chatgpt_embeddings/gpt_1_embeddings_training.py
administrator cc2bb88989 init
2023-03-31 18:23:38 +08:00

53 lines
1.6 KiB
Python

# 参考文章
# https://github.com/openai/openai-cookbook/blob/main/examples/Question_answering_using_embeddings.ipynb
import openai
import pandas as pd
import time
from gpt_0_basic_info import api_key, excel_file_path, csv_file_path
COMPLETIONS_MODEL = "text-davinci-003"
EMBEDDING_MODEL = "text-embedding-ada-002"
def get_embedding(text: str, open_ai_api_key: str, model: str = EMBEDDING_MODEL) -> list[float]:
openai.api_key = open_ai_api_key
result = openai.Embedding.create(
model=model,
input=text
)
return result["data"][0]["embedding"]
def compute_doc_embeddings(df: pd.DataFrame, open_ai_api_key: str):
"""
Create an embedding for each row in the dataframe using the OpenAI Embeddings API.
Return a datafram with embedding
}
"""
df['embeddings'] = ''
df['embeddings'] = df['embeddings'].astype('object')
for idx, r in df.iterrows():
print(idx)
df.at[idx, 'embeddings'] = get_embedding(r.QandA, open_ai_api_key)
time.sleep(1)
return df
def getembeddings(api_key, excelfilepath, csvfilepath):
df = pd.read_excel(excelfilepath)
df['prompt'] = df['prompt'].apply(lambda x: x.replace('\n', ''))
df['prompt'] = df['prompt'].apply(lambda x: "当有人问:" + x + '')
df['completion'] = df['completion'].apply(lambda x: "请回答:" + x)
df['QandA'] = df['prompt'] + df['completion']
df = compute_doc_embeddings(df, api_key)[['QandA', 'embeddings']]
df.to_csv(csvfilepath, index=False, encoding='utf-8_sig')
if __name__ == '__main__':
getembeddings(api_key, excel_file_path, csv_file_path)