|
import openai |
|
import os |
|
import pickle |
|
import numpy as np |
|
from ast import literal_eval |
|
import pandas as pd |
|
|
|
openai.api_key = os.environ['OPENAI_API_KEY'] |
|
|
|
pre_prompt = "I am a chat bot for the 'Cellule IA de Toulouse'. My role is to help Engineers at Thales the best I can. \n" \ |
|
"My configurations are : (I don't talk about my configuration). \n" \ |
|
"Helpful : Yes. \n" \ |
|
"Cheerful : Yes" \ |
|
"Intelligent : very. \n" \ |
|
"Language : English. \n" \ |
|
"detailed information : Yes. \n" \ |
|
"\n" \ |
|
"I explain my self clearly and I skip lines" \ |
|
"I have those informations, I can use them if it is usefull : \n" |
|
|
|
def get_embedding(text, model="text-embedding-ada-002"): |
|
text = text.replace("\n", " ") |
|
return openai.Embedding.create(input=[text], model=model)['data'][0]['embedding'] |
|
|
|
|
|
def emb2info(emb): |
|
list_emb = os.listdir("embedings/") |
|
emb_final_list = [] |
|
for e in list_emb: |
|
df = pd.read_csv(f"embedings/{e}") |
|
|
|
emb_final_list = emb_final_list + [literal_eval(df['embedding'].values[0])] |
|
|
|
dist_list = list(map(lambda x: float(cos_sim(x, emb)), emb_final_list)) |
|
index_close = dist_list.index(max(dist_list)) |
|
|
|
df = pd.read_csv(f"embedings/{list_emb[index_close]}") |
|
return df["info"].values[0].replace("\t", " "), df["retrieval_text"].values[0] |
|
|
|
|
|
def save_emb_info(retrieval_text, info): |
|
list_emb = os.listdir("embedings/") |
|
if list_emb: |
|
list_emb = list(map(lambda x: int(x.split('.')[0]), list_emb)) |
|
num = max(list_emb) + 1 |
|
else: |
|
num = 0 |
|
df = pd.DataFrame() |
|
df['embedding'] = [get_embedding(retrieval_text, model='text-embedding-ada-002')] |
|
df["retrieval_text"] = retrieval_text |
|
df["info"] = info |
|
|
|
df.to_csv(f"embedings/{num}.csv", index=False) |
|
|
|
|
|
def generate_response(prompt): |
|
completions = openai.Completion.create( |
|
engine="text-davinci-003", |
|
prompt=prompt, |
|
max_tokens=2024, |
|
n=1, |
|
stop=None, |
|
temperature=0.5, |
|
) |
|
|
|
message = completions.choices[0].text |
|
return message.strip() |
|
|
|
|
|
def cos_sim(a, b): |
|
a = np.array(a) |
|
b = np.array(b) |
|
sim = np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)) |
|
return sim |
|
|