Spaces:
Sleeping
Sleeping
import numpy as np | |
import pandas as pd | |
from sentence_transformers import SentenceTransformer | |
from sklearn.metrics.pairwise import cosine_similarity | |
# ์ฌ์ ์ ์๋ Query-Answer๊ฐ ๋ด๊ธด ํ ์ด๋ธ | |
qna_df = pd.read_csv('./data/qa_data.csv')[['์ง๋ฌธ', '๋ต๋ณ']] | |
qna_df['์ง๋ฌธ'] = qna_df['์ง๋ฌธ'].apply(lambda x: x.split('์ง๋ฌธ\n')[1]) # "์ง๋ฌธ\n" ์ ๊ฑฐ | |
qna_df['๋ต๋ณ'] = qna_df['๋ต๋ณ'].apply(lambda x: x.split('๋ต๋ณ\n')[1]) # "๋ต๋ณ\n" ์ ๊ฑฐ | |
# SentenceTransformer ๋ชจ๋ธ ๋ก๋ | |
embedding_model = SentenceTransformer('jeonseonjin/embedding_BAAI-bge-m3') | |
# ์ฟผ๋ฆฌ ๋ฌธ์ฅ๋ค์ ๋ํ ์๋ฒ ๋ฉ ๋ฒกํฐ ์์ฑ | |
query_texts = qna_df['์ง๋ฌธ'].to_list() | |
query_embeddings = embedding_model.encode(query_texts) | |
# query-answer ํจ์ ์ ์ | |
def qna_answer_to_query(new_query, embedding_model=embedding_model, query_embeddings=query_embeddings, top_k=1, verbose=True): | |
# ์ฟผ๋ฆฌ ์๋ฒ ๋ฉ ๊ณ์ฐ | |
new_query_embedding = embedding_model.encode([new_query]) | |
# ์ฝ์ฌ์ธ ์ ์ฌ๋ ๊ณ์ฐ | |
cos_sim = cosine_similarity(new_query_embedding, query_embeddings) | |
# ์ฝ์ฌ์ธ ์ ์ฌ๋ ๊ฐ์ด ๊ฐ์ฅ ํฐ ์ง๋ฌธ์ ์ธ๋ฑ์ค ์ฐพ๊ธฐ | |
most_similar_idx = np.argmax(cos_sim) | |
similarity = np.round(cos_sim[0][most_similar_idx], 2) | |
# ๊ฐ์ฅ ๋น์ทํ ์ง๋ฌธ๊ณผ ๋ต๋ณ ๊ฐ์ ธ์ค๊ธฐ | |
similar_query = query_texts[most_similar_idx] | |
similar_answer = qna_df.iloc[most_similar_idx]['๋ต๋ณ'] | |
if verbose == True: | |
print("๊ฐ์ฅ ๋น์ทํ ์ง๋ฌธ : ", similar_query) | |
print("๊ฐ์ฅ ๋น์ทํ ์ง๋ฌธ์ ์ ์ฌ๋ : ", similarity) | |
print("๊ฐ์ฅ ๋น์ทํ ์ง๋ฌธ์ ๋ต: ", similar_answer) | |
# ๊ฒฐ๊ณผ ๋ฐํ | |
return similar_query, similarity, similar_answer | |
import gradio as gr | |
# ์ง๋ฌธ์ ๋ํ ๋ต๋ณ์ ์ ๊ณตํ๋ ํจ์ (qna_answer_to_query ํจ์ ์ฌ์ฉ) | |
def chat_with(message, history): | |
# ์ฌ์ฉ์์ ์ง๋ฌธ์ ๋ํด full_answer_to_query๋ฅผ ์ฌ์ฉํ์ฌ ๋ต๋ณ ์์ฑ | |
response = qna_answer_to_query(message)[2] | |
# ์ง๋ฌธ๊ณผ ๋ต๋ณ์ ํ์คํ ๋ฆฌ์ ์ ์ฅ (history๋ ๋ํ ํ์คํ ๋ฆฌ) | |
history.append((message, response)) | |
# Gradio๊ฐ (์๋ต, history)๋ฅผ ๋ฐํํด์ผ ํ๋ฏ๋ก, ๋ํ ๊ธฐ๋ก๊ณผ ํจ๊ป ๋ฐํ | |
return history, history | |
# Gradio Chatbot ์ธํฐํ์ด์ค ์์ฑ | |
with gr.Blocks() as demo: | |
chatbot = gr.Chatbot() # ๋ํ ๊ธฐ๋ก์ ํ์ํ๋ ์ปดํฌ๋ํธ | |
msg = gr.Textbox(label="์ง๋ฌธ ์ ๋ ฅ") # ์ง๋ฌธ ์ ๋ ฅ์ ์ํ ํ ์คํธ ๋ฐ์ค | |
clear = gr.Button("๋ํ ๊ธฐ๋ก ์ด๊ธฐํ") # ๋ํ ๊ธฐ๋ก ์ด๊ธฐํ ๋ฒํผ | |
# ๋ํ๊ฐ ์์๋ ๋ ์คํํ ๋์ ์ ์ | |
msg.submit(chat_with, inputs=[msg, chatbot], outputs=[chatbot, msg]) # ์ ๋ ฅ๊ฐ์ ์ฒ๋ฆฌ ํ ์ถ๋ ฅ | |
# ๊ธฐ๋ก ์ด๊ธฐํ ๋ฒํผ ๋์ ์ ์ | |
clear.click(lambda: [], None, chatbot, queue=False) # ๋ํ ๊ธฐ๋ก์ ์ด๊ธฐํ | |
# ์ฑ ์คํ | |
demo.launch(share=True) | |