roomnumber103's picture
model upload
9484ade
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
# ์‚ฌ์ „์ •์˜๋œ Query-Answer๊ฐ€ ๋‹ด๊ธด ํ…Œ์ด๋ธ”
qna_df = pd.read_csv('./data/qa_data.csv')[['์งˆ๋ฌธ', '๋‹ต๋ณ€']]
qna_df['์งˆ๋ฌธ'] = qna_df['์งˆ๋ฌธ'].apply(lambda x: x.split('์งˆ๋ฌธ\n')[1]) # "์งˆ๋ฌธ\n" ์ œ๊ฑฐ
qna_df['๋‹ต๋ณ€'] = qna_df['๋‹ต๋ณ€'].apply(lambda x: x.split('๋‹ต๋ณ€\n')[1]) # "๋‹ต๋ณ€\n" ์ œ๊ฑฐ
# SentenceTransformer ๋ชจ๋ธ ๋กœ๋“œ
embedding_model = SentenceTransformer('jeonseonjin/embedding_BAAI-bge-m3')
# ์ฟผ๋ฆฌ ๋ฌธ์žฅ๋“ค์— ๋Œ€ํ•œ ์ž„๋ฒ ๋”ฉ ๋ฒกํ„ฐ ์ƒ์„ฑ
query_texts = qna_df['์งˆ๋ฌธ'].to_list()
query_embeddings = embedding_model.encode(query_texts)
# query-answer ํ•จ์ˆ˜ ์ •์˜
def qna_answer_to_query(new_query, embedding_model=embedding_model, query_embeddings=query_embeddings, top_k=1, verbose=True):
# ์ฟผ๋ฆฌ ์ž„๋ฒ ๋”ฉ ๊ณ„์‚ฐ
new_query_embedding = embedding_model.encode([new_query])
# ์ฝ”์‚ฌ์ธ ์œ ์‚ฌ๋„ ๊ณ„์‚ฐ
cos_sim = cosine_similarity(new_query_embedding, query_embeddings)
# ์ฝ”์‚ฌ์ธ ์œ ์‚ฌ๋„ ๊ฐ’์ด ๊ฐ€์žฅ ํฐ ์งˆ๋ฌธ์˜ ์ธ๋ฑ์Šค ์ฐพ๊ธฐ
most_similar_idx = np.argmax(cos_sim)
similarity = np.round(cos_sim[0][most_similar_idx], 2)
# ๊ฐ€์žฅ ๋น„์Šทํ•œ ์งˆ๋ฌธ๊ณผ ๋‹ต๋ณ€ ๊ฐ€์ ธ์˜ค๊ธฐ
similar_query = query_texts[most_similar_idx]
similar_answer = qna_df.iloc[most_similar_idx]['๋‹ต๋ณ€']
if verbose == True:
print("๊ฐ€์žฅ ๋น„์Šทํ•œ ์งˆ๋ฌธ : ", similar_query)
print("๊ฐ€์žฅ ๋น„์Šทํ•œ ์งˆ๋ฌธ์˜ ์œ ์‚ฌ๋„ : ", similarity)
print("๊ฐ€์žฅ ๋น„์Šทํ•œ ์งˆ๋ฌธ์˜ ๋‹ต: ", similar_answer)
# ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜
return similar_query, similarity, similar_answer
import gradio as gr
# ์งˆ๋ฌธ์— ๋Œ€ํ•œ ๋‹ต๋ณ€์„ ์ œ๊ณตํ•˜๋Š” ํ•จ์ˆ˜ (qna_answer_to_query ํ•จ์ˆ˜ ์‚ฌ์šฉ)
def chat_with(message, history):
# ์‚ฌ์šฉ์ž์˜ ์งˆ๋ฌธ์— ๋Œ€ํ•ด full_answer_to_query๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ๋‹ต๋ณ€ ์ƒ์„ฑ
response = qna_answer_to_query(message)[2]
# ์งˆ๋ฌธ๊ณผ ๋‹ต๋ณ€์„ ํžˆ์Šคํ† ๋ฆฌ์— ์ €์žฅ (history๋Š” ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ)
history.append((message, response))
# Gradio๊ฐ€ (์‘๋‹ต, history)๋ฅผ ๋ฐ˜ํ™˜ํ•ด์•ผ ํ•˜๋ฏ€๋กœ, ๋Œ€ํ™” ๊ธฐ๋ก๊ณผ ํ•จ๊ป˜ ๋ฐ˜ํ™˜
return history, history
# Gradio Chatbot ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ
with gr.Blocks() as demo:
chatbot = gr.Chatbot() # ๋Œ€ํ™” ๊ธฐ๋ก์„ ํ‘œ์‹œํ•˜๋Š” ์ปดํฌ๋„ŒํŠธ
msg = gr.Textbox(label="์งˆ๋ฌธ ์ž…๋ ฅ") # ์งˆ๋ฌธ ์ž…๋ ฅ์„ ์œ„ํ•œ ํ…์ŠคํŠธ ๋ฐ•์Šค
clear = gr.Button("๋Œ€ํ™” ๊ธฐ๋ก ์ดˆ๊ธฐํ™”") # ๋Œ€ํ™” ๊ธฐ๋ก ์ดˆ๊ธฐํ™” ๋ฒ„ํŠผ
# ๋Œ€ํ™”๊ฐ€ ์‹œ์ž‘๋  ๋•Œ ์‹คํ–‰ํ•  ๋™์ž‘ ์ •์˜
msg.submit(chat_with, inputs=[msg, chatbot], outputs=[chatbot, msg]) # ์ž…๋ ฅ๊ฐ’์„ ์ฒ˜๋ฆฌ ํ›„ ์ถœ๋ ฅ
# ๊ธฐ๋ก ์ดˆ๊ธฐํ™” ๋ฒ„ํŠผ ๋™์ž‘ ์ •์˜
clear.click(lambda: [], None, chatbot, queue=False) # ๋Œ€ํ™” ๊ธฐ๋ก์„ ์ดˆ๊ธฐํ™”
# ์•ฑ ์‹คํ–‰
demo.launch(share=True)