|
from datasets import load_dataset |
|
from datasets import Dataset |
|
from sentence_transformers import SentenceTransformer |
|
import faiss |
|
import time |
|
from datetime import datetime |
|
import json |
|
|
|
import uuid |
|
import pandas as pd |
|
from llama_cpp import Llama |
|
|
|
from threading import Thread |
|
from huggingface_hub import Repository, upload_file |
|
import os |
|
|
|
|
|
HF_TOKEN = os.getenv('HF_Token') |
|
|
|
logfile = 'DiabetesChatLog.txt' |
|
|
|
data = load_dataset("Namitg02/Test", split='train', streaming=False) |
|
|
|
length = len(data) |
|
|
|
embedding_model = SentenceTransformer("all-MiniLM-L6-v2") |
|
embedding_dim = embedding_model.get_sentence_embedding_dimension() |
|
|
|
|
|
|
|
index = faiss.IndexFlatL2(embedding_dim) |
|
data.add_faiss_index("embeddings", custom_index=index) |
|
|
|
|
|
|
|
|
|
SYS_PROMPT = """You are an assistant for answering questions like a medical person. |
|
You are given the extracted parts of document, a question and history of questions and answers . Provide a brief conversational answer. |
|
If you do not know the answer, just say "I do not know." Do not make up an answer. Don't repeat the SYS_PROMPT or say that you are referring to document or an article.""" |
|
|
|
|
|
|
|
|
|
|
|
model = Llama( |
|
model_path="./llama-2-7b-chat.Q4_K_M.gguf", |
|
|
|
n_gpu_layers = 0, |
|
temperature=0.75, |
|
n_ctx = 4096, |
|
top_p=0.95 |
|
|
|
|
|
|
|
) |
|
|
|
|
|
def search(query: str, k: int = 2 ): |
|
"""a function that embeds a new query and returns the most probable results""" |
|
embedded_query = embedding_model.encode(query) |
|
scores, retrieved_examples = data.get_nearest_examples( |
|
"embeddings", embedded_query, |
|
k=k |
|
) |
|
return scores, retrieved_examples |
|
|
|
|
|
|
|
def format_prompt(prompt,retrieved_documents,k,history,memory_limit=3): |
|
"""using the retrieved documents we will prompt the model to generate our responses""" |
|
PROMPT = f"Question:{prompt}\nContext:" |
|
for idx in range(k) : |
|
PROMPT+= f"{retrieved_documents['0'][idx]}\n" |
|
print("historyinfo") |
|
print(f"{history}") |
|
if len(history) == 0: |
|
return PROMPT |
|
|
|
if len(history) > memory_limit: |
|
history = history[-memory_limit:] |
|
|
|
print("checkwohist") |
|
|
|
|
|
for user_message, bot_message in history[0:]: |
|
PROMPT += f"<s>[INST] {user_message} [/INST] {bot_message} </s>" |
|
print("checkwthhist2") |
|
|
|
return PROMPT |
|
|
|
|
|
|
|
|
|
def talk(prompt, history): |
|
k = 2 |
|
scores , retrieved_documents = search(prompt, k) |
|
print(retrieved_documents.keys()) |
|
|
|
formatted_prompt = format_prompt(prompt,retrieved_documents,k,history,memory_limit=3) |
|
print("check5") |
|
pd.options.display.max_colwidth = 4000 |
|
messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}] |
|
print(messages) |
|
|
|
|
|
|
|
|
|
stream = model.create_chat_completion(messages = messages,max_tokens =400, stop=["</s>"], stream=False) |
|
|
|
print("check 7") |
|
print(stream['choices'][0]['message']['content']) |
|
response = stream['choices'][0]['message']['content'] |
|
|
|
|
|
|
|
historylog = '' |
|
historylog += f"{prompt} \n {response} " |
|
print("history log") |
|
print(str(historylog)) |
|
print("history log string printed") |
|
|
|
try: |
|
|
|
unique_filename = f"file_{uuid.uuid4()}.txt" |
|
|
|
|
|
with open(unique_filename, "a") as data: |
|
data.write(historylog) |
|
|
|
print("History log printed:") |
|
with open(unique_filename, "r") as data: |
|
print(data.read()) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
except IOError as e: |
|
print(f"An error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("upload section passed") |
|
|
|
for i in range(len(response)): |
|
time.sleep(0.05) |
|
yield response[: i+1] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
TITLE = "AI Copilot for Diabetes Patients" |
|
|
|
DESCRIPTION = "I provide answers to concerns related to Diabetes" |
|
|
|
import gradio as gr |
|
|
|
demo = gr.ChatInterface( |
|
fn=talk, |
|
chatbot=gr.Chatbot( |
|
show_label=True, |
|
show_share_button=True, |
|
show_copy_button=True, |
|
likeable=True, |
|
layout="bubble", |
|
bubble_full_width=False, |
|
), |
|
theme="Soft", |
|
examples=[["what is Diabetes?"]], |
|
title=TITLE, |
|
description=DESCRIPTION, |
|
) |
|
|
|
print("check14") |
|
|
|
|
|
|
|
|
|
demo.launch() |
|
|