Namitg02 commited on
Commit
0389f9b
·
verified ·
1 Parent(s): 86cee1d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +169 -0
app.py CHANGED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset
2
+ from datasets import Dataset
3
+ from sentence_transformers import SentenceTransformer
4
+ import faiss
5
+ import time
6
+ import json
7
+ #import torch
8
+ import pandas as pd
9
+ from llama_cpp import Llama
10
+ #from langchain_community.llms import LlamaCpp
11
+ from threading import Thread
12
+ from huggingface_hub import Repository, upload_file
13
+ import os
14
+
15
+
16
+ HF_TOKEN = os.getenv('HF_Token')
17
+ #Log_Path="./Logfolder"
18
+ logfile = 'DiabetesChatLog.txt'
19
+ historylog = [{
20
+ "Prompt": '',
21
+ "Output": ''
22
+ }]
23
+
24
+ data = load_dataset("Namitg02/Test", split='train', streaming=False)
25
+ #Returns a list of dictionaries, each representing a row in the dataset.
26
+ length = len(data)
27
+
28
+ embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
29
+ embedding_dim = embedding_model.get_sentence_embedding_dimension()
30
+ # Returns dimensions of embedidng
31
+
32
+
33
+ index = faiss.IndexFlatL2(embedding_dim)
34
+ data.add_faiss_index("embeddings", custom_index=index)
35
+ # adds an index column for the embeddings
36
+
37
+ #question = "How can I reverse Diabetes?"
38
+
39
+ SYS_PROMPT = """You are an assistant for answering questions.
40
+ You are given the extracted parts of documents and a question. Provide a conversational answer.
41
+ If you don't know the answer, just say "I do not know." Don't make up an answer. Don't repeat the SYS_PROMPT."""
42
+ # Provides context of how to answer the question
43
+
44
+ #llm_model = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF", tinyllama-1.1b-chat-v1.0.Q5_K_M.gguf
45
+ # TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF , TinyLlama/TinyLlama-1.1B-Chat-v0.6, andrijdavid/TinyLlama-1.1B-Chat-v1.0-GGUF"
46
+
47
+ model = Llama(
48
+ model_path="./llama-2-7b-chat.Q4_K_M.gguf",
49
+ # chat_format="llama-2",
50
+ n_gpu_layers = 0,
51
+ temperature=0.75,
52
+ n_ctx = 4096,
53
+ max_tokens=500,
54
+ top_p=0.95 #,
55
+ # eos_tokens=terminators
56
+ # callback_manager=callback_manager,
57
+ # verbose=True, # Verbose is required to pass to the callback manager
58
+ )
59
+ #initiate model and tokenizer
60
+
61
+ def search(query: str, k: int = 2 ):
62
+ """a function that embeds a new query and returns the most probable results"""
63
+ embedded_query = embedding_model.encode(query) # create embedding of a new query
64
+ scores, retrieved_examples = data.get_nearest_examples( # retrieve results
65
+ "embeddings", embedded_query, # compare our new embedded query with the dataset embeddings
66
+ k=k # get only top k results
67
+ )
68
+ return scores, retrieved_examples
69
+ # returns scores (List[float]): the retrieval scores from either FAISS (IndexFlatL2 by default) and examples (dict) format
70
+ # called by talk function that passes prompt
71
+
72
+ def format_prompt(prompt,retrieved_documents,k,history,memory_limit=3):
73
+ """using the retrieved documents we will prompt the model to generate our responses"""
74
+ PROMPT = f"Question:{prompt}\nContext:"
75
+ for idx in range(k) :
76
+ PROMPT+= f"{retrieved_documents['0'][idx]}\n"
77
+ print("historyinfo")
78
+ print(history)
79
+ if len(history) == 0:
80
+ return PROMPT
81
+
82
+ if len(history) > memory_limit:
83
+ history = history[-memory_limit:]
84
+
85
+ print("checkwohist")
86
+ # PROMPT = PROMPT + f"{history[0][0]} [/INST] {history[0][1]} </s>"
87
+ # print("checkwthhist")
88
+ # print(PROMPT)
89
+ # Handle conversation history
90
+ for user_message, bot_message in history[0:]:
91
+ PROMPT += f"<s>[INST] {user_message} [/INST] {bot_message} </s>"
92
+ print("checkwthhist2")
93
+ print(PROMPT)
94
+ return PROMPT
95
+
96
+
97
+ # Called by talk function to add retrieved documents to the prompt. Keeps adding text of retrieved documents to string that are retreived
98
+
99
+ def talk(prompt, history):
100
+ k = 2 # number of retrieved documents
101
+ scores , retrieved_documents = search(prompt, k) # get retrival scores and examples in dictionary format based on the prompt passed
102
+ print(retrieved_documents.keys())
103
+ # print("check4")
104
+ formatted_prompt = format_prompt(prompt,retrieved_documents,k,history,memory_limit=3) # create a new prompt using the retrieved documents
105
+ print("check5")
106
+ pd.options.display.max_colwidth = 4000
107
+ # print(retrieved_documents['0'])
108
+ # print(formatted_prompt)
109
+ # formatted_prompt_with_history = add_history(formatted_prompt, history)
110
+
111
+ # formatted_prompt_with_history = formatted_prompt_with_history[:600] # to avoid memory issue
112
+ # print(formatted_prompt_with_history)
113
+ messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}]
114
+ print(messages)
115
+ # binding the system context and new prompt for LLM
116
+ # the chat template structure should be based on text generation model format
117
+
118
+ # indicates the end of a sequence
119
+ stream = model.create_chat_completion(messages = messages, max_tokens=1000, stop=["</s>"], stream=False)
120
+ # print(f"{stream}")
121
+ print("check 7")
122
+ print(stream['choices'][0]['message']['content'])
123
+ return(stream['choices'][0]['message']['content'])
124
+ # text = ""
125
+ # for output in stream:
126
+ # text += output['choices'][0]['message']['content']
127
+ # print(f"{output}")
128
+ # print("check3H")
129
+ # print(text)
130
+ # yield text
131
+
132
+
133
+
134
+ # calling the model to generate response based on message/ input
135
+ # do_sample if set to True uses strategies to select the next token from the probability distribution over the entire vocabulary
136
+ # temperature controls randomness. more renadomness with higher temperature
137
+ # only the tokens comprising the top_p probability mass are considered for responses
138
+ # This output is a data structure containing all the information returned by generate(), but that can also be used as tuple or dictionary.
139
+
140
+
141
+
142
+ TITLE = "AI Copilot for Diabetes Patients"
143
+
144
+ DESCRIPTION = "I provide answers to concerns related to Diabetes"
145
+
146
+ import gradio as gr
147
+ # Design chatbot
148
+ demo = gr.ChatInterface(
149
+ fn=talk,
150
+ chatbot=gr.Chatbot(
151
+ show_label=True,
152
+ show_share_button=True,
153
+ show_copy_button=True,
154
+ likeable=True,
155
+ layout="bubble",
156
+ bubble_full_width=False,
157
+ ),
158
+ theme="Soft",
159
+ examples=[["what is Diabetes?"]],
160
+ title=TITLE,
161
+ description=DESCRIPTION,
162
+ )
163
+ # launch chatbot and calls the talk function which in turn calls other functions
164
+ print("check14")
165
+ #print(historylog)
166
+ #memory_panda = pd.DataFrame(historylog)
167
+ #Logfile = Dataset.from_pandas(memory_panda)
168
+ #Logfile.push_to_hub("Namitg02/Logfile",token = HF_TOKEN)
169
+ demo.launch()