Spaces:

nand-tmp
/

GoogleSearchWithLLM

Sleeping

App Files Files Community

8bitnand commited on Apr 28, 2024

Commit

174deaa

1 Parent(s): aae4036

pulled code from gpus

Browse files

Files changed (2) hide show

app.py +21 -15
model.py +13 -10

app.py CHANGED Viewed

@@ -4,28 +4,32 @@ from model import RAGModel, load_configs
 def run_on_start():
-    global r
-    global configs
-    configs = load_configs(config_file="rag.configs.yml")
-    r = RAGModel(configs)
 def search(query):
     g = GoogleSearch(query)
     data = g.all_page_data
-    d = Document(data, min_char_len=configs["document"]["min_char_length"])
-    st.session_state.doc = d.doc()[0]
-st.title("LLM powred Google search")
 if "messages" not in st.session_state:
-    run_on_start()
     st.session_state.messages = []
 if "doc" not in st.session_state:
     st.session_state.doc = None
 for message in st.session_state.messages:
     with st.chat_message(message["role"]):
@@ -36,15 +40,17 @@ if prompt := st.chat_input("Search Here insetad of Google"):
     st.chat_message("user").markdown(prompt)
     st.session_state.messages.append({"role": "user", "content": prompt})
-    search(prompt)
-    s, u = SemanticSearch(
-        prompt,
         st.session_state.doc,
-        configs["model"]["embeding_model"],
-        configs["model"]["device"],
     )
-    topk = s.semantic_search(query=prompt, k=32)
-    output = r.answer_query(query=prompt, topk_items=topk)
     response = output
     with st.chat_message("assistant"):
         st.markdown(response)

 def run_on_start():
+    if "configs" not in st.session_state:
+        st.session_state.configs = configs = load_configs(config_file="rag.configs.yml")
+    if "model" not in st.session_state:
+        st.session_state.model = RAGModel(configs)
+run_on_start()
 def search(query):
     g = GoogleSearch(query)
     data = g.all_page_data
+    d = Document(data, min_char_len=st.session_state.configs["document"]["min_char_length"])
+    st.session_state.doc = d.doc()
+st.title("Search Here Instead of Google")
 if "messages" not in st.session_state:
     st.session_state.messages = []
 if "doc" not in st.session_state:
     st.session_state.doc = None
+if "refresh" not in st.session_state:
+    st.session_state.refresh = True
 for message in st.session_state.messages:
     with st.chat_message(message["role"]):
     st.chat_message("user").markdown(prompt)
     st.session_state.messages.append({"role": "user", "content": prompt})
+    if st.session_state.refresh:
+        st.session_state.refresh = False
+        search(prompt)
+    s = SemanticSearch(
         st.session_state.doc,
+        st.session_state.configs["model"]["embeding_model"],
+        st.session_state.configs["model"]["device"],
     )
+    topk, u = s.semantic_search(query=prompt, k=32)
+    output = st.session_state.model.answer_query(query=prompt, topk_items=topk)
     response = output
     with st.chat_message("assistant"):
         st.markdown(response)

model.py CHANGED Viewed

@@ -4,7 +4,7 @@ from transformers import BitsAndBytesConfig
 from transformers.utils import is_flash_attn_2_available
 import yaml
 import torch
 def load_configs(config_file: str) -> dict:
     with open(config_file, "r") as f:
@@ -35,13 +35,16 @@ class RAGModel:
     def create_prompt(self, query, topk_items: list[str]):
-        context = "_ " + "\n-".join(c for c in topk_items)
-        base_prompt = f"""Give time for yourself to read the context and then answer the query.
         Do not return thinking process, just return the answer.
-        If you do not find the answer, or if the query is offesnsive or in any other way harmfull just return "I'm not aware of it"
-        Now use the following context items to answer the user query.
-        context: {context}.
         user query : {query}
         """
@@ -56,16 +59,16 @@ class RAGModel:
         prompt = self.create_prompt(query, topk_items)
         input_ids = self.tokenizer(prompt, return_tensors="pt").to(self.device)
-        output = self.model.generate(**input_ids, max_new_tokens=512)
         text = self.tokenizer.decode(output[0])
-        return text
 if __name__ == "__main__":
     configs = load_configs(config_file="rag.configs.yml")
-    query = "what is computer vision"
     g = GoogleSearch(query)
     data = g.all_page_data
     d = Document(data, 512)

 from transformers.utils import is_flash_attn_2_available
 import yaml
 import torch
+import nltk
 def load_configs(config_file: str) -> dict:
     with open(config_file, "r") as f:
     def create_prompt(self, query, topk_items: list[str]):
+        context =  "\n-".join(c for c in topk_items)
+        base_prompt = f"""You are an alternate to goole search. Your job is to answer the user query in as detailed manner as possible.
+        you have access to the internet and other relevent data related to the user's question.
+        Give time for yourself to read the context and user query and extract relevent data and then answer the query.
+        make sure your answers is as detailed as posssbile.
         Do not return thinking process, just return the answer.
+        Give the output structured as a Wikipedia article.
+        Now use the following context items to answer the user query
+        context: {context}
         user query : {query}
         """
         prompt = self.create_prompt(query, topk_items)
         input_ids = self.tokenizer(prompt, return_tensors="pt").to(self.device)
+        output = self.model.generate(**input_ids, temperature=0.7, max_new_tokens=512, do_sample=True)
         text = self.tokenizer.decode(output[0])
+        text = text.replace(prompt, "").replace("<bos>", "").replace("<eos>", "")
+        return text
 if __name__ == "__main__":
     configs = load_configs(config_file="rag.configs.yml")
+    query = "Explain F1 racing for a beginer"
     g = GoogleSearch(query)
     data = g.all_page_data
     d = Document(data, 512)