Spaces:
Sleeping
Sleeping
8bitnand
commited on
Commit
·
174deaa
1
Parent(s):
aae4036
pulled code from gpus
Browse files
app.py
CHANGED
@@ -4,28 +4,32 @@ from model import RAGModel, load_configs
|
|
4 |
|
5 |
|
6 |
def run_on_start():
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
|
|
|
|
|
|
11 |
|
12 |
|
13 |
def search(query):
|
14 |
g = GoogleSearch(query)
|
15 |
data = g.all_page_data
|
16 |
-
d = Document(data, min_char_len=configs["document"]["min_char_length"])
|
17 |
-
st.session_state.doc = d.doc()
|
18 |
|
19 |
|
20 |
-
st.title("
|
21 |
|
22 |
if "messages" not in st.session_state:
|
23 |
-
run_on_start()
|
24 |
st.session_state.messages = []
|
25 |
|
26 |
if "doc" not in st.session_state:
|
27 |
st.session_state.doc = None
|
28 |
|
|
|
|
|
29 |
|
30 |
for message in st.session_state.messages:
|
31 |
with st.chat_message(message["role"]):
|
@@ -36,15 +40,17 @@ if prompt := st.chat_input("Search Here insetad of Google"):
|
|
36 |
st.chat_message("user").markdown(prompt)
|
37 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
38 |
|
39 |
-
|
40 |
-
|
41 |
-
prompt
|
|
|
|
|
42 |
st.session_state.doc,
|
43 |
-
configs["model"]["embeding_model"],
|
44 |
-
configs["model"]["device"],
|
45 |
)
|
46 |
-
topk = s.semantic_search(query=prompt, k=32)
|
47 |
-
output =
|
48 |
response = output
|
49 |
with st.chat_message("assistant"):
|
50 |
st.markdown(response)
|
|
|
4 |
|
5 |
|
6 |
def run_on_start():
|
7 |
+
|
8 |
+
if "configs" not in st.session_state:
|
9 |
+
st.session_state.configs = configs = load_configs(config_file="rag.configs.yml")
|
10 |
+
if "model" not in st.session_state:
|
11 |
+
st.session_state.model = RAGModel(configs)
|
12 |
+
|
13 |
+
run_on_start()
|
14 |
|
15 |
|
16 |
def search(query):
|
17 |
g = GoogleSearch(query)
|
18 |
data = g.all_page_data
|
19 |
+
d = Document(data, min_char_len=st.session_state.configs["document"]["min_char_length"])
|
20 |
+
st.session_state.doc = d.doc()
|
21 |
|
22 |
|
23 |
+
st.title("Search Here Instead of Google")
|
24 |
|
25 |
if "messages" not in st.session_state:
|
|
|
26 |
st.session_state.messages = []
|
27 |
|
28 |
if "doc" not in st.session_state:
|
29 |
st.session_state.doc = None
|
30 |
|
31 |
+
if "refresh" not in st.session_state:
|
32 |
+
st.session_state.refresh = True
|
33 |
|
34 |
for message in st.session_state.messages:
|
35 |
with st.chat_message(message["role"]):
|
|
|
40 |
st.chat_message("user").markdown(prompt)
|
41 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
42 |
|
43 |
+
if st.session_state.refresh:
|
44 |
+
st.session_state.refresh = False
|
45 |
+
search(prompt)
|
46 |
+
|
47 |
+
s = SemanticSearch(
|
48 |
st.session_state.doc,
|
49 |
+
st.session_state.configs["model"]["embeding_model"],
|
50 |
+
st.session_state.configs["model"]["device"],
|
51 |
)
|
52 |
+
topk, u = s.semantic_search(query=prompt, k=32)
|
53 |
+
output = st.session_state.model.answer_query(query=prompt, topk_items=topk)
|
54 |
response = output
|
55 |
with st.chat_message("assistant"):
|
56 |
st.markdown(response)
|
model.py
CHANGED
@@ -4,7 +4,7 @@ from transformers import BitsAndBytesConfig
|
|
4 |
from transformers.utils import is_flash_attn_2_available
|
5 |
import yaml
|
6 |
import torch
|
7 |
-
|
8 |
|
9 |
def load_configs(config_file: str) -> dict:
|
10 |
with open(config_file, "r") as f:
|
@@ -35,13 +35,16 @@ class RAGModel:
|
|
35 |
|
36 |
def create_prompt(self, query, topk_items: list[str]):
|
37 |
|
38 |
-
context =
|
39 |
|
40 |
-
base_prompt = f"""
|
|
|
|
|
|
|
41 |
Do not return thinking process, just return the answer.
|
42 |
-
|
43 |
-
Now use the following context items to answer the user query
|
44 |
-
context: {context}
|
45 |
user query : {query}
|
46 |
"""
|
47 |
|
@@ -56,16 +59,16 @@ class RAGModel:
|
|
56 |
|
57 |
prompt = self.create_prompt(query, topk_items)
|
58 |
input_ids = self.tokenizer(prompt, return_tensors="pt").to(self.device)
|
59 |
-
output = self.model.generate(**input_ids, max_new_tokens=512)
|
60 |
text = self.tokenizer.decode(output[0])
|
|
|
61 |
|
62 |
-
return text
|
63 |
|
|
|
64 |
|
65 |
if __name__ == "__main__":
|
66 |
-
|
67 |
configs = load_configs(config_file="rag.configs.yml")
|
68 |
-
query = "
|
69 |
g = GoogleSearch(query)
|
70 |
data = g.all_page_data
|
71 |
d = Document(data, 512)
|
|
|
4 |
from transformers.utils import is_flash_attn_2_available
|
5 |
import yaml
|
6 |
import torch
|
7 |
+
import nltk
|
8 |
|
9 |
def load_configs(config_file: str) -> dict:
|
10 |
with open(config_file, "r") as f:
|
|
|
35 |
|
36 |
def create_prompt(self, query, topk_items: list[str]):
|
37 |
|
38 |
+
context = "\n-".join(c for c in topk_items)
|
39 |
|
40 |
+
base_prompt = f"""You are an alternate to goole search. Your job is to answer the user query in as detailed manner as possible.
|
41 |
+
you have access to the internet and other relevent data related to the user's question.
|
42 |
+
Give time for yourself to read the context and user query and extract relevent data and then answer the query.
|
43 |
+
make sure your answers is as detailed as posssbile.
|
44 |
Do not return thinking process, just return the answer.
|
45 |
+
Give the output structured as a Wikipedia article.
|
46 |
+
Now use the following context items to answer the user query
|
47 |
+
context: {context}
|
48 |
user query : {query}
|
49 |
"""
|
50 |
|
|
|
59 |
|
60 |
prompt = self.create_prompt(query, topk_items)
|
61 |
input_ids = self.tokenizer(prompt, return_tensors="pt").to(self.device)
|
62 |
+
output = self.model.generate(**input_ids, temperature=0.7, max_new_tokens=512, do_sample=True)
|
63 |
text = self.tokenizer.decode(output[0])
|
64 |
+
text = text.replace(prompt, "").replace("<bos>", "").replace("<eos>", "")
|
65 |
|
|
|
66 |
|
67 |
+
return text
|
68 |
|
69 |
if __name__ == "__main__":
|
|
|
70 |
configs = load_configs(config_file="rag.configs.yml")
|
71 |
+
query = "Explain F1 racing for a beginer"
|
72 |
g = GoogleSearch(query)
|
73 |
data = g.all_page_data
|
74 |
d = Document(data, 512)
|