Spaces:
Sleeping
Sleeping
Mehmet Emin Aydin
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -15,10 +15,10 @@ from datetime import datetime
|
|
15 |
import io
|
16 |
from dotenv import load_dotenv
|
17 |
from groq import Groq
|
18 |
-
|
19 |
log_data = []
|
20 |
|
21 |
-
client = Groq(api_key="
|
22 |
|
23 |
class User:
|
24 |
def __init__(self, username):
|
@@ -27,12 +27,12 @@ class User:
|
|
27 |
self.embedder = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
|
28 |
|
29 |
|
30 |
-
def upload_documents(user: User, files) -> tuple[str, int]:
|
31 |
text = _extract_text_from_document(files)
|
32 |
chunks = _chunk_text(text)
|
33 |
-
status_code = _create_embeddings_and_save(user, chunks)
|
34 |
if status_code == 200:
|
35 |
-
return "Document uploaded successfully.", 200
|
36 |
else:
|
37 |
return "Failed to upload document.", 500
|
38 |
|
@@ -66,43 +66,46 @@ def _chunk_text(text: str) -> list[str]:
|
|
66 |
return text_splitter.split_text(text)
|
67 |
|
68 |
|
69 |
-
def _create_embeddings_and_save(user: User, chunks: any) -> int:
|
70 |
embeddings = HuggingFaceEmbeddings(model_name=user.embedder)
|
71 |
vector_store = FAISS.from_texts(chunks, embeddings, metadatas=[{"source": f"{user.username}:{i}"} for i in range(len(chunks))])
|
72 |
-
|
73 |
-
return 200
|
74 |
|
75 |
|
76 |
-
def ask_question(user: User, question: str,
|
77 |
-
if api_key:
|
78 |
-
os.environ["GOOGLE_API_KEY"] = api_key
|
79 |
-
else:
|
80 |
-
is_loaded = load_dotenv()
|
81 |
-
if not is_loaded:
|
82 |
-
return "API key not found.", 400
|
83 |
|
84 |
docs = vector_store.similarity_search(question)
|
85 |
retrieved_chunks = docs[0].page_content + docs[1].page_content + docs[2].page_content
|
86 |
-
prompt =
|
87 |
|
88 |
try:
|
89 |
-
response = get_completion(prompt
|
90 |
-
except Exception:
|
91 |
-
return "
|
92 |
|
93 |
-
answer = response
|
94 |
_log(user, question, retrieved_chunks, response)
|
95 |
return answer, 200
|
96 |
|
97 |
|
98 |
def get_completion(prompt, model="llama3-8b-8192"):
|
99 |
-
messages = [
|
100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
response = client.chat.completions.create(
|
102 |
model=model,
|
103 |
messages=messages,
|
104 |
temperature=0,
|
105 |
)
|
|
|
106 |
return response.choices[0].message.content.strip()
|
107 |
|
108 |
|
@@ -142,8 +145,8 @@ def upload_document():
|
|
142 |
st.write(file_details)
|
143 |
|
144 |
user = User(username=username)
|
145 |
-
response, status_code = upload_documents(user, uploaded_files)
|
146 |
-
|
147 |
if status_code == 200:
|
148 |
st.success(response)
|
149 |
else:
|
@@ -152,7 +155,6 @@ def upload_document():
|
|
152 |
|
153 |
def ask_question_ui(vector_store : FAISS):
|
154 |
username = st.text_input("Enter a username (just something that represents you):")
|
155 |
-
api_key = st.text_input("Add your Google API key. It is free. Key acquisition video: [https://www.youtube.com/watch?v=brCkpzAD0gc]: (If you do not trust you can download and use the app in your local too)", type="password")
|
156 |
question = st.text_area("Enter the question you want to ask in your document (the more detailed your question, the more accurate an answer you will get):")
|
157 |
|
158 |
if st.button("Ask"):
|
@@ -162,7 +164,7 @@ def ask_question_ui(vector_store : FAISS):
|
|
162 |
st.warning("Please enter a username.")
|
163 |
else:
|
164 |
user = User(username=username)
|
165 |
-
answer, status_code = ask_question(user, question,
|
166 |
|
167 |
if status_code == 200:
|
168 |
st.success("Answer: " + answer)
|
@@ -170,4 +172,6 @@ def ask_question_ui(vector_store : FAISS):
|
|
170 |
st.error("Error: " + answer)
|
171 |
|
172 |
if __name__ == "__main__":
|
|
|
|
|
173 |
main()
|
|
|
15 |
import io
|
16 |
from dotenv import load_dotenv
|
17 |
from groq import Groq
|
18 |
+
load_dotenv()
|
19 |
log_data = []
|
20 |
|
21 |
+
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
|
22 |
|
23 |
class User:
|
24 |
def __init__(self, username):
|
|
|
27 |
self.embedder = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
|
28 |
|
29 |
|
30 |
+
def upload_documents(user: User, files) -> tuple[str, int, FAISS]:
|
31 |
text = _extract_text_from_document(files)
|
32 |
chunks = _chunk_text(text)
|
33 |
+
status_code , vector_store = _create_embeddings_and_save(user, chunks)
|
34 |
if status_code == 200:
|
35 |
+
return "Document uploaded successfully.", 200 , vector_store
|
36 |
else:
|
37 |
return "Failed to upload document.", 500
|
38 |
|
|
|
66 |
return text_splitter.split_text(text)
|
67 |
|
68 |
|
69 |
+
def _create_embeddings_and_save(user: User, chunks: any) -> tuple[int, FAISS]:
|
70 |
embeddings = HuggingFaceEmbeddings(model_name=user.embedder)
|
71 |
vector_store = FAISS.from_texts(chunks, embeddings, metadatas=[{"source": f"{user.username}:{i}"} for i in range(len(chunks))])
|
72 |
+
|
73 |
+
return 200, vector_store
|
74 |
|
75 |
|
76 |
+
def ask_question(user: User, question: str, vector_store : FAISS) -> tuple[str, int]:
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
|
78 |
docs = vector_store.similarity_search(question)
|
79 |
retrieved_chunks = docs[0].page_content + docs[1].page_content + docs[2].page_content
|
80 |
+
prompt = f'Question: "{question}"\nContext: "{retrieved_chunks}"'
|
81 |
|
82 |
try:
|
83 |
+
response = get_completion(prompt)
|
84 |
+
except Exception as e:
|
85 |
+
return f"LLM connection failed.{e}", 400
|
86 |
|
87 |
+
answer = f'{response}\n\n**<Most Related Chunk>**\n\n{retrieved_chunks}'
|
88 |
_log(user, question, retrieved_chunks, response)
|
89 |
return answer, 200
|
90 |
|
91 |
|
92 |
def get_completion(prompt, model="llama3-8b-8192"):
|
93 |
+
messages = [
|
94 |
+
{
|
95 |
+
"role": "system",
|
96 |
+
"content": "Based on the context provided, answer the question as an easy-to-understand assistant. Ensure that the answer is concise, directly addresses the question, and is in the same language as the question."
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"role": "user",
|
100 |
+
"content": prompt
|
101 |
+
}
|
102 |
+
]
|
103 |
response = client.chat.completions.create(
|
104 |
model=model,
|
105 |
messages=messages,
|
106 |
temperature=0,
|
107 |
)
|
108 |
+
|
109 |
return response.choices[0].message.content.strip()
|
110 |
|
111 |
|
|
|
145 |
st.write(file_details)
|
146 |
|
147 |
user = User(username=username)
|
148 |
+
response, status_code , vector_store= upload_documents(user, uploaded_files)
|
149 |
+
st.session_state.vector_store = vector_store
|
150 |
if status_code == 200:
|
151 |
st.success(response)
|
152 |
else:
|
|
|
155 |
|
156 |
def ask_question_ui(vector_store : FAISS):
|
157 |
username = st.text_input("Enter a username (just something that represents you):")
|
|
|
158 |
question = st.text_area("Enter the question you want to ask in your document (the more detailed your question, the more accurate an answer you will get):")
|
159 |
|
160 |
if st.button("Ask"):
|
|
|
164 |
st.warning("Please enter a username.")
|
165 |
else:
|
166 |
user = User(username=username)
|
167 |
+
answer, status_code = ask_question(user, question, vector_store)
|
168 |
|
169 |
if status_code == 200:
|
170 |
st.success("Answer: " + answer)
|
|
|
172 |
st.error("Error: " + answer)
|
173 |
|
174 |
if __name__ == "__main__":
|
175 |
+
if "vector_store" not in st.session_state:
|
176 |
+
st.session_state.vector_store = {}
|
177 |
main()
|