nickmuchi commited on
Commit
f1e7785
·
1 Parent(s): 0681909

Update functions.py

Browse files
Files changed (1) hide show
  1. functions.py +47 -48
functions.py CHANGED
@@ -26,16 +26,12 @@ from pydub import AudioSegment
26
  from langchain.docstore.document import Document
27
  from langchain.embeddings import HuggingFaceEmbeddings,HuggingFaceInstructEmbeddings
28
  from langchain.vectorstores import FAISS
29
- from langchain.chains.qa_with_sources import load_qa_with_sources_chain
30
  from langchain.text_splitter import RecursiveCharacterTextSplitter
31
- from langchain.llms import OpenAI
32
  from langchain.callbacks.base import CallbackManager
33
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
34
- from langchain.chains.chat_vector_db.prompts import CONDENSE_QUESTION_PROMPT
35
- from langchain import VectorDBQA
36
-
37
 
38
- from langchain.chat_models import ChatOpenAI
39
  from langchain.prompts.chat import (
40
  ChatPromptTemplate,
41
  SystemMessagePromptTemplate,
@@ -64,36 +60,40 @@ margin-bottom: 2.5rem">{}</div> """
64
 
65
  #Stuff Chain Type Prompt template
66
 
67
- system_template="""Use only the following pieces of finance context to answer the users question thoroughly.
68
- Do not use any information not provided in the context and remember you are a finance expert.
69
- If you don't know the answer, just say that you don't know, don't try to make up an answer.
70
- ALWAYS return a "SOURCES" part in your answer.
71
- The "SOURCES" part should be a reference to the source of the document from which you got your answer.
72
-
73
- Remember, do not reference any information not given in the context.
74
-
75
- Follow the below format when answering:
76
-
77
- Question: [question here]
78
- Helpful Answer: [answer here]
79
- SOURCES: xyz
80
-
81
- If there is no sources found please return the below:
82
-
83
- ```
84
- The answer is foo
85
- SOURCES: Please refer to references section
86
- ```
87
 
88
- Begin!
89
- ----------------
90
- {context}"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
- messages = [
93
- SystemMessagePromptTemplate.from_template(system_template),
94
- HumanMessagePromptTemplate.from_template("{question}")
95
- ]
96
- prompt = ChatPromptTemplate.from_messages(messages)
97
 
98
  ###################### Functions #######################################################################################
99
 
@@ -140,9 +140,9 @@ def process_corpus(corpus, title, embedding_model, chunk_size=1000, overlap=50):
140
 
141
  embeddings = gen_embeddings(embedding_model)
142
 
143
- docsearch = FAISS.from_texts(texts, embeddings, metadatas=[{"source": i} for i in range(len(texts))])
144
 
145
- return docsearch
146
 
147
  @st.cache_data
148
  def chunk_and_preprocess_text(text,thresh=500):
@@ -192,23 +192,22 @@ def embed_text(query,title,embedding_model,_docsearch):
192
 
193
  '''Embed text and generate semantic search scores'''
194
 
 
195
  # llm = OpenAI(temperature=0)
196
- chat_llm = ChatOpenAI(streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True, temperature=0)
 
 
 
 
 
197
 
198
  title = title.split()[0].lower()
199
 
200
- chain_type_kwargs = {"prompt": prompt}
201
-
202
- chain = VectorDBQA.from_chain_type(
203
- llm=chat_llm,
204
- chain_type="stuff",
205
- vectorstore=_docsearch,
206
- chain_type_kwargs=chain_type_kwargs,
207
- return_source_documents=True,
208
- k=3
209
- )
210
 
211
- answer = chain({"query": query})
212
 
213
  return answer
214
 
 
26
  from langchain.docstore.document import Document
27
  from langchain.embeddings import HuggingFaceEmbeddings,HuggingFaceInstructEmbeddings
28
  from langchain.vectorstores import FAISS
 
29
  from langchain.text_splitter import RecursiveCharacterTextSplitter
30
+ from langchain.chat_models import ChatOpenAI
31
  from langchain.callbacks.base import CallbackManager
32
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
33
+ from langchain.chains import ConversationalRetrievalChain
 
 
34
 
 
35
  from langchain.prompts.chat import (
36
  ChatPromptTemplate,
37
  SystemMessagePromptTemplate,
 
60
 
61
  #Stuff Chain Type Prompt template
62
 
63
+ # def load_prompt()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
+ # system_template="""Use only the following pieces of earnings context to answer the users question thoroughly.
66
+ # Do not use any information not provided in the context and remember you are a finance expert.
67
+ # If you don't know the answer, just say that you don't know, don't try to make up an answer.
68
+ # ALWAYS return a "SOURCES" part in your answer.
69
+ # The "SOURCES" part should be a reference to the source of the document from which you got your answer.
70
+
71
+ # Remember, do not reference any information not given in the context.
72
+
73
+ # Follow the below format when answering:
74
+
75
+ # Question: [question here]
76
+ # Helpful Answer: [answer here]
77
+ # SOURCES: xyz
78
+
79
+ # If there is no sources found please return the below:
80
+
81
+ # ```
82
+ # The answer is: foo
83
+ # SOURCES: Please refer to references section
84
+ # ```
85
+
86
+ # Begin!
87
+ # ----------------
88
+ # {context}"""
89
+
90
+ # messages = [
91
+ # SystemMessagePromptTemplate.from_template(system_template),
92
+ # HumanMessagePromptTemplate.from_template("{question}")
93
+ # ]
94
+ # prompt = ChatPromptTemplate.from_messages(messages)
95
 
96
+ # return prompt
 
 
 
 
97
 
98
  ###################### Functions #######################################################################################
99
 
 
140
 
141
  embeddings = gen_embeddings(embedding_model)
142
 
143
+ vectorstore = FAISS.from_texts(texts, embeddings, metadatas=[{"source": i} for i in range(len(texts))])
144
 
145
+ return vectorstore
146
 
147
  @st.cache_data
148
  def chunk_and_preprocess_text(text,thresh=500):
 
192
 
193
  '''Embed text and generate semantic search scores'''
194
 
195
+ chat_history = []
196
  # llm = OpenAI(temperature=0)
197
+ chat_llm = ChatOpenAI(streaming=True,
198
+ model_name = 'gpt-3.5-turbo',
199
+ callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
200
+ verbose=True,
201
+ temperature=0
202
+ )
203
 
204
  title = title.split()[0].lower()
205
 
206
+ chain = ConversationalRetrievalChain.from_llm(chat_llm,
207
+ retriever= _docsearch.as_retriever(),
208
+ return_source_documents=True)
 
 
 
 
 
 
 
209
 
210
+ answer = chain({"question": question, "chat_history": chat_history})
211
 
212
  return answer
213