Spaces:

nickmuchi
/

Earnings-Call-Analysis-Whisperer

Running

App Files Files Community

nickmuchi commited on Apr 23, 2023

Commit

f1e7785

1 Parent(s): 0681909

Update functions.py

Browse files

Files changed (1) hide show

functions.py +47 -48

functions.py CHANGED Viewed

@@ -26,16 +26,12 @@ from pydub import AudioSegment
 from langchain.docstore.document import Document
 from langchain.embeddings import HuggingFaceEmbeddings,HuggingFaceInstructEmbeddings
 from langchain.vectorstores import FAISS
-from langchain.chains.qa_with_sources import load_qa_with_sources_chain
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.llms import OpenAI
 from langchain.callbacks.base import CallbackManager
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
-from langchain.chains.chat_vector_db.prompts import CONDENSE_QUESTION_PROMPT
-from langchain import VectorDBQA
-from langchain.chat_models import ChatOpenAI
 from langchain.prompts.chat import (
     ChatPromptTemplate,
     SystemMessagePromptTemplate,
@@ -64,36 +60,40 @@ margin-bottom: 2.5rem">{}</div> """
 #Stuff Chain Type Prompt template
-system_template="""Use only the following pieces of finance context to answer the users question thoroughly.
-Do not use any information not provided in the context and remember you are a finance expert.
-If you don't know the answer, just say that you don't know, don't try to make up an answer.
-ALWAYS return a "SOURCES" part in your answer.
-The "SOURCES" part should be a reference to the source of the document from which you got your answer.
-Remember, do not reference any information not given in the context.
-Follow the below format when answering:
-Question: [question here]
-Helpful Answer: [answer here]
-SOURCES: xyz
-If there is no sources found please return the below:
-```
-The answer is foo
-SOURCES: Please refer to references section
-```
-Begin!
-----------------
-{context}"""
-messages = [
-    SystemMessagePromptTemplate.from_template(system_template),
-    HumanMessagePromptTemplate.from_template("{question}")
-]
-prompt = ChatPromptTemplate.from_messages(messages)
 ###################### Functions #######################################################################################
@@ -140,9 +140,9 @@ def process_corpus(corpus, title, embedding_model, chunk_size=1000, overlap=50):
     embeddings = gen_embeddings(embedding_model)
-    docsearch = FAISS.from_texts(texts, embeddings, metadatas=[{"source": i} for i in range(len(texts))])
-    return docsearch
 @st.cache_data
 def chunk_and_preprocess_text(text,thresh=500):
@@ -192,23 +192,22 @@ def embed_text(query,title,embedding_model,_docsearch):
     '''Embed text and generate semantic search scores'''
     # llm = OpenAI(temperature=0)
-    chat_llm = ChatOpenAI(streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True, temperature=0)
     title = title.split()[0].lower()
-    chain_type_kwargs = {"prompt": prompt}
-    chain = VectorDBQA.from_chain_type(
-        llm=chat_llm,
-        chain_type="stuff",
-        vectorstore=_docsearch,
-        chain_type_kwargs=chain_type_kwargs,
-        return_source_documents=True,
-        k=3
-        )
-    answer = chain({"query": query})
     return answer

 from langchain.docstore.document import Document
 from langchain.embeddings import HuggingFaceEmbeddings,HuggingFaceInstructEmbeddings
 from langchain.vectorstores import FAISS
 from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.chat_models import ChatOpenAI
 from langchain.callbacks.base import CallbackManager
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+from langchain.chains import ConversationalRetrievalChain
 from langchain.prompts.chat import (
     ChatPromptTemplate,
     SystemMessagePromptTemplate,
 #Stuff Chain Type Prompt template
+# def load_prompt()
+#     system_template="""Use only the following pieces of earnings context to answer the users question thoroughly.
+#     Do not use any information not provided in the context and remember you are a finance expert.
+#     If you don't know the answer, just say that you don't know, don't try to make up an answer.
+#     ALWAYS return a "SOURCES" part in your answer.
+#     The "SOURCES" part should be a reference to the source of the document from which you got your answer.
+#     Remember, do not reference any information not given in the context.
+#     Follow the below format when answering:
+#     Question: [question here]
+#     Helpful Answer: [answer here]
+#     SOURCES: xyz
+#     If there is no sources found please return the below:
+#     ```
+#     The answer is: foo
+#     SOURCES: Please refer to references section
+#     ```
+#     Begin!
+#     ----------------
+#     {context}"""
+#     messages = [
+#         SystemMessagePromptTemplate.from_template(system_template),
+#         HumanMessagePromptTemplate.from_template("{question}")
+#     ]
+#     prompt = ChatPromptTemplate.from_messages(messages)
+#     return prompt
 ###################### Functions #######################################################################################
     embeddings = gen_embeddings(embedding_model)
+    vectorstore = FAISS.from_texts(texts, embeddings, metadatas=[{"source": i} for i in range(len(texts))])
+    return vectorstore
 @st.cache_data
 def chunk_and_preprocess_text(text,thresh=500):
     '''Embed text and generate semantic search scores'''
+    chat_history = []
     # llm = OpenAI(temperature=0)
+    chat_llm = ChatOpenAI(streaming=True,
+                          model_name = 'gpt-3.5-turbo',
+                          callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
+                          verbose=True,
+                          temperature=0
+                         )
     title = title.split()[0].lower()
+    chain = ConversationalRetrievalChain.from_llm(chat_llm,
+                                                  retriever= _docsearch.as_retriever(),
+                                                  return_source_documents=True)
+    answer = chain({"question": question, "chat_history": chat_history})
     return answer