Spaces:

joshuasundance
/

langchain-streamlit-demo

Sleeping

App Files Files Community

Joshua Sundance Bailey commited on Sep 28, 2023

Commit

47c2ffc

1 Parent(s): 622ac66

qagen & summarize

Browse files

Files changed (2) hide show

langchain-streamlit-demo/app.py +36 -17
langchain-streamlit-demo/summarize.py +51 -0

langchain-streamlit-demo/app.py CHANGED Viewed

@@ -7,12 +7,12 @@ import anthropic
 import langsmith.utils
 import openai
 import streamlit as st
-from langchain import LLMChain
 from langchain.callbacks import StreamlitCallbackHandler
 from langchain.callbacks.base import BaseCallbackHandler
 from langchain.callbacks.tracers.langchain import LangChainTracer, wait_for_all_tracers
 from langchain.callbacks.tracers.run_collector import RunCollectorCallbackHandler
 from langchain.chains import RetrievalQA
 from langchain.chat_models import ChatOpenAI, ChatAnyscale, ChatAnthropic
 from langchain.document_loaders import PyPDFLoader
 from langchain.embeddings import OpenAIEmbeddings
@@ -26,6 +26,7 @@ from langsmith.client import Client
 from streamlit_feedback import streamlit_feedback
 from qagen import get_qa_gen_chain, combine_qa_pair_lists
 __version__ = "0.0.6"
@@ -216,7 +217,14 @@ with sidebar:
         )
         document_chat_chain_type = st.selectbox(
             label="Document Chat Chain Type",
-            options=["stuff", "refine", "map_reduce", "map_rerank", "Q&A Generation"],
             index=0,
             help=chain_type_help,
             disabled=not document_chat,
@@ -331,13 +339,7 @@ if st.session_state.llm:
     # --- Document Chat ---
     if st.session_state.retriever:
         if document_chat_chain_type == "Summarization":
-            raise NotImplementedError
-            # st.session_state.doc_chain = RetrievalQA.from_chain_type(
-            #     llm=st.session_state.llm,
-            #     chain_type=chain_type,
-            #     retriever=st.session_state.retriever,
-            #     memory=MEMORY,
-            # )
         elif document_chat_chain_type == "Q&A Generation":
             st.session_state.doc_chain = get_qa_gen_chain(st.session_state.llm)
@@ -393,7 +395,17 @@ if st.session_state.llm:
                 full_response: Union[str, None]
                 if use_document_chat:
                     if document_chat_chain_type == "Summarization":
-                        raise NotImplementedError
                     elif document_chat_chain_type == "Q&A Generation":
                         config: Dict[str, Any] = dict(
                             callbacks=callbacks,
@@ -409,14 +421,21 @@ if st.session_state.llm:
                             config,
                         )
                         results = combine_qa_pair_lists(raw_results).QuestionAnswerPairs
-                        full_response = "\n".join(
-                            f"**Q:** {result.question}\n**A:** {result.answer}\n"
-                            for result in results
                         )
-                        for idx, result in enumerate(results, start=1):
-                            st.markdown(f"{idx}. **Q:** {result.question}")
-                            st.markdown(f"{idx}. **A:** {result.answer}")
-                            st.markdown("\n")
                     else:
                         st_handler = StreamlitCallbackHandler(st.container())

 import langsmith.utils
 import openai
 import streamlit as st
 from langchain.callbacks import StreamlitCallbackHandler
 from langchain.callbacks.base import BaseCallbackHandler
 from langchain.callbacks.tracers.langchain import LangChainTracer, wait_for_all_tracers
 from langchain.callbacks.tracers.run_collector import RunCollectorCallbackHandler
 from langchain.chains import RetrievalQA
+from langchain.chains.llm import LLMChain
 from langchain.chat_models import ChatOpenAI, ChatAnyscale, ChatAnthropic
 from langchain.document_loaders import PyPDFLoader
 from langchain.embeddings import OpenAIEmbeddings
 from streamlit_feedback import streamlit_feedback
 from qagen import get_qa_gen_chain, combine_qa_pair_lists
+from summarize import get_summarization_chain
 __version__ = "0.0.6"
         )
         document_chat_chain_type = st.selectbox(
             label="Document Chat Chain Type",
+            options=[
+                "stuff",
+                "refine",
+                "map_reduce",
+                "map_rerank",
+                "Q&A Generation",
+                "Summarization",
+            ],
             index=0,
             help=chain_type_help,
             disabled=not document_chat,
     # --- Document Chat ---
     if st.session_state.retriever:
         if document_chat_chain_type == "Summarization":
+            st.session_state.doc_chain = "summarization"
         elif document_chat_chain_type == "Q&A Generation":
             st.session_state.doc_chain = get_qa_gen_chain(st.session_state.llm)
                 full_response: Union[str, None]
                 if use_document_chat:
                     if document_chat_chain_type == "Summarization":
+                        st.session_state.doc_chain = get_summarization_chain(
+                            st.session_state.llm,
+                            prompt,
+                        )
+                        full_response = st.session_state.doc_chain.run(
+                            st.session_state.texts,
+                            callbacks=callbacks,
+                            tags=["Streamlit Chat"],
+                        )
+                        st.markdown(full_response)
                     elif document_chat_chain_type == "Q&A Generation":
                         config: Dict[str, Any] = dict(
                             callbacks=callbacks,
                             config,
                         )
                         results = combine_qa_pair_lists(raw_results).QuestionAnswerPairs
+                        def _to_str(idx, qap):
+                            question_piece = f"{idx}. **Q:** {qap.question}"
+                            whitespace = " " * (len(str(idx)) + 2)
+                            answer_piece = f"{whitespace}**A:** {qap.answer}"
+                            return f"{question_piece}\n{answer_piece}"
+                        output_text = "\n\n".join(
+                            [
+                                _to_str(idx, qap)
+                                for idx, qap in enumerate(results, start=1)
+                            ],
                         )
+                        st.markdown(output_text)
                     else:
                         st_handler = StreamlitCallbackHandler(st.container())

langchain-streamlit-demo/summarize.py ADDED Viewed

	@@ -0,0 +1,51 @@

+from langchain.chains.base import Chain
+from langchain.chains.summarize import load_summarize_chain
+from langchain.prompts import PromptTemplate
+from langchain.schema.language_model import BaseLanguageModel
+prompt_template = """Write a concise summary of the following text, based on the user input.
+User input: {query}
+Text:
+```
+{text}
+```
+CONCISE SUMMARY:"""
+refine_template = (
+    "You are iteratively crafting a summary of the text below based on the user input\n"
+    "User input: {query}"
+    "We have provided an existing summary up to a certain point: {existing_answer}\n"
+    "We have the opportunity to refine the existing summary"
+    "(only if needed) with some more context below.\n"
+    "------------\n"
+    "{text}\n"
+    "------------\n"
+    "Given the new context, refine the original summary.\n"
+    "If the context isn't useful, return the original summary.\n"
+    "If the context is useful, refine the summary to include the new context.\n"
+    "Your contribution is helping to build a comprehensive summary of a large body of knowledge.\n"
+    "You do not have the complete context, so do not discard pieces of the original summary."
+)
+def get_summarization_chain(
+    llm: BaseLanguageModel,
+    prompt: str,
+) -> Chain:
+    _prompt = PromptTemplate.from_template(
+        prompt_template,
+        partial_variables={"query": prompt},
+    )
+    refine_prompt = PromptTemplate.from_template(
+        refine_template,
+        partial_variables={"query": prompt},
+    )
+    return load_summarize_chain(
+        llm=llm,
+        chain_type="refine",
+        question_prompt=_prompt,
+        refine_prompt=refine_prompt,
+        return_intermediate_steps=False,
+        input_key="input_documents",
+        output_key="output_text",
+    )