Spaces:

joshuasundance
/

langchain-streamlit-demo

Sleeping

App Files Files Community

Joshua Sundance Bailey commited on Dec 12, 2023

Commit

883f3be

1 Parent(s): 048798b

research assistant

Browse files

Files changed (9) hide show

.pre-commit-config.yaml +3 -1
langchain-streamlit-demo/app.py +30 -17
langchain-streamlit-demo/llm_resources.py +43 -31
langchain-streamlit-demo/research_assistant/__init__.py +3 -0
langchain-streamlit-demo/research_assistant/chain.py +16 -0
langchain-streamlit-demo/research_assistant/search/__init__.py +0 -0
langchain-streamlit-demo/research_assistant/search/web.py +180 -0
langchain-streamlit-demo/research_assistant/writer.py +75 -0
requirements.txt +2 -0

.pre-commit-config.yaml CHANGED Viewed

@@ -44,6 +44,8 @@ repos:
     rev: v1.5.1
     hooks:
     -   id: mypy
 -   repo: https://github.com/asottile/add-trailing-comma
     rev: v3.1.0
     hooks:
@@ -60,4 +62,4 @@ repos:
     rev: 1.7.5
     hooks:
     -   id: bandit
-        args: ["-x", "tests/*.py"]

     rev: v1.5.1
     hooks:
     -   id: mypy
+        additional_dependencies:
+            - types-requests
 -   repo: https://github.com/asottile/add-trailing-comma
     rev: v3.1.0
     hooks:
     rev: 1.7.5
     hooks:
     -   id: bandit
+        args: ["-x", "tests/*.py", "-s", "B113"]

langchain-streamlit-demo/app.py CHANGED Viewed

@@ -5,23 +5,24 @@ import anthropic
 import langsmith.utils
 import openai
 import streamlit as st
 from langchain.callbacks.tracers.langchain import LangChainTracer, wait_for_all_tracers
 from langchain.callbacks.tracers.run_collector import RunCollectorCallbackHandler
 from langchain.memory import ConversationBufferMemory, StreamlitChatMessageHistory
 from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
 from langchain.schema.document import Document
 from langchain.schema.retriever import BaseRetriever
 from langsmith.client import Client
 from streamlit_feedback import streamlit_feedback
 from defaults import default_values
 from llm_resources import (
-    get_runnable,
     get_llm,
     get_texts_and_multiretriever,
-    StreamHandler,
 )
 __version__ = "1.1.0"
@@ -378,6 +379,15 @@ st.session_state.llm = get_llm(
     },
 )
 # --- Chat History ---
 for msg in STMEMORY.messages:
     st.chat_message(
@@ -430,24 +440,27 @@ if st.session_state.llm:
             full_response: Union[str, None] = None
             message_placeholder = st.empty()
-            stream_handler = StreamHandler(message_placeholder)
-            callbacks.append(stream_handler)
-            st.session_state.chain = get_runnable(
-                use_document_chat,
-                document_chat_chain_type,
-                st.session_state.llm,
-                st.session_state.retriever,
-                MEMORY,
-                chat_prompt,
-                prompt,
-                STMEMORY,
-            )
             # --- LLM call ---
             try:
-                full_response = st.session_state.chain.invoke(prompt, config)
             except (openai.AuthenticationError, anthropic.AuthenticationError):
                 st.error(

 import langsmith.utils
 import openai
 import streamlit as st
+from langchain.callbacks import StreamlitCallbackHandler
 from langchain.callbacks.tracers.langchain import LangChainTracer, wait_for_all_tracers
 from langchain.callbacks.tracers.run_collector import RunCollectorCallbackHandler
 from langchain.memory import ConversationBufferMemory, StreamlitChatMessageHistory
 from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
 from langchain.schema.document import Document
 from langchain.schema.retriever import BaseRetriever
+from langchain.tools import Tool
 from langsmith.client import Client
 from streamlit_feedback import streamlit_feedback
 from defaults import default_values
 from llm_resources import (
+    get_agent,
     get_llm,
     get_texts_and_multiretriever,
 )
+from research_assistant.chain import chain as research_assistant_chain
 __version__ = "1.1.0"
     },
 )
+research_assistant_tool = Tool.from_function(
+    func=lambda s: research_assistant_chain.invoke({"question": s}),
+    name="web-research-assistant",
+    description="this assistant returns a report based on web research",
+)
+TOOLS = [research_assistant_tool]
+st.session_state.agent = get_agent(TOOLS, STMEMORY, st.session_state.llm)
 # --- Chat History ---
 for msg in STMEMORY.messages:
     st.chat_message(
             full_response: Union[str, None] = None
+            # stream_handler = StreamHandler(message_placeholder)
+            # callbacks.append(stream_handler)
+            st_callback = StreamlitCallbackHandler(st.container())
+            callbacks.append(st_callback)
             message_placeholder = st.empty()
+            # st.session_state.chain = get_runnable(
+            #     use_document_chat,
+            #     document_chat_chain_type,
+            #     st.session_state.llm,
+            #     st.session_state.retriever,
+            #     MEMORY,
+            #     chat_prompt,
+            #     prompt,
+            #     STMEMORY,
+            # )
             # --- LLM call ---
             try:
+                full_response = st.session_state.agent.invoke(prompt, config)
             except (openai.AuthenticationError, anthropic.AuthenticationError):
                 st.error(

langchain-streamlit-demo/llm_resources.py CHANGED Viewed

@@ -32,6 +32,48 @@ from langchain_core.messages import SystemMessage
 from defaults import DEFAULT_CHUNK_SIZE, DEFAULT_CHUNK_OVERLAP, DEFAULT_RETRIEVER_K
 from qagen import get_rag_qa_gen_chain
 from summarize import get_rag_summarization_chain
 def get_runnable(
@@ -69,38 +111,8 @@ def get_runnable(
             "Retrieves custom context provided by the user for this conversation. Use this if you cannot answer immediately and confidently.",
         )
         tools = [tool]
-        memory_key = "agent_history"
-        system_message = SystemMessage(
-            content=(
-                "Do your best to answer the questions. "
-                "Feel free to use any tools available to look up "
-                "relevant information, only if necessary"
-            ),
-        )
-        prompt = OpenAIFunctionsAgent.create_prompt(
-            system_message=system_message,
-            extra_prompt_messages=[MessagesPlaceholder(variable_name=memory_key)],
-        )
-        agent = OpenAIFunctionsAgent(llm=llm, tools=tools, prompt=prompt)
-        agent_memory = AgentTokenBufferMemory(
-            chat_memory=chat_history,
-            memory_key=memory_key,
-            llm=llm,
-        )
-        agent_executor = AgentExecutor(
-            agent=agent,
-            tools=tools,
-            memory=agent_memory,
-            verbose=True,
-            return_intermediate_steps=True,
-        )
-        return (
-            {"input": RunnablePassthrough()}
-            | agent_executor
-            | (lambda output: output["output"])
-        )
 def get_llm(

 from defaults import DEFAULT_CHUNK_SIZE, DEFAULT_CHUNK_OVERLAP, DEFAULT_RETRIEVER_K
 from qagen import get_rag_qa_gen_chain
 from summarize import get_rag_summarization_chain
+from langchain.tools.base import BaseTool
+from langchain.schema.chat_history import BaseChatMessageHistory
+from langchain.llms.base import BaseLLM
+def get_agent(
+    tools: list[BaseTool],
+    chat_history: BaseChatMessageHistory,
+    llm: BaseLLM,
+):
+    memory_key = "agent_history"
+    system_message = SystemMessage(
+        content=(
+            "Do your best to answer the questions. "
+            "Feel free to use any tools available to look up "
+            "relevant information, only if necessary"
+        ),
+    )
+    prompt = OpenAIFunctionsAgent.create_prompt(
+        system_message=system_message,
+        extra_prompt_messages=[MessagesPlaceholder(variable_name=memory_key)],
+    )
+    agent = OpenAIFunctionsAgent(llm=llm, tools=tools, prompt=prompt)
+    agent_memory = AgentTokenBufferMemory(
+        chat_memory=chat_history,
+        memory_key=memory_key,
+        llm=llm,
+    )
+    agent_executor = AgentExecutor(
+        agent=agent,
+        tools=tools,
+        memory=agent_memory,
+        verbose=True,
+        return_intermediate_steps=True,
+    )
+    return (
+        {"input": RunnablePassthrough()}
+        | agent_executor
+        | (lambda output: output["output"])
+    )
 def get_runnable(
             "Retrieves custom context provided by the user for this conversation. Use this if you cannot answer immediately and confidently.",
         )
         tools = [tool]
+        return get_agent(tools, chat_history, llm)
 def get_llm(

langchain-streamlit-demo/research_assistant/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from research_assistant.chain import chain
2	+
3	+ __all__ = ["chain"]

langchain-streamlit-demo/research_assistant/chain.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from langchain_core.pydantic_v1 import BaseModel
+from langchain_core.runnables import RunnablePassthrough
+from research_assistant.search.web import chain as search_chain
+from research_assistant.writer import chain as writer_chain
+chain_notypes = (
+    RunnablePassthrough().assign(research_summary=search_chain) | writer_chain
+)
+class InputType(BaseModel):
+    question: str
+chain = chain_notypes.with_types(input_type=InputType)

langchain-streamlit-demo/research_assistant/search/__init__.py ADDED Viewed

File without changes

langchain-streamlit-demo/research_assistant/search/web.py ADDED Viewed

	@@ -0,0 +1,180 @@

+import json
+from typing import Any
+import requests
+from bs4 import BeautifulSoup
+from langchain.chat_models import ChatOpenAI
+from langchain.prompts import ChatPromptTemplate
+from langchain.retrievers.tavily_search_api import TavilySearchAPIRetriever
+from langchain.utilities import DuckDuckGoSearchAPIWrapper
+from langchain_core.messages import SystemMessage
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import (
+    ConfigurableField,
+    Runnable,
+    RunnableLambda,
+    RunnableParallel,
+    RunnablePassthrough,
+)
+RESULTS_PER_QUESTION = 3
+ddg_search = DuckDuckGoSearchAPIWrapper()
+def scrape_text(url: str):
+    # Send a GET request to the webpage
+    try:
+        response = requests.get(url)
+        # Check if the request was successful
+        if response.status_code == 200:
+            # Parse the content of the request with BeautifulSoup
+            soup = BeautifulSoup(response.text, "html.parser")
+            # Extract all text from the webpage
+            page_text = soup.get_text(separator=" ", strip=True)
+            # Print the extracted text
+            return page_text
+        else:
+            return f"Failed to retrieve the webpage: Status code {response.status_code}"
+    except Exception as e:
+        print(e)
+        return f"Failed to retrieve the webpage: {e}"
+def web_search(query: str, num_results: int):
+    results = ddg_search.results(query, num_results)
+    return [r["link"] for r in results]
+get_links: Runnable[Any, Any] = (
+    RunnablePassthrough()
+    | RunnableLambda(
+        lambda x: [
+            {"url": url, "question": x["question"]}
+            for url in web_search(query=x["question"], num_results=RESULTS_PER_QUESTION)
+        ],
+    )
+).configurable_alternatives(
+    ConfigurableField("search_engine"),
+    default_key="duckduckgo",
+    tavily=RunnableLambda(lambda x: x["question"])
+    | RunnableParallel(
+        {
+            "question": RunnablePassthrough(),
+            "results": TavilySearchAPIRetriever(k=RESULTS_PER_QUESTION),
+        },
+    )
+    | RunnableLambda(
+        lambda x: [
+            {"url": result.metadata["source"], "question": x["question"]}
+            for result in x["results"]
+        ],
+    ),
+)
+SEARCH_PROMPT = ChatPromptTemplate.from_messages(
+    [
+        ("system", "{agent_prompt}"),
+        (
+            "user",
+            "Write 3 google search queries to search online that form an "
+            "objective opinion from the following: {question}\n"
+            "You must respond with a list of strings in the following format: "
+            '["query 1", "query 2", "query 3"].',
+        ),
+    ],
+)
+AUTO_AGENT_INSTRUCTIONS = """
+This task involves researching a given topic, regardless of its complexity or the availability of a definitive answer. The research is conducted by a specific agent, defined by its type and role, with each agent requiring distinct instructions.
+Agent
+The agent is determined by the field of the topic and the specific name of the agent that could be utilized to research the topic provided. Agents are categorized by their area of expertise, and each agent type is associated with a corresponding emoji.
+examples:
+task: "should I invest in apple stocks?"
+response:
+{
+    "agent": "💰 Finance Agent",
+    "agent_role_prompt: "You are a seasoned finance analyst AI assistant. Your primary goal is to compose comprehensive, astute, impartial, and methodically arranged financial reports based on provided data and trends."
+}
+task: "could reselling sneakers become profitable?"
+response:
+{
+    "agent":  "📈 Business Analyst Agent",
+    "agent_role_prompt": "You are an experienced AI business analyst assistant. Your main objective is to produce comprehensive, insightful, impartial, and systematically structured business reports based on provided business data, market trends, and strategic analysis."
+}
+task: "what are the most interesting sites in Tel Aviv?"
+response:
+{
+    "agent:  "🌍 Travel Agent",
+    "agent_role_prompt": "You are a world-travelled AI tour guide assistant. Your main purpose is to draft engaging, insightful, unbiased, and well-structured travel reports on given locations, including history, attractions, and cultural insights."
+}
+"""  # noqa: E501
+CHOOSE_AGENT_PROMPT = ChatPromptTemplate.from_messages(
+    [SystemMessage(content=AUTO_AGENT_INSTRUCTIONS), ("user", "task: {task}")],
+)
+SUMMARY_TEMPLATE = """{text}
+-----------
+Using the above text, answer in short the following question:
+> {question}
+-----------
+if the question cannot be answered using the text, imply summarize the text. Include all factual information, numbers, stats etc if available."""  # noqa: E501
+SUMMARY_PROMPT = ChatPromptTemplate.from_template(SUMMARY_TEMPLATE)
+scrape_and_summarize: Runnable[Any, Any] = (
+    RunnableParallel(
+        {
+            "question": lambda x: x["question"],
+            "text": lambda x: scrape_text(x["url"])[:10000],
+            "url": lambda x: x["url"],
+        },
+    )
+    | RunnableParallel(
+        {
+            "summary": SUMMARY_PROMPT | ChatOpenAI(temperature=0) | StrOutputParser(),
+            "url": lambda x: x["url"],
+        },
+    )
+    | RunnableLambda(lambda x: f"Source Url: {x['url']}\nSummary: {x['summary']}")
+)
+multi_search = get_links | scrape_and_summarize.map() | (lambda x: "\n".join(x))
+def load_json(s):
+    try:
+        return json.loads(s)
+    except Exception:
+        return {}
+search_query = SEARCH_PROMPT | ChatOpenAI(temperature=0) | StrOutputParser() | load_json
+choose_agent = (
+    CHOOSE_AGENT_PROMPT | ChatOpenAI(temperature=0) | StrOutputParser() | load_json
+)
+get_search_queries = (
+    RunnablePassthrough().assign(
+        agent_prompt=RunnableParallel({"task": lambda x: x})
+        | choose_agent
+        | (lambda x: x.get("agent_role_prompt")),
+    )
+    | search_query
+)
+chain = (
+    get_search_queries
+    | (lambda x: [{"question": q} for q in x])
+    | multi_search.map()
+    | (lambda x: "\n\n".join(x))
+)

langchain-streamlit-demo/research_assistant/writer.py ADDED Viewed

	@@ -0,0 +1,75 @@

+from langchain.chat_models import ChatOpenAI
+from langchain.prompts import ChatPromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import ConfigurableField
+WRITER_SYSTEM_PROMPT = "You are an AI critical thinker research assistant. Your sole purpose is to write well written, critically acclaimed, objective and structured reports on given text."  # noqa: E501
+# Report prompts from https://github.com/assafelovic/gpt-researcher/blob/master/gpt_researcher/master/prompts.py
+RESEARCH_REPORT_TEMPLATE = """Information:
+--------
+{research_summary}
+--------
+Using the above information, answer the following question or topic: "{question}" in a detailed report -- \
+The report should focus on the answer to the question, should be well structured, informative, \
+in depth, with facts and numbers if available and a minimum of 1,200 words.
+You should strive to write the report as long as you can using all relevant and necessary information provided.
+You must write the report with markdown syntax.
+You MUST determine your own concrete and valid opinion based on the given information. Do NOT deter to general and meaningless conclusions.
+Write all used source urls at the end of the report, and make sure to not add duplicated sources, but only one reference for each.
+You must write the report in apa format.
+Please do your best, this is very important to my career."""  # noqa: E501
+RESOURCE_REPORT_TEMPLATE = """Information:
+--------
+{research_summary}
+--------
+Based on the above information, generate a bibliography recommendation report for the following question or topic: "{question}". \
+The report should provide a detailed analysis of each recommended resource, explaining how each source can contribute to finding answers to the research question. \
+Focus on the relevance, reliability, and significance of each source. \
+Ensure that the report is well-structured, informative, in-depth, and follows Markdown syntax. \
+Include relevant facts, figures, and numbers whenever available. \
+The report should have a minimum length of 1,200 words.
+Please do your best, this is very important to my career."""  # noqa: E501
+OUTLINE_REPORT_TEMPLATE = """Information:
+--------
+{research_summary}
+--------
+Using the above information, generate an outline for a research report in Markdown syntax for the following question or topic: "{question}". \
+The outline should provide a well-structured framework for the research report, including the main sections, subsections, and key points to be covered. \
+The research report should be detailed, informative, in-depth, and a minimum of 1,200 words. \
+Use appropriate Markdown syntax to format the outline and ensure readability.
+Please do your best, this is very important to my career."""  # noqa: E501
+model = ChatOpenAI(temperature=0)
+prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", WRITER_SYSTEM_PROMPT),
+        ("user", RESEARCH_REPORT_TEMPLATE),
+    ],
+).configurable_alternatives(
+    ConfigurableField("report_type"),
+    default_key="research_report",
+    resource_report=ChatPromptTemplate.from_messages(
+        [
+            ("system", WRITER_SYSTEM_PROMPT),
+            ("user", RESOURCE_REPORT_TEMPLATE),
+        ],
+    ),
+    outline_report=ChatPromptTemplate.from_messages(
+        [
+            ("system", WRITER_SYSTEM_PROMPT),
+            ("user", OUTLINE_REPORT_TEMPLATE),
+        ],
+    ),
+)
+chain = prompt | model | StrOutputParser()

requirements.txt CHANGED Viewed

@@ -1,4 +1,6 @@
 anthropic==0.7.7
 faiss-cpu==1.7.4
 langchain==0.0.348
 langsmith==0.0.69

 anthropic==0.7.7
+beautifulsoup4==4.12.2
+duckduckgo-search==4.0.0
 faiss-cpu==1.7.4
 langchain==0.0.348
 langsmith==0.0.69