Spaces:

philmui
/

globe

Runtime error

App Files Files Community

philmui commited on May 9, 2023

Commit

ca7e9c6

1 Parent(s): 7ab4d1f

adding an app

Browse files

Files changed (16) hide show

.gitattributes +1 -0
.gitignore +160 -0
LICENSE +21 -0
README.md +1 -1
agents.py +117 -0
app.py +118 -0
data/sales_data.csv +0 -0
images/.DS_Store +0 -0
images/chinook.png +0 -0
images/plugins.png +0 -0
images/salesforce.png +0 -0
models.py +90 -0
requirements.txt +15 -0
sandbox/flant5.py +16 -0
sandbox/google.py +16 -0
sandbox/test.py +61 -0

.gitattributes CHANGED Viewed

@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.sqlite filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,160 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2023 Phil Mui
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 title: Globe
-emoji: 🏃
 colorFrom: blue
 colorTo: indigo
 sdk: streamlit

 ---
 title: Globe
+emoji: 🌎
 colorFrom: blue
 colorTo: indigo
 sdk: streamlit

agents.py ADDED Viewed

	@@ -0,0 +1,117 @@

+##############################################################################
+# Agent interfaces that bridges private capability agents (pandas,
+# sql, ...), 3rd party plugin agents (search, weather, movie, ...),
+# and 3rd party LLMs
+#
+# @philmui
+# Mon May 1 18:34:45 PDT 2023
+##############################################################################
+from langchain.schema import HumanMessage
+from langchain.prompts import PromptTemplate, ChatPromptTemplate, \
+                              HumanMessagePromptTemplate
+from models import load_chat_agent, load_chained_agent, load_sales_agent, \
+                   load_sqlite_agent
+import logging
+logger = logging.getLogger(__name__)
+# To parse outputs and get structured data back
+from langchain.output_parsers import StructuredOutputParser, ResponseSchema
+instruct_template = """
+Please answer this question clearly with easy to follow reasoning:
+{query}
+If you don't know the answer, just reply: not available.
+"""
+instruct_prompt = PromptTemplate(
+    input_variables=["query"],
+    template=instruct_template
+)
+response_schemas = [
+    ResponseSchema(name="artist",
+                   description="The name of the musical artist"),
+    ResponseSchema(name="song",
+                   description="The name of the song that the artist plays")
+]
+output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
+format_instructions = output_parser.get_format_instructions()
+LOCAL_MAGIC_TOKENS = ["my company", "for us", "our company", "our sales"]
+DIGITAL_MAGIC_TOKENS = ["digital media", "our database", "our digital"]
+def is_magic(sentence, magic_tokens):
+    return any([t in sentence.lower() for t in magic_tokens])
+chat_prompt = ChatPromptTemplate(
+    messages=[
+        HumanMessagePromptTemplate.from_template(
+            "Given a command from the user, extract the artist and \
+             song names \n{format_instructions}\n{user_prompt}")
+    ],
+    input_variables=["user_prompt"],
+    partial_variables={"format_instructions": format_instructions}
+)
+def chatAgent(chat_message):
+    try:
+        agent = load_chat_agent(verbose=True)
+        output = agent([HumanMessage(content=chat_message)])
+    except:
+        output = "Please rephrase and try chat again."
+    return output
+def instructAgent(question_text, model_name):
+    output = ""
+    if is_magic(question_text, LOCAL_MAGIC_TOKENS):
+        output = salesAgent(question_text)
+        print(f"🔹 salesAgent")
+    elif is_magic(question_text, DIGITAL_MAGIC_TOKENS):
+        output = chinookAgent(question_text, model_name)
+        print(f"🔹 chinookAgent")
+    else:
+        try:
+            instruction = instruct_prompt.format(query=question_text)
+            logger.info(f"instruction: {instruction}")
+            agent = load_chained_agent(verbose=True, model_name=model_name)
+            response = agent([instruction])
+            if response is None or "not available" in response["output"]:
+                response = ""
+            else:
+                output = response['output']
+                logger.info(f"🔹 Steps: {response['intermediate_steps']}")
+        except Exception as e:
+            output = "Please rephrase and try again ..."
+            print(f"\t{e}")
+    return output
+def salesAgent(instruction):
+    output = ""
+    try:
+        agent = load_sales_agent(verbose=True)
+        output = agent.run(instruction)
+        print("panda> " + output)
+    except:
+        output = "Please rephrase and try again for company sales data"
+    return output
+def chinookAgent(instruction, model_name):
+    output = ""
+    try:
+        agent = load_sqlite_agent(model_name)
+        output = agent.run(instruction)
+        print("chinook> " + output)
+    except:
+        output = "Please rephrase and try again for digital media data"
+    return output

app.py ADDED Viewed

	@@ -0,0 +1,118 @@

+##############################################################################
+# Main script that builds the UI & connects the logic for an LLM-driven
+# query frontend to a "Global Commerce" demo app.
+#
+# @philmui
+# Mon May 1 18:34:45 PDT 2023
+##############################################################################
+import streamlit as st
+from agents import instructAgent, salesAgent, chinookAgent, chatAgent
+##############################################################################
+st.set_page_config(page_title="Global",
+                   page_icon=":cart:",
+                   layout="wide")
+st.header("📦 Global 🛍️")
+col1, col2 = st.columns([1,1])
+with col1:
+    option_llm = st.selectbox(
+        "Model",
+        ('text-davinci-003',
+         'text-babbage-001',
+         'text-curie-001',
+         'text-ada-001',
+         'gpt-4',
+         'gpt-3.5-turbo',
+         'google/flan-t5-xl',
+         'databricks/dolly-v2-3b',
+         'bigscience/bloom-1b7')
+    )
+with col2:
+    option_mode = st.selectbox(
+        "LLM mode",
+        ("Instruct (all)",
+         "Chat (high temperature)",
+         "Wolfram-Alpha",
+         "Internal-Sales",
+         "Internal-Merchant"
+         )
+    )
+def get_question():
+    input_text = st.text_area(label="Your question ...",
+                              placeholder="Ask me anything ...",
+                              key="question_text", label_visibility="collapsed")
+    return input_text
+question_text = get_question()
+if question_text and len(question_text) > 1:
+    output=""
+    if option_mode == "Internal-Sales":
+        output = salesAgent(question_text)
+    elif option_mode == "Internal-Merchant":
+        output = chinookAgent(question_text, option_llm)
+    elif option_mode.startswith("Chat"):
+        response = chatAgent(question_text)
+        if response and response.content:
+            output = response.content
+        else:
+            output = response
+    else:
+        output = instructAgent(question_text, option_llm)
+    height = min(2*len(output), 280)
+    st.text_area(label="In response ...",
+                 value=output, height=height)
+##############################################################################
+st.markdown(
+    """
+    <style>
+    textarea[aria-label^="ex"] {
+            font-size: 0.8em !important;
+            font-family: Arial, sans-serif !important;
+            color: gray !important;
+    }
+    </style>
+    """,
+    unsafe_allow_html=True,
+)
+st.markdown("#### 3 types of reasoning:")
+col1, col2, col3 = st.columns([1,1,1])
+with col1:
+    st.markdown("__Common sense reasoning__")
+    st.text_area(label="ex1", label_visibility="collapsed", height=120,
+                 value="🔹 Why is the sky blue?\n" +
+                       "🔹 How to avoid touching a hot stove?\n" +
+                       "🔹 Please give tips to win a 3200m track race?\n" +
+                       "🔹 Please advise on how best to prepare for retirement?"
+                       )
+with col2:
+    st.markdown("__Local ('secure') reasoning__")
+    st.text_area(label="ex2", label_visibility="collapsed", height=120,
+                 value="🔹 For my company, what is the total sales " +
+                       "broken down by month?\n" +
+                       "🔹 How many total artists are there in each "+
+                       "genres in our digital media database?")
+with col3:
+    st.markdown("__Enhanced reasoning__ [🎵](https://www.youtube.com/watch?v=hTTUaImgCyU&t=62s)")
+    st.text_area(label="ex3", label_visibility="collapsed", height=120,
+                 value="🔹 Who is the president of South Korea?  " +
+                       "What is his favorite song? How old is he? " +
+                       "What is the smallest prime greater than his age?\n" +
+                       "🔹 What is the derivative of f(x)=3*log(x)*sin(x)?")
+st.image(image="images/plugins.png", width=700, caption="salesforce.com")
+st.image(image="images/chinook.png", width=420, caption="Digital Media Schema")
+##############################################################################

data/sales_data.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

images/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

images/chinook.png ADDED Viewed

images/plugins.png ADDED Viewed

images/salesforce.png ADDED Viewed

models.py ADDED Viewed

	@@ -0,0 +1,90 @@

+##############################################################################
+# Utility methods for building LLMs and agent models
+#
+# @philmui
+# Mon May 1 18:34:45 PDT 2023
+##############################################################################
+import os
+import pandas as pd
+from langchain.agents import AgentType, load_tools, initialize_agent,\
+                            create_pandas_dataframe_agent
+from langchain.chat_models import ChatOpenAI
+from langchain.llms import OpenAI
+from langchain import SQLDatabase, SQLDatabaseChain, HuggingFaceHub
+OPENAI_LLMS = [
+    'text-davinci-003',
+    'text-babbage-001',
+    'text-curie-001',
+    'text-ada-001'
+]
+OPENAI_CHAT_LLMS = [
+    'gpt-3.5-turbo',
+    'gpt-4',
+]
+HUGGINGFACE_LLMS = [
+    'google/flan-t5-xl',
+    'databricks/dolly-v2-3b',
+    'bigscience/bloom-1b7'
+]
+HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
+def createLLM(model_name="text-davinci-003", temperature=0):
+    llm = None
+    if model_name in OPENAI_LLMS:
+        llm = OpenAI(model_name=model_name, temperature=temperature)
+    elif model_name in OPENAI_CHAT_LLMS:
+        llm = ChatOpenAI(model_name=model_name, temperature=temperature)
+    elif model_name in HUGGINGFACE_LLMS:
+        llm = HuggingFaceHub(repo_id=model_name,
+                             model_kwargs={"temperature":1e-10})
+    return llm
+def load_chat_agent(verbose=True):
+    return createLLM(OPENAI_CHAT_LLMS[0], temperature=0.5)
+def load_sales_agent(verbose=True):
+    '''
+    Hard-coded agent that gates an internal sales CSV file for demo
+    '''
+    chat = createLLM(OPENAI_CHAT_LLMS[0], temperature=0.5)
+    df = pd.read_csv("data/sales_data.csv")
+    agent = create_pandas_dataframe_agent(chat, df, verbose=verbose)
+    return agent
+def load_sqlite_agent(model_name="text-davinci-003"):
+    '''
+    Hard-coded agent that gates a sqlite DB of digital media for demo
+    '''
+    llm = createLLM(OPENAI_LLMS[0])
+    sqlite_db_path = "./data/Chinook_Sqlite.sqlite"
+    db = SQLDatabase.from_uri(f"sqlite:///{sqlite_db_path}")
+    db_chain = SQLDatabaseChain(llm=llm, database=db, verbose=True)
+    return db_chain
+from langchain.tools import DuckDuckGoSearchRun, GoogleSearchRun
+from langchain.utilities import GoogleSearchAPIWrapper
+def load_chained_agent(verbose=True, model_name="text-davinci-003"):
+    llm = createLLM(model_name)
+    toolkit = [DuckDuckGoSearchRun()]
+    toolkit += load_tools(["serpapi", "open-meteo-api", "news-api",
+                           "python_repl", "wolfram-alpha", "llm-math",
+                           "pal-math", "pal-colored-objects"],
+                            llm=llm,
+                            serpapi_api_key=os.getenv('SERPAPI_API_KEY'),
+                            news_api_key=os.getenv('NEWS_API_KEY'),
+                            tmdb_bearer_token=os.getenv('TMDB_BEARER_TOKEN')
+                            )
+    agent = initialize_agent(toolkit,
+                             llm,
+                             agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
+                             verbose=verbose,
+                             return_intermediate_steps=True)
+    return agent

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+charset-normalizer
+chromadb
+fastapi
+duckduckgo-search
+google-api-python-client
+google-search-results
+langchain
+nltk
+numpy
+openai
+pandas
+pdfminer.six
+streamlit
+tabulate
+unstructured

sandbox/flant5.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import numpy as np
+import matplotlib.pyplot as plt
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+model_path = "/Users/pmui/models/flan-t5-xl"
+tokenizer = T5Tokenizer.from_pretrained(model_path)
+model = T5ForConditionalGeneration.from_pretrained(model_path, device_map="auto")
+def inference(input_text):
+  input_ids = tokenizer(input_text, return_tensors="pt").input_ids
+  outputs = model.generate(input_ids, max_length=200, bos_token_id=0)
+  result = tokenizer.decode(outputs[0], skip_special_tokens=True)
+  print(result)
+input_text = "What is the tallest building in the world?"
+inference(input_text)

sandbox/google.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import os
+from googleapiclient.discovery import build
+import pprint
+my_api_key = os.getenv("GOOGLE_API_KEY")
+my_cse_id = os.getenv("GOOGLE_CSE_ID")
+def google_search(search_term, api_key, cse_id, **kwargs):
+    service = build("customsearch", "v1", developerKey=api_key)
+    res = service.cse().list(q=search_term, cx=cse_id, **kwargs).execute()
+    return res['items']
+results = google_search(
+    'stackoverflow site:en.wikipedia.org', my_api_key, my_cse_id, num=10)
+for result in results:
+    pprint.pprint(result)

sandbox/test.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import os
+from models import load_chained_agent
+from agents import chatAgent
+import langchain
+from langchain.agents import load_tools
+from langchain.agents import initialize_agent
+from langchain.chat_models import ChatOpenAI
+from langchain.llms import OpenAI
+# print(chatAgent("why is the sky blue?"))
+# try:
+#     prompt_formatted = prompt.format(query="""
+#     Who is the president of South Korea?  What is his age?  What is the digit sum of his age?
+#     """)
+#     agent = load_chained_agent(verbose=True)
+#     response = agent({"input": prompt_formatted})
+#     print(response["output"])
+# except Exception as e:
+#     print(e)
+from langchain.tools import DuckDuckGoSearchRun, GoogleSearchRun
+from langchain.utilities import GoogleSearchAPIWrapper
+def load_chained_agent(verbose=True, model_name="text-davinci-003"):
+    llm = OpenAI(model_name=model_name, temperature=0)
+    toolkit = [GoogleSearchRun(), DuckDuckGoSearchRun()]
+    toolkit += load_tools(["open-meteo-api", "news-api",
+                          "python_repl", "wolfram-alpha",
+                          "pal-math", "pal-colored-objects"],
+                            llm=llm,
+                            serpapi_api_key=os.getenv('SERPAPI_API_KEY'),
+                            news_api_key=os.getenv('NEWS_API_KEY'),
+                            tmdb_bearer_token=os.getenv('TMDB_BEARER_TOKEN')
+                            )
+    agent = initialize_agent(toolkit,
+                             llm,
+                             agent="zero-shot-react-description",
+                             verbose=verbose,
+                             return_intermediate_steps=True)
+    return agent
+PROMPT = "Who is the president of South Korea?  How old is he?  What is the smallest prime greater than his age?"
+if __name__ == '__main__':
+    agent = load_chained_agent()
+    response = agent(PROMPT)
+    if response is not None:
+        """
+        print("Steps: ")
+        for action in response['intermediate_steps']:
+            print()
+            print(f"==> Tool: {action[0].tool}")
+            print(f"    Input: {action[0].tool_input}")
+            print(f"    Thought: {action[0].log}")
+            print(f"    Finding: {action[1]}")
+        """
+        print(f"input: {response['input']}")
+        print(f"output: {response['output']}")