Spaces:
Runtime error
Runtime error
File size: 2,855 Bytes
9f54a3b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
import streamlit as st
from openai import OpenAI
import os
import sys
from langchain.callbacks import StreamlitCallbackHandler
from dotenv import load_dotenv, dotenv_values
load_dotenv()
if 'key' not in st.session_state:
st.session_state['key'] = 'value'
# initialize the client but point it to TGI
client = OpenAI(
base_url="/static-proxy?url=https%3A%2F%2Fapi-inference.huggingface.co%2Fv1%26quot%3B%3C%2Fspan%3E%2C
#api_key=os.environ.get('HUGGINGFACEHUB_API_TOKEN')#"hf_xxx" # Replace with your token
)
#Create supported models
model_links ={
"Mistral":"mistralai/Mistral-7B-Instruct-v0.2",
"Gemma":"google/gemma-7b-it"
}
# Define the available models
# models = ["Mistral", "Gemma"]
models =[key for key in model_links.keys()]
# Create the sidebar with the dropdown for model selection
selected_model = st.sidebar.selectbox("Select Model", models)
#Pull in the model we want to use
repo_id = model_links[selected_model]
st.title(f'ChatBot Using {selected_model}')
# Set a default model
if selected_model not in st.session_state:
st.session_state[selected_model] = model_links[selected_model] #"google/gemma-7b-it"
# Initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = []
# Display chat messages from history on app rerun
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Accept user input
if prompt := st.chat_input("What is up?"):
# Display user message in chat message container
with st.chat_message("user"):
st.markdown(prompt)
# Add user message to chat history
st.session_state.messages.append({"role": "user", "content": prompt})
# Display assistant response in chat message container
with st.chat_message("assistant"):
st_callback = StreamlitCallbackHandler(st.container())
# st_callback =stream_handler
# stream = client.completions.create(
# model="google/gemma-7b-it",
# prompt="You are a helpful agent in a question answer exhange. Give you best answer to the questions. {prompt}",
# # messages=[
# # {"role": m["role"], "content": m["content"]}
# # for m in st.session_state.messages
# # ],
# temperature=0.5,
# stream=True,
# max_tokens=3000
# )
stream = client.chat.completions.create(
model=model_links[selected_model],#"google/gemma-7b-it",
messages=[
{"role": m["role"], "content": m["content"]}
for m in st.session_state.messages
],
temperature=0.5,
stream=True,
max_tokens=3000,
)
response = st.write_stream(stream)
st.session_state.messages.append({"role": "assistant", "content": response}) |