Spaces:
Runtime error
Runtime error
File size: 2,686 Bytes
38585cf 406a63c 38585cf 406a63c 38585cf 406a63c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import os
from huggingface_hub import InferenceClient
import gradio as gr
from gradio_client import Client
model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
inference_client = InferenceClient(model_id, token=os.environ["HF_TOKEN"])
docs_embeddings_space_id = "huggingchat/hf-docs"
gradio_client = Client(docs_embeddings_space_id)
SYSTEM_PROMPT = "You are a Hugging Face AI expert. Use the provided context to answer user questions. If the request is not realted to Hugging Face Hub or Hugging Face open source libraries, you MUST respond with: \"I can only chat about Hugging Face\" and STOP answering." # from https://huggingface.co/chat/settings/assistants/65f33e95d854946bb3f88dde
def generate(prompt, history):
try:
# step 1: get relevant docs excerpts
rag_content, sourced_md = gradio_client.predict(
query_text=prompt,
output_option="RAG-friendly",
api_name="/predict"
)
# step 2; generate answer
processed_prompt = f'''Answer the question: "{prompt}"\
Here are relevant extract from docs that you can use to generate the answer:
=====================
{rag_content}
====================='''
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
for user_msg, assistant_msg in history:
assistant_msg = assistant_msg.split("\n\nsources:")[0]
messages.extend([{"role": "user", "content": user_msg}, {"role": "assistant", "content": assistant_msg}])
messages.append({"role": "user", "content": processed_prompt})
generate_kwargs = dict(
temperature=0.6,
max_tokens=8192,
top_p=0.95,
)
output = ""
for token in inference_client.chat_completion(messages, stream=True, **generate_kwargs):
new_content = token.choices[0].delta.content
output += new_content
yield output + f"\n\nsources: {sourced_md}"
return output + f"\n\nsources: {sourced_md}"
except Exception as e:
raise gr.Error(e)
examples = ["How do upload a model?",
"Can I change the color of my Space?",
"How do I finetune Stable Diffusion with Lora?",
"How do I run a model found on the Hugging Face Hub?"]
demo = gr.ChatInterface(
fn=generate,
chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
title="HF Docs Bot 🤗",
examples=examples,
concurrency_limit=400,
stop_btn = None,
retry_btn = None,
undo_btn = None,
clear_btn = None,
cache_examples=False
)
demo.launch(show_api=False)
|