hf-docs-chat

Runtime error

App Files Files Community

mishig HF staff commited on Jul 12, 2024

Commit

406a63c

•

1 Parent(s): 335a0b3

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -65

app.py CHANGED Viewed

@@ -1,69 +1,79 @@
-import time
 import os
 import gradio as gr
-import torch
-from transformers import AutoModel, AutoTokenizer
-import meilisearch
-tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-base-en-v1.5')
-model = AutoModel.from_pretrained('BAAI/bge-base-en-v1.5')
-model.eval()
-cuda_available = torch.cuda.is_available()
-print(f"CUDA available: {cuda_available}")
-meilisearch_client = meilisearch.Client("https://edge.meilisearch.com", os.environ["MEILISEARCH_KEY"])
-meilisearch_index_name = "docs-embed"
-meilisearch_index = meilisearch_client.index(meilisearch_index_name)
-output_options = ["RAG-friendly", "human-friendly"]
-def search_embeddings(query_text, output_option):
-    start_time_embedding = time.time()
-    query_prefix = 'Represent this sentence for searching code documentation: '
-    query_tokens =  tokenizer(query_prefix + query_text, padding=True, truncation=True, return_tensors='pt', max_length=512)
-    # step1: tokenizer the query
-    with torch.no_grad():
-        # Compute token embeddings
-        model_output = model(**query_tokens)
-        sentence_embeddings = model_output[0][:, 0]
-        # normalize embeddings
-        sentence_embeddings = torch.nn.functional.normalize(sentence_embeddings, p=2, dim=1)
-        sentence_embeddings_list = sentence_embeddings[0].tolist()
-        elapsed_time_embedding = time.time() - start_time_embedding
-    # step2: search meilisearch
-    start_time_meilisearch = time.time()
-    response = meilisearch_index.search(
-        "", opt_params={"vector": sentence_embeddings_list, "hybrid": {"semanticRatio": 1.0}, "limit": 5, "attributesToRetrieve": ["text", "source_page_url", "source_page_title", "library"]}
-    )
-    elapsed_time_meilisearch = time.time() - start_time_meilisearch
-    hits = response["hits"]
-    sources_md = [f"[\"{hit['source_page_title']}\"]({hit['source_page_url']})" for hit in hits]
-    sources_md = ", ".join(sources_md)
-    # step3: present the results in markdown
-    if output_option == "human-friendly":
-        md = f"Stats:\n\nembedding time: {elapsed_time_embedding:.2f}s\n\nmeilisearch time: {elapsed_time_meilisearch:.2f}s\n\n---\n\n"
-        for hit in hits:
-            text, source_page_url, source_page_title = hit["text"], hit["source_page_url"], hit["source_page_title"]
-            source = f"src: [\"{source_page_title}\"]({source_page_url})"
-            md += text + f"\n\n{source}\n\n---\n\n"
-        return md, sources_md
-    elif output_option == "RAG-friendly":
-        hit_texts = [hit["text"] for hit in hits]
-        hit_text_str = "\n------------\n".join(hit_texts)
-        return hit_text_str, sources_md
-demo = gr.Interface(
-    fn=search_embeddings,
-    inputs=[gr.Textbox(label="enter your query", placeholder="Type Markdown here...", lines=10), gr.Radio(label="Select an output option", choices=output_options, value="RAG-friendly")],
-    outputs=[gr.Markdown(), gr.Markdown()],
-    title="HF Docs Emebddings Explorer",
-    allow_flagging="never"
 )
-if __name__ == "__main__":
-    demo.launch()

 import os
+from huggingface_hub import InferenceClient
 import gradio as gr
+from gradio_client import Client
+model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
+inference_client = InferenceClient(model_id, token=os.environ["HF_TOKEN"])
+docs_embeddings_space_id = "huggingchat/hf-docs"
+gradio_client = Client(docs_embeddings_space_id)
+SYSTEM_PROMPT = "You are a Hugging Face AI expert. Use the provided context to answer user questions. If the request is not realted to Hugging Face Hub or Hugging Face open source libraries, you MUST respond with: \"I can only chat about Hugging Face\" and STOP answering." # from https://huggingface.co/chat/settings/assistants/65f33e95d854946bb3f88dde
+def generate(prompt, history):
+    try:
+        # step 1: get relevant docs excerpts
+        rag_content, sourced_md = gradio_client.predict(
+                query_text=prompt,
+                output_option="RAG-friendly",
+                api_name="/predict"
+        )
+        # step 2; generate answer
+        processed_prompt = f'''Answer the question: "{prompt}"\
+Here are relevant extract from docs that you can use to generate the answer:
+=====================
+{rag_content}
+====================='''
+        messages = [{"role": "system", "content": SYSTEM_PROMPT}]
+        for user_msg, assistant_msg in history:
+            assistant_msg = assistant_msg.split("\n\nsources:")[0]
+            messages.extend([{"role": "user", "content": user_msg}, {"role": "assistant", "content": assistant_msg}])
+        messages.append({"role": "user", "content": processed_prompt})
+        generate_kwargs = dict(
+            temperature=0.6,
+            max_tokens=8192,
+            top_p=0.95,
+        )
+        output = ""
+        for token in inference_client.chat_completion(messages, stream=True, **generate_kwargs):
+            new_content = token.choices[0].delta.content
+            output += new_content
+            yield output + f"\n\nsources: {sourced_md}"
+        return output + f"\n\nsources: {sourced_md}"
+    except Exception as e:
+        raise gr.Error(e)
+examples = ["How do upload a model?",
+            "Can I change the color of my Space?",
+            "How do I finetune Stable Diffusion with Lora?",
+            "How do I run a model found on the Hugging Face Hub?"]
+demo = gr.ChatInterface(
+    fn=generate,
+    chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
+    title="HF Docs Bot 🤗",
+    examples=examples,
+    concurrency_limit=400,
+    stop_btn = None,
+    retry_btn = None,
+    undo_btn = None,
+    clear_btn = None,
+    cache_examples=False
 )
+demo.launch(show_api=False)