mishig HF staff commited on
Commit
406a63c
1 Parent(s): 335a0b3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -65
app.py CHANGED
@@ -1,69 +1,79 @@
1
- import time
2
  import os
 
3
  import gradio as gr
4
- import torch
5
- from transformers import AutoModel, AutoTokenizer
6
- import meilisearch
7
-
8
- tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-base-en-v1.5')
9
- model = AutoModel.from_pretrained('BAAI/bge-base-en-v1.5')
10
- model.eval()
11
-
12
- cuda_available = torch.cuda.is_available()
13
- print(f"CUDA available: {cuda_available}")
14
-
15
- meilisearch_client = meilisearch.Client("https://edge.meilisearch.com", os.environ["MEILISEARCH_KEY"])
16
- meilisearch_index_name = "docs-embed"
17
- meilisearch_index = meilisearch_client.index(meilisearch_index_name)
18
-
19
- output_options = ["RAG-friendly", "human-friendly"]
20
-
21
- def search_embeddings(query_text, output_option):
22
- start_time_embedding = time.time()
23
- query_prefix = 'Represent this sentence for searching code documentation: '
24
- query_tokens = tokenizer(query_prefix + query_text, padding=True, truncation=True, return_tensors='pt', max_length=512)
25
- # step1: tokenizer the query
26
- with torch.no_grad():
27
- # Compute token embeddings
28
- model_output = model(**query_tokens)
29
- sentence_embeddings = model_output[0][:, 0]
30
- # normalize embeddings
31
- sentence_embeddings = torch.nn.functional.normalize(sentence_embeddings, p=2, dim=1)
32
- sentence_embeddings_list = sentence_embeddings[0].tolist()
33
- elapsed_time_embedding = time.time() - start_time_embedding
34
-
35
- # step2: search meilisearch
36
- start_time_meilisearch = time.time()
37
- response = meilisearch_index.search(
38
- "", opt_params={"vector": sentence_embeddings_list, "hybrid": {"semanticRatio": 1.0}, "limit": 5, "attributesToRetrieve": ["text", "source_page_url", "source_page_title", "library"]}
39
- )
40
- elapsed_time_meilisearch = time.time() - start_time_meilisearch
41
- hits = response["hits"]
42
-
43
- sources_md = [f"[\"{hit['source_page_title']}\"]({hit['source_page_url']})" for hit in hits]
44
- sources_md = ", ".join(sources_md)
45
-
46
- # step3: present the results in markdown
47
- if output_option == "human-friendly":
48
- md = f"Stats:\n\nembedding time: {elapsed_time_embedding:.2f}s\n\nmeilisearch time: {elapsed_time_meilisearch:.2f}s\n\n---\n\n"
49
- for hit in hits:
50
- text, source_page_url, source_page_title = hit["text"], hit["source_page_url"], hit["source_page_title"]
51
- source = f"src: [\"{source_page_title}\"]({source_page_url})"
52
- md += text + f"\n\n{source}\n\n---\n\n"
53
- return md, sources_md
54
- elif output_option == "RAG-friendly":
55
- hit_texts = [hit["text"] for hit in hits]
56
- hit_text_str = "\n------------\n".join(hit_texts)
57
- return hit_text_str, sources_md
58
-
59
-
60
- demo = gr.Interface(
61
- fn=search_embeddings,
62
- inputs=[gr.Textbox(label="enter your query", placeholder="Type Markdown here...", lines=10), gr.Radio(label="Select an output option", choices=output_options, value="RAG-friendly")],
63
- outputs=[gr.Markdown(), gr.Markdown()],
64
- title="HF Docs Emebddings Explorer",
65
- allow_flagging="never"
 
 
 
 
 
 
 
 
 
 
 
66
  )
67
 
68
- if __name__ == "__main__":
69
- demo.launch()
 
 
1
  import os
2
+ from huggingface_hub import InferenceClient
3
  import gradio as gr
4
+ from gradio_client import Client
5
+
6
+
7
+ model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
8
+ inference_client = InferenceClient(model_id, token=os.environ["HF_TOKEN"])
9
+ docs_embeddings_space_id = "huggingchat/hf-docs"
10
+ gradio_client = Client(docs_embeddings_space_id)
11
+
12
+
13
+ SYSTEM_PROMPT = "You are a Hugging Face AI expert. Use the provided context to answer user questions. If the request is not realted to Hugging Face Hub or Hugging Face open source libraries, you MUST respond with: \"I can only chat about Hugging Face\" and STOP answering." # from https://huggingface.co/chat/settings/assistants/65f33e95d854946bb3f88dde
14
+
15
+
16
+ def generate(prompt, history):
17
+ try:
18
+ # step 1: get relevant docs excerpts
19
+ rag_content, sourced_md = gradio_client.predict(
20
+ query_text=prompt,
21
+ output_option="RAG-friendly",
22
+ api_name="/predict"
23
+ )
24
+
25
+ # step 2; generate answer
26
+ processed_prompt = f'''Answer the question: "{prompt}"\
27
+
28
+ Here are relevant extract from docs that you can use to generate the answer:
29
+ =====================
30
+ {rag_content}
31
+ ====================='''
32
+
33
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}]
34
+ for user_msg, assistant_msg in history:
35
+ assistant_msg = assistant_msg.split("\n\nsources:")[0]
36
+ messages.extend([{"role": "user", "content": user_msg}, {"role": "assistant", "content": assistant_msg}])
37
+
38
+ messages.append({"role": "user", "content": processed_prompt})
39
+
40
+ generate_kwargs = dict(
41
+ temperature=0.6,
42
+ max_tokens=8192,
43
+ top_p=0.95,
44
+ )
45
+
46
+ output = ""
47
+
48
+ for token in inference_client.chat_completion(messages, stream=True, **generate_kwargs):
49
+ new_content = token.choices[0].delta.content
50
+ output += new_content
51
+ yield output + f"\n\nsources: {sourced_md}"
52
+
53
+ return output + f"\n\nsources: {sourced_md}"
54
+ except Exception as e:
55
+ raise gr.Error(e)
56
+
57
+
58
+
59
+ examples = ["How do upload a model?",
60
+ "Can I change the color of my Space?",
61
+ "How do I finetune Stable Diffusion with Lora?",
62
+ "How do I run a model found on the Hugging Face Hub?"]
63
+
64
+
65
+
66
+ demo = gr.ChatInterface(
67
+ fn=generate,
68
+ chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
69
+ title="HF Docs Bot 🤗",
70
+ examples=examples,
71
+ concurrency_limit=400,
72
+ stop_btn = None,
73
+ retry_btn = None,
74
+ undo_btn = None,
75
+ clear_btn = None,
76
+ cache_examples=False
77
  )
78
 
79
+ demo.launch(show_api=False)