ariG23498 HF staff commited on
Commit
f03fa61
·
1 Parent(s): c5e4aa3
Files changed (2) hide show
  1. app.py +76 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ import numpy as np
3
+ import requests
4
+
5
+ import gradio as gr
6
+
7
+ def request_and_response(url):
8
+ response = requests.get(url)
9
+ papers = response.json()
10
+ return papers
11
+
12
+ def extract_abstracts_and_ids(papers):
13
+ abstracts = [paper["paper"]["summary"] for paper in papers]
14
+ paper_ids = [paper["paper"]["id"] for paper in papers]
15
+ return abstracts, paper_ids
16
+
17
+ def get_embeddings(model, texts):
18
+ embeddings = model.encode(texts)
19
+ return embeddings
20
+
21
+ def compute_similarity(model, embeddings1, embeddings2):
22
+ similarities = model.similarity(embeddings1, embeddings2)
23
+ return similarities
24
+
25
+ def find_closest(similarities, paper_ids):
26
+ best_match_idx = np.argmax(similarities)
27
+ best_match_id = paper_ids[best_match_idx]
28
+ return best_match_id
29
+
30
+ # Step 0: Get the model
31
+ model = SentenceTransformer("all-MiniLM-L6-v2")
32
+
33
+ # Step 1: Get papers from API
34
+ papers = request_and_response("https://hf.co/api/daily_papers")
35
+
36
+ # Step 2: Extract abstracts and paper ids
37
+ abstracts, paper_ids = extract_abstracts_and_ids(papers)
38
+
39
+ # Step 3: Embed Query and the Abstracts of papers
40
+ abstract_embeddings = get_embeddings(model, abstracts)
41
+
42
+ def get_closest_paper(query):
43
+ query_embeddings = get_embeddings(model, [query])
44
+
45
+ # Step 4: Find similarity scores
46
+ similarities = compute_similarity(model, query_embeddings, abstract_embeddings)
47
+
48
+ # Step 5: Find the closest match
49
+ best_match_id = find_closest(similarities, paper_ids)
50
+
51
+ # Step 6: Get the best match paper title and id
52
+ paper = request_and_response(f"https://hf.co/api/papers/{best_match_id}")
53
+ title = paper["title"]
54
+ summary = paper["summary"]
55
+
56
+ return title, f"https://huggingface.co/papers/{best_match_id}", summary
57
+
58
+
59
+ with gr.Blocks() as iface:
60
+ gr.Markdown("""\
61
+ # Query AK's Daily Paper Collection
62
+ Query papers you want to read from AK's daily paper collection. Ask what you want to read,
63
+ and we will provide you with the paper id that serves your query the best. It is a work in progress (please be kind)\
64
+
65
+ Thanks to Tom Arsen for reviewing the code and working alongside.""")
66
+ query = gr.Textbox(placeholder="What do you have in mind?")
67
+ with gr.Row():
68
+ title = gr.Textbox()
69
+ paper_link = gr.Textbox()
70
+ abstract = gr.Textbox()
71
+ btn = gr.Button(value="Submit")
72
+ btn.click(get_closest_paper, query, [title, paper_link, abstract])
73
+
74
+
75
+ if __name__ == "__main__":
76
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio
2
+ sentence-transformers