from sentence_transformers import SentenceTransformer import numpy as np import requests import gradio as gr def request_and_response(url): response = requests.get(url) papers = response.json() return papers def extract_abstracts_and_ids(papers): abstracts = [paper["paper"]["summary"] for paper in papers] paper_ids = [paper["paper"]["id"] for paper in papers] return abstracts, paper_ids def get_embeddings(model, texts): embeddings = model.encode(texts) return embeddings def compute_similarity(model, embeddings1, embeddings2): similarities = model.similarity(embeddings1, embeddings2) return similarities def find_closest(similarities, paper_ids): best_match_idx = np.argmax(similarities) best_match_id = paper_ids[best_match_idx] return best_match_id # Step 0: Get the model model = SentenceTransformer("all-MiniLM-L6-v2") # Step 1: Get papers from API papers = request_and_response("https://hf.co/api/daily_papers") # Step 2: Extract abstracts and paper ids abstracts, paper_ids = extract_abstracts_and_ids(papers) # Step 3: Embed Query and the Abstracts of papers abstract_embeddings = get_embeddings(model, abstracts) def get_closest_paper(query): query_embeddings = get_embeddings(model, [query]) # Step 4: Find similarity scores similarities = compute_similarity(model, query_embeddings, abstract_embeddings) # Step 5: Find the closest match best_match_id = find_closest(similarities, paper_ids) # Step 6: Get the best match paper title and id paper = request_and_response(f"https://hf.co/api/papers/{best_match_id}") title = paper["title"] summary = paper["summary"] return title, f"https://huggingface.co/papers/{best_match_id}", summary with gr.Blocks() as iface: gr.Markdown("""\ # Query AK's Daily Paper Collection Query papers you want to read from AK's daily paper collection. Ask what you want to read, and we will provide you with the paper id that serves your query the best. It is a work in progress (please be kind)\ Thanks to Tom Arsen for reviewing the code and working alongside.""") query = gr.Textbox(placeholder="What do you have in mind?") with gr.Row(): title = gr.Textbox() paper_link = gr.Textbox() abstract = gr.Textbox() btn = gr.Button(value="Submit") btn.click(get_closest_paper, query, [title, paper_link, abstract]) if __name__ == "__main__": iface.launch()