Spaces:

Kexin2000
/

tkx_final_project

Sleeping

App Files Files Community

Kexin2000 commited on Jan 14, 2024

Commit

b9a0194

•

1 Parent(s): 0945cdf

Upload 3 files

Browse files

Files changed (3) hide show

app.py +129 -0
requirements.in +3 -0
requirements.txt +62 -0

app.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import urllib.request
+import fitz
+import re
+import numpy as np
+import tensorflow_hub as hub
+import openai
+import gradio as gr
+import os
+from sklearn.neighbors import NearestNeighbors
+import requests
+from cachetools import cached, TTLCache
+CACHE_TIME = 60 * 60 * 6  # 6小时
+# 全局的推荐器对象
+recommender = None
+# 第二个功能的全局变量
+@cached(cache=TTLCache(maxsize=500, ttl=CACHE_TIME))
+def get_recommendations_from_semantic_scholar(semantic_scholar_id: str):
+    try:
+        r = requests.post(
+            "https://api.semanticscholar.org/recommendations/v1/papers/",
+            json={
+                "positivePaperIds": [semantic_scholar_id],
+            },
+            params={"fields": "externalIds,title,year", "limit": 10},
+        )
+        return r.json()["recommendedPapers"]
+    except KeyError as e:
+        raise gr.Error(
+            "获取推荐时出错，如果这是一篇新论文或尚未被Semantic Scholar索引，则可能尚未有推荐。"
+        ) from e
+def filter_recommendations(recommendations, max_paper_count=5):
+    arxiv_paper = [
+        r for r in recommendations if r["externalIds"].get("ArXiv", None) is not None
+    ]
+    if len(arxiv_paper) > max_paper_count:
+        arxiv_paper = arxiv_paper[:max_paper_count]
+    return arxiv_paper
+@cached(cache=TTLCache(maxsize=500, ttl=CACHE_TIME))
+def get_paper_title_from_arxiv_id(arxiv_id):
+    try:
+        return requests.get(f"https://huggingface.co/api/papers/{arxiv_id}").json()[
+            "title"
+        ]
+    except Exception as e:
+        print(f"获取论文标题时出错 {arxiv_id}: {e}")
+        raise gr.Error(f"获取论文标题时出错 {arxiv_id}: {e}") from e
+def format_recommendation_into_markdown(arxiv_id, recommendations):
+    comment = "以下论文由Semantic Scholar API推荐\n\n"
+    for r in recommendations:
+        hub_paper_url = f"https://huggingface.co/papers/{r['externalIds']['ArXiv']}"
+        comment += f"* [{r['title']}]({hub_paper_url}) ({r['year']})\n"
+    return comment
+def return_recommendations(url):
+    arxiv_id = parse_arxiv_id_from_paper_url(url)
+    recommendations = get_recommendations_from_semantic_scholar(f"ArXiv:{arxiv_id}")
+    filtered_recommendations = filter_recommendations(recommendations)
+    return format_recommendation_into_markdown(arxiv_id, filtered_recommendations)
+# Gradio界面
+title = 'PDF GPT Turbo'
+description = """ PDF GPT Turbo允许您与PDF文件交流。它使用Google的Universal Sentence Encoder与Deep averaging network（DAN）来提供无幻觉的响应，通过提高OpenAI的嵌入质量。它在方括号（[Page No.]）中引用页码，显示信息的位置，增强了响应的可信度。"""
+# 预定义的问题
+questions = [
+    "研究调查了什么？",
+    "能否提供本文的摘要？",
+    "这项研究使用了什么方法？",
+    # 需要时添加更多的问题
+]
+with gr.Blocks(css="""#chatbot { font-size: 14px; min-height: 1200; }""") as demo:
+    gr.Markdown(f'<center><h3>{title}</h3></center>')
+    gr.Markdown(description)
+    with gr.Row():
+        with gr.Group():
+            gr.Markdown(f'<p style="text-align:center">在这里获取您的Open AI API密钥 <a href="https://platform.openai.com/account/api-keys">here</a></p>')
+            with gr.Accordion("API Key"):
+                openAI_key = gr.Textbox(label='在此输入您的OpenAI API密钥', password=True)
+                url = gr.Textbox(label='在此输入PDF的URL   (示例: https://arxiv.org/pdf/1706.03762.pdf )')
+                gr.Markdown("<center><h4>或<h4></center>")
+                file = gr.File(label='在此上传您的PDF/研究论文/书籍', file_types=['.pdf'])
+            question = gr.Textbox(label='在此输入您的问题')
+            gr.Examples(
+                [[q] for q in questions],
+                inputs=[question],
+                label="预定义问题：点击问题以自动填充输入框，然后按Enter键！",
+            )
+            model = gr.Radio([
+                'gpt-3.5-turbo',
+                'gpt-3.5-turbo-16k',
+                'gpt-3.5-turbo-0613',
+                'gpt-3.5-turbo-16k-0613',
+                'text-davinci-003',
+                'gpt-4',
+                'gpt-4-32k'
+            ], label='选择模型', default='gpt-3.5-turbo')
+            btn = gr.Button(value='提交')
+            btn.style(full_width=True)
+        with gr.Group():
+            chatbot = gr.Chatbot(placeholder="聊天历史", label="聊天历史", lines=50, elem_id="chatbot")
+    # 将按钮的点击事件绑定到question_answer函数
+    btn.click(
+        question_answer,
+        inputs=[chatbot, url, file, question, openAI_key, model],
+        outputs=[chatbot],
+    )
+    # 第二个标签
+    gr.Tab("论文推荐", [
+        gr.Textbox(label="输入Hugging Face Papers的URL", lines=1),
+        gr.Button("获取推荐", return_recommendations),
+        gr.Markdown(),
+    ])
+demo.launch()

requirements.in ADDED Viewed

	@@ -0,0 +1,3 @@

+cachetools
+gradio
+requests

requirements.txt ADDED Viewed

	@@ -0,0 +1,62 @@

+PyMuPDF
+scikit-learn
+tensorflow
+tensorflow-hub
+openai
+aiofiles==23.2.1
+altair==5.1.1
+annotated-types==0.5.0
+anyio==3.7.1
+attrs==23.1.0
+cachetools==5.3.1
+certifi==2023.7.22
+charset-normalizer==3.2.0
+click==8.1.7
+contourpy==1.1.1
+cycler==0.11.0
+fastapi==0.103.1
+ffmpy==0.3.1
+filelock==3.12.4
+fonttools==4.42.1
+fsspec==2023.9.2
+gradio==3.45.1
+gradio-client==0.5.2
+h11==0.14.0
+httpcore==0.18.0
+httpx==0.25.0
+huggingface-hub==0.17.3
+idna==3.4
+importlib-resources==6.1.0
+jinja2==3.1.2
+jsonschema==4.19.1
+jsonschema-specifications==2023.7.1
+kiwisolver==1.4.5
+markupsafe==2.1.3
+matplotlib==3.8.0
+numpy==1.26.0
+orjson==3.9.7
+packaging==23.1
+pandas==2.1.1
+pillow==10.0.1
+pydantic==2.4.1
+pydantic-core==2.10.1
+pydub==0.25.1
+pyparsing==3.1.1
+python-dateutil==2.8.2
+python-multipart==0.0.6
+pytz==2023.3.post1
+pyyaml==6.0.1
+referencing==0.30.2
+requests==2.31.0
+rpds-py==0.10.3
+semantic-version==2.10.0
+six==1.16.0
+sniffio==1.3.0
+starlette==0.27.0
+toolz==0.12.0
+tqdm==4.66.1
+typing-extensions==4.8.0
+tzdata==2023.3
+urllib3==2.0.5
+uvicorn==0.23.2
+websockets==11.0.3