Spaces:
Running
Running
shaocongma
commited on
Commit
·
365213e
1
Parent(s):
c9efba3
Edit UI.
Browse files- app.py +46 -36
- auto_backgrounds.py +38 -33
- latex_templates/pre_refs.bib +19 -16
- utils/prompts.py +9 -10
- utils/references.py +13 -13
app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
import openai
|
4 |
-
from auto_backgrounds import generate_backgrounds,
|
5 |
from utils.file_operations import hash_name
|
6 |
|
7 |
# note: App白屏bug:允许第三方cookie
|
@@ -9,12 +9,10 @@ from utils.file_operations import hash_name
|
|
9 |
# 6. get logs when the procedure is not completed. *
|
10 |
# 7. 自己的文件库; 更多的prompts
|
11 |
# 8. Decide on how to generate the main part of a paper * (Langchain/AutoGPT
|
12 |
-
# 9. Load .bibtex file to generate a pre-defined references list. *
|
13 |
# 1. 把paper改成纯JSON?
|
14 |
# 2. 实现别的功能
|
15 |
# 3. Check API Key GPT-4 Support.
|
16 |
# 8. Re-build some components using `langchain`
|
17 |
-
# - in `references.py`, use PromptTemplates.format -> str
|
18 |
# - in `gpt_interation`, use LLM
|
19 |
# 5. 从提供的bib文件中 找到cite和citedby的文章, 计算embeddings; 从整个paper list中 根据cos距离进行排序; 选取max_refs的文章
|
20 |
# future:
|
@@ -49,17 +47,12 @@ def clear_inputs(text1, text2):
|
|
49 |
|
50 |
|
51 |
def wrapped_generator(paper_title, paper_description, openai_api_key=None,
|
52 |
-
template="ICLR2022",
|
53 |
-
cache_mode=IS_CACHE_AVAILABLE
|
54 |
# if `cache_mode` is True, then follow the following steps:
|
55 |
# check if "title"+"description" have been generated before
|
56 |
# if so, download from the cloud storage, return it
|
57 |
# if not, generate the result.
|
58 |
-
if generator is None:
|
59 |
-
# todo: add a Dropdown to select which generator to use.
|
60 |
-
# generator = generate_backgrounds
|
61 |
-
generator = generate_draft
|
62 |
-
# generator = fake_generator
|
63 |
if openai_api_key is not None:
|
64 |
openai.api_key = openai_api_key
|
65 |
openai.Model.list()
|
@@ -80,13 +73,17 @@ def wrapped_generator(paper_title, paper_description, openai_api_key=None,
|
|
80 |
else:
|
81 |
# generate the result.
|
82 |
# output = fake_generate_backgrounds(title, description, openai_key)
|
83 |
-
|
84 |
-
|
|
|
|
|
85 |
upload_file(output)
|
86 |
return output
|
87 |
else:
|
88 |
# output = fake_generate_backgrounds(title, description, openai_key)
|
89 |
-
output =
|
|
|
|
|
90 |
return output
|
91 |
|
92 |
|
@@ -97,6 +94,14 @@ theme = gr.themes.Default(font=gr.themes.GoogleFont("Questrial"))
|
|
97 |
# button_primary_background_fill="#281A39"
|
98 |
# )
|
99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
with gr.Blocks(theme=theme) as demo:
|
101 |
gr.Markdown('''
|
102 |
# Auto-Draft: 文献整理辅助工具
|
@@ -107,11 +112,7 @@ with gr.Blocks(theme=theme) as demo:
|
|
107 |
|
108 |
在这个Huggingface Organization里也提供一定额度的免费体验: [AUTO-ACADEMIC](https://huggingface.co/auto-academic).
|
109 |
|
110 |
-
如果有更多想法和建议欢迎加入QQ群里交流, 如果我在Space里更新了Key我会第一时间通知大家. 群号: ***249738228***.
|
111 |
-
|
112 |
-
## 用法
|
113 |
-
|
114 |
-
输入想要生成的论文名称(比如Playing Atari with Deep Reinforcement Learning), 点击Submit, 等待大概十分钟, 下载.zip格式的输出,在Overleaf上编译浏览.
|
115 |
''')
|
116 |
|
117 |
with gr.Row():
|
@@ -124,6 +125,9 @@ with gr.Blocks(theme=theme) as demo:
|
|
124 |
|
125 |
# 每个功能做一个tab
|
126 |
with gr.Tab("学术论文"):
|
|
|
|
|
|
|
127 |
title = gr.Textbox(value="Playing Atari with Deep Reinforcement Learning", lines=1, max_lines=1,
|
128 |
label="Title", info="论文标题")
|
129 |
|
@@ -131,33 +135,38 @@ with gr.Blocks(theme=theme) as demo:
|
|
131 |
description_pp = gr.Textbox(lines=5, label="Description (Optional)", visible=True,
|
132 |
info="对希望生成的论文的一些描述. 包括这篇论文的创新点, 主要贡献, 等.")
|
133 |
|
134 |
-
interactive = False
|
135 |
-
gr.Markdown('''
|
136 |
-
## 下面的功能我只做了UI, 还没来得及实现功能.
|
137 |
-
''')
|
138 |
with gr.Row():
|
139 |
with gr.Column():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
gr.Markdown('''
|
141 |
-
|
142 |
-
|
143 |
-
通过上传.bib文件来控制GPT-4模型必须参考哪些文献.
|
144 |
''')
|
145 |
bibtex_file = gr.File(label="Upload .bib file", file_types=["text"],
|
146 |
-
interactive=
|
|
|
|
|
|
|
|
|
147 |
with gr.Column():
|
148 |
search_engine = gr.Dropdown(label="Search Engine",
|
149 |
choices=["ArXiv", "Semantic Scholar", "Google Scholar", "None"],
|
150 |
value= "Semantic Scholar",
|
151 |
-
interactive=
|
152 |
-
info="用于决定GPT-4用什么搜索引擎来搜索文献.
|
153 |
-
|
154 |
info="选择此筐表示将使用Semantic Scholar的TLDR作为文献的总结.",
|
155 |
-
interactive =
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
slider = gr.Slider(minimum=1, maximum=
|
160 |
-
|
161 |
|
162 |
with gr.Row():
|
163 |
clear_button_pp = gr.Button("Clear")
|
@@ -196,7 +205,8 @@ with gr.Blocks(theme=theme) as demo:
|
|
196 |
file_output = gr.File(label="Output")
|
197 |
|
198 |
clear_button_pp.click(fn=clear_inputs, inputs=[title, description_pp], outputs=[title, description_pp])
|
199 |
-
submit_button_pp.click(fn=wrapped_generator, inputs=[title, description_pp, key], outputs=file_output)
|
|
|
200 |
|
201 |
demo.queue(concurrency_count=1, max_size=5, api_open=False)
|
202 |
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
import openai
|
4 |
+
from auto_backgrounds import generate_backgrounds, generate_draft
|
5 |
from utils.file_operations import hash_name
|
6 |
|
7 |
# note: App白屏bug:允许第三方cookie
|
|
|
9 |
# 6. get logs when the procedure is not completed. *
|
10 |
# 7. 自己的文件库; 更多的prompts
|
11 |
# 8. Decide on how to generate the main part of a paper * (Langchain/AutoGPT
|
|
|
12 |
# 1. 把paper改成纯JSON?
|
13 |
# 2. 实现别的功能
|
14 |
# 3. Check API Key GPT-4 Support.
|
15 |
# 8. Re-build some components using `langchain`
|
|
|
16 |
# - in `gpt_interation`, use LLM
|
17 |
# 5. 从提供的bib文件中 找到cite和citedby的文章, 计算embeddings; 从整个paper list中 根据cos距离进行排序; 选取max_refs的文章
|
18 |
# future:
|
|
|
47 |
|
48 |
|
49 |
def wrapped_generator(paper_title, paper_description, openai_api_key=None,
|
50 |
+
template="ICLR2022", tldr=True, max_num_refs=50, sections=None, bib_refs=None, model="gpt-4",
|
51 |
+
cache_mode=IS_CACHE_AVAILABLE):
|
52 |
# if `cache_mode` is True, then follow the following steps:
|
53 |
# check if "title"+"description" have been generated before
|
54 |
# if so, download from the cloud storage, return it
|
55 |
# if not, generate the result.
|
|
|
|
|
|
|
|
|
|
|
56 |
if openai_api_key is not None:
|
57 |
openai.api_key = openai_api_key
|
58 |
openai.Model.list()
|
|
|
73 |
else:
|
74 |
# generate the result.
|
75 |
# output = fake_generate_backgrounds(title, description, openai_key)
|
76 |
+
output =generate_draft(paper_title, paper_description, template=template,
|
77 |
+
tldr=tldr, max_num_refs=max_num_refs,
|
78 |
+
sections=sections, bib_refs=bib_refs, model=model)
|
79 |
+
# output = generate_draft(paper_title, paper_description, template, "gpt-4")
|
80 |
upload_file(output)
|
81 |
return output
|
82 |
else:
|
83 |
# output = fake_generate_backgrounds(title, description, openai_key)
|
84 |
+
output =generate_draft(paper_title, paper_description, template=template,
|
85 |
+
tldr=tldr, max_num_refs=max_num_refs,
|
86 |
+
sections=sections, bib_refs=bib_refs, model=model)
|
87 |
return output
|
88 |
|
89 |
|
|
|
94 |
# button_primary_background_fill="#281A39"
|
95 |
# )
|
96 |
|
97 |
+
ACADEMIC_PAPER = """## 一键生成论文初稿
|
98 |
+
|
99 |
+
1. 在Title文本框中输入想要生成的论文名称(比如Playing Atari with Deep Reinforcement Learning).
|
100 |
+
2. 点击Submit. 等待大概十分钟.
|
101 |
+
3. 在右侧下载.zip格式的输出,在Overleaf上编译浏览.
|
102 |
+
"""
|
103 |
+
|
104 |
+
|
105 |
with gr.Blocks(theme=theme) as demo:
|
106 |
gr.Markdown('''
|
107 |
# Auto-Draft: 文献整理辅助工具
|
|
|
112 |
|
113 |
在这个Huggingface Organization里也提供一定额度的免费体验: [AUTO-ACADEMIC](https://huggingface.co/auto-academic).
|
114 |
|
115 |
+
如果有更多想法和建议欢迎加入QQ群里交流, 如果我在Space里更新了Key我会第一时间通知大家. 群号: ***249738228***.
|
|
|
|
|
|
|
|
|
116 |
''')
|
117 |
|
118 |
with gr.Row():
|
|
|
125 |
|
126 |
# 每个功能做一个tab
|
127 |
with gr.Tab("学术论文"):
|
128 |
+
gr.Markdown(ACADEMIC_PAPER)
|
129 |
+
|
130 |
+
|
131 |
title = gr.Textbox(value="Playing Atari with Deep Reinforcement Learning", lines=1, max_lines=1,
|
132 |
label="Title", info="论文标题")
|
133 |
|
|
|
135 |
description_pp = gr.Textbox(lines=5, label="Description (Optional)", visible=True,
|
136 |
info="对希望生成的论文的一些描述. 包括这篇论文的创新点, 主要贡献, 等.")
|
137 |
|
|
|
|
|
|
|
|
|
138 |
with gr.Row():
|
139 |
with gr.Column():
|
140 |
+
with gr.Row():
|
141 |
+
template = gr.Dropdown(label="Template", choices=["ICLR2022"], value="ICLR2022",
|
142 |
+
interactive=False,
|
143 |
+
info="生成论文的参考模板. (暂不支持修改)")
|
144 |
+
model_selection = gr.Dropdown(label="Model", choices=["gpt-4", "gpt-3.5-turbo"], value="gpt-4",
|
145 |
+
interactive=True,
|
146 |
+
info="生成论文用到的语言模型.")
|
147 |
gr.Markdown('''
|
148 |
+
上传.bib文件提供AI需要参考的文献.
|
|
|
|
|
149 |
''')
|
150 |
bibtex_file = gr.File(label="Upload .bib file", file_types=["text"],
|
151 |
+
interactive=True)
|
152 |
+
gr.Examples(
|
153 |
+
examples=["latex_templates/pre_refs.bib"],
|
154 |
+
inputs=bibtex_file
|
155 |
+
)
|
156 |
with gr.Column():
|
157 |
search_engine = gr.Dropdown(label="Search Engine",
|
158 |
choices=["ArXiv", "Semantic Scholar", "Google Scholar", "None"],
|
159 |
value= "Semantic Scholar",
|
160 |
+
interactive=False,
|
161 |
+
info="用于决定GPT-4用什么搜索引擎来搜索文献. (暂不支持修改)")
|
162 |
+
tldr_checkbox = gr.Checkbox(value=True, label="TLDR;",
|
163 |
info="选择此筐表示将使用Semantic Scholar的TLDR作为文献的总结.",
|
164 |
+
interactive = True)
|
165 |
+
sections = gr.CheckboxGroup(choices=["introduction", "related works", "backgrounds", "methodology", "experiments", "conclusion", "abstract"],
|
166 |
+
type="value", label="生成章节", interactive = True,
|
167 |
+
value=["introduction", "related works"])
|
168 |
+
slider = gr.Slider(minimum=1, maximum=100, value=50, step=1,
|
169 |
+
interactive = True, label="最大参考文献数目")
|
170 |
|
171 |
with gr.Row():
|
172 |
clear_button_pp = gr.Button("Clear")
|
|
|
205 |
file_output = gr.File(label="Output")
|
206 |
|
207 |
clear_button_pp.click(fn=clear_inputs, inputs=[title, description_pp], outputs=[title, description_pp])
|
208 |
+
# submit_button_pp.click(fn=wrapped_generator, inputs=[title, description_pp, key, template, tldr, slider, sections, bibtex_file], outputs=file_output)
|
209 |
+
submit_button_pp.click(fn=wrapped_generator, inputs=[title, description_pp, key, template, tldr_checkbox, slider, sections, bibtex_file, model_selection ], outputs=file_output)
|
210 |
|
211 |
demo.queue(concurrency_count=1, max_size=5, api_open=False)
|
212 |
demo.launch()
|
auto_backgrounds.py
CHANGED
@@ -30,8 +30,29 @@ def log_usage(usage, generating_target, print_out=True):
|
|
30 |
print(message)
|
31 |
logging.info(message)
|
32 |
|
33 |
-
def _generation_setup(title, description="", template="ICLR2022",
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
print("Generation setup...")
|
36 |
paper = {}
|
37 |
paper_body = {}
|
@@ -44,24 +65,16 @@ def _generation_setup(title, description="", template="ICLR2022", model="gpt-4",
|
|
44 |
print("Initialize the paper information ...")
|
45 |
input_dict = {"title": title, "description": description}
|
46 |
# keywords, usage = keywords_generation(input_dict, model="gpt-3.5-turbo", max_kw_refs=max_kw_refs)
|
47 |
-
keywords, usage = keywords_generation(input_dict)
|
48 |
-
print(f"keywords: {keywords}")
|
49 |
log_usage(usage, "keywords")
|
50 |
|
51 |
# generate keywords dictionary
|
52 |
keywords = {keyword:max_kw_refs for keyword in keywords}
|
53 |
-
|
54 |
-
# for keyword in json.loads(keywords):
|
55 |
-
# tmp[keyword] = max_kw_refs
|
56 |
-
# keywords = tmp
|
57 |
-
print(f"keywords: {keywords}")
|
58 |
|
59 |
-
ref = References()
|
60 |
ref.collect_papers(keywords, tldr=tldr)
|
61 |
-
|
62 |
-
# in tex_processing, remove all duplicated ids
|
63 |
-
# find most relevant papers; max_num_refs
|
64 |
-
all_paper_ids = ref.to_bibtex(bibtex_path)
|
65 |
|
66 |
print(f"The paper information has been initialized. References are saved to {bibtex_path}.")
|
67 |
|
@@ -70,11 +83,12 @@ def _generation_setup(title, description="", template="ICLR2022", model="gpt-4",
|
|
70 |
paper["references"] = ref.to_prompts()
|
71 |
paper["body"] = paper_body
|
72 |
paper["bibtex"] = bibtex_path
|
73 |
-
return paper, destination_folder, all_paper_ids
|
74 |
|
75 |
|
76 |
|
77 |
def generate_backgrounds(title, description="", template="ICLR2022", model="gpt-4"):
|
|
|
78 |
paper, destination_folder, _ = _generation_setup(title, description, template, model)
|
79 |
|
80 |
for section in ["introduction", "related works", "backgrounds"]:
|
@@ -92,25 +106,15 @@ def generate_backgrounds(title, description="", template="ICLR2022", model="gpt-
|
|
92 |
return make_archive(destination_folder, filename)
|
93 |
|
94 |
|
95 |
-
def
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
filename = hash_name(input_dict) + ".zip"
|
101 |
-
return make_archive("sample-output.pdf", filename)
|
102 |
-
|
103 |
-
|
104 |
-
def generate_draft(title, description="", template="ICLR2022", model="gpt-4", tldr=True, max_kw_refs=4):
|
105 |
-
paper, destination_folder, _ = _generation_setup(title, description, template, model, tldr, max_kw_refs)
|
106 |
-
raise
|
107 |
-
# todo: `list_of_methods` failed to be generated; find a solution ...
|
108 |
-
# print("Generating figures ...")
|
109 |
-
# usage = figures_generation(paper, destination_folder, model="gpt-3.5-turbo")
|
110 |
-
# log_usage(usage, "figures")
|
111 |
|
112 |
-
#
|
113 |
-
|
|
|
114 |
max_attempts = 4
|
115 |
attempts_count = 0
|
116 |
while attempts_count < max_attempts:
|
@@ -127,6 +131,7 @@ def generate_draft(title, description="", template="ICLR2022", model="gpt-4", tl
|
|
127 |
|
128 |
input_dict = {"title": title, "description": description, "generator": "generate_draft"}
|
129 |
filename = hash_name(input_dict) + ".zip"
|
|
|
130 |
return make_archive(destination_folder, filename)
|
131 |
|
132 |
|
|
|
30 |
print(message)
|
31 |
logging.info(message)
|
32 |
|
33 |
+
def _generation_setup(title, description="", template="ICLR2022", tldr=False,
|
34 |
+
max_kw_refs=10, max_num_refs=50, bib_refs=None):
|
35 |
+
"""
|
36 |
+
This function handles the setup process for paper generation; it contains three folds
|
37 |
+
1. Copy the template to the outputs folder. Create the log file `generation.log`
|
38 |
+
2. Collect references based on the given `title` and `description`
|
39 |
+
3. Generate the basic `paper` object (a dictionary)
|
40 |
+
|
41 |
+
Parameters:
|
42 |
+
title (str): The title of the paper.
|
43 |
+
description (str, optional): A short description or abstract for the paper. Defaults to an empty string.
|
44 |
+
template (str, optional): The template to be used for paper generation. Defaults to "ICLR2022".
|
45 |
+
tldr (bool, optional): A flag indicating whether a TL;DR (Too Long; Didn't Read) summary should be generated for the collected papers. Defaults to False.
|
46 |
+
max_kw_refs (int, optional): The maximum number of references that can be associated with each keyword. Defaults to 10.
|
47 |
+
max_num_refs (int, optional): The maximum number of references that can be included in the paper. Defaults to 50.
|
48 |
+
bib_refs (list, optional): A list of pre-existing references in BibTeX format. Defaults to None.
|
49 |
+
|
50 |
+
Returns:
|
51 |
+
tuple: A tuple containing the following elements:
|
52 |
+
- paper (dict): A dictionary containing the generated paper information.
|
53 |
+
- destination_folder (str): The path to the destination folder where the generation log is saved.
|
54 |
+
- all_paper_ids (list): A list of all paper IDs collected for the references.
|
55 |
+
"""
|
56 |
print("Generation setup...")
|
57 |
paper = {}
|
58 |
paper_body = {}
|
|
|
65 |
print("Initialize the paper information ...")
|
66 |
input_dict = {"title": title, "description": description}
|
67 |
# keywords, usage = keywords_generation(input_dict, model="gpt-3.5-turbo", max_kw_refs=max_kw_refs)
|
68 |
+
keywords, usage = keywords_generation(input_dict)
|
|
|
69 |
log_usage(usage, "keywords")
|
70 |
|
71 |
# generate keywords dictionary
|
72 |
keywords = {keyword:max_kw_refs for keyword in keywords}
|
73 |
+
print(f"keywords: {keywords}\n\n")
|
|
|
|
|
|
|
|
|
74 |
|
75 |
+
ref = References(title, bib_refs)
|
76 |
ref.collect_papers(keywords, tldr=tldr)
|
77 |
+
all_paper_ids = ref.to_bibtex(bibtex_path, max_num_refs) #todo: max_num_refs has not implemented yet
|
|
|
|
|
|
|
78 |
|
79 |
print(f"The paper information has been initialized. References are saved to {bibtex_path}.")
|
80 |
|
|
|
83 |
paper["references"] = ref.to_prompts()
|
84 |
paper["body"] = paper_body
|
85 |
paper["bibtex"] = bibtex_path
|
86 |
+
return paper, destination_folder, all_paper_ids #todo: use `all_paper_ids` to check if all citations are in this list
|
87 |
|
88 |
|
89 |
|
90 |
def generate_backgrounds(title, description="", template="ICLR2022", model="gpt-4"):
|
91 |
+
# todo: to match the current generation setup
|
92 |
paper, destination_folder, _ = _generation_setup(title, description, template, model)
|
93 |
|
94 |
for section in ["introduction", "related works", "backgrounds"]:
|
|
|
106 |
return make_archive(destination_folder, filename)
|
107 |
|
108 |
|
109 |
+
def generate_draft(title, description="", template="ICLR2022",
|
110 |
+
model="gpt-4", tldr=True, max_kw_refs=10, max_num_refs=30, sections=None, bib_refs=None):
|
111 |
+
# pre-processing `sections` parameter;
|
112 |
+
if sections is None:
|
113 |
+
sections = ["introduction", "related works", "backgrounds", "methodology", "experiments", "conclusion", "abstract"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
|
115 |
+
# todo: add more parameters; select which section to generate; select maximum refs.
|
116 |
+
paper, destination_folder, _ = _generation_setup(title, description, template, tldr, max_kw_refs, max_num_refs, bib_refs)
|
117 |
+
for section in sections:
|
118 |
max_attempts = 4
|
119 |
attempts_count = 0
|
120 |
while attempts_count < max_attempts:
|
|
|
131 |
|
132 |
input_dict = {"title": title, "description": description, "generator": "generate_draft"}
|
133 |
filename = hash_name(input_dict) + ".zip"
|
134 |
+
print("\nMission completed.\n")
|
135 |
return make_archive(destination_folder, filename)
|
136 |
|
137 |
|
latex_templates/pre_refs.bib
CHANGED
@@ -1,17 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
|
2 |
-
@
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
author = {Ehud Lehrer , Eilon Solan , Omri N. Solan},
|
14 |
-
journal={arXiv preprint arXiv:1511.02377},
|
15 |
-
year = {2015},
|
16 |
-
url = {http://arxiv.org/abs/1511.02377v1}
|
17 |
-
}
|
|
|
1 |
+
@inproceedings{ma2020understanding,
|
2 |
+
title={Understanding the impact of model incoherence on convergence of incremental sgd with random reshuffle},
|
3 |
+
author={Ma, Shaocong and Zhou, Yi},
|
4 |
+
booktitle={International Conference on Machine Learning},
|
5 |
+
pages={6565--6574},
|
6 |
+
year={2020},
|
7 |
+
organization={PMLR}
|
8 |
+
}
|
9 |
|
10 |
+
@inproceedings{ma2020variance,
|
11 |
+
author = {Ma, Shaocong and Zhou, Yi and Zou, Shaofeng},
|
12 |
+
booktitle = {Advances in Neural Information Processing Systems},
|
13 |
+
editor = {H. Larochelle and M. Ranzato and R. Hadsell and M.F. Balcan and H. Lin},
|
14 |
+
pages = {14796--14806},
|
15 |
+
publisher = {Curran Associates, Inc.},
|
16 |
+
title = {Variance-Reduced Off-Policy TDC Learning: Non-Asymptotic Convergence Analysis},
|
17 |
+
url = {https://proceedings.neurips.cc/paper_files/paper/2020/file/a992995ef4f0439b258f2360dbb85511-Paper.pdf},
|
18 |
+
volume = {33},
|
19 |
+
year = {2020}
|
20 |
+
}
|
|
|
|
|
|
|
|
|
|
utils/prompts.py
CHANGED
@@ -33,16 +33,15 @@ def generate_experiments_prompts(paper_info):
|
|
33 |
######################################################################################################################
|
34 |
|
35 |
# two parameters: min_refs_num, max_refs_num
|
36 |
-
keywords_system_template = """You are an assistant designed to provide accurate and informative keywords of searching academic papers.
|
37 |
-
Instructions
|
38 |
-
- Your response should always be a Python list; e.g. ["keyword1", "keyword2", "keyword3"]
|
39 |
-
- The length of list should between {min_refs_num} and {max_refs_num}
|
40 |
-
- Use specific phrases as keywords and avoid using too general words (e.g. machine learning)"""
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
# - Use specific phrases instead of using too general words (e.g. machine learning)"""
|
46 |
|
47 |
# two parameters: min_refs_num, max_refs_num
|
48 |
exp_methods_system_template = """You are an assistant designed to provide most related algorithms or methods to a given paper title.
|
|
|
33 |
######################################################################################################################
|
34 |
|
35 |
# two parameters: min_refs_num, max_refs_num
|
36 |
+
# keywords_system_template = """You are an assistant designed to provide accurate and informative keywords of searching academic papers.
|
37 |
+
# Instructions
|
38 |
+
# - Your response should always be a Python list; e.g. ["keyword1", "keyword2", "keyword3"]
|
39 |
+
# - The length of list should between {min_refs_num} and {max_refs_num}
|
40 |
+
# - Use specific phrases as keywords and avoid using too general words (e.g. machine learning)"""
|
41 |
+
keywords_system_template = """You are an assistant designed to provide accurate and informative keywords of searching academic papers.\n
|
42 |
+
Instructions:\n
|
43 |
+
- Your response should follow the following output format: ["field1", "field2", "field3", "field4"]\n
|
44 |
+
- The length of this Python list should between {min_refs_num} and {max_refs_num}."""
|
|
|
45 |
|
46 |
# two parameters: min_refs_num, max_refs_num
|
47 |
exp_methods_system_template = """You are an assistant designed to provide most related algorithms or methods to a given paper title.
|
utils/references.py
CHANGED
@@ -150,7 +150,6 @@ def _collect_papers_ss(keyword, counts=3, tldr=False):
|
|
150 |
# turn the search result to a list of paper dictionary.
|
151 |
papers_ss = []
|
152 |
for raw_paper in search_results_ss:
|
153 |
-
print(raw_paper['title'])
|
154 |
if raw_paper["abstract"] is None:
|
155 |
continue
|
156 |
|
@@ -170,6 +169,8 @@ def _collect_papers_ss(keyword, counts=3, tldr=False):
|
|
170 |
abstract = raw_paper['tldr']['text']
|
171 |
else:
|
172 |
abstract = remove_newlines(raw_paper['abstract'])
|
|
|
|
|
173 |
embeddings_dict = raw_paper.get('embedding')
|
174 |
if embeddings_dict is None:
|
175 |
continue
|
@@ -203,14 +204,13 @@ def _collect_papers_ss(keyword, counts=3, tldr=False):
|
|
203 |
######################################################################################################################
|
204 |
|
205 |
class References:
|
206 |
-
def __init__(self):
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
self.papers = {}
|
214 |
|
215 |
def load_papers(self, bibtex, keyword):
|
216 |
self.papers[keyword] = load_papers_from_bibtex(bibtex)
|
@@ -230,14 +230,14 @@ class References:
|
|
230 |
for key, counts in keywords_dict.items():
|
231 |
self.papers[key] = _collect_papers_ss(key, counts, tldr)
|
232 |
|
233 |
-
def find_relevant(self, max_refs=30):
|
234 |
-
# todo: use embeddings to evaluate
|
235 |
-
pass
|
236 |
|
237 |
-
def to_bibtex(self, path_to_bibtex="ref.bib"):
|
238 |
"""
|
239 |
Turn the saved paper list into bibtex file "ref.bib". Return a list of all `paper_id`.
|
240 |
"""
|
|
|
|
|
|
|
241 |
papers = self._get_papers(keyword = "_all")
|
242 |
|
243 |
# clear the bibtex file
|
|
|
150 |
# turn the search result to a list of paper dictionary.
|
151 |
papers_ss = []
|
152 |
for raw_paper in search_results_ss:
|
|
|
153 |
if raw_paper["abstract"] is None:
|
154 |
continue
|
155 |
|
|
|
169 |
abstract = raw_paper['tldr']['text']
|
170 |
else:
|
171 |
abstract = remove_newlines(raw_paper['abstract'])
|
172 |
+
|
173 |
+
# some papers have no embeddings; handle this case
|
174 |
embeddings_dict = raw_paper.get('embedding')
|
175 |
if embeddings_dict is None:
|
176 |
continue
|
|
|
204 |
######################################################################################################################
|
205 |
|
206 |
class References:
|
207 |
+
def __init__(self, title, load_papers):
|
208 |
+
if load_papers is not None:
|
209 |
+
self.papers = {}
|
210 |
+
self.papers["customized_refs"] = load_papers_from_bibtex(load_papers)
|
211 |
+
else:
|
212 |
+
self.papers = {}
|
213 |
+
self.title = title
|
|
|
214 |
|
215 |
def load_papers(self, bibtex, keyword):
|
216 |
self.papers[keyword] = load_papers_from_bibtex(bibtex)
|
|
|
230 |
for key, counts in keywords_dict.items():
|
231 |
self.papers[key] = _collect_papers_ss(key, counts, tldr)
|
232 |
|
|
|
|
|
|
|
233 |
|
234 |
+
def to_bibtex(self, path_to_bibtex="ref.bib", max_num_refs=50):
|
235 |
"""
|
236 |
Turn the saved paper list into bibtex file "ref.bib". Return a list of all `paper_id`.
|
237 |
"""
|
238 |
+
# todo:
|
239 |
+
# use embeddings to evaluate; keep top k relevant references in papers
|
240 |
+
# send (title, .bib file) to evaluate embeddings; recieve truncated papers
|
241 |
papers = self._get_papers(keyword = "_all")
|
242 |
|
243 |
# clear the bibtex file
|