shaocongma commited on
Commit
8e698eb
·
1 Parent(s): b4c6c2b

Update UI. Implement Pre-Defined References function.

Browse files
app.py CHANGED
@@ -6,15 +6,12 @@ from utils.file_operations import hash_name
6
 
7
  # note: App白屏bug:允许第三方cookie
8
  # todo:
9
- # 5. Use some simple method for simple tasks
10
- # (including: writing abstract, conclusion, generate keywords, generate figures...)
11
- # 5.1 Use GPT 3.5 for abstract, conclusion, ... (or may not)
12
- # 5.2 Use local LLM to generate keywords, figures, ...
13
- # 5.3 Use embedding to find most related papers (find a paper dataset)
14
- # 6. get logs when the procedure is not completed.
15
  # 7. 自己的文件库; 更多的prompts
16
- # 8. Decide on how to generate the main part of a paper
17
- # 9. Load .bibtex file to generate a pre-defined references list.
 
 
18
  # future:
19
  # 8. Change prompts to langchain
20
  # 4. add auto_polishing function
@@ -112,20 +109,72 @@ with gr.Blocks(theme=theme) as demo:
112
 
113
  输入想要生成的论文名称(比如Playing Atari with Deep Reinforcement Learning), 点击Submit, 等待大概十分钟, 下载.zip格式的输出,在Overleaf上编译浏览.
114
  ''')
 
115
  with gr.Row():
116
  with gr.Column(scale=2):
117
  key = gr.Textbox(value=openai_key, lines=1, max_lines=1, label="OpenAI Key",
118
  visible=not IS_OPENAI_API_KEY_AVAILABLE)
 
119
  # generator = gr.Dropdown(choices=["学术论文", "文献总结"], value="文献总结",
120
  # label="Selection", info="目前支持生成'学术论文'和'文献总结'.", interactive=True)
121
- title = gr.Textbox(value="Playing Atari with Deep Reinforcement Learning", lines=1, max_lines=1,
122
- label="Title", info="论文标题")
123
- description = gr.Textbox(lines=5, label="Description (Optional)", visible=True,
124
- info="对希望生成的论文的一些描述. 包括这篇论文的创新点, 主要贡献, 等.")
125
-
126
- with gr.Row():
127
- clear_button = gr.Button("Clear")
128
- submit_button = gr.Button("Submit", variant="primary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  with gr.Column(scale=1):
130
  style_mapping = {True: "color:white;background-color:green",
131
  False: "color:white;background-color:red"} # todo: to match website's style
@@ -137,8 +186,8 @@ with gr.Blocks(theme=theme) as demo:
137
  `OpenAI API`: <span style="{style_mapping[IS_OPENAI_API_KEY_AVAILABLE]}">{availability_mapping[IS_OPENAI_API_KEY_AVAILABLE]}</span>. `Cache`: <span style="{style_mapping[IS_CACHE_AVAILABLE]}">{availability_mapping[IS_CACHE_AVAILABLE]}</span>.''')
138
  file_output = gr.File(label="Output")
139
 
140
- clear_button.click(fn=clear_inputs, inputs=[title, description], outputs=[title, description])
141
- submit_button.click(fn=wrapped_generator, inputs=[title, description, key], outputs=file_output)
142
 
143
  demo.queue(concurrency_count=1, max_size=5, api_open=False)
144
  demo.launch()
 
6
 
7
  # note: App白屏bug:允许第三方cookie
8
  # todo:
9
+ # 6. get logs when the procedure is not completed. *
 
 
 
 
 
10
  # 7. 自己的文件库; 更多的prompts
11
+ # 8. Decide on how to generate the main part of a paper * (Langchain/AutoGPT
12
+ # 9. Load .bibtex file to generate a pre-defined references list. *
13
+ # 1. 把paper改成纯JSON?
14
+ # 2. 实现别的功能
15
  # future:
16
  # 8. Change prompts to langchain
17
  # 4. add auto_polishing function
 
109
 
110
  输入想要生成的论文名称(比如Playing Atari with Deep Reinforcement Learning), 点击Submit, 等待大概十分钟, 下载.zip格式的输出,在Overleaf上编译浏览.
111
  ''')
112
+
113
  with gr.Row():
114
  with gr.Column(scale=2):
115
  key = gr.Textbox(value=openai_key, lines=1, max_lines=1, label="OpenAI Key",
116
  visible=not IS_OPENAI_API_KEY_AVAILABLE)
117
+
118
  # generator = gr.Dropdown(choices=["学术论文", "文献总结"], value="文献总结",
119
  # label="Selection", info="目前支持生成'学术论文'和'文献总结'.", interactive=True)
120
+
121
+ # 每个功能做一个tab
122
+ with gr.Tab("学术论文"):
123
+ title = gr.Textbox(value="Playing Atari with Deep Reinforcement Learning", lines=1, max_lines=1,
124
+ label="Title", info="论文标题")
125
+
126
+ with gr.Accordion("高级设置", open=False):
127
+ description_pp = gr.Textbox(lines=5, label="Description (Optional)", visible=True,
128
+ info="对希望生成的论文的一些描述. 包括这篇论文的创新点, 主要贡献, 等.")
129
+
130
+ interactive = False
131
+ with gr.Row():
132
+ with gr.Column():
133
+ gr.Markdown('''
134
+ Upload .bib file (Optional)
135
+
136
+ 通过上传.bib文件来控制GPT-4模型必须参考哪些文献.
137
+ ''')
138
+ bibtex_file = gr.File(label="Upload .bib file", file_types=["text"],
139
+ interactive=interactive)
140
+ with gr.Column():
141
+ search_engine = gr.Dropdown(label="Search Engine",
142
+ choices=["ArXiv", "Semantic Scholar", "Google Scholar", "None"],
143
+ value= "Semantic Scholar",
144
+ interactive=interactive,
145
+ info="用于决定GPT-4用什么搜索引擎来搜索文献. 选择None的时候仅参考给定文献.")
146
+ tldr = gr.Checkbox(value=True, label="TLDR;",
147
+ info="选择此筐表示将使用Semantic Scholar的TLDR作为文献的总结.",
148
+ interactive = interactive),
149
+ use_cache = gr.Checkbox(label="总是重新生成",
150
+ info="选择此筐表示将不会读取已经生成好的文章.",
151
+ interactive = interactive)
152
+
153
+ with gr.Row():
154
+ clear_button_pp = gr.Button("Clear")
155
+ submit_button_pp = gr.Button("Submit", variant="primary")
156
+
157
+ with gr.Tab("文献综述"):
158
+ gr.Markdown('''
159
+ <h1 style="text-align: center;">Coming soon!</h1>
160
+ ''')
161
+ # topic = gr.Textbox(value="Deep Reinforcement Learning", lines=1, max_lines=1,
162
+ # label="Topic", info="文献主题")
163
+ # with gr.Accordion("Advanced Setting"):
164
+ # description_lr = gr.Textbox(lines=5, label="Description (Optional)", visible=True,
165
+ # info="对希望生成的综述的一些描述. 包括这篇论文的创新点, 主要贡献, 等.")
166
+ # with gr.Row():
167
+ # clear_button_lr = gr.Button("Clear")
168
+ # submit_button_lr = gr.Button("Submit", variant="primary")
169
+ with gr.Tab("论文润色"):
170
+ gr.Markdown('''
171
+ <h1 style="text-align: center;">Coming soon!</h1>
172
+ ''')
173
+ with gr.Tab("帮我想想该写什么论文!"):
174
+ gr.Markdown('''
175
+ <h1 style="text-align: center;">Coming soon!</h1>
176
+ ''')
177
+
178
  with gr.Column(scale=1):
179
  style_mapping = {True: "color:white;background-color:green",
180
  False: "color:white;background-color:red"} # todo: to match website's style
 
186
  `OpenAI API`: <span style="{style_mapping[IS_OPENAI_API_KEY_AVAILABLE]}">{availability_mapping[IS_OPENAI_API_KEY_AVAILABLE]}</span>. `Cache`: <span style="{style_mapping[IS_CACHE_AVAILABLE]}">{availability_mapping[IS_CACHE_AVAILABLE]}</span>.''')
187
  file_output = gr.File(label="Output")
188
 
189
+ clear_button_pp.click(fn=clear_inputs, inputs=[title, description_pp], outputs=[title, description_pp])
190
+ submit_button_pp.click(fn=wrapped_generator, inputs=[title, description_pp, key], outputs=file_output)
191
 
192
  demo.queue(concurrency_count=1, max_size=5, api_open=False)
193
  demo.launch()
latex_templates/pre_refs.bib ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ @article{1512.07669,
3
+ title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov
4
+ Decision Processes},
5
+ author = {Vikram Krishnamurthy},
6
+ journal={arXiv preprint arXiv:1512.07669},
7
+ year = {2015},
8
+ url = {http://arxiv.org/abs/1512.07669v1}
9
+ }
10
+
11
+ @article{1511.02377,
12
+ title = {The Value Functions of Markov Decision Processes},
13
+ author = {Ehud Lehrer , Eilon Solan , Omri N. Solan},
14
+ journal={arXiv preprint arXiv:1511.02377},
15
+ year = {2015},
16
+ url = {http://arxiv.org/abs/1511.02377v1}
17
+ }
utils/references.py CHANGED
@@ -1,18 +1,26 @@
1
- # Generate references
2
- # 1. select most correlated references from "references" dataset or Arxiv search engine.
3
- # 2. Generate bibtex from the selected papers. --> to_bibtex()
4
- # 3. Generate prompts from the selected papers: --> to_prompts()
5
- # {"paper_id": "paper summary"}
6
-
 
 
 
 
7
 
8
  import requests
9
  import re
 
 
 
10
 
11
 
12
  ######################################################################################################################
13
  # Some basic tools
14
  ######################################################################################################################
15
  def remove_newlines(serie):
 
16
  serie = serie.replace('\n', ' ')
17
  serie = serie.replace('\\n', ' ')
18
  serie = serie.replace(' ', ' ')
@@ -20,6 +28,47 @@ def remove_newlines(serie):
20
  return serie
21
 
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  ######################################################################################################################
24
  # Semantic Scholar (SS) API
25
  ######################################################################################################################
@@ -79,7 +128,7 @@ def _collect_papers_ss(keyword, counts=3, tldr=False):
79
 
80
  def parse_search_results(search_results_ss):
81
  # turn the search result to a list of paper dictionary.
82
- papers = []
83
  for raw_paper in search_results_ss:
84
  if raw_paper["abstract"] is None:
85
  continue
@@ -100,14 +149,14 @@ def _collect_papers_ss(keyword, counts=3, tldr=False):
100
  result = {
101
  "paper_id": paper_id,
102
  "title": title,
103
- "abstract": abstract, # todo: compare results with tldr
104
  "link": link,
105
  "authors": authors_str,
106
  "year": year_str,
107
  "journal": journal
108
  }
109
- papers.append(result)
110
- return papers
111
 
112
  raw_results = ss_search(keyword, limit=counts)
113
  if raw_results is not None:
@@ -192,13 +241,13 @@ def _collect_papers_arxiv(keyword, counts=3, tldr=False):
192
  # References Class
193
  ######################################################################################################################
194
 
195
- # Each `paper` is a dictionary containing (1) paper_id (2) title (3) authors (4) year (5) link (6) abstract (7) journal
196
  class References:
197
  def __init__(self, load_papers=""):
198
  if load_papers:
199
- # todo: read a json file from the given path
200
- # this could be used to support pre-defined references
201
- pass
 
202
  else:
203
  self.papers = []
204
 
@@ -266,15 +315,20 @@ class References:
266
 
267
 
268
  if __name__ == "__main__":
269
- refs = References()
270
- keywords_dict = {
271
- "Deep Q-Networks": 15,
272
- "Policy Gradient Methods": 24,
273
- "Actor-Critic Algorithms": 4,
274
- "Model-Based Reinforcement Learning": 13,
275
- "Exploration-Exploitation Trade-off": 7
276
- }
277
- refs.collect_papers(keywords_dict, method="ss", tldr=True)
278
- for p in refs.papers:
279
- print(p["paper_id"])
280
- print(len(refs.papers))
 
 
 
 
 
 
1
+ # Each `paper` is a dictionary containing:
2
+ # (1) paper_id (2) title (3) authors (4) year (5) link (6) abstract (7) journal
3
+ #
4
+ # Generate references:
5
+ # `Reference` class:
6
+ # 1. Read a given .bib file to collect papers; use `search_paper_abstract` method to fill missing abstract.
7
+ # 2. Given some keywords; use ArXiv or Semantic Scholar API to find papers.
8
+ # 3. Generate bibtex from the selected papers. --> to_bibtex()
9
+ # 4. Generate prompts from the selected papers: --> to_prompts()
10
+ # A sample prompt: {"paper_id": "paper summary"}
11
 
12
  import requests
13
  import re
14
+ import bibtexparser
15
+ from scholarly import scholarly
16
+ from scholarly import ProxyGenerator
17
 
18
 
19
  ######################################################################################################################
20
  # Some basic tools
21
  ######################################################################################################################
22
  def remove_newlines(serie):
23
+ # This function is applied to the abstract of each paper to reduce the length of prompts.
24
  serie = serie.replace('\n', ' ')
25
  serie = serie.replace('\\n', ' ')
26
  serie = serie.replace(' ', ' ')
 
28
  return serie
29
 
30
 
31
+ def search_paper_abstract(title):
32
+ pg = ProxyGenerator()
33
+ success = pg.ScraperAPI("921b16f94d701308b9d9b4456ddde155")
34
+ scholarly.use_proxy(pg)
35
+ # input the title of a paper, return its abstract
36
+ search_query = scholarly.search_pubs(title)
37
+ paper = next(search_query)
38
+ return remove_newlines(paper['bib']['abstract'])
39
+
40
+
41
+ def load_papers_from_bibtex(bib_file_path):
42
+ with open(bib_file_path) as bibtex_file:
43
+ bib_database = bibtexparser.load(bibtex_file)
44
+ if len(bib_database.entries) == 0:
45
+ return []
46
+ else:
47
+ bib_papers = []
48
+ for bibitem in bib_database.entries:
49
+ paper_id = bibitem.get("ID")
50
+ title = bibitem.get("title")
51
+ if title is None:
52
+ continue
53
+ journal = bibitem.get("journal")
54
+ year = bibitem.get("year")
55
+ author = bibitem.get("author")
56
+ abstract = bibitem.get("abstract")
57
+ if abstract is None:
58
+ abstract = search_paper_abstract(title)
59
+ result = {
60
+ "paper_id": paper_id,
61
+ "title": title,
62
+ "link": "",
63
+ "abstract": abstract,
64
+ "authors": author,
65
+ "year": year,
66
+ "journal": journal
67
+ }
68
+ bib_papers.append(result)
69
+ return bib_papers
70
+
71
+
72
  ######################################################################################################################
73
  # Semantic Scholar (SS) API
74
  ######################################################################################################################
 
128
 
129
  def parse_search_results(search_results_ss):
130
  # turn the search result to a list of paper dictionary.
131
+ papers_ss = []
132
  for raw_paper in search_results_ss:
133
  if raw_paper["abstract"] is None:
134
  continue
 
149
  result = {
150
  "paper_id": paper_id,
151
  "title": title,
152
+ "abstract": abstract,
153
  "link": link,
154
  "authors": authors_str,
155
  "year": year_str,
156
  "journal": journal
157
  }
158
+ papers_ss.append(result)
159
+ return papers_ss
160
 
161
  raw_results = ss_search(keyword, limit=counts)
162
  if raw_results is not None:
 
241
  # References Class
242
  ######################################################################################################################
243
 
 
244
  class References:
245
  def __init__(self, load_papers=""):
246
  if load_papers:
247
+ # todo: (1) too large bibtex may make have issues on token limitations; may truncate to 5 or 10
248
+ # (2) google scholar didn't give a full abstract for some papers ...
249
+ # (3) may use langchain to support long input
250
+ self.papers = load_papers_from_bibtex(load_papers)
251
  else:
252
  self.papers = []
253
 
 
315
 
316
 
317
  if __name__ == "__main__":
318
+ # refs = References()
319
+ # keywords_dict = {
320
+ # "Deep Q-Networks": 15,
321
+ # "Policy Gradient Methods": 24,
322
+ # "Actor-Critic Algorithms": 4,
323
+ # "Model-Based Reinforcement Learning": 13,
324
+ # "Exploration-Exploitation Trade-off": 7
325
+ # }
326
+ # refs.collect_papers(keywords_dict, method="ss", tldr=True)
327
+ # for p in refs.papers:
328
+ # print(p["paper_id"])
329
+ # print(len(refs.papers))
330
+
331
+ bib = "D:\\Projects\\auto-draft\\latex_templates\\pre_refs.bib"
332
+ papers = load_papers_from_bibtex(bib)
333
+ for paper in papers:
334
+ print(paper)
utils/tex_processing.py CHANGED
@@ -2,16 +2,12 @@ import os
2
 
3
  def replace_title(save_to_path, title):
4
  # Define input and output file names
5
- # input_file_name = save_to_path + "/template.tex"
6
- # output_file_name = save_to_path + "/main.tex"
7
  input_file_name = os.path.join(save_to_path, "template.tex")
8
  output_file_name = os.path.join(save_to_path , "main.tex")
9
 
10
  # Open the input file and read its content
11
  with open(input_file_name, 'r') as infile:
12
  content = infile.read()
13
-
14
- # Replace all occurrences of "asdfgh" with "hahaha"
15
  content = content.replace(r"\title{TITLE} ", f"\\title{{{title}}} ")
16
 
17
  # Open the output file and write the modified content
@@ -29,3 +25,4 @@ def replace_title(save_to_path, title):
29
  # sometimes the output may include thebibliography and bibitem . remove all of it.
30
 
31
 
 
 
2
 
3
  def replace_title(save_to_path, title):
4
  # Define input and output file names
 
 
5
  input_file_name = os.path.join(save_to_path, "template.tex")
6
  output_file_name = os.path.join(save_to_path , "main.tex")
7
 
8
  # Open the input file and read its content
9
  with open(input_file_name, 'r') as infile:
10
  content = infile.read()
 
 
11
  content = content.replace(r"\title{TITLE} ", f"\\title{{{title}}} ")
12
 
13
  # Open the output file and write the modified content
 
25
  # sometimes the output may include thebibliography and bibitem . remove all of it.
26
 
27
 
28
+