shaocongma commited on
Commit
c304855
·
1 Parent(s): 365213e
app.py CHANGED
@@ -21,6 +21,9 @@ from utils.file_operations import hash_name
21
  # 1. Check if there are any duplicated citations
22
  # 2. Remove potential thebibliography and bibitem in .tex file
23
 
 
 
 
24
  openai_key = os.getenv("OPENAI_API_KEY")
25
  access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
26
  secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
@@ -42,17 +45,19 @@ else:
42
  IS_OPENAI_API_KEY_AVAILABLE = False
43
 
44
 
45
- def clear_inputs(text1, text2):
46
  return "", ""
47
 
48
 
49
  def wrapped_generator(paper_title, paper_description, openai_api_key=None,
50
- template="ICLR2022", tldr=True, max_num_refs=50, sections=None, bib_refs=None, model="gpt-4",
51
  cache_mode=IS_CACHE_AVAILABLE):
52
  # if `cache_mode` is True, then follow the following steps:
53
  # check if "title"+"description" have been generated before
54
  # if so, download from the cloud storage, return it
55
  # if not, generate the result.
 
 
56
  if openai_api_key is not None:
57
  openai.api_key = openai_api_key
58
  openai.Model.list()
@@ -60,9 +65,8 @@ def wrapped_generator(paper_title, paper_description, openai_api_key=None,
60
  if cache_mode:
61
  from utils.storage import list_all_files, download_file, upload_file
62
  # check if "title"+"description" have been generated before
63
-
64
  input_dict = {"title": paper_title, "description": paper_description,
65
- "generator": "generate_draft"} # todo: modify here also
66
  file_name = hash_name(input_dict) + ".zip"
67
  file_list = list_all_files()
68
  # print(f"{file_name} will be generated. Check the file list {file_list}")
@@ -73,17 +77,17 @@ def wrapped_generator(paper_title, paper_description, openai_api_key=None,
73
  else:
74
  # generate the result.
75
  # output = fake_generate_backgrounds(title, description, openai_key)
76
- output =generate_draft(paper_title, paper_description, template=template,
77
- tldr=tldr, max_num_refs=max_num_refs,
78
- sections=sections, bib_refs=bib_refs, model=model)
79
  # output = generate_draft(paper_title, paper_description, template, "gpt-4")
80
  upload_file(output)
81
  return output
82
  else:
83
  # output = fake_generate_backgrounds(title, description, openai_key)
84
- output =generate_draft(paper_title, paper_description, template=template,
85
- tldr=tldr, max_num_refs=max_num_refs,
86
- sections=sections, bib_refs=bib_refs, model=model)
87
  return output
88
 
89
 
@@ -101,12 +105,12 @@ ACADEMIC_PAPER = """## 一键生成论文初稿
101
  3. 在右侧下载.zip格式的输出,在Overleaf上编译浏览.
102
  """
103
 
104
-
105
  with gr.Blocks(theme=theme) as demo:
106
  gr.Markdown('''
107
  # Auto-Draft: 文献整理辅助工具
108
 
109
- 本Demo提供对[Auto-Draft](https://github.com/CCCBora/auto-draft)的auto_draft功能的测试。通过输入想要生成的论文名称(比如Playing atari with deep reinforcement learning),即可由AI辅助生成论文模板.
 
110
 
111
  ***2023-05-03 Update***: 在公开版本中为大家提供了输入OpenAI API Key的地址, 如果有GPT-4的API KEY的话可以在这里体验!
112
 
@@ -127,7 +131,6 @@ with gr.Blocks(theme=theme) as demo:
127
  with gr.Tab("学术论文"):
128
  gr.Markdown(ACADEMIC_PAPER)
129
 
130
-
131
  title = gr.Textbox(value="Playing Atari with Deep Reinforcement Learning", lines=1, max_lines=1,
132
  label="Title", info="论文标题")
133
 
@@ -139,34 +142,37 @@ with gr.Blocks(theme=theme) as demo:
139
  with gr.Column():
140
  with gr.Row():
141
  template = gr.Dropdown(label="Template", choices=["ICLR2022"], value="ICLR2022",
142
- interactive=False,
143
- info="生成论文的参考模板. (暂不支持修改)")
144
- model_selection = gr.Dropdown(label="Model", choices=["gpt-4", "gpt-3.5-turbo"], value="gpt-4",
145
- interactive=True,
146
- info="生成论文用到的语言模型.")
 
147
  gr.Markdown('''
148
  上传.bib文件提供AI需要参考的文献.
149
  ''')
150
  bibtex_file = gr.File(label="Upload .bib file", file_types=["text"],
151
  interactive=True)
152
  gr.Examples(
153
- examples=["latex_templates/pre_refs.bib"],
154
  inputs=bibtex_file
155
  )
156
  with gr.Column():
157
  search_engine = gr.Dropdown(label="Search Engine",
158
  choices=["ArXiv", "Semantic Scholar", "Google Scholar", "None"],
159
- value= "Semantic Scholar",
160
  interactive=False,
161
  info="用于决定GPT-4用什么搜索引擎来搜索文献. (暂不支持修改)")
162
  tldr_checkbox = gr.Checkbox(value=True, label="TLDR;",
163
- info="选择此筐表示将使用Semantic Scholar的TLDR作为文献的总结.",
164
- interactive = True)
165
- sections = gr.CheckboxGroup(choices=["introduction", "related works", "backgrounds", "methodology", "experiments", "conclusion", "abstract"],
166
- type="value", label="生成章节", interactive = True,
167
- value=["introduction", "related works"])
 
 
168
  slider = gr.Slider(minimum=1, maximum=100, value=50, step=1,
169
- interactive = True, label="最大参考文献数目")
170
 
171
  with gr.Row():
172
  clear_button_pp = gr.Button("Clear")
@@ -205,8 +211,11 @@ with gr.Blocks(theme=theme) as demo:
205
  file_output = gr.File(label="Output")
206
 
207
  clear_button_pp.click(fn=clear_inputs, inputs=[title, description_pp], outputs=[title, description_pp])
208
- # submit_button_pp.click(fn=wrapped_generator, inputs=[title, description_pp, key, template, tldr, slider, sections, bibtex_file], outputs=file_output)
209
- submit_button_pp.click(fn=wrapped_generator, inputs=[title, description_pp, key, template, tldr_checkbox, slider, sections, bibtex_file, model_selection ], outputs=file_output)
 
 
 
210
 
211
  demo.queue(concurrency_count=1, max_size=5, api_open=False)
212
  demo.launch()
 
21
  # 1. Check if there are any duplicated citations
22
  # 2. Remove potential thebibliography and bibitem in .tex file
23
 
24
+ #######################################################################################################################
25
+ # Check if openai and cloud storage available
26
+ #######################################################################################################################
27
  openai_key = os.getenv("OPENAI_API_KEY")
28
  access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
29
  secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
 
45
  IS_OPENAI_API_KEY_AVAILABLE = False
46
 
47
 
48
+ def clear_inputs(*args):
49
  return "", ""
50
 
51
 
52
  def wrapped_generator(paper_title, paper_description, openai_api_key=None,
53
+ paper_template="ICLR2022", tldr=True, max_num_refs=50, selected_sections=None, bib_refs=None, model="gpt-4",
54
  cache_mode=IS_CACHE_AVAILABLE):
55
  # if `cache_mode` is True, then follow the following steps:
56
  # check if "title"+"description" have been generated before
57
  # if so, download from the cloud storage, return it
58
  # if not, generate the result.
59
+ if bib_refs is not None:
60
+ bib_refs = bib_refs.name
61
  if openai_api_key is not None:
62
  openai.api_key = openai_api_key
63
  openai.Model.list()
 
65
  if cache_mode:
66
  from utils.storage import list_all_files, download_file, upload_file
67
  # check if "title"+"description" have been generated before
 
68
  input_dict = {"title": paper_title, "description": paper_description,
69
+ "generator": "generate_draft"}
70
  file_name = hash_name(input_dict) + ".zip"
71
  file_list = list_all_files()
72
  # print(f"{file_name} will be generated. Check the file list {file_list}")
 
77
  else:
78
  # generate the result.
79
  # output = fake_generate_backgrounds(title, description, openai_key)
80
+ output = generate_draft(paper_title, paper_description, template=paper_template,
81
+ tldr=tldr, max_num_refs=max_num_refs,
82
+ sections=selected_sections, bib_refs=bib_refs, model=model)
83
  # output = generate_draft(paper_title, paper_description, template, "gpt-4")
84
  upload_file(output)
85
  return output
86
  else:
87
  # output = fake_generate_backgrounds(title, description, openai_key)
88
+ output = generate_draft(paper_title, paper_description, template=paper_template,
89
+ tldr=tldr, max_num_refs=max_num_refs,
90
+ sections=selected_sections, bib_refs=bib_refs, model=model)
91
  return output
92
 
93
 
 
105
  3. 在右侧下载.zip格式的输出,在Overleaf上编译浏览.
106
  """
107
 
 
108
  with gr.Blocks(theme=theme) as demo:
109
  gr.Markdown('''
110
  # Auto-Draft: 文献整理辅助工具
111
 
112
+ 本Demo提供对[Auto-Draft](https://github.com/CCCBora/auto-draft)的auto_draft功能的测试.
113
+ 通过输入想要生成的论文名称(比如Playing atari with deep reinforcement learning),即可由AI辅助生成论文模板.
114
 
115
  ***2023-05-03 Update***: 在公开版本中为大家提供了输入OpenAI API Key的地址, 如果有GPT-4的API KEY的话可以在这里体验!
116
 
 
131
  with gr.Tab("学术论文"):
132
  gr.Markdown(ACADEMIC_PAPER)
133
 
 
134
  title = gr.Textbox(value="Playing Atari with Deep Reinforcement Learning", lines=1, max_lines=1,
135
  label="Title", info="论文标题")
136
 
 
142
  with gr.Column():
143
  with gr.Row():
144
  template = gr.Dropdown(label="Template", choices=["ICLR2022"], value="ICLR2022",
145
+ interactive=False,
146
+ info="生成论文的参考模板. (暂不支持修改)")
147
+ model_selection = gr.Dropdown(label="Model", choices=["gpt-4", "gpt-3.5-turbo"],
148
+ value="gpt-4",
149
+ interactive=True,
150
+ info="生成论文用到的语言模型.")
151
  gr.Markdown('''
152
  上传.bib文件提供AI需要参考的文献.
153
  ''')
154
  bibtex_file = gr.File(label="Upload .bib file", file_types=["text"],
155
  interactive=True)
156
  gr.Examples(
157
+ examples=["latex_templates/example_references.bib"],
158
  inputs=bibtex_file
159
  )
160
  with gr.Column():
161
  search_engine = gr.Dropdown(label="Search Engine",
162
  choices=["ArXiv", "Semantic Scholar", "Google Scholar", "None"],
163
+ value="Semantic Scholar",
164
  interactive=False,
165
  info="用于决定GPT-4用什么搜索引擎来搜索文献. (暂不支持修改)")
166
  tldr_checkbox = gr.Checkbox(value=True, label="TLDR;",
167
+ info="选择此筐表示将使用Semantic Scholar的TLDR作为文献的总结.",
168
+ interactive=True)
169
+ sections = gr.CheckboxGroup(
170
+ choices=["introduction", "related works", "backgrounds", "methodology", "experiments",
171
+ "conclusion", "abstract"],
172
+ type="value", label="生成章节", interactive=True,
173
+ value=["introduction", "related works"])
174
  slider = gr.Slider(minimum=1, maximum=100, value=50, step=1,
175
+ interactive=True, label="最大参考文献数目")
176
 
177
  with gr.Row():
178
  clear_button_pp = gr.Button("Clear")
 
211
  file_output = gr.File(label="Output")
212
 
213
  clear_button_pp.click(fn=clear_inputs, inputs=[title, description_pp], outputs=[title, description_pp])
214
+ # submit_button_pp.click(fn=wrapped_generator,
215
+ # inputs=[title, description_pp, key, template, tldr, slider, sections, bibtex_file], outputs=file_output)
216
+ submit_button_pp.click(fn=wrapped_generator,
217
+ inputs=[title, description_pp, key, template, tldr_checkbox, slider, sections, bibtex_file,
218
+ model_selection], outputs=file_output)
219
 
220
  demo.queue(concurrency_count=1, max_size=5, api_open=False)
221
  demo.launch()
auto_backgrounds.py CHANGED
@@ -107,7 +107,7 @@ def generate_backgrounds(title, description="", template="ICLR2022", model="gpt-
107
 
108
 
109
  def generate_draft(title, description="", template="ICLR2022",
110
- model="gpt-4", tldr=True, max_kw_refs=10, max_num_refs=30, sections=None, bib_refs=None):
111
  # pre-processing `sections` parameter;
112
  if sections is None:
113
  sections = ["introduction", "related works", "backgrounds", "methodology", "experiments", "conclusion", "abstract"]
 
107
 
108
 
109
  def generate_draft(title, description="", template="ICLR2022",
110
+ tldr=True, max_kw_refs=10, max_num_refs=30, sections=None, bib_refs=None, model="gpt-4"):
111
  # pre-processing `sections` parameter;
112
  if sections is None:
113
  sections = ["introduction", "related works", "backgrounds", "methodology", "experiments", "conclusion", "abstract"]
latex_templates/{pre_refs.bib → example_references.bib} RENAMED
File without changes
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
 
utils/references.py CHANGED
@@ -21,6 +21,7 @@
21
  import requests
22
  import re
23
  import bibtexparser
 
24
  from scholarly import scholarly
25
  from scholarly import ProxyGenerator
26
 
@@ -239,6 +240,8 @@ class References:
239
  # use embeddings to evaluate; keep top k relevant references in papers
240
  # send (title, .bib file) to evaluate embeddings; recieve truncated papers
241
  papers = self._get_papers(keyword = "_all")
 
 
242
 
243
  # clear the bibtex file
244
  with open(path_to_bibtex, "w", encoding="utf-8") as file:
 
21
  import requests
22
  import re
23
  import bibtexparser
24
+ import random
25
  from scholarly import scholarly
26
  from scholarly import ProxyGenerator
27
 
 
240
  # use embeddings to evaluate; keep top k relevant references in papers
241
  # send (title, .bib file) to evaluate embeddings; recieve truncated papers
242
  papers = self._get_papers(keyword = "_all")
243
+ random.shuffle(papers)
244
+ papers = papers[:max_num_refs]
245
 
246
  # clear the bibtex file
247
  with open(path_to_bibtex, "w", encoding="utf-8") as file:
utils/tex_processing.py CHANGED
@@ -19,10 +19,11 @@ def replace_title(save_to_path, title):
19
 
20
  # check if citations are in bibtex.
21
 
22
-
23
  # replace citations
24
 
25
  # sometimes the output may include thebibliography and bibitem . remove all of it.
26
 
 
 
27
 
28
 
 
19
 
20
  # check if citations are in bibtex.
21
 
 
22
  # replace citations
23
 
24
  # sometimes the output may include thebibliography and bibitem . remove all of it.
25
 
26
+ # return all .png and replace it using placeholder.
27
+
28
 
29