qingxu99 commited on
Commit
35c3c0f
·
1 Parent(s): 42eef1b

新增latex文章校对纠错功能

Browse files
crazy_functional.py CHANGED
@@ -131,6 +131,7 @@ def get_crazy_functions():
131
  from crazy_functions.谷歌检索小助手 import 谷歌检索小助手
132
  from crazy_functions.理解PDF文档内容 import 理解PDF文档内容标准文件输入
133
  from crazy_functions.Latex全文润色 import Latex中文润色
 
134
  from crazy_functions.Latex全文翻译 import Latex中译英
135
  from crazy_functions.Latex全文翻译 import Latex英译中
136
  from crazy_functions.批量Markdown翻译 import Markdown中译英
@@ -168,12 +169,18 @@ def get_crazy_functions():
168
  "AsButton": False, # 加入下拉菜单中
169
  "Function": HotReload(理解PDF文档内容标准文件输入)
170
  },
171
- "[测试功能] 英文Latex项目全文润色(输入路径或上传压缩包)": {
172
  # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
173
  "Color": "stop",
174
  "AsButton": False, # 加入下拉菜单中
175
  "Function": HotReload(Latex英文润色)
176
  },
 
 
 
 
 
 
177
  "[测试功能] 中文Latex项目全文润色(输入路径或上传压缩包)": {
178
  # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
179
  "Color": "stop",
 
131
  from crazy_functions.谷歌检索小助手 import 谷歌检索小助手
132
  from crazy_functions.理解PDF文档内容 import 理解PDF文档内容标准文件输入
133
  from crazy_functions.Latex全文润色 import Latex中文润色
134
+ from crazy_functions.Latex全文润色 import Latex英文纠错
135
  from crazy_functions.Latex全文翻译 import Latex中译英
136
  from crazy_functions.Latex全文翻译 import Latex英译中
137
  from crazy_functions.批量Markdown翻译 import Markdown中译英
 
169
  "AsButton": False, # 加入下拉菜单中
170
  "Function": HotReload(理解PDF文档内容标准文件输入)
171
  },
172
+ "英文Latex项目全文润色(输入路径或上传压缩包)": {
173
  # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
174
  "Color": "stop",
175
  "AsButton": False, # 加入下拉菜单中
176
  "Function": HotReload(Latex英文润色)
177
  },
178
+ "英文Latex项目全文纠错(输入路径或上传压缩包)": {
179
+ # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
180
+ "Color": "stop",
181
+ "AsButton": False, # 加入下拉菜单中
182
+ "Function": HotReload(Latex英文纠错)
183
+ },
184
  "[测试功能] 中文Latex项目全文润色(输入路径或上传压缩包)": {
185
  # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
186
  "Color": "stop",
crazy_functions/Latex全文润色.py CHANGED
@@ -1,6 +1,6 @@
1
- from toolbox import update_ui
2
- from toolbox import CatchException, report_execption, write_results_to_file
3
- fast_debug = False
4
 
5
  class PaperFileGroup():
6
  def __init__(self):
@@ -34,8 +34,27 @@ class PaperFileGroup():
34
  self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex")
35
 
36
  print('Segmentation: done')
37
-
38
- def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en'):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  import time, os, re
40
  from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
41
 
@@ -58,28 +77,27 @@ def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
58
  pfg.run_file_split(max_token_limit=1024)
59
  n_split = len(pfg.sp_file_contents)
60
 
61
- # <-------- 抽取摘要 ---------->
62
- # if language == 'en':
63
- # abs_extract_inputs = f"Please write an abstract for this paper"
64
-
65
- # # 单线,获取文章meta信息
66
- # paper_meta_info = yield from request_gpt_model_in_new_thread_with_ui_alive(
67
- # inputs=abs_extract_inputs,
68
- # inputs_show_user=f"正在抽取摘要信息。",
69
- # llm_kwargs=llm_kwargs,
70
- # chatbot=chatbot, history=[],
71
- # sys_prompt="Your job is to collect information from materials。",
72
- # )
73
 
74
  # <-------- 多线程润色开始 ---------->
75
  if language == 'en':
76
- inputs_array = ["Below is a section from an academic paper, polish this section to meet the academic standard, improve the grammar, clarity and overall readability, do not modify any latex command such as \section, \cite and equations:" +
 
 
 
 
 
 
 
77
  f"\n\n{frag}" for frag in pfg.sp_file_contents]
78
  inputs_show_user_array = [f"Polish {f}" for f in pfg.sp_file_tag]
79
  sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)]
80
  elif language == 'zh':
81
- inputs_array = [f"以下是一篇学术论文中的一段内容,请将此部分润色以满足学术标准,提高语法、清晰度和整体可读性,不要修改任何LaTeX命令,例如\section,\cite和方程式:" +
82
- f"\n\n{frag}" for frag in pfg.sp_file_contents]
 
 
 
 
83
  inputs_show_user_array = [f"润色 {f}" for f in pfg.sp_file_tag]
84
  sys_prompt_array=["你是一位专业的中文学术论文作家。" for _ in range(n_split)]
85
 
@@ -95,6 +113,18 @@ def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch
95
  scroller_max_len = 80
96
  )
97
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  # <-------- 整理结果,退出 ---------->
99
  create_report_file_name = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + f"-chatgpt.polish.md"
100
  res = write_results_to_file(gpt_response_collection, file_name=create_report_file_name)
@@ -172,4 +202,42 @@ def Latex中文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
172
  report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
173
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
174
  return
175
- yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='zh')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from toolbox import update_ui, trimmed_format_exc
2
+ from toolbox import CatchException, report_execption, write_results_to_file, zip_folder
3
+
4
 
5
  class PaperFileGroup():
6
  def __init__(self):
 
34
  self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex")
35
 
36
  print('Segmentation: done')
37
+ def merge_result(self):
38
+ self.file_result = ["" for _ in range(len(self.file_paths))]
39
+ for r, k in zip(self.sp_file_result, self.sp_file_index):
40
+ self.file_result[k] += r
41
+
42
+ def write_result(self):
43
+ manifest = []
44
+ for path, res in zip(self.file_paths, self.file_result):
45
+ with open(path + '.polish.tex', 'w', encoding='utf8') as f:
46
+ manifest.append(path + '.polish.tex')
47
+ f.write(res)
48
+ return manifest
49
+
50
+ def zip_result(self):
51
+ import os, time
52
+ folder = os.path.dirname(self.file_paths[0])
53
+ t = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
54
+ zip_folder(folder, './gpt_log/', f'{t}-polished.zip')
55
+
56
+
57
+ def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en', mode='polish'):
58
  import time, os, re
59
  from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
60
 
 
77
  pfg.run_file_split(max_token_limit=1024)
78
  n_split = len(pfg.sp_file_contents)
79
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
  # <-------- 多线程润色开始 ---------->
82
  if language == 'en':
83
+ if mode == 'polish':
84
+ inputs_array = ["Below is a section from an academic paper, polish this section to meet the academic standard, " +
85
+ "improve the grammar, clarity and overall readability, do not modify any latex command such as \section, \cite and equations:" +
86
+ f"\n\n{frag}" for frag in pfg.sp_file_contents]
87
+ else:
88
+ inputs_array = [r"Below is a section from an academic paper, proofread this section." +
89
+ r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " +
90
+ r"Answer me only with the revised text:" +
91
  f"\n\n{frag}" for frag in pfg.sp_file_contents]
92
  inputs_show_user_array = [f"Polish {f}" for f in pfg.sp_file_tag]
93
  sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)]
94
  elif language == 'zh':
95
+ if mode == 'polish':
96
+ inputs_array = [f"以下是一篇学术论文中的一段内容,请将此部分润色以满足学术标准,提高语法、清晰度和整体可读性,不要修改任何LaTeX命令,例如\section,\cite和方程式:" +
97
+ f"\n\n{frag}" for frag in pfg.sp_file_contents]
98
+ else:
99
+ inputs_array = [f"以下是一篇学术论文中的一段内容,请对这部分内容进行语法矫正。不要修改任何LaTeX命令,例如\section,\cite和方程式:" +
100
+ f"\n\n{frag}" for frag in pfg.sp_file_contents]
101
  inputs_show_user_array = [f"润色 {f}" for f in pfg.sp_file_tag]
102
  sys_prompt_array=["你是一位专业的中文学术论文作家。" for _ in range(n_split)]
103
 
 
113
  scroller_max_len = 80
114
  )
115
 
116
+ pfg.sp_file_result = []
117
+ for i_say, gpt_say in zip(gpt_response_collection[0::2], gpt_response_collection[1::2]):
118
+ pfg.sp_file_result.append(gpt_say)
119
+
120
+ # <-------- 文本碎片重组为完整的tex文件,整理结果为压缩包 ---------->
121
+ try:
122
+ pfg.merge_result()
123
+ pfg.write_result()
124
+ pfg.zip_result()
125
+ except:
126
+ print(trimmed_format_exc())
127
+
128
  # <-------- 整理结果,退出 ---------->
129
  create_report_file_name = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + f"-chatgpt.polish.md"
130
  res = write_results_to_file(gpt_response_collection, file_name=create_report_file_name)
 
202
  report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
203
  yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
204
  return
205
+ yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='zh')
206
+
207
+
208
+
209
+
210
+ @CatchException
211
+ def Latex英文纠错(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
212
+ # 基本信息:功能、贡献者
213
+ chatbot.append([
214
+ "函数插件功能?",
215
+ "对整个Latex项目进行纠错。函数插件贡献者: Binary-Husky"])
216
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
217
+
218
+ # 尝试导入依赖,如果缺少依赖,则给出安装建议
219
+ try:
220
+ import tiktoken
221
+ except:
222
+ report_execption(chatbot, history,
223
+ a=f"解析项目: {txt}",
224
+ b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
225
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
226
+ return
227
+ history = [] # 清空历史,以免输入溢出
228
+ import glob, os
229
+ if os.path.exists(txt):
230
+ project_folder = txt
231
+ else:
232
+ if txt == "": txt = '空空如也的输入栏'
233
+ report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
234
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
235
+ return
236
+ file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
237
+ if len(file_manifest) == 0:
238
+ report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
239
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
240
+ return
241
+ yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en', mode='proofread')
242
+
243
+
toolbox.py CHANGED
@@ -465,7 +465,7 @@ def on_report_generated(files, chatbot):
465
  if len(report_files) == 0:
466
  return None, chatbot
467
  # files.extend(report_files)
468
- chatbot.append(['汇总报告如何远程获取?', '汇总报告已经添加到右侧“文件上传区”(可能处于折叠状态),请查收。'])
469
  return report_files, chatbot
470
 
471
  def is_openai_api_key(key):
@@ -721,3 +721,43 @@ def clip_history(inputs, history, tokenizer, max_token_limit):
721
 
722
  history = everything[1:]
723
  return history
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
465
  if len(report_files) == 0:
466
  return None, chatbot
467
  # files.extend(report_files)
468
+ chatbot.append(['报告如何远程获取?', '报告已经添加到右侧“文件上传区”(可能处于折叠状态),请查收。'])
469
  return report_files, chatbot
470
 
471
  def is_openai_api_key(key):
 
721
 
722
  history = everything[1:]
723
  return history
724
+
725
+ """
726
+ ========================================================================
727
+ 第三部分
728
+ 其他小工具:
729
+ - zip_folder: 把某个路径下所有文件压缩,然后转移到指定的另一个路径中(gpt写的)
730
+ ========================================================================
731
+ """
732
+
733
+ def zip_folder(source_folder, dest_folder, zip_name):
734
+ import zipfile
735
+ import os
736
+ # Make sure the source folder exists
737
+ if not os.path.exists(source_folder):
738
+ print(f"{source_folder} does not exist")
739
+ return
740
+
741
+ # Make sure the destination folder exists
742
+ if not os.path.exists(dest_folder):
743
+ print(f"{dest_folder} does not exist")
744
+ return
745
+
746
+ # Create the name for the zip file
747
+ zip_file = os.path.join(dest_folder, zip_name)
748
+
749
+ # Create a ZipFile object
750
+ with zipfile.ZipFile(zip_file, 'w', zipfile.ZIP_DEFLATED) as zipf:
751
+ # Walk through the source folder and add files to the zip file
752
+ for foldername, subfolders, filenames in os.walk(source_folder):
753
+ for filename in filenames:
754
+ filepath = os.path.join(foldername, filename)
755
+ zipf.write(filepath, arcname=os.path.relpath(filepath, source_folder))
756
+
757
+ # Move the zip file to the destination folder (if it wasn't already there)
758
+ if os.path.dirname(zip_file) != dest_folder:
759
+ os.rename(zip_file, os.path.join(dest_folder, os.path.basename(zip_file)))
760
+ zip_file = os.path.join(dest_folder, os.path.basename(zip_file))
761
+
762
+ print(f"Zip file created at {zip_file}")
763
+