valeriaWong commited on
Commit
0f28564
·
1 Parent(s): 3f635bc

feat(读文章写摘要):支持pdf文件批量阅读及总结 #101

Browse files
crazy_functions/批量总结PDF文档.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from predict import predict_no_ui
2
+ from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down
3
+ fast_debug = False
4
+
5
+
6
+ def 解析PDF(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt):
7
+ import time, glob, os, codecs, fitz
8
+ print('begin analysis on:', file_manifest)
9
+ for index, fp in enumerate(file_manifest):
10
+ with fitz.open(fp) as doc:
11
+ file_content = ""
12
+ for page in doc:
13
+ file_content += page.getText()
14
+ print(file_content)
15
+
16
+ prefix = "接下来请你逐文件分析下面的论文文件,概括其内容" if index==0 else ""
17
+ i_say = prefix + f'请对下面的文章片段用中文做一个概述,文件名是{os.path.relpath(fp, project_folder)},文章内容是 ```{file_content}```'
18
+ i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的文章片段做一个概述: {os.path.abspath(fp)}'
19
+ chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
20
+ print('[1] yield chatbot, history')
21
+ yield chatbot, history, '正常'
22
+
23
+ if not fast_debug:
24
+ msg = '正常'
25
+ # ** gpt request **
26
+ gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[]) # 带超时倒计时
27
+
28
+ print('[2] end gpt req')
29
+ chatbot[-1] = (i_say_show_user, gpt_say)
30
+ history.append(i_say_show_user); history.append(gpt_say)
31
+ print('[3] yield chatbot, history')
32
+ yield chatbot, history, msg
33
+ print('[4] next')
34
+ if not fast_debug: time.sleep(2)
35
+
36
+ all_file = ', '.join([os.path.relpath(fp, project_folder) for index, fp in enumerate(file_manifest)])
37
+ i_say = f'根据以上你自己的分析,对全文进行概括,用学术性语言写一段中文摘要,然后再写一段英文摘要(包括{all_file})。'
38
+ chatbot.append((i_say, "[Local Message] waiting gpt response."))
39
+ yield chatbot, history, '正常'
40
+
41
+ if not fast_debug:
42
+ msg = '正常'
43
+ # ** gpt request **
44
+ gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say, chatbot, top_p, temperature, history=history) # 带超时倒计时
45
+
46
+ chatbot[-1] = (i_say, gpt_say)
47
+ history.append(i_say); history.append(gpt_say)
48
+ yield chatbot, history, msg
49
+ res = write_results_to_file(history)
50
+ chatbot.append(("完成了吗?", res))
51
+ yield chatbot, history, msg
52
+
53
+
54
+ def 批量总结PDF文档(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
55
+ history = [] # 清空历史,以免输入溢出
56
+ import glob, os
57
+ if os.path.exists(txt):
58
+ project_folder = txt
59
+ else:
60
+ if txt == "": txt = '空空如也的输入栏'
61
+ report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
62
+ yield chatbot, history, '正常'
63
+ return
64
+ file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] + \
65
+ [f for f in glob.glob(f'{project_folder}/**/*.pdf', recursive=True)] # + \
66
+ # [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \
67
+ # [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
68
+ if len(file_manifest) == 0:
69
+ report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.pdf文件: {txt}")
70
+ yield chatbot, history, '正常'
71
+ return
72
+
73
+ yield from 解析PDF(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
74
+
crazy_functions/读文章写摘要.py CHANGED
@@ -48,54 +48,8 @@ def 解析Paper(file_manifest, project_folder, top_p, temperature, chatbot, hist
48
  yield chatbot, history, msg
49
 
50
 
51
- def 解析PDF(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt):
52
- import time, glob, os, codecs, fitz
53
- print('begin analysis on:', file_manifest)
54
- for index, fp in enumerate(file_manifest):
55
- with fitz.open(fp) as doc:
56
- file_content = ""
57
- for page in doc:
58
- file_content += page.getText()
59
- print(file_content)
60
-
61
- prefix = "接下来请你逐文件分析下面的论文文件,概括其内容" if index==0 else ""
62
- i_say = prefix + f'请对下面的文章片段用中文做一个概述,文件名是{os.path.relpath(fp, project_folder)},文章内容是 ```{file_content}```'
63
- i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的文章片段做一个概述: {os.path.abspath(fp)}'
64
- chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
65
- print('[1] yield chatbot, history')
66
- yield chatbot, history, '正常'
67
-
68
- if not fast_debug:
69
- msg = '正常'
70
- # ** gpt request **
71
- gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say_show_user, chatbot, top_p, temperature, history=[]) # 带超时倒计时
72
-
73
- print('[2] end gpt req')
74
- chatbot[-1] = (i_say_show_user, gpt_say)
75
- history.append(i_say_show_user); history.append(gpt_say)
76
- print('[3] yield chatbot, history')
77
- yield chatbot, history, msg
78
- print('[4] next')
79
- if not fast_debug: time.sleep(2)
80
-
81
- all_file = ', '.join([os.path.relpath(fp, project_folder) for index, fp in enumerate(file_manifest)])
82
- i_say = f'根据以上你自己的分析,对全文进行概括,用学术性语言写一段中文摘要,然后再写一段英文摘要(包括{all_file})。'
83
- chatbot.append((i_say, "[Local Message] waiting gpt response."))
84
- yield chatbot, history, '正常'
85
-
86
- if not fast_debug:
87
- msg = '正常'
88
- # ** gpt request **
89
- gpt_say = yield from predict_no_ui_but_counting_down(i_say, i_say, chatbot, top_p, temperature, history=history) # 带超时倒计时
90
-
91
- chatbot[-1] = (i_say, gpt_say)
92
- history.append(i_say); history.append(gpt_say)
93
- yield chatbot, history, msg
94
- res = write_results_to_file(history)
95
- chatbot.append(("完成了吗?", res))
96
- yield chatbot, history, msg
97
-
98
 
 
99
  def 读文章写摘要(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
100
  history = [] # 清空历史,以免输入溢出
101
  import glob, os
@@ -106,15 +60,11 @@ def 读文章写摘要(txt, top_p, temperature, chatbot, history, systemPromptTx
106
  report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
107
  yield chatbot, history, '正常'
108
  return
109
- file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] + \
110
- [f for f in glob.glob(f'{project_folder}/**/*.pdf', recursive=True)] # + \
111
  # [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \
112
  # [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
113
  if len(file_manifest) == 0:
114
- report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex或.pdf文件: {txt}")
115
  yield chatbot, history, '正常'
116
  return
117
- if '.pdf' in file_manifest[0]:
118
- yield from 解析PDF(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
119
- else:
120
- yield from 解析Paper(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
 
48
  yield chatbot, history, msg
49
 
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
+ @CatchException
53
  def 读文章写摘要(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT):
54
  history = [] # 清空历史,以免输入溢出
55
  import glob, os
 
60
  report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
61
  yield chatbot, history, '正常'
62
  return
63
+ file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] # + \
 
64
  # [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \
65
  # [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
66
  if len(file_manifest) == 0:
67
+ report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
68
  yield chatbot, history, '正常'
69
  return
70
+ yield from 解析Paper(file_manifest, project_folder, top_p, temperature, chatbot, history, systemPromptTxt)
 
 
 
functional_crazy.py CHANGED
@@ -1,6 +1,7 @@
1
 
2
  def get_crazy_functionals():
3
  from crazy_functions.读文章写摘要 import 读文章写摘要
 
4
  from crazy_functions.生成函数注释 import 批量生成函数注释
5
  from crazy_functions.解析项目源代码 import 解析项目本身
6
  from crazy_functions.解析项目源代码 import 解析一个Python项目
@@ -28,6 +29,10 @@ def get_crazy_functionals():
28
  "Color": "stop", # 按钮颜色
29
  "Function": 读文章写摘要
30
  },
 
 
 
 
31
  "[实验] 批量生成函数注释(配合input输入框)": {
32
  "Color": "stop", # 按钮颜色
33
  "Function": 批量生成函数注释
 
1
 
2
  def get_crazy_functionals():
3
  from crazy_functions.读文章写摘要 import 读文章写摘要
4
+ from crazy_functions.批量总结PDF文档 import 批量总结PDF文档
5
  from crazy_functions.生成函数注释 import 批量生成函数注释
6
  from crazy_functions.解析项目源代码 import 解析项目本身
7
  from crazy_functions.解析项目源代码 import 解析一个Python项目
 
29
  "Color": "stop", # 按钮颜色
30
  "Function": 读文章写摘要
31
  },
32
+ "[实验] 批量总结pdf文档并生成双语摘要(配合input输入框)": {
33
+ "Color": "stop", # 按钮颜色
34
+ "Function": 批量总结PDF文档
35
+ },
36
  "[实验] 批量生成函数注释(配合input输入框)": {
37
  "Color": "stop", # 按钮颜色
38
  "Function": 批量生成函数注释