Luke commited on
Commit
633cf99
·
1 Parent(s): 44eb855

no message

Browse files
Files changed (2) hide show
  1. Preprocess/preprocessImg.py +19 -1
  2. app.py +72 -44
Preprocess/preprocessImg.py CHANGED
@@ -3,6 +3,24 @@ import numpy as np
3
  from PIL import Image, ImageEnhance
4
 
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  # 方案一
7
  def preprocess_image001(image):
8
  # 將影像轉換為 NumPy 數組
@@ -18,7 +36,7 @@ def preprocess_image001(image):
18
  denoised = cv2.fastNlMeansDenoising(binary, None, 30, 7, 21)
19
  return Image.fromarray(denoised)
20
 
21
-
22
  def preprocess_image002(image):
23
  # 將 PIL Image 轉換為 numpy array
24
  image_np = np.array(image)
 
3
  from PIL import Image, ImageEnhance
4
 
5
 
6
+ def PreprocessImg(image):
7
+ if image is None:
8
+ raise ValueError("尚未上傳圖片!")
9
+
10
+ # 方案一
11
+ pre_img_001 = preprocess_image001(image)
12
+ # 方案二
13
+ pre_img_002 = preprocess_image002(image)
14
+ # 方案三
15
+ pre_img_003 = preprocess_image003(image)
16
+ # 方案四
17
+ pre_img_004 = preprocess_image004(image)
18
+ # 方案五
19
+ pre_img_005 = preprocess_image005(image)
20
+
21
+ return pre_img_001, pre_img_002, pre_img_003, pre_img_004, pre_img_005
22
+
23
+
24
  # 方案一
25
  def preprocess_image001(image):
26
  # 將影像轉換為 NumPy 數組
 
36
  denoised = cv2.fastNlMeansDenoising(binary, None, 30, 7, 21)
37
  return Image.fromarray(denoised)
38
 
39
+ # 方案二
40
  def preprocess_image002(image):
41
  # 將 PIL Image 轉換為 numpy array
42
  image_np = np.array(image)
app.py CHANGED
@@ -2,57 +2,70 @@ import os
2
  import gradio as gr
3
  from Plan.AiLLM import llm_recognition
4
  from Plan.pytesseractOCR import ocr_recognition
5
- from Preprocess.preprocessImg import (
6
- preprocess_image001, preprocess_image002, preprocess_image003,
7
- preprocess_image004, preprocess_image005
8
- )
9
 
10
  # 取得所有語言清單
11
  languages = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
12
 
13
 
14
- def preprocess_and_ocr(image, valid_type, language):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  # 方案一
16
- pre_img_001 = preprocess_image001(image)
17
- ocr_result_001 = ocr_recognition(pre_img_001, valid_type, language)
18
  # 方案二
19
- pre_img_002 = preprocess_image002(image)
20
- ocr_result_002 = ocr_recognition(pre_img_002, valid_type, language)
21
  # 方案三
22
- pre_img_003 = preprocess_image003(image)
23
- ocr_result_003 = ocr_recognition(pre_img_003, valid_type, language)
24
  # 方案四
25
- pre_img_004 = preprocess_image004(image)
26
- ocr_result_004 = ocr_recognition(pre_img_004, valid_type, language)
27
  # 方案五
28
- pre_img_005 = preprocess_image005(image)
29
- ocr_result_005 = ocr_recognition(pre_img_005, valid_type, language)
 
30
 
31
- return (pre_img_001, pre_img_002, pre_img_003, pre_img_004, pre_img_005,
32
- ocr_result_001, ocr_result_002, ocr_result_003, ocr_result_004, ocr_result_005)
33
 
 
 
 
 
 
34
 
35
- def preprocess_and_llm(image, valid_type, language):
36
  # 方案一
37
- pre_img_001 = preprocess_image001(image)
38
- llm_result_001 = llm_recognition(pre_img_001, valid_type, language)
39
  # 方案二
40
- pre_img_002 = preprocess_image002(image)
41
- llm_result_002 = llm_recognition(pre_img_002, valid_type, language)
42
  # 方案三
43
- pre_img_003 = preprocess_image003(image)
44
- llm_result_003 = llm_recognition(pre_img_003, valid_type, language)
45
  # 方案四
46
- pre_img_004 = preprocess_image004(image)
47
- llm_result_004 = llm_recognition(pre_img_004, valid_type, language)
48
  # 方案五
49
- pre_img_005 = preprocess_image005(image)
50
- llm_result_005 = llm_recognition(pre_img_005, valid_type, language)
51
 
52
- return (pre_img_001, pre_img_002, pre_img_003, pre_img_004, pre_img_005,
53
- llm_result_001, llm_result_002, llm_result_003, llm_result_004, llm_result_005)
54
 
55
 
 
56
  with gr.Blocks() as demo:
57
  with gr.Row():
58
  image_input = gr.Image(type="pil", label="上傳圖片")
@@ -60,8 +73,17 @@ with gr.Blocks() as demo:
60
  language_dropdown = gr.Dropdown(choices=languages, value="chi_tra", label="語言")
61
 
62
  with gr.Row():
63
- ocr_button = gr.Button("使用 OCR")
64
- llm_button = gr.Button("使用 AI LLM")
 
 
 
 
 
 
 
 
 
65
 
66
  with gr.Row():
67
  preprocess_output_001 = gr.Image(type="pil", label="預處理後的圖片-方案一")
@@ -85,17 +107,23 @@ with gr.Blocks() as demo:
85
  ocr_output_005 = gr.JSON(label="OCR-005-解析結果")
86
  llm_output_005 = gr.JSON(label="AiLLM-005-解析結果")
87
 
88
- ocr_button.click(preprocess_and_ocr, inputs=[image_input, validation_type, language_dropdown],
89
- outputs=[
90
- preprocess_output_001, preprocess_output_002, preprocess_output_003, preprocess_output_004,
91
- preprocess_output_005,
92
- ocr_output_001, ocr_output_002, ocr_output_003, ocr_output_004, ocr_output_005
93
- ])
94
- llm_button.click(preprocess_and_llm, inputs=[image_input, validation_type, language_dropdown],
95
- outputs=[
96
- preprocess_output_001, preprocess_output_002, preprocess_output_003, preprocess_output_004,
97
- preprocess_output_005,
98
- llm_output_001, llm_output_002, llm_output_003, llm_output_004, llm_output_005
99
- ])
 
 
 
 
 
 
100
 
101
  demo.launch(share=False)
 
2
  import gradio as gr
3
  from Plan.AiLLM import llm_recognition
4
  from Plan.pytesseractOCR import ocr_recognition
5
+ from Preprocess.preprocessImg import PreprocessImg
 
 
 
6
 
7
  # 取得所有語言清單
8
  languages = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
9
 
10
 
11
+ # 預處理圖片
12
+ def preprocess_image(image):
13
+ if image is None:
14
+ gr.Warning("尚未上傳圖片!")
15
+ raise ValueError("尚未上傳圖片!")
16
+ preprocessed_images = PreprocessImg(image)
17
+ return (
18
+ preprocessed_images,
19
+ True,
20
+ preprocessed_images[0],
21
+ preprocessed_images[1],
22
+ preprocessed_images[2],
23
+ preprocessed_images[3],
24
+ preprocessed_images[4]
25
+ )
26
+
27
+
28
+ # pytesseract OCR
29
+ def Basic_ocr(valid_type, language, preprocessed_images, finish_pre_img):
30
+ if not finish_pre_img:
31
+ gr.Warning("請先完成圖像預處理!")
32
+ raise ValueError("請先完成圖像預處理!")
33
+
34
  # 方案一
35
+ ocr_result_001 = ocr_recognition(preprocessed_images[0], valid_type, language)
 
36
  # 方案二
37
+ ocr_result_002 = ocr_recognition(preprocessed_images[1], valid_type, language)
 
38
  # 方案三
39
+ ocr_result_003 = ocr_recognition(preprocessed_images[2], valid_type, language)
 
40
  # 方案四
41
+ ocr_result_004 = ocr_recognition(preprocessed_images[3], valid_type, language)
 
42
  # 方案五
43
+ ocr_result_005 = ocr_recognition(preprocessed_images[4], valid_type, language)
44
+
45
+ return ocr_result_001, ocr_result_002, ocr_result_003, ocr_result_004, ocr_result_005
46
 
 
 
47
 
48
+ # AI LLM OCR
49
+ def AiLLM_ocr(valid_type, language, preprocessed_images, finish_pre_img):
50
+ if not finish_pre_img:
51
+ gr.Warning("請先完成圖像預處理!")
52
+ raise ValueError("請先完成圖像預處理!")
53
 
 
54
  # 方案一
55
+ llm_result_001 = llm_recognition(preprocessed_images[0], valid_type, language)
 
56
  # 方案二
57
+ llm_result_002 = llm_recognition(preprocessed_images[1], valid_type, language)
 
58
  # 方案三
59
+ llm_result_003 = llm_recognition(preprocessed_images[2], valid_type, language)
 
60
  # 方案四
61
+ llm_result_004 = llm_recognition(preprocessed_images[3], valid_type, language)
 
62
  # 方案五
63
+ llm_result_005 = llm_recognition(preprocessed_images[4], valid_type, language)
 
64
 
65
+ return llm_result_001, llm_result_002, llm_result_003, llm_result_004, llm_result_005
 
66
 
67
 
68
+ # VIEW
69
  with gr.Blocks() as demo:
70
  with gr.Row():
71
  image_input = gr.Image(type="pil", label="上傳圖片")
 
73
  language_dropdown = gr.Dropdown(choices=languages, value="chi_tra", label="語言")
74
 
75
  with gr.Row():
76
+ preImg_button = gr.Button("圖片預先處理")
77
+
78
+ with gr.Row():
79
+ with gr.Column():
80
+ ocr_button = gr.Button("使用 Pytesseract OCR 辨識")
81
+ gr.Markdown(
82
+ "<div style='display: flex;justify-content: center;align-items: center;background-color: red;font-weight: bold;text-decoration: underline;font-size: 20px;'>Package: Google Pytesseract</div>")
83
+ with gr.Column():
84
+ llm_button = gr.Button("使用 AI LLM 模型辨識")
85
+ gr.Markdown(
86
+ "<div style='display: flex;justify-content: center;align-items: center;background-color: red;font-weight: bold;text-decoration: underline;font-size: 20px;'>Package:Bert-base-chinese</div>")
87
 
88
  with gr.Row():
89
  preprocess_output_001 = gr.Image(type="pil", label="預處理後的圖片-方案一")
 
107
  ocr_output_005 = gr.JSON(label="OCR-005-解析結果")
108
  llm_output_005 = gr.JSON(label="AiLLM-005-解析結果")
109
 
110
+ # 定義狀態
111
+ finish_pre_img_state = gr.State(False)
112
+ preprocessed_images_state = gr.State([])
113
+
114
+ # 預先處理圖片 按鈕
115
+ preImg_button.click(preprocess_image, inputs=[image_input],
116
+ outputs=[preprocessed_images_state, finish_pre_img_state,
117
+ preprocess_output_001, preprocess_output_002,
118
+ preprocess_output_003, preprocess_output_004,
119
+ preprocess_output_005])
120
+ # pytesseract 按鈕
121
+ ocr_button.click(Basic_ocr, inputs=[validation_type, language_dropdown,
122
+ preprocessed_images_state, finish_pre_img_state],
123
+ outputs=[ocr_output_001, ocr_output_002, ocr_output_003, ocr_output_004, ocr_output_005])
124
+ # AI LLM 按鈕
125
+ llm_button.click(AiLLM_ocr, inputs=[validation_type, language_dropdown,
126
+ preprocessed_images_state, finish_pre_img_state],
127
+ outputs=[llm_output_001, llm_output_002, llm_output_003, llm_output_004, llm_output_005])
128
 
129
  demo.launch(share=False)