Luke
commited on
Commit
·
633cf99
1
Parent(s):
44eb855
no message
Browse files- Preprocess/preprocessImg.py +19 -1
- app.py +72 -44
Preprocess/preprocessImg.py
CHANGED
@@ -3,6 +3,24 @@ import numpy as np
|
|
3 |
from PIL import Image, ImageEnhance
|
4 |
|
5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
# 方案一
|
7 |
def preprocess_image001(image):
|
8 |
# 將影像轉換為 NumPy 數組
|
@@ -18,7 +36,7 @@ def preprocess_image001(image):
|
|
18 |
denoised = cv2.fastNlMeansDenoising(binary, None, 30, 7, 21)
|
19 |
return Image.fromarray(denoised)
|
20 |
|
21 |
-
|
22 |
def preprocess_image002(image):
|
23 |
# 將 PIL Image 轉換為 numpy array
|
24 |
image_np = np.array(image)
|
|
|
3 |
from PIL import Image, ImageEnhance
|
4 |
|
5 |
|
6 |
+
def PreprocessImg(image):
|
7 |
+
if image is None:
|
8 |
+
raise ValueError("尚未上傳圖片!")
|
9 |
+
|
10 |
+
# 方案一
|
11 |
+
pre_img_001 = preprocess_image001(image)
|
12 |
+
# 方案二
|
13 |
+
pre_img_002 = preprocess_image002(image)
|
14 |
+
# 方案三
|
15 |
+
pre_img_003 = preprocess_image003(image)
|
16 |
+
# 方案四
|
17 |
+
pre_img_004 = preprocess_image004(image)
|
18 |
+
# 方案五
|
19 |
+
pre_img_005 = preprocess_image005(image)
|
20 |
+
|
21 |
+
return pre_img_001, pre_img_002, pre_img_003, pre_img_004, pre_img_005
|
22 |
+
|
23 |
+
|
24 |
# 方案一
|
25 |
def preprocess_image001(image):
|
26 |
# 將影像轉換為 NumPy 數組
|
|
|
36 |
denoised = cv2.fastNlMeansDenoising(binary, None, 30, 7, 21)
|
37 |
return Image.fromarray(denoised)
|
38 |
|
39 |
+
# 方案二
|
40 |
def preprocess_image002(image):
|
41 |
# 將 PIL Image 轉換為 numpy array
|
42 |
image_np = np.array(image)
|
app.py
CHANGED
@@ -2,57 +2,70 @@ import os
|
|
2 |
import gradio as gr
|
3 |
from Plan.AiLLM import llm_recognition
|
4 |
from Plan.pytesseractOCR import ocr_recognition
|
5 |
-
from Preprocess.preprocessImg import
|
6 |
-
preprocess_image001, preprocess_image002, preprocess_image003,
|
7 |
-
preprocess_image004, preprocess_image005
|
8 |
-
)
|
9 |
|
10 |
# 取得所有語言清單
|
11 |
languages = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
|
12 |
|
13 |
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
# 方案一
|
16 |
-
|
17 |
-
ocr_result_001 = ocr_recognition(pre_img_001, valid_type, language)
|
18 |
# 方案二
|
19 |
-
|
20 |
-
ocr_result_002 = ocr_recognition(pre_img_002, valid_type, language)
|
21 |
# 方案三
|
22 |
-
|
23 |
-
ocr_result_003 = ocr_recognition(pre_img_003, valid_type, language)
|
24 |
# 方案四
|
25 |
-
|
26 |
-
ocr_result_004 = ocr_recognition(pre_img_004, valid_type, language)
|
27 |
# 方案五
|
28 |
-
|
29 |
-
|
|
|
30 |
|
31 |
-
return (pre_img_001, pre_img_002, pre_img_003, pre_img_004, pre_img_005,
|
32 |
-
ocr_result_001, ocr_result_002, ocr_result_003, ocr_result_004, ocr_result_005)
|
33 |
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
-
def preprocess_and_llm(image, valid_type, language):
|
36 |
# 方案一
|
37 |
-
|
38 |
-
llm_result_001 = llm_recognition(pre_img_001, valid_type, language)
|
39 |
# 方案二
|
40 |
-
|
41 |
-
llm_result_002 = llm_recognition(pre_img_002, valid_type, language)
|
42 |
# 方案三
|
43 |
-
|
44 |
-
llm_result_003 = llm_recognition(pre_img_003, valid_type, language)
|
45 |
# 方案四
|
46 |
-
|
47 |
-
llm_result_004 = llm_recognition(pre_img_004, valid_type, language)
|
48 |
# 方案五
|
49 |
-
|
50 |
-
llm_result_005 = llm_recognition(pre_img_005, valid_type, language)
|
51 |
|
52 |
-
return
|
53 |
-
llm_result_001, llm_result_002, llm_result_003, llm_result_004, llm_result_005)
|
54 |
|
55 |
|
|
|
56 |
with gr.Blocks() as demo:
|
57 |
with gr.Row():
|
58 |
image_input = gr.Image(type="pil", label="上傳圖片")
|
@@ -60,8 +73,17 @@ with gr.Blocks() as demo:
|
|
60 |
language_dropdown = gr.Dropdown(choices=languages, value="chi_tra", label="語言")
|
61 |
|
62 |
with gr.Row():
|
63 |
-
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
with gr.Row():
|
67 |
preprocess_output_001 = gr.Image(type="pil", label="預處理後的圖片-方案一")
|
@@ -85,17 +107,23 @@ with gr.Blocks() as demo:
|
|
85 |
ocr_output_005 = gr.JSON(label="OCR-005-解析結果")
|
86 |
llm_output_005 = gr.JSON(label="AiLLM-005-解析結果")
|
87 |
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
|
101 |
demo.launch(share=False)
|
|
|
2 |
import gradio as gr
|
3 |
from Plan.AiLLM import llm_recognition
|
4 |
from Plan.pytesseractOCR import ocr_recognition
|
5 |
+
from Preprocess.preprocessImg import PreprocessImg
|
|
|
|
|
|
|
6 |
|
7 |
# 取得所有語言清單
|
8 |
languages = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
|
9 |
|
10 |
|
11 |
+
# 預處理圖片
|
12 |
+
def preprocess_image(image):
|
13 |
+
if image is None:
|
14 |
+
gr.Warning("尚未上傳圖片!")
|
15 |
+
raise ValueError("尚未上傳圖片!")
|
16 |
+
preprocessed_images = PreprocessImg(image)
|
17 |
+
return (
|
18 |
+
preprocessed_images,
|
19 |
+
True,
|
20 |
+
preprocessed_images[0],
|
21 |
+
preprocessed_images[1],
|
22 |
+
preprocessed_images[2],
|
23 |
+
preprocessed_images[3],
|
24 |
+
preprocessed_images[4]
|
25 |
+
)
|
26 |
+
|
27 |
+
|
28 |
+
# pytesseract OCR
|
29 |
+
def Basic_ocr(valid_type, language, preprocessed_images, finish_pre_img):
|
30 |
+
if not finish_pre_img:
|
31 |
+
gr.Warning("請先完成圖像預處理!")
|
32 |
+
raise ValueError("請先完成圖像預處理!")
|
33 |
+
|
34 |
# 方案一
|
35 |
+
ocr_result_001 = ocr_recognition(preprocessed_images[0], valid_type, language)
|
|
|
36 |
# 方案二
|
37 |
+
ocr_result_002 = ocr_recognition(preprocessed_images[1], valid_type, language)
|
|
|
38 |
# 方案三
|
39 |
+
ocr_result_003 = ocr_recognition(preprocessed_images[2], valid_type, language)
|
|
|
40 |
# 方案四
|
41 |
+
ocr_result_004 = ocr_recognition(preprocessed_images[3], valid_type, language)
|
|
|
42 |
# 方案五
|
43 |
+
ocr_result_005 = ocr_recognition(preprocessed_images[4], valid_type, language)
|
44 |
+
|
45 |
+
return ocr_result_001, ocr_result_002, ocr_result_003, ocr_result_004, ocr_result_005
|
46 |
|
|
|
|
|
47 |
|
48 |
+
# AI LLM OCR
|
49 |
+
def AiLLM_ocr(valid_type, language, preprocessed_images, finish_pre_img):
|
50 |
+
if not finish_pre_img:
|
51 |
+
gr.Warning("請先完成圖像預處理!")
|
52 |
+
raise ValueError("請先完成圖像預處理!")
|
53 |
|
|
|
54 |
# 方案一
|
55 |
+
llm_result_001 = llm_recognition(preprocessed_images[0], valid_type, language)
|
|
|
56 |
# 方案二
|
57 |
+
llm_result_002 = llm_recognition(preprocessed_images[1], valid_type, language)
|
|
|
58 |
# 方案三
|
59 |
+
llm_result_003 = llm_recognition(preprocessed_images[2], valid_type, language)
|
|
|
60 |
# 方案四
|
61 |
+
llm_result_004 = llm_recognition(preprocessed_images[3], valid_type, language)
|
|
|
62 |
# 方案五
|
63 |
+
llm_result_005 = llm_recognition(preprocessed_images[4], valid_type, language)
|
|
|
64 |
|
65 |
+
return llm_result_001, llm_result_002, llm_result_003, llm_result_004, llm_result_005
|
|
|
66 |
|
67 |
|
68 |
+
# VIEW
|
69 |
with gr.Blocks() as demo:
|
70 |
with gr.Row():
|
71 |
image_input = gr.Image(type="pil", label="上傳圖片")
|
|
|
73 |
language_dropdown = gr.Dropdown(choices=languages, value="chi_tra", label="語言")
|
74 |
|
75 |
with gr.Row():
|
76 |
+
preImg_button = gr.Button("圖片預先處理")
|
77 |
+
|
78 |
+
with gr.Row():
|
79 |
+
with gr.Column():
|
80 |
+
ocr_button = gr.Button("使用 Pytesseract OCR 辨識")
|
81 |
+
gr.Markdown(
|
82 |
+
"<div style='display: flex;justify-content: center;align-items: center;background-color: red;font-weight: bold;text-decoration: underline;font-size: 20px;'>Package: Google Pytesseract</div>")
|
83 |
+
with gr.Column():
|
84 |
+
llm_button = gr.Button("使用 AI LLM 模型辨識")
|
85 |
+
gr.Markdown(
|
86 |
+
"<div style='display: flex;justify-content: center;align-items: center;background-color: red;font-weight: bold;text-decoration: underline;font-size: 20px;'>Package:Bert-base-chinese</div>")
|
87 |
|
88 |
with gr.Row():
|
89 |
preprocess_output_001 = gr.Image(type="pil", label="預處理後的圖片-方案一")
|
|
|
107 |
ocr_output_005 = gr.JSON(label="OCR-005-解析結果")
|
108 |
llm_output_005 = gr.JSON(label="AiLLM-005-解析結果")
|
109 |
|
110 |
+
# 定義狀態
|
111 |
+
finish_pre_img_state = gr.State(False)
|
112 |
+
preprocessed_images_state = gr.State([])
|
113 |
+
|
114 |
+
# 預先處理圖片 按鈕
|
115 |
+
preImg_button.click(preprocess_image, inputs=[image_input],
|
116 |
+
outputs=[preprocessed_images_state, finish_pre_img_state,
|
117 |
+
preprocess_output_001, preprocess_output_002,
|
118 |
+
preprocess_output_003, preprocess_output_004,
|
119 |
+
preprocess_output_005])
|
120 |
+
# pytesseract 按鈕
|
121 |
+
ocr_button.click(Basic_ocr, inputs=[validation_type, language_dropdown,
|
122 |
+
preprocessed_images_state, finish_pre_img_state],
|
123 |
+
outputs=[ocr_output_001, ocr_output_002, ocr_output_003, ocr_output_004, ocr_output_005])
|
124 |
+
# AI LLM 按鈕
|
125 |
+
llm_button.click(AiLLM_ocr, inputs=[validation_type, language_dropdown,
|
126 |
+
preprocessed_images_state, finish_pre_img_state],
|
127 |
+
outputs=[llm_output_001, llm_output_002, llm_output_003, llm_output_004, llm_output_005])
|
128 |
|
129 |
demo.launch(share=False)
|