File size: 9,515 Bytes
b9e018b 0347dd6 e651999 0347dd6 e651999 0347dd6 e651999 633cf99 b9e018b e651999 633cf99 0347dd6 633cf99 03b6d75 633cf99 03b6d75 633cf99 68e1313 633cf99 68e1313 633cf99 68e1313 633cf99 03b6d75 e651999 633cf99 0347dd6 e651999 03b6d75 633cf99 03b6d75 633cf99 68e1313 633cf99 68e1313 633cf99 68e1313 633cf99 e651999 633cf99 b9e018b 0347dd6 633cf99 b9e018b e651999 0347dd6 2792f82 633cf99 0347dd6 2792f82 633cf99 2792f82 633cf99 2792f82 0347dd6 2792f82 e651999 68e1313 03b6d75 68e1313 0347dd6 68e1313 03b6d75 68e1313 0347dd6 68e1313 0347dd6 68e1313 0347dd6 68e1313 0347dd6 e651999 633cf99 e651999 0347dd6 e651999 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 |
import os
import subprocess
from datetime import datetime
import gradio as gr
from Plan.AiLLM import llm_recognition
from Plan.pytesseractJsOCR import pytesseractJs_recognition
from Plan.pytesseractOCR import ocr_recognition
from Preprocess.preprocessImg import PreprocessImg
# 取得所有語言清單
languages = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
# 預處理圖片
def preprocess_image(image):
if image is None:
gr.Warning("尚未上傳圖片!")
raise ValueError("尚未上傳圖片!")
preprocessed_images = PreprocessImg(image)
return (
preprocessed_images,
True,
preprocessed_images[0],
preprocessed_images[1],
preprocessed_images[2],
preprocessed_images[3],
preprocessed_images[4]
)
# pytesseract OCR
def Basic_ocr(valid_type, language, preprocessed_images, finish_pre_img):
if not finish_pre_img:
gr.Warning("請先執行圖像預處理,再進行分析!")
raise ValueError("請先執行圖像預處理,再進行分析!")
# 方案一
ocr_result_001 = ocr_recognition(preprocessed_images[0], valid_type, language)
# 方案二
ocr_result_002 = ocr_recognition(preprocessed_images[1], valid_type, language)
# 方案三
ocr_result_003 = ocr_recognition(preprocessed_images[2], valid_type, language)
# 方案四
ocr_result_004 = ocr_recognition(preprocessed_images[3], valid_type, language)
# 方案五
ocr_result_005 = ocr_recognition(preprocessed_images[4], valid_type, language)
return ocr_result_001, ocr_result_002, ocr_result_003, ocr_result_004, ocr_result_005
# AI LLM OCR
def AiLLM_ocr(valid_type, language, preprocessed_images, finish_pre_img):
if not finish_pre_img:
gr.Warning("請先執行圖像預處理,再進行分析!")
raise ValueError("請先執行圖像預處理,再進行分析!")
# 方案一
llm_result_001 = llm_recognition(preprocessed_images[0], valid_type, language)
# 方案二
llm_result_002 = llm_recognition(preprocessed_images[1], valid_type, language)
# 方案三
llm_result_003 = llm_recognition(preprocessed_images[2], valid_type, language)
# 方案四
llm_result_004 = llm_recognition(preprocessed_images[3], valid_type, language)
# 方案五
llm_result_005 = llm_recognition(preprocessed_images[4], valid_type, language)
return llm_result_001, llm_result_002, llm_result_003, llm_result_004, llm_result_005
def pytesseractJs_ocr(valid_type, language, preprocessed_images, finish_pre_img):
if not finish_pre_img:
gr.Warning("請先執行圖像預處理,再進行分析!")
raise ValueError("請先執行圖像預處理,再進行分析!")
temp_path = 'TempFile/' + datetime.now().strftime('%Y%m%d_%H%M%S') + '/'
# 檢查目錄是否存在,如果不存在則建立
if not os.path.exists(temp_path):
os.makedirs(temp_path)
image_files = []
for i, image in enumerate(preprocessed_images):
filename = temp_path + f'preprocessed_image_{i}.png'
image.save(filename)
image_files.append(filename)
# 方案一
file_name = 'out_pytesseractJs_result_1.txt'
out_ocr_text_001 = pytesseractJs_recognition(valid_type, image_files[0], temp_path, file_name, language)
# 方案二
file_name = 'out_pytesseractJs_result_2.txt'
out_ocr_text_002 = pytesseractJs_recognition(valid_type, image_files[1], temp_path, file_name, language)
# file_name = 'out_pytesseractJs_result_2.txt'
# 使用 subprocess 執行 JavaScript 代碼,傳遞語言參數
# subprocess.run(['node', 'pytesseractJsOCR.js', image_files[1], language, temp_path + file_name], capture_output=True,
# text=True)
# with open(temp_path + file_name, 'r') as file:
# out_ocr_text_002 = file.read()
# 方案三
file_name = 'out_pytesseractJs_result_3.txt'
out_ocr_text_003 = pytesseractJs_recognition(valid_type, image_files[2], temp_path, file_name, language)
# 方案四
file_name = 'out_pytesseractJs_result_4.txt'
out_ocr_text_004 = pytesseractJs_recognition(valid_type, image_files[3], temp_path, file_name, language)
# 方案五
file_name = 'out_pytesseractJs_result_5.txt'
out_ocr_text_005 = pytesseractJs_recognition(valid_type, image_files[4], temp_path, file_name, language)
return out_ocr_text_001, out_ocr_text_002, out_ocr_text_003, out_ocr_text_004, out_ocr_text_005
# VIEW
with gr.Blocks() as demo:
with gr.Row():
image_input = gr.Image(type="pil", label="上傳圖片")
with gr.Column():
validation_type = gr.Dropdown(choices=["全文分析", "身分證正面", "身分證反面"], value='全文分析',
label="驗證類別")
language_dropdown = gr.Dropdown(choices=languages, value="chi_tra", label="語言")
with gr.Row():
with gr.Column():
preImg_button = gr.Button("圖片預先處理")
gr.Markdown(
"<div style='display: flex;justify-content: center;align-items: center;background-color: #ffdf00;font-weight: bold;text-decoration: underline;font-size: 20px;'>多模態預處理圖像</div>")
with gr.Row():
with gr.Column():
ocr_button = gr.Button("使用 Pytesseract OCR 辨識")
gr.Markdown(
"<div style='display: flex;justify-content: center;align-items: center;background-color: #ffdf00;font-weight: bold;text-decoration: underline;font-size: 20px;'>Package: Google Pytesseract</div>")
with gr.Column():
llm_button = gr.Button("使用 AI LLM 模型辨識")
gr.Markdown(
"<div style='display: flex;justify-content: center;align-items: center;background-color: #ffdf00;font-weight: bold;text-decoration: underline;font-size: 20px;'>Package:Bert-base-chinese</div>")
with gr.Column():
pytesseractJS_button = gr.Button("使用 PytesseractJS 模型辨識")
gr.Markdown(
"<div style='display: flex;justify-content: center;align-items: center;background-color: #ffdf00;font-weight: bold;text-decoration: underline;font-size: 20px;'>Package:PytesseractJS</div>")
with gr.Row():
preprocess_output_001 = gr.Image(type="pil", label="預處理後的圖片-方案一")
ocr_output_001 = gr.JSON(label="OCR-001-解析結果")
llm_output_001 = gr.JSON(label="AiLLM-001-解析結果")
pytesseractJS_output_001 = gr.JSON(label="PytesseractJS-001-解析結果")
with gr.Row():
preprocess_output_002 = gr.Image(type="pil", label="預處理後的圖片-方案二")
ocr_output_002 = gr.JSON(label="OCR-002-解析結果")
llm_output_002 = gr.JSON(label="AiLLM-002-解析結果")
pytesseractJS_output_002 = gr.JSON(label="PytesseractJS-002-解析結果")
with gr.Row():
preprocess_output_003 = gr.Image(type="pil", label="預處理後的圖片-方案三")
ocr_output_003 = gr.JSON(label="OCR-003-解析結果")
llm_output_003 = gr.JSON(label="AiLLM-003-解析結果")
pytesseractJS_output_003 = gr.JSON(label="PytesseractJS-003-解析結果")
with gr.Row():
preprocess_output_004 = gr.Image(type="pil", label="預處理後的圖片-方案四")
ocr_output_004 = gr.JSON(label="OCR-004-解析結果")
llm_output_004 = gr.JSON(label="AiLLM-004-解析結果")
pytesseractJS_output_004 = gr.JSON(label="PytesseractJS-004-解析結果")
with gr.Row():
preprocess_output_005 = gr.Image(type="pil", label="預處理後的圖片-方案五")
ocr_output_005 = gr.JSON(label="OCR-005-解析結果")
llm_output_005 = gr.JSON(label="AiLLM-005-解析結果")
pytesseractJS_output_005 = gr.JSON(label="PytesseractJS-005-解析結果")
# 定義狀態
finish_pre_img_state = gr.State(False)
preprocessed_images_state = gr.State([])
# 預先處理圖片 按鈕
preImg_button.click(preprocess_image, inputs=[image_input],
outputs=[preprocessed_images_state, finish_pre_img_state,
preprocess_output_001, preprocess_output_002,
preprocess_output_003, preprocess_output_004,
preprocess_output_005])
# pytesseract 按鈕
ocr_button.click(Basic_ocr, inputs=[validation_type, language_dropdown,
preprocessed_images_state, finish_pre_img_state],
outputs=[ocr_output_001, ocr_output_002, ocr_output_003, ocr_output_004, ocr_output_005])
# AI LLM 按鈕
llm_button.click(AiLLM_ocr, inputs=[validation_type, language_dropdown,
preprocessed_images_state, finish_pre_img_state],
outputs=[llm_output_001, llm_output_002, llm_output_003, llm_output_004, llm_output_005])
# pytesseract 按鈕
pytesseractJS_button.click(pytesseractJs_ocr, inputs=[validation_type, language_dropdown,
preprocessed_images_state, finish_pre_img_state],
outputs=[pytesseractJS_output_001, pytesseractJS_output_002, pytesseractJS_output_003,
pytesseractJS_output_004, pytesseractJS_output_005])
demo.launch(share=False)
|