Luke
commited on
Commit
•
0347dd6
1
Parent(s):
633cf99
no message
Browse files- Plan/pytesseractJsOCR.py +18 -0
- Preprocess/preprocessImg.py +1 -0
- app.py +81 -10
- package-lock.json +119 -0
- package.json +5 -0
- pytesseractJsOCR.js +20 -0
- requirements.txt +3 -1
Plan/pytesseractJsOCR.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import subprocess
|
2 |
+
|
3 |
+
from IdentifyModel.cardModel import parse_id_card
|
4 |
+
from Plan.AiLLM import extract_entities
|
5 |
+
|
6 |
+
|
7 |
+
def pytesseractJs_recognition(validation_type, image, temp_path, file_name, language):
|
8 |
+
try:
|
9 |
+
# 使用 subprocess 執行 JavaScript 代碼,傳遞語言參數
|
10 |
+
subprocess.run(['node', 'pytesseractJsOCR.js', image, language, temp_path + file_name],
|
11 |
+
capture_output=True,
|
12 |
+
text=True)
|
13 |
+
with open(temp_path + file_name, 'r') as file:
|
14 |
+
out_ocr_text = file.read()
|
15 |
+
entities = extract_entities(out_ocr_text)
|
16 |
+
return parse_id_card(out_ocr_text, validation_type, entities)
|
17 |
+
except Exception as e:
|
18 |
+
return str(e)
|
Preprocess/preprocessImg.py
CHANGED
@@ -36,6 +36,7 @@ def preprocess_image001(image):
|
|
36 |
denoised = cv2.fastNlMeansDenoising(binary, None, 30, 7, 21)
|
37 |
return Image.fromarray(denoised)
|
38 |
|
|
|
39 |
# 方案二
|
40 |
def preprocess_image002(image):
|
41 |
# 將 PIL Image 轉換為 numpy array
|
|
|
36 |
denoised = cv2.fastNlMeansDenoising(binary, None, 30, 7, 21)
|
37 |
return Image.fromarray(denoised)
|
38 |
|
39 |
+
|
40 |
# 方案二
|
41 |
def preprocess_image002(image):
|
42 |
# 將 PIL Image 轉換為 numpy array
|
app.py
CHANGED
@@ -1,6 +1,11 @@
|
|
1 |
import os
|
|
|
|
|
|
|
2 |
import gradio as gr
|
|
|
3 |
from Plan.AiLLM import llm_recognition
|
|
|
4 |
from Plan.pytesseractOCR import ocr_recognition
|
5 |
from Preprocess.preprocessImg import PreprocessImg
|
6 |
|
@@ -28,8 +33,8 @@ def preprocess_image(image):
|
|
28 |
# pytesseract OCR
|
29 |
def Basic_ocr(valid_type, language, preprocessed_images, finish_pre_img):
|
30 |
if not finish_pre_img:
|
31 |
-
gr.Warning("
|
32 |
-
raise ValueError("
|
33 |
|
34 |
# 方案一
|
35 |
ocr_result_001 = ocr_recognition(preprocessed_images[0], valid_type, language)
|
@@ -48,8 +53,8 @@ def Basic_ocr(valid_type, language, preprocessed_images, finish_pre_img):
|
|
48 |
# AI LLM OCR
|
49 |
def AiLLM_ocr(valid_type, language, preprocessed_images, finish_pre_img):
|
50 |
if not finish_pre_img:
|
51 |
-
gr.Warning("
|
52 |
-
raise ValueError("
|
53 |
|
54 |
# 方案一
|
55 |
llm_result_001 = llm_recognition(preprocessed_images[0], valid_type, language)
|
@@ -65,17 +70,65 @@ def AiLLM_ocr(valid_type, language, preprocessed_images, finish_pre_img):
|
|
65 |
return llm_result_001, llm_result_002, llm_result_003, llm_result_004, llm_result_005
|
66 |
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
# VIEW
|
69 |
with gr.Blocks() as demo:
|
70 |
with gr.Row():
|
71 |
image_input = gr.Image(type="pil", label="上傳圖片")
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
preImg_button = gr.Button("圖片預先處理")
|
77 |
-
|
78 |
with gr.Row():
|
|
|
|
|
|
|
|
|
79 |
with gr.Column():
|
80 |
ocr_button = gr.Button("使用 Pytesseract OCR 辨識")
|
81 |
gr.Markdown(
|
@@ -84,28 +137,40 @@ with gr.Blocks() as demo:
|
|
84 |
llm_button = gr.Button("使用 AI LLM 模型辨識")
|
85 |
gr.Markdown(
|
86 |
"<div style='display: flex;justify-content: center;align-items: center;background-color: red;font-weight: bold;text-decoration: underline;font-size: 20px;'>Package:Bert-base-chinese</div>")
|
|
|
|
|
|
|
|
|
87 |
|
88 |
with gr.Row():
|
89 |
preprocess_output_001 = gr.Image(type="pil", label="預處理後的圖片-方案一")
|
90 |
ocr_output_001 = gr.JSON(label="OCR-001-解析結果")
|
91 |
llm_output_001 = gr.JSON(label="AiLLM-001-解析結果")
|
|
|
|
|
92 |
with gr.Row():
|
93 |
preprocess_output_002 = gr.Image(type="pil", label="預處理後的圖片-方案二")
|
94 |
ocr_output_002 = gr.JSON(label="OCR-002-解析結果")
|
95 |
llm_output_002 = gr.JSON(label="AiLLM-002-解析結果")
|
|
|
96 |
|
97 |
with gr.Row():
|
98 |
preprocess_output_003 = gr.Image(type="pil", label="預處理後的圖片-方案三")
|
99 |
ocr_output_003 = gr.JSON(label="OCR-003-解析結果")
|
100 |
llm_output_003 = gr.JSON(label="AiLLM-003-解析結果")
|
|
|
|
|
101 |
with gr.Row():
|
102 |
preprocess_output_004 = gr.Image(type="pil", label="預處理後的圖片-方案四")
|
103 |
ocr_output_004 = gr.JSON(label="OCR-004-解析結果")
|
104 |
llm_output_004 = gr.JSON(label="AiLLM-004-解析結果")
|
|
|
|
|
105 |
with gr.Row():
|
106 |
preprocess_output_005 = gr.Image(type="pil", label="預處理後的圖片-方案五")
|
107 |
ocr_output_005 = gr.JSON(label="OCR-005-解析結果")
|
108 |
llm_output_005 = gr.JSON(label="AiLLM-005-解析結果")
|
|
|
109 |
|
110 |
# 定義狀態
|
111 |
finish_pre_img_state = gr.State(False)
|
@@ -126,4 +191,10 @@ with gr.Blocks() as demo:
|
|
126 |
preprocessed_images_state, finish_pre_img_state],
|
127 |
outputs=[llm_output_001, llm_output_002, llm_output_003, llm_output_004, llm_output_005])
|
128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
demo.launch(share=False)
|
|
|
1 |
import os
|
2 |
+
import subprocess
|
3 |
+
from datetime import datetime
|
4 |
+
|
5 |
import gradio as gr
|
6 |
+
|
7 |
from Plan.AiLLM import llm_recognition
|
8 |
+
from Plan.pytesseractJsOCR import pytesseractJs_recognition
|
9 |
from Plan.pytesseractOCR import ocr_recognition
|
10 |
from Preprocess.preprocessImg import PreprocessImg
|
11 |
|
|
|
33 |
# pytesseract OCR
|
34 |
def Basic_ocr(valid_type, language, preprocessed_images, finish_pre_img):
|
35 |
if not finish_pre_img:
|
36 |
+
gr.Warning("請先執行圖像預處理,再進行分析!")
|
37 |
+
raise ValueError("請先執行圖像預處理,再進行分析!")
|
38 |
|
39 |
# 方案一
|
40 |
ocr_result_001 = ocr_recognition(preprocessed_images[0], valid_type, language)
|
|
|
53 |
# AI LLM OCR
|
54 |
def AiLLM_ocr(valid_type, language, preprocessed_images, finish_pre_img):
|
55 |
if not finish_pre_img:
|
56 |
+
gr.Warning("請先執行圖像預處理,再進行分析!")
|
57 |
+
raise ValueError("請先執行圖像預處理,再進行分析!")
|
58 |
|
59 |
# 方案一
|
60 |
llm_result_001 = llm_recognition(preprocessed_images[0], valid_type, language)
|
|
|
70 |
return llm_result_001, llm_result_002, llm_result_003, llm_result_004, llm_result_005
|
71 |
|
72 |
|
73 |
+
def pytesseractJs_ocr(valid_type, language, preprocessed_images, finish_pre_img):
|
74 |
+
if not finish_pre_img:
|
75 |
+
gr.Warning("請先執行圖像預處理,再進行分析!")
|
76 |
+
raise ValueError("請先執行圖像預處理,再進行分析!")
|
77 |
+
|
78 |
+
temp_path = 'TempFile/' + datetime.now().strftime('%Y%m%d_%H%M%S') + '/'
|
79 |
+
# 檢查目錄是否存在,如果不存在則建立
|
80 |
+
if not os.path.exists(temp_path):
|
81 |
+
os.makedirs(temp_path)
|
82 |
+
|
83 |
+
image_files = []
|
84 |
+
for i, image in enumerate(preprocessed_images):
|
85 |
+
filename = temp_path + f'preprocessed_image_{i}.png'
|
86 |
+
image.save(filename)
|
87 |
+
image_files.append(filename)
|
88 |
+
|
89 |
+
# 方案一
|
90 |
+
file_name = 'out_pytesseractJs_result_1.txt'
|
91 |
+
out_ocr_text_001 = pytesseractJs_recognition(valid_type, image_files[0], temp_path, file_name, language)
|
92 |
+
|
93 |
+
# 方案二
|
94 |
+
file_name = 'out_pytesseractJs_result_2.txt'
|
95 |
+
out_ocr_text_002 = pytesseractJs_recognition(valid_type, image_files[1], temp_path, file_name, language)
|
96 |
+
|
97 |
+
# file_name = 'out_pytesseractJs_result_2.txt'
|
98 |
+
# 使用 subprocess 執行 JavaScript 代碼,傳遞語言參數
|
99 |
+
# subprocess.run(['node', 'pytesseractJsOCR.js', image_files[1], language, temp_path + file_name], capture_output=True,
|
100 |
+
# text=True)
|
101 |
+
# with open(temp_path + file_name, 'r') as file:
|
102 |
+
# out_ocr_text_002 = file.read()
|
103 |
+
|
104 |
+
# 方案三
|
105 |
+
file_name = 'out_pytesseractJs_result_3.txt'
|
106 |
+
out_ocr_text_003 = pytesseractJs_recognition(valid_type, image_files[2], temp_path, file_name, language)
|
107 |
+
|
108 |
+
# 方案四
|
109 |
+
file_name = 'out_pytesseractJs_result_4.txt'
|
110 |
+
out_ocr_text_004 = pytesseractJs_recognition(valid_type, image_files[3], temp_path, file_name, language)
|
111 |
+
|
112 |
+
# 方案五
|
113 |
+
file_name = 'out_pytesseractJs_result_5.txt'
|
114 |
+
out_ocr_text_005 = pytesseractJs_recognition(valid_type, image_files[4], temp_path, file_name, language)
|
115 |
+
|
116 |
+
return out_ocr_text_001, out_ocr_text_002, out_ocr_text_003, out_ocr_text_004, out_ocr_text_005
|
117 |
+
|
118 |
+
|
119 |
# VIEW
|
120 |
with gr.Blocks() as demo:
|
121 |
with gr.Row():
|
122 |
image_input = gr.Image(type="pil", label="上傳圖片")
|
123 |
+
with gr.Column():
|
124 |
+
validation_type = gr.Dropdown(choices=["全文分析", "身分證正面", "身分證反面"], value='全文分析',
|
125 |
+
label="驗證類別")
|
126 |
+
language_dropdown = gr.Dropdown(choices=languages, value="chi_tra", label="語言")
|
|
|
|
|
127 |
with gr.Row():
|
128 |
+
with gr.Column():
|
129 |
+
preImg_button = gr.Button("圖片預先處理")
|
130 |
+
gr.Markdown(
|
131 |
+
"<div style='display: flex;justify-content: center;align-items: center;background-color: red;font-weight: bold;text-decoration: underline;font-size: 20px;'>多模態預處理圖像</div>")
|
132 |
with gr.Column():
|
133 |
ocr_button = gr.Button("使用 Pytesseract OCR 辨識")
|
134 |
gr.Markdown(
|
|
|
137 |
llm_button = gr.Button("使用 AI LLM 模型辨識")
|
138 |
gr.Markdown(
|
139 |
"<div style='display: flex;justify-content: center;align-items: center;background-color: red;font-weight: bold;text-decoration: underline;font-size: 20px;'>Package:Bert-base-chinese</div>")
|
140 |
+
with gr.Column():
|
141 |
+
pytesseractJS_button = gr.Button("使用 PytesseractJS 模型辨識")
|
142 |
+
gr.Markdown(
|
143 |
+
"<div style='display: flex;justify-content: center;align-items: center;background-color: red;font-weight: bold;text-decoration: underline;font-size: 20px;'>Package:PytesseractJS</div>")
|
144 |
|
145 |
with gr.Row():
|
146 |
preprocess_output_001 = gr.Image(type="pil", label="預處理後的圖片-方案一")
|
147 |
ocr_output_001 = gr.JSON(label="OCR-001-解析結果")
|
148 |
llm_output_001 = gr.JSON(label="AiLLM-001-解析結果")
|
149 |
+
pytesseractJS_output_001 = gr.JSON(label="PytesseractJS-001-解析結果")
|
150 |
+
|
151 |
with gr.Row():
|
152 |
preprocess_output_002 = gr.Image(type="pil", label="預處理後的圖片-方案二")
|
153 |
ocr_output_002 = gr.JSON(label="OCR-002-解析結果")
|
154 |
llm_output_002 = gr.JSON(label="AiLLM-002-解析結果")
|
155 |
+
pytesseractJS_output_002 = gr.JSON(label="PytesseractJS-002-解析結果")
|
156 |
|
157 |
with gr.Row():
|
158 |
preprocess_output_003 = gr.Image(type="pil", label="預處理後的圖片-方案三")
|
159 |
ocr_output_003 = gr.JSON(label="OCR-003-解析結果")
|
160 |
llm_output_003 = gr.JSON(label="AiLLM-003-解析結果")
|
161 |
+
pytesseractJS_output_003 = gr.JSON(label="PytesseractJS-003-解析結果")
|
162 |
+
|
163 |
with gr.Row():
|
164 |
preprocess_output_004 = gr.Image(type="pil", label="預處理後的圖片-方案四")
|
165 |
ocr_output_004 = gr.JSON(label="OCR-004-解析結果")
|
166 |
llm_output_004 = gr.JSON(label="AiLLM-004-解析結果")
|
167 |
+
pytesseractJS_output_004 = gr.JSON(label="PytesseractJS-004-解析結果")
|
168 |
+
|
169 |
with gr.Row():
|
170 |
preprocess_output_005 = gr.Image(type="pil", label="預處理後的圖片-方案五")
|
171 |
ocr_output_005 = gr.JSON(label="OCR-005-解析結果")
|
172 |
llm_output_005 = gr.JSON(label="AiLLM-005-解析結果")
|
173 |
+
pytesseractJS_output_005 = gr.JSON(label="PytesseractJS-005-解析結果")
|
174 |
|
175 |
# 定義狀態
|
176 |
finish_pre_img_state = gr.State(False)
|
|
|
191 |
preprocessed_images_state, finish_pre_img_state],
|
192 |
outputs=[llm_output_001, llm_output_002, llm_output_003, llm_output_004, llm_output_005])
|
193 |
|
194 |
+
# pytesseract 按鈕
|
195 |
+
pytesseractJS_button.click(pytesseractJs_ocr, inputs=[validation_type, language_dropdown,
|
196 |
+
preprocessed_images_state, finish_pre_img_state],
|
197 |
+
outputs=[pytesseractJS_output_001, pytesseractJS_output_002, pytesseractJS_output_003,
|
198 |
+
pytesseractJS_output_004, pytesseractJS_output_005])
|
199 |
+
|
200 |
demo.launch(share=False)
|
package-lock.json
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"name": "OCR-2",
|
3 |
+
"lockfileVersion": 3,
|
4 |
+
"requires": true,
|
5 |
+
"packages": {
|
6 |
+
"": {
|
7 |
+
"dependencies": {
|
8 |
+
"tesseract.js": "^5.1.0"
|
9 |
+
}
|
10 |
+
},
|
11 |
+
"node_modules/bmp-js": {
|
12 |
+
"version": "0.1.0",
|
13 |
+
"resolved": "https://registry.npmjs.org/bmp-js/-/bmp-js-0.1.0.tgz",
|
14 |
+
"integrity": "sha512-vHdS19CnY3hwiNdkaqk93DvjVLfbEcI8mys4UjuWrlX1haDmroo8o4xCzh4wD6DGV6HxRCyauwhHRqMTfERtjw=="
|
15 |
+
},
|
16 |
+
"node_modules/idb-keyval": {
|
17 |
+
"version": "6.2.1",
|
18 |
+
"resolved": "https://registry.npmjs.org/idb-keyval/-/idb-keyval-6.2.1.tgz",
|
19 |
+
"integrity": "sha512-8Sb3veuYCyrZL+VBt9LJfZjLUPWVvqn8tG28VqYNFCo43KHcKuq+b4EiXGeuaLAQWL2YmyDgMp2aSpH9JHsEQg=="
|
20 |
+
},
|
21 |
+
"node_modules/is-electron": {
|
22 |
+
"version": "2.2.2",
|
23 |
+
"resolved": "https://registry.npmjs.org/is-electron/-/is-electron-2.2.2.tgz",
|
24 |
+
"integrity": "sha512-FO/Rhvz5tuw4MCWkpMzHFKWD2LsfHzIb7i6MdPYZ/KW7AlxawyLkqdy+jPZP1WubqEADE3O4FUENlJHDfQASRg=="
|
25 |
+
},
|
26 |
+
"node_modules/is-url": {
|
27 |
+
"version": "1.2.4",
|
28 |
+
"resolved": "https://registry.npmjs.org/is-url/-/is-url-1.2.4.tgz",
|
29 |
+
"integrity": "sha512-ITvGim8FhRiYe4IQ5uHSkj7pVaPDrCTkNd3yq3cV7iZAcJdHTUMPMEHcqSOy9xZ9qFenQCvi+2wjH9a1nXqHww=="
|
30 |
+
},
|
31 |
+
"node_modules/node-fetch": {
|
32 |
+
"version": "2.7.0",
|
33 |
+
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
|
34 |
+
"integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
|
35 |
+
"dependencies": {
|
36 |
+
"whatwg-url": "^5.0.0"
|
37 |
+
},
|
38 |
+
"engines": {
|
39 |
+
"node": "4.x || >=6.0.0"
|
40 |
+
},
|
41 |
+
"peerDependencies": {
|
42 |
+
"encoding": "^0.1.0"
|
43 |
+
},
|
44 |
+
"peerDependenciesMeta": {
|
45 |
+
"encoding": {
|
46 |
+
"optional": true
|
47 |
+
}
|
48 |
+
}
|
49 |
+
},
|
50 |
+
"node_modules/opencollective-postinstall": {
|
51 |
+
"version": "2.0.3",
|
52 |
+
"resolved": "https://registry.npmjs.org/opencollective-postinstall/-/opencollective-postinstall-2.0.3.tgz",
|
53 |
+
"integrity": "sha512-8AV/sCtuzUeTo8gQK5qDZzARrulB3egtLzFgteqB2tcT4Mw7B8Kt7JcDHmltjz6FOAHsvTevk70gZEbhM4ZS9Q==",
|
54 |
+
"bin": {
|
55 |
+
"opencollective-postinstall": "index.js"
|
56 |
+
}
|
57 |
+
},
|
58 |
+
"node_modules/regenerator-runtime": {
|
59 |
+
"version": "0.13.11",
|
60 |
+
"resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.13.11.tgz",
|
61 |
+
"integrity": "sha512-kY1AZVr2Ra+t+piVaJ4gxaFaReZVH40AKNo7UCX6W+dEwBo/2oZJzqfuN1qLq1oL45o56cPaTXELwrTh8Fpggg=="
|
62 |
+
},
|
63 |
+
"node_modules/tesseract.js": {
|
64 |
+
"version": "5.1.0",
|
65 |
+
"resolved": "https://registry.npmjs.org/tesseract.js/-/tesseract.js-5.1.0.tgz",
|
66 |
+
"integrity": "sha512-2fH9pqWdS2C6ue/3OoGg91Wtv7Rt/1atYu/g0Q1SGFrowEW/kIBkG361hLienHsWe4KWEjxOJBrCQYpIBWG6WA==",
|
67 |
+
"hasInstallScript": true,
|
68 |
+
"dependencies": {
|
69 |
+
"bmp-js": "^0.1.0",
|
70 |
+
"idb-keyval": "^6.2.0",
|
71 |
+
"is-electron": "^2.2.2",
|
72 |
+
"is-url": "^1.2.4",
|
73 |
+
"node-fetch": "^2.6.9",
|
74 |
+
"opencollective-postinstall": "^2.0.3",
|
75 |
+
"regenerator-runtime": "^0.13.3",
|
76 |
+
"tesseract.js-core": "^5.1.0",
|
77 |
+
"wasm-feature-detect": "^1.2.11",
|
78 |
+
"zlibjs": "^0.3.1"
|
79 |
+
}
|
80 |
+
},
|
81 |
+
"node_modules/tesseract.js-core": {
|
82 |
+
"version": "5.1.0",
|
83 |
+
"resolved": "https://registry.npmjs.org/tesseract.js-core/-/tesseract.js-core-5.1.0.tgz",
|
84 |
+
"integrity": "sha512-D4gc5ET1DF/sDayF/eVmHgVGo7nqVC2e3d7uVgVOSAk4NOcmUqvJRTj8etqEmI/2390ZkXCRiDMxTD1RFYyp1g=="
|
85 |
+
},
|
86 |
+
"node_modules/tr46": {
|
87 |
+
"version": "0.0.3",
|
88 |
+
"resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
|
89 |
+
"integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="
|
90 |
+
},
|
91 |
+
"node_modules/wasm-feature-detect": {
|
92 |
+
"version": "1.6.2",
|
93 |
+
"resolved": "https://registry.npmjs.org/wasm-feature-detect/-/wasm-feature-detect-1.6.2.tgz",
|
94 |
+
"integrity": "sha512-4dnaZ+Fq/q+BbMlTIfaNS851i+0zmHzui++NUZdskESRu3xwB6g6x2FnGvBdWtpijqO5yuj1l+EUTJGc4S4DKg=="
|
95 |
+
},
|
96 |
+
"node_modules/webidl-conversions": {
|
97 |
+
"version": "3.0.1",
|
98 |
+
"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
|
99 |
+
"integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ=="
|
100 |
+
},
|
101 |
+
"node_modules/whatwg-url": {
|
102 |
+
"version": "5.0.0",
|
103 |
+
"resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
|
104 |
+
"integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
|
105 |
+
"dependencies": {
|
106 |
+
"tr46": "~0.0.3",
|
107 |
+
"webidl-conversions": "^3.0.0"
|
108 |
+
}
|
109 |
+
},
|
110 |
+
"node_modules/zlibjs": {
|
111 |
+
"version": "0.3.1",
|
112 |
+
"resolved": "https://registry.npmjs.org/zlibjs/-/zlibjs-0.3.1.tgz",
|
113 |
+
"integrity": "sha512-+J9RrgTKOmlxFSDHo0pI1xM6BLVUv+o0ZT9ANtCxGkjIVCCUdx9alUF8Gm+dGLKbkkkidWIHFDZHDMpfITt4+w==",
|
114 |
+
"engines": {
|
115 |
+
"node": "*"
|
116 |
+
}
|
117 |
+
}
|
118 |
+
}
|
119 |
+
}
|
package.json
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dependencies": {
|
3 |
+
"tesseract.js": "^5.1.0"
|
4 |
+
}
|
5 |
+
}
|
pytesseractJsOCR.js
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
const Tesseract = require('tesseract.js');
|
2 |
+
const fs = require('fs');
|
3 |
+
|
4 |
+
const image = process.argv[2];
|
5 |
+
const lang = process.argv[3];
|
6 |
+
const saveFilePath = process.argv[4];
|
7 |
+
|
8 |
+
Tesseract.recognize(
|
9 |
+
image,
|
10 |
+
lang,
|
11 |
+
{
|
12 |
+
logger: m => console.log(m)
|
13 |
+
}
|
14 |
+
).then(({ data: { text } }) => {
|
15 |
+
console.log(text);
|
16 |
+
fs.writeFileSync(saveFilePath, text);
|
17 |
+
}).catch(err => {
|
18 |
+
console.error(err);
|
19 |
+
fs.writeFileSync(saveFilePath, 'Error: ' + err.message);
|
20 |
+
});
|
requirements.txt
CHANGED
@@ -5,4 +5,6 @@ Pillow
|
|
5 |
torch
|
6 |
huggingface-hub
|
7 |
opencv-python
|
8 |
-
numpy
|
|
|
|
|
|
5 |
torch
|
6 |
huggingface-hub
|
7 |
opencv-python
|
8 |
+
numpy
|
9 |
+
pyppeteer
|
10 |
+
playwright
|