Spaces:

truong-xuan-linh
/

vietnamese-ocr

Build error

App Files Files Community

truong-xuan-linh commited on Sep 7, 2023

Commit

03484ca

1 Parent(s): a244fb6

inmit:

Browse files

Files changed (11) hide show

.github/workflows/main.yml +1 -1
.gitignore +4 -0
README.md +11 -0
app.py +53 -0
config/config.yml +1 -0
requirements.txt +13 -0
src/OCR.py +95 -0
src/setup.py +13 -0
storage/.keep +0 -0
storage/Roboto-Black.ttf +0 -0
storage/linhai.jpeg +0 -0

.github/workflows/main.yml CHANGED Viewed

@@ -17,4 +17,4 @@ jobs:
       - name: Push to hub
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
-        run: git push --force https://truong-xuan-linh:[email protected]/spaces/truong-xuan-linh/vietnamese-ocr master

       - name: Push to hub
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: git push --force https://truong-xuan-linh:[email protected]/spaces/truong-xuan-linh/vietnamese-ocr main

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+*.ipynb*
+log
+__pycache__
+*test*

README.md CHANGED Viewed

	@@ -1 +1,12 @@











1	# vietnamese_ocr

+---
+title: Vietnamese Ocr
+emoji: 🌍
+colorFrom: red
+colorTo: green
+sdk: streamlit
+sdk_version: 1.26.0
+app_file: app.py
+pinned: false
+---
 # vietnamese_ocr

app.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import streamlit as st
+from PIL import Image
+#Trick to not init function multitime
+if "ocr_detector" not in st.session_state:
+    print("INIT MODEL")
+    from src.setup import Setup
+    Setup().ocr_model_downloader()
+    from src.OCR import OCRDetector
+    st.session_state.ocr_detector = OCRDetector()
+    print("DONE INIT MODEL")
+st.set_page_config(page_title="Vietnamese OCR", layout="wide", page_icon = "./storage/linhai.jpeg")
+hide_menu_style = """
+<style>
+footer {visibility: hidden;}
+</style>
+"""
+st.markdown(hide_menu_style, unsafe_allow_html= True)
+st.markdown(
+    """
+    <style>
+    [data-testid="stSidebar"][aria-expanded="true"] > div:first-child{
+        width: 400px;
+    }
+    [data-testid="stSidebar"][aria-expanded="false"] > div:first-child{
+        margin-left: -400px;
+    }
+    """,
+    unsafe_allow_html=True,
+)
+st.markdown("<h2 style='text-align: center; color: grey;'>Input: Image </h2>", unsafe_allow_html=True)
+st.markdown("<h2 style='text-align: center; color: grey;'>Output: The Vietnamese or English text in the image (if any).</h2>", unsafe_allow_html=True)
+left_col, right_col = st.columns(2)
+#LEFT COLUMN
+upload_image = left_col.file_uploader("Choose an image file", type=["jpg", "jpeg", "png", "webp", ])
+if left_col.button("OCR Detect"):
+    image, texts, boxes = st.session_state.ocr_detector.text_detector(upload_image, is_local=True)
+    left_col.write("**RESULTS:** ")
+    left_col.write(texts)
+    #RIGHT COLUMN
+    visualize_image = st.session_state.ocr_detector.visualize_ocr(image, texts, boxes)
+    right_col.write("**ORIGIN IMAGE:** ")
+    right_col.image(image)
+    right_col.write("**OCR IMAGE:** ")
+    right_col.image(visualize_image)

config/config.yml ADDED Viewed

	@@ -0,0 +1 @@


1	+ ocr_model: https://drive.google.com/uc?id=1-Cdr1MAztczfMxpkekn0wIiZsY8NAnJS

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+#commom
+opencv-python>=4.1.1
+numpy<=1.20.0
+torch>=1.8.0
+torchvision
+unidecode
+Pillow==9.4.0
+PyYAML>=5.3.1
+gdown==4.4.0
+paddlepaddle>=2.3.1
+paddleocr>=2.5.0.3
+vietocr>=0.3.8
+streamlit==1.26.0

src/OCR.py ADDED Viewed

	@@ -0,0 +1,95 @@

+from paddleocr import PaddleOCR
+from vietocr.tool.config import Cfg
+from vietocr.tool.predictor import Predictor
+import cv2
+import requests
+import unidecode
+import numpy as np
+from PIL import Image, ImageFont, ImageDraw
+class OCRDetector:
+    def __init__(self) -> None:
+        self.paddle_ocr = PaddleOCR(lang='en', use_angle_cls=False)
+        # config['weights'] = './weights/transformerocr.pth'
+        self.config = Cfg.load_config_from_name('vgg_transformer')
+        self.config['weights'] = "./storage/ocr_model.pth"
+        self.config['cnn']['pretrained']=False
+        self.config['device'] =  "cpu"
+        self.config['predictor']['beamsearch']=False
+        self.viet_ocr = Predictor(self.config)
+    def find_box(self, image):
+        '''Xác định box dựa vào mô hình paddle_ocr'''
+        result = self.paddle_ocr.ocr(image, cls = False)
+        result = result[0]
+        # Extracting detected components
+        boxes = [res[0] for res in result]
+        texts = [{"text": res[1][0], "score": res[1][1]} for res in result]
+        # scores = [res[1][1] for res in result]
+        return boxes, texts
+    def vietnamese_text(self, boxes, image):
+        '''Xác định text dựa vào mô hình viet_ocr'''
+        texts = []
+        for box in boxes:
+            A = box[0]
+            B = box[1]
+            C = box[2]
+            D = box[3]
+            y1 = min(A[1], B[1])
+            y1 = int(max(0, y1 - max(0, 10 - abs(A[1] - B[1]))))
+            y2 = max(C[1], D[1])
+            y2 = int(y2 + max(0, 10 - abs(A[1] - B[1])))
+            x1 = int(max(0, min(A[0], D[0]) ))
+            x2 = int(max(B[0], C[0]) )
+            cut_image = image[y1:y2, x1:x2]
+            cut_image = Image.fromarray(np.uint8(cut_image))
+            text, score = self.viet_ocr.predict(cut_image, return_prob=True)
+            texts.append({"text": text,
+                          "score": score})
+        return texts
+    #Merge
+    def text_detector(self, image_path, is_local=False):
+        if is_local:
+            image = Image.open(image_path).convert("RGB")
+        else:
+            image = Image.open(requests.get(image_path, stream=True).raw).convert("RGB")
+        image = np.array(image)
+        boxes, paddle_texts = self.find_box(image)
+        if not boxes:
+            return image, None, None
+        viet_texts = self.vietnamese_text(boxes, image)
+        results_texts = []
+        for i, viet_txt in enumerate(viet_texts):
+            if viet_txt["text"] != unidecode.unidecode(viet_txt["text"]):
+                results_texts.append(viet_txt)
+            else:
+                results_texts.append(paddle_texts[i])
+        if results_texts != []:
+            return image, results_texts, boxes
+        else:
+            return image, None, None
+    def visualize_ocr(self, image, texts, boxes):
+        if not texts:
+            return image
+        img = image.copy()
+        for box, text in zip(boxes, texts):
+            (x1, y1), (x2, y2), (x3, y3), (x4, y4) = box
+            h = y3 - y1
+            scl = max(h//1000,1)
+            font = ImageFont.truetype("./storage/Roboto-Black.ttf", 15*scl)
+            img = cv2.rectangle(img, (int(x1), int(y1)), (int(x3), int(y3)), (0, 255, 0), 1)
+            img_pil = Image.fromarray(img)
+            draw = ImageDraw.Draw(img_pil)
+            draw.text((int(x1), int(y1-h-3)), text["text"], font = font, fill = (51, 51, 255))
+            img = np.array(img_pil)
+            # img = cv2.putText(img, text["text"], (int(x1), int(y1)-3), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255,0,0), 1)
+        return img

src/setup.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import os
+import yaml
+class Setup():
+    def __init__(self) -> None:
+        self.config = yaml.load(open("./config/config.yml"), yaml.loader.SafeLoader)
+        self.ocr_model = self.config["ocr_model"]
+    def ocr_model_downloader(self) -> None:
+        os.system("python -m pip install gdown --upgrade")
+        import gdown
+        if "ocr_model.pth" not in os.listdir(("./storage")):
+            gdown.download(self.ocr_model, "./storage/ocr_model.pth", quiet=False)

storage/.keep ADDED Viewed

File without changes

storage/Roboto-Black.ttf ADDED Viewed

Binary file (168 kB). View file

storage/linhai.jpeg ADDED Viewed