import gradio as gr import cv2 import pytesseract import numpy as np # 设置 Tesseract OCR 路径(如果需要) # pytesseract.pytesseract.tesseract_cmd = r"/path/to/tesseract" def preprocess_image(image): # 将图像转为灰度 gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # 应用自适应阈值以获得更好的 OCR 效果 thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2) # 使用边缘检测 edges = cv2.Canny(thresh, 50, 150, apertureSize=3) # 查找轮廓 contours, _ = cv2.findContours(edges, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5] # 尝试找到纸张的四边形轮廓 for contour in contours: epsilon = 0.02 * cv2.arcLength(contour, True) approx = cv2.approxPolyDP(contour, epsilon, True) if len(approx) == 4: paper_contour = approx break else: paper_contour = None # 如果找到轮廓,进行透视变换 if paper_contour is not None: pts = paper_contour.reshape(4, 2) rect = np.zeros((4, 2), dtype="float32") # 左上和右下 s = pts.sum(axis=1) rect[0] = pts[np.argmin(s)] rect[2] = pts[np.argmax(s)] # 右上和左下 diff = np.diff(pts, axis=1) rect[1] = pts[np.argmin(diff)] rect[3] = pts[np.argmax(diff)] # 计算新的变换矩阵 (tl, tr, br, bl) = rect widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2)) widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2)) maxWidth = max(int(widthA), int(widthB)) heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2)) heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2)) maxHeight = max(int(heightA), int(heightB)) dst = np.array([ [0, 0], [maxWidth - 1, 0], [maxWidth - 1, maxHeight - 1], [0, maxHeight - 1]], dtype="float32") M = cv2.getPerspectiveTransform(rect, dst) warped = cv2.warpPerspective(gray, M, (maxWidth, maxHeight)) else: # 无法找到四边形,返回灰度图像 warped = gray return warped def ocr_process(image): # 图像预处理 processed_image = preprocess_image(image) # OCR 识别 text = pytesseract.image_to_string(processed_image, lang='eng') return text # 使用 Gradio 创建界面 iface = gr.Interface( fn=ocr_process, inputs=gr.Image(type="numpy"), outputs="text", title="轻量级 OCR 应用", description="上传带角度的纸张图片,自动校正并提取文字" ) iface.launch()