|
|
|
import gradio as gr |
|
import re |
|
import os |
|
from pdf2image import convert_from_path |
|
from transformers import DonutProcessor, VisionEncoderDecoderModel |
|
import torch |
|
from PIL import Image |
|
from pathlib import Path |
|
|
|
from models.experimental import attempt_load |
|
from utils.datasets import LoadImage |
|
from utils.general import check_img_size, non_max_suppression, scale_coords, set_logging |
|
from utils.torch_utils import select_device |
|
import cv2 |
|
|
|
|
|
key = str(os.environ.get('key')) |
|
|
|
def check_image(image): |
|
try: |
|
images = convert_from_path(Path(image.name), fmt="jpeg", size=(960,1280)) |
|
return images |
|
except: |
|
return [Image.open(image)] |
|
|
|
def crop(files = '', |
|
weights = 'yolov7.pt', |
|
classes = None, |
|
imgsz = 640, |
|
device = '', |
|
conf_thres = 0.25, |
|
iou_thres = 0.45, |
|
augment = False, |
|
agnostic_nms = False): |
|
|
|
|
|
set_logging() |
|
device = select_device(device) |
|
half = device.type != 'cpu' |
|
|
|
|
|
model = attempt_load(weights, map_location=device) |
|
stride = int(model.stride.max()) |
|
imgsz = check_img_size(imgsz, s=stride) |
|
|
|
if half: |
|
model.half() |
|
|
|
|
|
dataset = LoadImage(files = files, img_size=imgsz, stride=stride) |
|
|
|
|
|
names = model.module.names if hasattr(model, 'module') else model.names |
|
|
|
|
|
if device.type != 'cpu': |
|
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) |
|
old_img_w = old_img_h = imgsz |
|
old_img_b = 1 |
|
list_cropobj = [] |
|
for img, img0s in dataset: |
|
img = torch.from_numpy(img).to(device) |
|
img = img.half() if half else img.float() |
|
img /= 255.0 |
|
if img.ndimension() == 3: |
|
img = img.unsqueeze(0) |
|
|
|
|
|
with torch.no_grad(): |
|
pred = model(img, augment=augment)[0] |
|
|
|
|
|
pred = non_max_suppression(pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms) |
|
|
|
|
|
for i, det in enumerate(pred): |
|
if len(det): |
|
|
|
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0s.shape).round() |
|
|
|
|
|
for *xyxy, conf, cls in reversed(det): |
|
|
|
object_coordinates = [int(xyxy[0]),int(xyxy[1]),int(xyxy[2]),int(xyxy[3])] |
|
cropobj_bgr = img0s[int(xyxy[1]):int(xyxy[3]),int(xyxy[0]):int(xyxy[2])] |
|
cropobj_rgb = cv2.cvtColor(cropobj_bgr, cv2.COLOR_BGR2RGB) |
|
clase = names[int(cls)] |
|
list_cropobj.append([Image.fromarray(cropobj_rgb),int(cls)]) |
|
|
|
return list_cropobj |
|
|
|
|
|
def get_attributes(input_img): |
|
|
|
access_token = key |
|
processor = DonutProcessor.from_pretrained("AA-supply/donut-finetuned-lic-crop", use_auth_token=access_token) |
|
model = VisionEncoderDecoderModel.from_pretrained("AA-supply/donut-finetuned-lic-crop", use_auth_token=access_token) |
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
model.eval() |
|
model.to(device) |
|
|
|
images = check_image(input_img) |
|
images = crop(weights="best.pt", files= images) |
|
image_cedula = [img[0] for img in images if img[1]==1][0] |
|
|
|
|
|
pixel_values = processor(image_cedula, return_tensors="pt").pixel_values |
|
pixel_values = pixel_values.to(device) |
|
print(pixel_values.size()) |
|
|
|
task_prompt = "<s_cord-v2>" |
|
decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids |
|
decoder_input_ids = decoder_input_ids.to(device) |
|
|
|
|
|
outputs = model.generate( |
|
pixel_values, |
|
decoder_input_ids=decoder_input_ids, |
|
max_length=model.decoder.config.max_position_embeddings, |
|
early_stopping=True, |
|
pad_token_id=processor.tokenizer.pad_token_id, |
|
eos_token_id=processor.tokenizer.eos_token_id, |
|
use_cache=True, |
|
num_beams=1, |
|
bad_words_ids=[[processor.tokenizer.unk_token_id]], |
|
return_dict_in_generate=True, |
|
) |
|
|
|
|
|
seq = processor.batch_decode(outputs.sequences)[0] |
|
seq = seq.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "") |
|
seq = re.sub(r"<.*?>", "", seq, count=1).strip() |
|
seq = processor.token2json(seq) |
|
|
|
return str(seq) |
|
|
|
demo = gr.Interface(get_attributes, "file", "label") |
|
demo.launch() |
|
|