File size: 5,400 Bytes
7a5ba6e 2e7b875 94af0fd 2e7b875 94af0fd 2e7b875 7a5ba6e 2e7b875 94af0fd 7a5ba6e 94af0fd 7a5ba6e 94af0fd 7a5ba6e 4a788b6 2e7b875 94af0fd 7a5ba6e 2e7b875 94af0fd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
import gradio as gr
import re
import os
from pdf2image import convert_from_path
from transformers import DonutProcessor, VisionEncoderDecoderModel
import torch
from PIL import Image
from pathlib import Path
from models.experimental import attempt_load
from utils.datasets import LoadImage
from utils.general import check_img_size, non_max_suppression, scale_coords, set_logging
from utils.torch_utils import select_device
import cv2
#sudo apt-get install poppler-utils Necesario
key = str(os.environ.get('key'))
def check_image(image):
try:
images = convert_from_path(Path(image.name), fmt="jpeg", size=(960,1280))
return images
except:
return [Image.open(image)]
def crop(files = '', #files
weights = 'yolov7.pt', #model.pt path(s)
classes = None, #filter by class: --class 0, or --class 0 2 3
imgsz = 640, #inference size (pixels)
device = '', #cuda device, i.e. 0 or 0,1,2,3 or cpu
conf_thres = 0.25, #object confidence threshold
iou_thres = 0.45, #IOU threshold for NMS
augment = False, #augmented inference
agnostic_nms = False): #class-agnostic NMS
# Initialize
set_logging()
device = select_device(device)
half = device.type != 'cpu' # half precision only supported on CUDA
# Load model
model = attempt_load(weights, map_location=device) # load FP32 model
stride = int(model.stride.max()) # model stride
imgsz = check_img_size(imgsz, s=stride) # check img_size
if half:
model.half() # to FP16
# Set Dataloader
dataset = LoadImage(files = files, img_size=imgsz, stride=stride)
# Get names and colors
names = model.module.names if hasattr(model, 'module') else model.names
# Run inference
if device.type != 'cpu':
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
old_img_w = old_img_h = imgsz
old_img_b = 1
list_cropobj = []
for img, img0s in dataset:
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
# Inference
with torch.no_grad(): # Calculating gradients would cause a GPU memory leak
pred = model(img, augment=augment)[0]
# Apply NMS
pred = non_max_suppression(pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms)
# Process detections
for i, det in enumerate(pred): # detections per image
if len(det):
# Rescale boxes from img_size to img0s size
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0s.shape).round()
# Write results
for *xyxy, conf, cls in reversed(det):
#crop an image based on coordinates
object_coordinates = [int(xyxy[0]),int(xyxy[1]),int(xyxy[2]),int(xyxy[3])]
cropobj_bgr = img0s[int(xyxy[1]):int(xyxy[3]),int(xyxy[0]):int(xyxy[2])]
cropobj_rgb = cv2.cvtColor(cropobj_bgr, cv2.COLOR_BGR2RGB)
clase = names[int(cls)]
list_cropobj.append([Image.fromarray(cropobj_rgb),int(cls)])
return list_cropobj
def get_attributes(input_img):
#access_token = str(os.environ.get('key'))
access_token = key
processor = DonutProcessor.from_pretrained("AA-supply/donut-finetuned-lic-crop", use_auth_token=access_token)
model = VisionEncoderDecoderModel.from_pretrained("AA-supply/donut-finetuned-lic-crop", use_auth_token=access_token)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.eval()
model.to(device)
images = check_image(input_img)
images = crop(weights="best.pt", files= images)
image_cedula = [img[0] for img in images if img[1]==1][0] #0 en 'img[1]==0' es el label de cedula, si se reemplaza por 1
#entrega licencias
pixel_values = processor(image_cedula, return_tensors="pt").pixel_values
pixel_values = pixel_values.to(device)
print(pixel_values.size())
# prepare decoder inputs
task_prompt = "<s_cord-v2>"
decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids
decoder_input_ids = decoder_input_ids.to(device)
# autoregressively generate sequence
outputs = model.generate(
pixel_values,
decoder_input_ids=decoder_input_ids,
max_length=model.decoder.config.max_position_embeddings,
early_stopping=True,
pad_token_id=processor.tokenizer.pad_token_id,
eos_token_id=processor.tokenizer.eos_token_id,
use_cache=True,
num_beams=1,
bad_words_ids=[[processor.tokenizer.unk_token_id]],
return_dict_in_generate=True,
)
# turn into JSON
seq = processor.batch_decode(outputs.sequences)[0]
seq = seq.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
seq = re.sub(r"<.*?>", "", seq, count=1).strip() # remove first task start token
seq = processor.token2json(seq)
return str(seq)
demo = gr.Interface(get_attributes, "file", "label")
demo.launch()
|