File size: 5,400 Bytes
7a5ba6e
2e7b875
 
 
94af0fd
2e7b875
 
94af0fd
 
2e7b875
7a5ba6e
 
 
 
 
 
 
 
2e7b875
94af0fd
 
 
7a5ba6e
94af0fd
7a5ba6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94af0fd
 
7a5ba6e
 
4a788b6
 
2e7b875
 
 
 
 
94af0fd
7a5ba6e
 
 
 
 
 
2e7b875
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94af0fd
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142

import gradio as gr
import re
import os
from pdf2image import convert_from_path
from transformers import DonutProcessor, VisionEncoderDecoderModel
import torch
from PIL import Image
from pathlib import Path

from models.experimental import attempt_load
from utils.datasets import LoadImage
from utils.general import check_img_size, non_max_suppression, scale_coords, set_logging
from utils.torch_utils import select_device
import cv2
#sudo apt-get install poppler-utils Necesario

key = str(os.environ.get('key'))

def check_image(image):
    try:
        images = convert_from_path(Path(image.name), fmt="jpeg", size=(960,1280))
        return images
    except:
        return [Image.open(image)]

def crop(files = '', #files
           weights = 'yolov7.pt', #model.pt path(s)
           classes = None, #filter by class: --class 0, or --class 0 2 3
           imgsz = 640, #inference size (pixels)
           device = '', #cuda device, i.e. 0 or 0,1,2,3 or cpu
           conf_thres = 0.25, #object confidence threshold
           iou_thres = 0.45, #IOU threshold for NMS
           augment = False, #augmented inference
           agnostic_nms = False): #class-agnostic NMS 

    # Initialize
    set_logging()
    device = select_device(device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size

    if half:
        model.half()  # to FP16

    # Set Dataloader
    dataset = LoadImage(files = files, img_size=imgsz, stride=stride)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    
    # Run inference
    if device.type != 'cpu':
        model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
    old_img_w = old_img_h = imgsz
    old_img_b = 1
    list_cropobj = []
    for img, img0s in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        with torch.no_grad():   # Calculating gradients would cause a GPU memory leak
            pred = model(img, augment=augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if len(det):
                # Rescale boxes from img_size to img0s size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0s.shape).round()

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    #crop an image based on coordinates
                    object_coordinates = [int(xyxy[0]),int(xyxy[1]),int(xyxy[2]),int(xyxy[3])]
                    cropobj_bgr = img0s[int(xyxy[1]):int(xyxy[3]),int(xyxy[0]):int(xyxy[2])]
                    cropobj_rgb = cv2.cvtColor(cropobj_bgr, cv2.COLOR_BGR2RGB)
                    clase = names[int(cls)]
                    list_cropobj.append([Image.fromarray(cropobj_rgb),int(cls)])
                    
    return list_cropobj


def get_attributes(input_img):
    #access_token = str(os.environ.get('key'))
    access_token = key
    processor = DonutProcessor.from_pretrained("AA-supply/donut-finetuned-lic-crop", use_auth_token=access_token)
    model = VisionEncoderDecoderModel.from_pretrained("AA-supply/donut-finetuned-lic-crop", use_auth_token=access_token)

    device = "cuda" if torch.cuda.is_available() else "cpu"

    model.eval()
    model.to(device)
    
    images = check_image(input_img)
    images = crop(weights="best.pt", files= images)
    image_cedula = [img[0] for img in images if img[1]==1][0] #0 en 'img[1]==0' es el label de cedula, si se reemplaza por 1
                                                              #entrega licencias
    
    pixel_values = processor(image_cedula, return_tensors="pt").pixel_values
    pixel_values = pixel_values.to(device)
    print(pixel_values.size())
    # prepare decoder inputs
    task_prompt = "<s_cord-v2>"
    decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids
    decoder_input_ids = decoder_input_ids.to(device)

    # autoregressively generate sequence
    outputs = model.generate(
        pixel_values,
        decoder_input_ids=decoder_input_ids,
        max_length=model.decoder.config.max_position_embeddings,
        early_stopping=True,
        pad_token_id=processor.tokenizer.pad_token_id,
        eos_token_id=processor.tokenizer.eos_token_id,
        use_cache=True,
        num_beams=1,
        bad_words_ids=[[processor.tokenizer.unk_token_id]],
        return_dict_in_generate=True,
    )

    # turn into JSON
    seq = processor.batch_decode(outputs.sequences)[0]
    seq = seq.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
    seq = re.sub(r"<.*?>", "", seq, count=1).strip()  # remove first task start token
    seq = processor.token2json(seq)

    return str(seq)

demo = gr.Interface(get_attributes, "file", "label")
demo.launch()