Spaces:

AA-supply
/

EnrollmentVerification-licencias

Sleeping

App Files Files Community

EnrollmentVerification-licencias / app.py

pbravoc

Another typo fix

4a788b6 almost 2 years ago

raw

history blame contribute delete

5.4 kB


	import gradio as gr
	import re
	import os
	from pdf2image import convert_from_path
	from transformers import DonutProcessor, VisionEncoderDecoderModel
	import torch
	from PIL import Image
	from pathlib import Path

	from models.experimental import attempt_load
	from utils.datasets import LoadImage
	from utils.general import check_img_size, non_max_suppression, scale_coords, set_logging
	from utils.torch_utils import select_device
	import cv2
	#sudo apt-get install poppler-utils Necesario

	key = str(os.environ.get('key'))

	def check_image(image):
	try:
	images = convert_from_path(Path(image.name), fmt="jpeg", size=(960,1280))
	return images
	except:
	return [Image.open(image)]

	def crop(files = '', #files
	weights = 'yolov7.pt', #model.pt path(s)
	classes = None, #filter by class: --class 0, or --class 0 2 3
	imgsz = 640, #inference size (pixels)
	device = '', #cuda device, i.e. 0 or 0,1,2,3 or cpu
	conf_thres = 0.25, #object confidence threshold
	iou_thres = 0.45, #IOU threshold for NMS
	augment = False, #augmented inference
	agnostic_nms = False): #class-agnostic NMS

	# Initialize
	set_logging()
	device = select_device(device)
	half = device.type != 'cpu' # half precision only supported on CUDA

	# Load model
	model = attempt_load(weights, map_location=device) # load FP32 model
	stride = int(model.stride.max()) # model stride
	imgsz = check_img_size(imgsz, s=stride) # check img_size

	if half:
	model.half() # to FP16

	# Set Dataloader
	dataset = LoadImage(files = files, img_size=imgsz, stride=stride)

	# Get names and colors
	names = model.module.names if hasattr(model, 'module') else model.names

	# Run inference
	if device.type != 'cpu':
	model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
	old_img_w = old_img_h = imgsz
	old_img_b = 1
	list_cropobj = []
	for img, img0s in dataset:
	img = torch.from_numpy(img).to(device)
	img = img.half() if half else img.float() # uint8 to fp16/32
	img /= 255.0 # 0 - 255 to 0.0 - 1.0
	if img.ndimension() == 3:
	img = img.unsqueeze(0)

	# Inference
	with torch.no_grad(): # Calculating gradients would cause a GPU memory leak
	pred = model(img, augment=augment)[0]

	# Apply NMS
	pred = non_max_suppression(pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms)

	# Process detections
	for i, det in enumerate(pred): # detections per image
	if len(det):
	# Rescale boxes from img_size to img0s size
	det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0s.shape).round()

	# Write results
	for *xyxy, conf, cls in reversed(det):
	#crop an image based on coordinates
	object_coordinates = [int(xyxy[0]),int(xyxy[1]),int(xyxy[2]),int(xyxy[3])]
	cropobj_bgr = img0s[int(xyxy[1]):int(xyxy[3]),int(xyxy[0]):int(xyxy[2])]
	cropobj_rgb = cv2.cvtColor(cropobj_bgr, cv2.COLOR_BGR2RGB)
	clase = names[int(cls)]
	list_cropobj.append([Image.fromarray(cropobj_rgb),int(cls)])

	return list_cropobj


	def get_attributes(input_img):
	#access_token = str(os.environ.get('key'))
	access_token = key
	processor = DonutProcessor.from_pretrained("AA-supply/donut-finetuned-lic-crop", use_auth_token=access_token)
	model = VisionEncoderDecoderModel.from_pretrained("AA-supply/donut-finetuned-lic-crop", use_auth_token=access_token)

	device = "cuda" if torch.cuda.is_available() else "cpu"

	model.eval()
	model.to(device)

	images = check_image(input_img)
	images = crop(weights="best.pt", files= images)
	image_cedula = [img[0] for img in images if img[1]==1][0] #0 en 'img[1]==0' es el label de cedula, si se reemplaza por 1
	#entrega licencias

	pixel_values = processor(image_cedula, return_tensors="pt").pixel_values
	pixel_values = pixel_values.to(device)
	print(pixel_values.size())
	# prepare decoder inputs
	task_prompt = "<s_cord-v2>"
	decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids
	decoder_input_ids = decoder_input_ids.to(device)

	# autoregressively generate sequence
	outputs = model.generate(
	pixel_values,
	decoder_input_ids=decoder_input_ids,
	max_length=model.decoder.config.max_position_embeddings,
	early_stopping=True,
	pad_token_id=processor.tokenizer.pad_token_id,
	eos_token_id=processor.tokenizer.eos_token_id,
	use_cache=True,
	num_beams=1,
	bad_words_ids=[[processor.tokenizer.unk_token_id]],
	return_dict_in_generate=True,
	)

	# turn into JSON
	seq = processor.batch_decode(outputs.sequences)[0]
	seq = seq.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
	seq = re.sub(r"<.*?>", "", seq, count=1).strip() # remove first task start token
	seq = processor.token2json(seq)

	return str(seq)

	demo = gr.Interface(get_attributes, "file", "label")
	demo.launch()