Spaces:

itsyoboieltr
/

anpr

Build error

File size: 11,611 Bytes

6519fca

import os
import gradio as gr  # type: ignore
from paddleocr import PaddleOCR  # type: ignore
from ultralytics import YOLO  # type: ignore
from pathlib import Path
from deep_sort_realtime.deepsort_tracker import DeepSort  # type: ignore
import cv2  # type: ignore
import numpy as np
import re
from internetarchive import download  # type: ignore
from tqdm import trange

download("anpr_weights", files=["anpr.pt"], verbose=True)  # type: ignore

download(
    "anpr_examples_202208",
    files=["test_image_1.jpg", "test_image_2.jpg", "test_image_3.jpeg", "test_video_1.mp4"],  # type: ignore
    verbose=True,
)

paddle = PaddleOCR(lang="en", use_angle_cls=True, show_log=False)

model = YOLO(model="./anpr_weights/anpr.pt", task="detect")


def detect_plates(src):
    predictions = model.predict(src, verbose=False)

    results = []

    for prediction in predictions:
        for box in prediction.boxes:
            det_confidence = box.conf.item()
            if det_confidence < 0.6:
                continue
            coords = [int(position) for position in (box.xyxy.view(1, 4)).tolist()[0]]
            results.append({"coords": coords, "det_conf": det_confidence})

    return results


def crop(img, coords):
    cropped = img[coords[1] : coords[3], coords[0] : coords[2]]
    return cropped


def preprocess_image(src):
    normalize = cv2.normalize(
        src, np.zeros((src.shape[0], src.shape[1])), 0, 255, cv2.NORM_MINMAX
    )
    denoise = cv2.fastNlMeansDenoisingColored(
        normalize, h=10, hColor=10, templateWindowSize=7, searchWindowSize=15
    )
    grayscale = cv2.cvtColor(denoise, cv2.COLOR_BGR2GRAY)
    threshold = cv2.threshold(grayscale, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
    return threshold


def ocr_plate(src):
    # Preprocess the image for better OCR results
    preprocessed = preprocess_image(src)

    # OCR the preprocessed image
    results = paddle.ocr(preprocessed, det=False, cls=True)

    # Get the best OCR result
    plate_text, ocr_confidence = max(
        results,
        key=lambda ocr_prediction: max(
            ocr_prediction,
            key=lambda ocr_prediction_result: ocr_prediction_result[1],  # type: ignore
        ),
    )[0]

    # Filter out anything but uppercase letters, digits, hypens and whitespace.
    # Also, remove hypens and whitespaces at the first and last positions
    plate_text_filtered = re.sub(r"[^A-Z0-9- ]", "", plate_text).strip("- ")

    return {"plate": plate_text_filtered, "ocr_conf": ocr_confidence}


def ocr_plates(src, det_predictions):
    results = []

    for det_prediction in det_predictions:
        plate_region = crop(src, det_prediction["coords"])
        ocr_prediction = ocr_plate(plate_region)
        results.append(ocr_prediction)

    return results


def plot_box(img, coords, label=None, color=[0, 150, 255], line_thickness=3):
    # Plots box on image
    c1, c2 = (int(coords[0]), int(coords[1])), (int(coords[2]), int(coords[3]))
    cv2.rectangle(img, c1, c2, color, thickness=line_thickness, lineType=cv2.LINE_AA)
    # Plots label on image, if exists
    if label:
        tf = max(line_thickness - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=line_thickness / 3, thickness=tf)[
            0
        ]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(
            img,
            label,
            (c1[0], c1[1] - 2),
            0,
            line_thickness / 3,
            [225, 255, 255],
            thickness=tf,
            lineType=cv2.LINE_AA,
        )


def get_plates(src):
    det_predictions = detect_plates(src)
    ocr_predictions = ocr_plates(src, det_predictions)

    for det_prediction, ocr_prediction in zip(det_predictions, ocr_predictions):
        plot_box(src, det_prediction["coords"], ocr_prediction["plate"])

    return src, det_predictions, ocr_predictions


def predict_image(src):
    detected_image, det_predictions, ocr_predictions = get_plates(src)
    return detected_image


def predict_image_api(src):
    detected_image, det_predictions, ocr_predictions = get_plates(src)
    return ocr_predictions[0]["plate"]


def pascal_voc_to_coco(x1y1x2y2):
    x1, y1, x2, y2 = x1y1x2y2
    return [x1, y1, x2 - x1, y2 - y1]


def get_best_ocr(preds, rec_conf, ocr_res, track_id):
    for info in preds:
        # Check if it is current track id
        if info["track_id"] == track_id:
            # Check if the ocr confidence is maximum or not
            if info["ocr_conf"] < rec_conf:
                info["ocr_conf"] = rec_conf
                info["ocr_txt"] = ocr_res
            else:
                rec_conf = info["ocr_conf"]
                ocr_res = info["ocr_txt"]
            break
    return preds, rec_conf, ocr_res


def predict_video(src):
    output = f"{Path(src).stem}_detected{Path(src).suffix}"

    # Create a VideoCapture object
    video = cv2.VideoCapture(src)

    # Default resolutions of the frame are obtained. The default resolutions are system dependent.
    # We convert the resolutions from float to integer.
    width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = video.get(cv2.CAP_PROP_FPS)
    frames_total = int(video.get(cv2.CAP_PROP_FRAME_COUNT))

    # Define the codec and create VideoWriter object.
    temp = f"{Path(output).stem}_temp{Path(output).suffix}"
    export = cv2.VideoWriter(
        temp, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height)
    )

    # Intializing tracker
    tracker = DeepSort()

    # Initializing some helper variables.
    preds = []
    total_obj = 0

    for i in trange(frames_total):
        ret, frame = video.read()
        if ret is True:
            # Run the ANPR algorithm
            det_predictions = detect_plates(frame)
            # Convert Pascal VOC detections to COCO
            bboxes = list(
                map(
                    lambda bbox: pascal_voc_to_coco(bbox),
                    [det_prediction["coords"] for det_prediction in det_predictions],
                )
            )

            if len(bboxes) > 0:
                # Storing all the required info in a list.
                detections = [
                    (bbox, score, "number_plate")
                    for bbox, score in zip(
                        bboxes,
                        [
                            det_prediction["det_conf"]
                            for det_prediction in det_predictions
                        ],
                    )
                ]

                # Applying tracker.
                # The tracker code flow: kalman filter -> target association(using hungarian algorithm) and appearance descriptor.
                tracks = tracker.update_tracks(detections, frame=frame)

                # Checking if tracks exist.
                for track in tracks:
                    if not track.is_confirmed() or track.time_since_update > 1:
                        continue

                    # Changing track bbox to top left, bottom right coordinates
                    bbox = [int(position) for position in list(track.to_tlbr())]

                    for i in range(len(bbox)):
                        if bbox[i] < 0:
                            bbox[i] = 0

                    # Cropping the license plate and applying the OCR.
                    plate_region = crop(frame, bbox)
                    ocr_prediction = ocr_plate(plate_region)
                    plate_text, ocr_confidence = (
                        ocr_prediction["plate"],
                        ocr_prediction["ocr_conf"],
                    )

                    # Storing the ocr output for corresponding track id.
                    output_frame = {
                        "track_id": track.track_id,
                        "ocr_txt": plate_text,
                        "ocr_conf": ocr_confidence,
                    }

                    # Appending track_id to list only if it does not exist in the list
                    # else looking for the current track in the list and updating the highest confidence of it.
                    if track.track_id not in list(
                        set(pred["track_id"] for pred in preds)
                    ):
                        total_obj += 1
                        preds.append(output_frame)
                    else:
                        preds, ocr_confidence, plate_text = get_best_ocr(
                            preds,
                            ocr_confidence,
                            plate_text,
                            track.track_id,
                        )

                    # Plotting the prediction.
                    plot_box(
                        frame,
                        bbox,
                        f"{str(track.track_id)}. {plate_text}",
                        color=[255, 150, 0],
                    )

            # Write the frame into the output file
            export.write(frame)
        else:
            break

    # When everything done, release the video capture and video write objects
    video.release()
    export.release()

    # Compressing the video for smaller size and web compatibility.
    os.system(
        f"ffmpeg -y -i {temp} -c:v libx264 -b:v 5000k -minrate 1000k -maxrate 8000k -pass 1 -c:a aac -f mp4 /dev/null && ffmpeg -y -i {temp} -c:v libx264 -b:v 5000k -minrate 1000k -maxrate 8000k -pass 2 -c:a aac -movflags faststart {output}"
    )
    os.system(f"rm -rf {temp} ffmpeg2pass-0.log ffmpeg2pass-0.log.mbtree")
    return output


with gr.Blocks() as demo:
    gr.Markdown('### <h3 align="center">Automatic Number Plate Recognition</h3>')
    gr.Markdown(
        "This AI was trained to detect and recognize number plates on vehicles."
    )
    with gr.Tabs():
        with gr.TabItem("Image"):
            with gr.Row():
                image_input = gr.Image()
                image_output = gr.Image()
                image_input.upload(
                    predict_image,
                    inputs=[image_input],
                    outputs=[image_output],
                )
            with gr.Row(visible=False):  # Prediction API
                api_image_input = gr.Image()
                api_prediction_output = gr.Textbox()
                api_image_input.upload(
                    predict_image_api,
                    inputs=[api_image_input],
                    outputs=[api_prediction_output],
                    api_name="predict",
                )
            gr.Examples(
                [
                    ["./anpr_examples_202208/test_image_1.jpg"],
                    ["./anpr_examples_202208/test_image_2.jpg"],
                    ["./anpr_examples_202208/test_image_3.jpeg"],
                ],
                [image_input],
                [image_output],
                predict_image,
                cache_examples=True,
            )
        with gr.TabItem("Video"):
            with gr.Row():
                video_input = gr.Video(format="mp4")
                video_output = gr.Video(format="mp4")
                video_input.upload(
                    predict_video, inputs=[video_input], outputs=[video_output]
                )
            gr.Examples(
                [["./anpr_examples_202208/test_video_1.mp4"]],
                [video_input],
                [video_output],
                predict_video,
                cache_examples=True,
            )
    gr.Markdown("[@itsyoboieltr](https://github.com/itsyoboieltr)")

demo.launch()