import glob import json import os import xml.etree.ElementTree as ET import cv2 # from sklearn.externals import joblib import joblib import numpy as np import pandas as pd # from .variables import old_ocr_req_cols # from .skew_correction import PageSkewWraper const_HW = 1.294117647 const_W = 600 # https://www.forbes.com/sites/forbestechcouncil/2020/06/02/leveraging-technologies-to-align-realograms-and-planograms-for-grocery/?sh=506b8b78e86c # https://stackoverflow.com/questions/39403183/python-opencv-sorting-contours # http://devdoc.net/linux/OpenCV-3.2.0/da/d0c/tutorial_bounding_rects_circles.html # https://stackoverflow.com/questions/10297713/find-contour-of-the-set-of-points-in-opencv # https://stackoverflow.com/questions/16538774/dealing-with-contours-and-bounding-rectangle-in-opencv-2-4-python-2-7 # https://stackoverflow.com/questions/50308055/creating-bounding-boxes-for-contours # https://stackoverflow.com/questions/57296398/how-can-i-get-better-results-of-bounding-box-using-find-contours-of-opencv # http://amroamroamro.github.io/mexopencv/opencv/generalContours_demo1.html # https://gist.github.com/bigsnarfdude/d811e31ee17495f82f10db12651ae82d # http://man.hubwiz.com/docset/OpenCV.docset/Contents/Resources/Documents/da/d0c/tutorial_bounding_rects_circles.html # https://www.analyticsvidhya.com/blog/2021/05/document-layout-detection-and-ocr-with-detectron2/ # https://colab.research.google.com/drive/1m6gaQF6Q4M0IaSjoo_4jWllKJjK-i6fw?usp=sharing#scrollTo=lEyl3wYKHAe1 # https://stackoverflow.com/questions/39403183/python-opencv-sorting-contours # https://docs.opencv.org/2.4/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.html # https://www.pyimagesearch.com/2016/03/21/ordering-coordinates-clockwise-with-python-and-opencv/ def bucket_sort(df, colmn, ymax_col="ymax", ymin_col="ymin"): df["line_number"] = 0 colmn.append("line_number") array_value = df[colmn].values start_index = Line_counter = counter = 0 ymax, ymin, line_no = ( colmn.index(ymax_col), colmn.index(ymin_col), colmn.index("line_number"), ) while counter < len(array_value): current_ymax = array_value[start_index][ymax] for next_index in range(start_index, len(array_value)): counter += 1 next_ymin = array_value[next_index][ymin] next_ymax = array_value[next_index][ymax] if current_ymax > next_ymin: array_value[next_index][line_no] = Line_counter + 1 # if current_ymax < next_ymax: # current_ymax = next_ymax else: counter -= 1 break # print(counter, len(array_value), start_index) start_index = counter Line_counter += 1 return pd.DataFrame(array_value, columns=colmn) def do_sorting(df): df.sort_values(["ymin", "xmin"], ascending=True, inplace=True) df["idx"] = df.index if "line_number" in df.columns: print("line number removed") df.drop("line_number", axis=1, inplace=True) req_colns = ["xmin", "ymin", "xmax", "ymax", "idx"] temp_df = df.copy() temp = bucket_sort(temp_df.copy(), req_colns) df = df.merge(temp[["idx", "line_number"]], on="idx") df.sort_values(["line_number", "xmin"], ascending=True, inplace=True) df = df.reset_index(drop=True) df = df.reset_index(drop=True) return df def xml_to_csv(xml_file): # https://gist.github.com/rotemtam/88d9a4efae243fc77ed4a0f9917c8f6c xml_list = [] # for xml_file in glob.glob(path + '/*.xml'): # https://discuss.streamlit.io/t/unable-to-read-files-using-standard-file-uploader/2258/2 tree = ET.parse(xml_file) root = tree.getroot() for member in root.findall("object"): bbx = member.find("bndbox") xmin = int(bbx.find("xmin").text) ymin = int(bbx.find("ymin").text) xmax = int(bbx.find("xmax").text) ymax = int(bbx.find("ymax").text) label = member.find("name").text value = ( root.find("filename").text, int(root.find("size")[0].text), int(root.find("size")[1].text), label, xmin, ymin, xmax, ymax, ) xml_list.append(value) column_name = [ "filename", "width", "height", "cls", "xmin", "ymin", "xmax", "ymax", ] xml_df = pd.DataFrame(xml_list, columns=column_name) return xml_df # def annotate_planogram_compliance(img0, sorted_xml_df, wrong_indexes, target_names): # # annotator = Annotator(img0, line_width=3, pil=True) # det = sorted_xml_df[['xmin', 'ymin', 'xmax', 'ymax','cls']].values # # det[:, :4] = scale_coords((640, 640), det[:, :4], img0.shape).round() # for i, (*xyxy, cls) in enumerate(det): # c = int(cls) # integer class # if i in wrong_indexes: # # print(xyxy, "Wrong detection", (255, 0, 0)) # label = "Wrong detection" # color = (0,0,255) # else: # # print(xyxy, label, (0, 255, 0)) # label = f'{target_names[c]}' # color = (0,255, 0) # org = (int(xyxy[0]), int(xyxy[1]) ) # top_left = org # bottom_right = (int(xyxy[2]), int(xyxy[3])) # # print("#"*50) # # print(f"Anooatting cv2 rectangle with shape: { img0.shape}, top left: { top_left}, bottom right: { bottom_right} , color : { color }, thickness: {3}, cv2.LINE_8") # # print("#"*50) # cv2.rectangle(img0, top_left, bottom_right , color, 3, cv2.LINE_8) # cv2.putText(img0, label, tuple(org), cv2. FONT_HERSHEY_SIMPLEX , 0.5, color) # return img0 def annotate_planogram_compliance( img0, sorted_df, correct_indexes, wrong_indexes, target_names ): # annotator = Annotator(img0, line_width=3, pil=True) det = sorted_df[["xmin", "ymin", "xmax", "ymax", "cls"]].values # det[:, :4] = scale_coords((640, 640), det[:, :4], img0.shape).round() for x, y in zip(*correct_indexes): try: row = sorted_df[sorted_df["line_number"] == x + 1].iloc[y] xyxy = row[["xmin", "ymin", "xmax", "ymax"]].values label = f'{target_names[row["cls"]]}' color = (0, 255, 0) # org = (int(xyxy[0]), int(xyxy[1]) ) top_left = (int(row["xmin"]), int(row["ymin"])) bottom_right = (int(row["xmax"]), int(row["ymax"])) cv2.rectangle(img0, top_left, bottom_right, color, 3, cv2.LINE_8) cv2.putText( img0, label, top_left, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color ) except Exception as e: print("Error: " + str(e)) continue for x, y in zip(*wrong_indexes): try: row = sorted_df[sorted_df["line_number"] == x + 1].iloc[y] xyxy = row[["xmin", "ymin", "xmax", "ymax"]].values label = f'{target_names[row["cls"]]}' color = (0, 0, 255) # org = (int(xyxy[0]), int(xyxy[1]) ) top_left = (row["xmin"], row["ymin"]) bottom_right = (row["xmax"], row["ymax"]) cv2.rectangle(img0, top_left, bottom_right, color, 3, cv2.LINE_8) cv2.putText( img0, label, top_left, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color ) except Exception as e: print("Error: " + str(e)) continue return img0