Spaces:

Yifeng-Liu
/

satellite-image-roofs-auto-annotation

Running

App Files Files Community

ownEyes commited on Oct 22, 2024

Commit

0a96ac9

1 Parent(s): 58d36e1

add project files

Browse files

Files changed (4) hide show

app.py +143 -0
img/example.jpg +0 -0
inference.py +315 -0
requirements.txt +150 -0

app.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import gradio as gr
+from pathlib import Path
+import secrets
+import shutil
+from inference import detector, detector_processor, segment_predictor, ModelInference
+current_dir = Path(__file__).parent
+def process_images(image_path, files, slider_value, request: gr.Request):
+    user_dir: Path = current_dir / str(request.session_hash)
+    user_dir.mkdir(exist_ok=True)
+    annotation_path = user_dir / f"{secrets.token_hex(nbytes=8)}_annotations.coco.json"
+    class_names = list(inferencer.id2label.values())
+    if image_path:
+        print(f"Processing image: {image_path}")
+        seg_detections, annotated_frame = inferencer.predict_one(image_path)
+        inferencer.save_annotations([image_path], [seg_detections], class_names, annotation_path)
+    elif files:
+        print(f"Processing files: {files}")
+        print(f"Batch size: {slider_value}")
+        all_image_paths, all_results, annotated_frame,  detector_failed_list, segmentor_failed_list = inferencer.predict_folder(files, slider_value)
+        print(f"Detector failed list: {detector_failed_list}")
+        print(f"Segmentor failed list: {segmentor_failed_list}")
+        inferencer.save_annotations(all_image_paths, all_results, class_names, annotation_path)
+    return [
+        gr.UploadButton(visible=False),
+        gr.Button("Run", visible=False),
+        gr.DownloadButton("Download annotation results", value=annotation_path, label="Download", visible=True),
+        gr.Image(value=annotated_frame, label="Annotated Image", visible=True),
+    ]
+def upload_file():
+    return [
+        None,
+        gr.UploadButton(visible=False),
+        gr.Slider(1, 6, step=1, label="Batch size", interactive=True, value=4, visible=True),
+        gr.Button("Run", visible=True),
+        gr.DownloadButton(visible=False),
+        gr.Image(value=None, label="Annotated Image", visible=True),
+    ]
+def upload_image(imge_path):
+    return [
+        gr.UploadButton(visible=False),
+        gr.Slider(1, 6, step=1, label="Batch size", interactive=True, value=4, visible=False),
+        gr.Button("Run", visible=True),
+        gr.DownloadButton(visible=False),
+        gr.Image(value=None, label="Annotated Image", visible=True),
+    ]
+def download_file():
+    return [
+        gr.Image(value=None),
+        gr.UploadButton(visible=True),
+        gr.Slider(1, 6, step=1, label="Batch size", interactive=True, value=4, visible=False),
+        gr.Button("Run", visible=False),
+        gr.DownloadButton(visible=True),
+        gr.Image(value=None, visible=False),
+    ]
+def delete_directory(request: gr.Request):
+    """Delete the user-specific directory when the user's session ends."""
+    user_dir = current_dir / str(request.session_hash)
+    if user_dir.exists():
+        shutil.rmtree(user_dir)
+def create_gradio_interface():
+    with gr.Blocks(theme=gr.themes.Monochrome(), delete_cache=(60, 3600)) as demo:
+        gr.HTML("""
+                <div style="text-align: center;">
+                <h1>Satellite Image Roofs Auto Annotation</h1>
+                <p>Powered by a <a href="https://huggingface.co/Yifeng-Liu/rt-detr-finetuned-for-satellite-image-roofs-detection" target="_blank">fine-tuned RT-DETR model</a> and Fast-SAM model.</p>
+                <p>📤 Upload an image or a folder containing images.</p>
+                <p>🖼️ Images are saved in a user-specific directory and deleted when the user closes the page.</p>
+                <p>⚙️ Each user can upload files with a maximum file size of 200 MB.</p>
+                </div>
+            """)
+        with gr.Row():
+            with gr.Column(scale=1):
+                img_input = gr.Image(
+                    interactive=True,
+                    sources=["upload", "clipboard"],
+                    show_share_button=True,
+                    type='filepath',
+                    label="Upload a single image",
+                )
+                upload_button = gr.UploadButton("Upload a folder", file_count="directory")
+                batch_slider = gr.Slider(1, 6, step=1, label="Batch size", interactive=True, value=4, visible=False)
+                run_button = gr.Button("Run", visible=False)
+            with gr.Column(scale=1):
+                img_output = gr.Image(label="Annotated Image", visible=False)
+                download_button = gr.DownloadButton("Download annotation results", label="Download", visible=False)
+        with gr.Row():
+            examples = gr.Examples(
+                examples=[["./img/example.jpg"]],
+                inputs=[img_input],
+                outputs=[upload_button, batch_slider, run_button, download_button, img_output],
+                fn=upload_image,
+                run_on_click=True,
+            )
+        upload_button.upload(upload_file, None, [img_input, upload_button, batch_slider, run_button, download_button, img_output])
+        download_button.click(download_file, None, [img_input, upload_button, batch_slider, run_button, download_button, img_output])
+        run_button.click(process_images,
+                         [img_input, upload_button, batch_slider],
+                         [upload_button, run_button, download_button, img_output])
+        img_input.upload(upload_image, img_input, [upload_button, batch_slider, run_button, download_button, img_output])
+        demo.unload(delete_directory)
+    return demo
+def inferencer_init():
+    id2label = {0: 'building'}
+    CONFIDENCE_TRESHOLD = 0.5
+    return ModelInference(detector, detector_processor, segment_predictor, id2label, CONFIDENCE_TRESHOLD)
+inferencer = inferencer_init()
+if __name__ == "__main__":
+    demo = create_gradio_interface()
+    demo.launch(max_file_size=200 * gr.FileSize.MB)

img/example.jpg ADDED Viewed

inference.py ADDED Viewed

	@@ -0,0 +1,315 @@

+from transformers import AutoModelForObjectDetection, AutoImageProcessor
+from torch.utils.data import Dataset, DataLoader
+import os
+from tqdm import tqdm
+from PIL import Image
+from pathlib import Path
+from ultralytics.models.fastsam import FastSAMPredictor
+import supervision as sv
+import torch
+import numpy as np
+import cv2
+from typing import List, Tuple, Dict, Any, Optional
+from supervision.dataset.utils import approximate_mask_with_polygons
+from supervision.detection.utils import (
+    contains_holes,
+    contains_multiple_segments,
+)
+detector = AutoModelForObjectDetection.from_pretrained("Yifeng-Liu/rt-detr-finetuned-for-satellite-image-roofs-detection")
+detector_processor = AutoImageProcessor.from_pretrained("Yifeng-Liu/rt-detr-finetuned-for-satellite-image-roofs-detection")
+overrides = dict(conf=0.25, task="segment", mode="predict", model="FastSAM-x.pt", save=False)
+segment_predictor = FastSAMPredictor(overrides=overrides)
+# IMG_FORMATS = {"bmp", "dng", "jpeg", "jpg", "mpo", "png", "tif", "tiff", "webp", "pfm"}  # image suffixes
+class ImageInferenceDataset(Dataset):
+    def __init__(self, image_paths: Path, image_processor):
+        """
+        A custom dataset class for image inference without annotations or masks.
+        Args:
+            image_folder (Path): The path to the folder containing images.
+            image_processor: A callable for processing images (usually a transformer or feature extractor).
+            image_formats (set): A set of supported image formats to be filtered.
+        """
+        self.image_processor = image_processor
+        # Filter out files that are not supported image formats
+        self.image_files = image_paths
+    def __len__(self) -> int:
+        return len(self.image_files)
+    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, str]:
+        """
+        Get an image from the dataset at the specified index.
+        Args:
+            idx (int): The index of the image.
+        Returns:
+            Tuple[torch.Tensor, str]: A tuple containing the processed image tensor and the image file path.
+        """
+        image_path = self.image_files[idx]
+        # Open image using PIL and process it using the provided image processor
+        with Image.open(image_path) as img:
+            orig_size = img.size
+            img = img.convert("RGB")  # Ensure all images are in RGB format for consistency
+            processed_img = self.image_processor(images=img, return_tensors="pt")["pixel_values"].squeeze(0)
+        return processed_img, str(image_path), orig_size
+def collate_fn_inference(batch: List[Tuple[torch.Tensor, str]]) -> dict:
+    """
+    Collate function for batching images for inference.
+    Args:
+        batch (List[Tuple[torch.Tensor, str]]): A list of tuples where each tuple contains
+                                                the processed image tensor and image path.
+    Returns:
+        dict: A dictionary containing the batched image tensors and corresponding image file paths.
+    """
+    pixel_values = [item[0] for item in batch]  # Extract processed images
+    image_paths = [item[1] for item in batch]   # Extract image paths
+    orig_sizes = [item[2] for item in batch]
+    # Pad the images to match the largest image in the batch
+    encoding = detector_processor.pad(pixel_values, return_tensors="pt")
+    return {
+        'pixel_values': encoding['pixel_values'],
+        'pixel_mask': encoding['pixel_mask'],  # Padding mask (if needed by the model)
+        'image_paths': image_paths,
+        'orig_sizes': orig_sizes
+    }
+class ModelInference:
+    def __init__(self, detector, detector_processor, segment_predictor, id2label, CONFIDENCE_TRESHOLD):
+        self.detector = detector
+        self.detector_processor = detector_processor
+        self.segment_predictor = segment_predictor
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.CONFIDENCE_TRESHOLD = CONFIDENCE_TRESHOLD
+        self.id2label = id2label
+        self.mask_annotator = sv.MaskAnnotator()
+        self.detector.to(self.device)
+    def predict_one(self, image_path):
+        image = cv2.imread(image_path)
+        with torch.no_grad():
+            # load image and predict
+            inputs = self.detector_processor(images=image, return_tensors='pt').to(self.device)
+            outputs = self.detector(**inputs)
+            # post-process
+            target_sizes = torch.tensor([image.shape[:2]]).to(self.device)
+            results = detector_processor.post_process_object_detection(
+                outputs=outputs,
+                threshold=self.CONFIDENCE_TRESHOLD,
+                target_sizes=target_sizes
+            )[0]
+            if results['boxes'].numel() == 0:
+                print("No bounding box detected")
+                return None, None
+            else:
+                det_detections = sv.Detections.from_transformers(transformers_results=results).with_nms(threshold=0.5)
+            everything_results = self.segment_predictor(image)
+        if everything_results[0].masks is not None:
+            bbox_results = self.segment_predictor.prompt(everything_results, det_detections.xyxy.tolist())[0]
+            seg_detections = sv.Detections.from_ultralytics(bbox_results)
+            seg_detections = self.filter_small_masks(seg_detections)
+            max_length = max(len(name) for name in self.id2label.values())
+            # Create a new NumPy array with the appropriate dtype based on the longest string
+            seg_detections.data['class_name'] = np.array(seg_detections.data['class_name'], dtype=f'<U{max_length}')
+            for idx, class_name in enumerate(seg_detections.data['class_name']):
+                if class_name == 'object':
+                    seg_detections.data['class_name'][idx] = self.id2label[seg_detections.class_id[idx]]
+            annotated_frame = image.copy()
+            annotated_frame = self.mask_annotator.annotate(scene=annotated_frame, detections=seg_detections)
+            return seg_detections, annotated_frame
+        else:
+            print("No segmentation mask generated")
+            return None, None
+    def predict_folder(self, image_paths, batch_size=4):
+        dataset = ImageInferenceDataset(image_paths=image_paths, image_processor=detector_processor)
+        # Create DataLoader instance with the custom collate function
+        dataloader = DataLoader(dataset, batch_size=batch_size, collate_fn=collate_fn_inference)
+        detector_failed_list = []
+        segmentor_failed_list = []
+        id2label = {0: 'building'}
+        max_length = max(len(name) for name in id2label.values())
+        all_image_paths = []
+        all_results = []
+        for idx, batch in enumerate(tqdm(dataloader)):
+            pixel_values = batch["pixel_values"].to(self.device)
+            pixel_mask = batch["pixel_mask"].to(self.device)
+            image_paths = batch["image_paths"]
+            orig_sizes = batch["orig_sizes"]
+            orig_target_sizes = torch.tensor(orig_sizes, device=self.device)
+            with torch.no_grad():
+                outputs = self.detector(
+                    pixel_values=pixel_values, pixel_mask=pixel_mask)
+            # orig_target_sizes = torch.stack([target["orig_size"] for target in labels], dim=0)
+            detector_results = detector_processor.post_process_object_detection(
+                outputs,
+                target_sizes=orig_target_sizes)
+            detector_detections = []
+            detector_to_remove = []
+            for idx, detector_result in enumerate(detector_results):
+                if detector_result['boxes'].numel() == 0:
+                    # The tensor is empty
+                    detector_to_remove.append(idx)
+                else:
+                    detector_detections.append(sv.Detections.from_transformers(transformers_results=detector_result))
+            if detector_to_remove is not None:
+                # Remove items from detector_results and image_ids by reversing the indices to avoid index shifting
+                for idx in sorted(detector_to_remove, reverse=True):
+                    detector_failed_list.append(image_paths[idx])
+                    del image_paths[idx]
+            images_raw = [cv2.imread(image_path) for image_path in image_paths]
+            boxes = [detections.xyxy.tolist() for detections in detector_detections]
+            results = []
+            to_remove_seg = []
+            for idx, (image_path, image, box) in enumerate(zip(image_paths, images_raw, boxes)):
+                try:
+                    with torch.no_grad():
+                        # segmentation_result = segment_model(image, bboxes=box)[0]
+                        everything_results = self.segment_predictor(image)
+                        if everything_results[0].masks is not None:
+                            bbox_results = self.segment_predictor.prompt(everything_results, box)[0]
+                            seg_detections = sv.Detections.from_ultralytics(bbox_results)
+                            seg_detections = self.filter_small_masks(seg_detections)
+                            seg_detections.data['class_name'] = np.array(seg_detections.data['class_name'], dtype=f'<U{max_length}')
+                            for idx, class_name in enumerate(seg_detections.data['class_name']):
+                                if class_name == 'object':
+                                    seg_detections.data['class_name'][idx] = id2label[seg_detections.class_id[idx]]
+                            results.append(seg_detections)
+                        else:
+                            to_remove_seg.append(idx)
+                except Exception as e:
+                    print(f"An error occurred: {e}")
+                    print(f"box: {box}")
+                    print(f"image id: {image_path}")
+                # result = sv.Detections.from_ultralytics(segmentation_result)
+                # results.append(result)
+            if to_remove_seg is not None:
+                for idx in sorted(to_remove_seg, reverse=True):
+                    segmentor_failed_list.append(image_paths[idx])
+                    del image_paths[idx]
+            if len(results) != len(image_paths):
+                print(f"Length of results ({len(results)}) does not match the length of image_ids ({len(image_paths)})")
+                continue
+            all_image_paths.extend(image_paths)
+            all_results.extend(results)
+            annotated_frame = cv2.imread(all_image_paths[0]).copy()
+            annotated_frame = self.mask_annotator.annotate(scene=annotated_frame, detections=all_results[0])
+        return all_image_paths, all_results, annotated_frame, detector_failed_list, segmentor_failed_list
+    def filter_small_masks(self, detections: sv.Detections) -> sv.Detections:
+        valid_indices = []
+        min_image_area_percentage = 0.002
+        max_image_area_percentage = 0.80
+        approximation_percentage = 0.75
+        for i, mask in enumerate(detections.mask):
+            # Check for structural issues in the mask
+            if not (contains_holes(mask) or contains_multiple_segments(mask)):
+                # Check if the mask can be approximated to a polygon successfully
+                if not approximate_mask_with_polygons(mask=mask,
+                                                      min_image_area_percentage=min_image_area_percentage,
+                                                      max_image_area_percentage=max_image_area_percentage,
+                                                      approximation_percentage=approximation_percentage,
+                                                      ):
+                    print(f"Skipping mask {i} due to structural issues")
+                    continue
+            # If all checks pass, add index to valid_indices
+            valid_indices.append(i)
+        filtered_xyxy = detections.xyxy[valid_indices]
+        filtered_mask = detections.mask[valid_indices]
+        filtered_confidence = detections.confidence[valid_indices]
+        filtered_class_id = detections.class_id[valid_indices]
+        filtered_class_name = detections.data['class_name'][valid_indices]
+        detections.xyxy = filtered_xyxy
+        detections.mask = filtered_mask
+        detections.confidence = filtered_confidence
+        detections.class_id = filtered_class_id
+        detections.data['class_name'] = filtered_class_name
+        return detections
+    def get_dict(
+        self,
+        image_paths: List[Any],
+        detections: List[Any]
+    ) -> Dict[str, Any]:
+        detections_dict = {}
+        for idx, image_path in enumerate(image_paths):
+            detections_dict[image_path] = detections[idx]
+        return detections_dict
+    def save_annotations(self,
+                         image_paths,
+                         detections,
+                         class_names,
+                         annotation_path,
+                         MIN_IMAGE_AREA_PERCENTAGE=0.002,
+                         MAX_IMAGE_AREA_PERCENTAGE=0.80,
+                         APPROXIMATION_PERCENTAGE=0.75):
+        # image_dir = annotation_path.parent
+        detections_dict = self.get_dict(image_paths, detections)
+        sv.DetectionDataset(
+            classes=class_names,
+            images=image_paths,
+            annotations=detections_dict
+        ).as_coco(
+            images_directory_path=None,
+            annotations_path=annotation_path,
+            min_image_area_percentage=MIN_IMAGE_AREA_PERCENTAGE,
+            max_image_area_percentage=MAX_IMAGE_AREA_PERCENTAGE,
+            approximation_percentage=APPROXIMATION_PERCENTAGE
+        )
+        return

requirements.txt ADDED Viewed

	@@ -0,0 +1,150 @@

+absl-py==2.1.0
+aiofiles==23.2.1
+aiohappyeyeballs==2.4.0
+aiohttp==3.10.5
+aiosignal==1.3.1
+annotated-types==0.7.0
+anyio==4.4.0
+asttokens==2.4.1
+attrs==24.2.0
+certifi==2024.8.30
+charset-normalizer==3.3.2
+click==8.1.7
+coco_eval==0.0.4
+comm==0.2.2
+contourpy==1.3.0
+cycler==0.12.1
+datasets==2.21.0
+debugpy==1.8.2
+decorator==5.1.1
+defusedxml==0.7.1
+dill==0.3.8
+executing==2.0.1
+fastapi==0.113.0
+ffmpy==0.4.0
+filelock==3.13.1
+fonttools==4.53.1
+frozenlist==1.4.1
+fsspec==2024.2.0
+gradio==5.1.0
+gradio_client==1.4.0
+grpcio==1.66.1
+h11==0.14.0
+httpcore==1.0.5
+httpx==0.27.2
+huggingface-hub==0.26.0
+idna==3.8
+imageio==2.35.1
+importlib_resources==6.4.4
+ipykernel==6.29.4
+ipython==8.25.0
+ipywidgets==8.1.3
+jedi==0.19.1
+Jinja2==3.1.3
+jupyter_client==8.6.2
+jupyter_core==5.7.2
+jupyterlab_widgets==3.0.11
+kiwisolver==1.4.7
+lazy_loader==0.4
+lightning==2.4.0
+lightning-utilities==0.11.7
+Markdown==3.7
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.9.2
+matplotlib-inline==0.1.7
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.0.5
+multiprocess==0.70.16
+nest-asyncio==1.6.0
+networkx==3.2.1
+numpy==2.1.1
+nvidia-cublas-cu12==12.1.3.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cudnn-cu12==9.1.0.70
+nvidia-cufft-cu12==11.0.2.54
+nvidia-curand-cu12==10.3.2.106
+nvidia-cusolver-cu12==11.4.5.107
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-nccl-cu12==2.20.5
+nvidia-nvjitlink-cu12==12.1.105
+nvidia-nvtx-cu12==12.1.105
+opencv-python==4.10.0.84
+opencv-python-headless==4.10.0.84
+orjson==3.10.7
+packaging==24.1
+pandas==2.2.2
+parso==0.8.4
+pexpect==4.9.0
+pillow==10.4.0
+platformdirs==4.2.2
+prompt_toolkit==3.0.47
+protobuf==5.28.0
+psutil==6.0.0
+ptyprocess==0.7.0
+pure-eval==0.2.2
+py-cpuinfo==9.0.0
+pyarrow==17.0.0
+pycocotools==2.0.8
+pydantic==2.9.0
+pydantic_core==2.23.2
+pydub==0.25.1
+Pygments==2.18.0
+pyparsing==3.1.4
+python-dateutil==2.9.0.post0
+python-multipart==0.0.9
+pytorch-lightning==2.4.0
+pytz==2024.1
+PyYAML==6.0.2
+pyzmq==26.0.3
+regex==2024.7.24
+requests==2.32.3
+rich==13.8.0
+ruff==0.6.4
+safetensors==0.4.4
+scikit-image==0.24.0
+scipy==1.14.1
+seaborn==0.13.2
+semantic-version==2.10.0
+setuptools==69.5.1
+shapely==2.0.6
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.1
+stack-data==0.6.3
+starlette==0.38.4
+supervision==0.23.0
+sympy==1.12
+tensorboard==2.17.1
+tensorboard-data-server==0.7.2
+tifffile==2024.8.30
+tokenizers==0.19.1
+tomlkit==0.12.0
+torch==2.4.0
+torch-geometric==2.6.0
+torch-tb-profiler==0.4.3
+torchaudio==2.4.0
+torchmetrics==1.4.1
+torchvision==0.19.0
+tornado==6.4.1
+tqdm==4.66.5
+traitlets==5.14.3
+transformers==4.44.2
+triton==3.0.0
+typer==0.12.5
+typing_extensions==4.9.0
+tzdata==2024.1
+ultralytics==8.2.85
+ultralytics-thop==2.0.6
+urllib3==2.2.2
+uvicorn==0.30.6
+wcwidth==0.2.13
+websockets==12.0
+Werkzeug==3.0.4
+wheel==0.43.0
+widgetsnbextension==4.0.11
+xxhash==3.5.0
+yarl==1.9.6