From a672551a989752c3b80cb49378be2603ab23b9bf Mon Sep 17 00:00:00 2001 From: Benjamin Cretois Date: Mon, 14 Oct 2024 15:48:49 +0200 Subject: [PATCH] [ADD] script to rasterize masks --- src/dataset/coco/coco_utils.py | 163 +++++++++++++++++++++++ src/dataset/{ => coco}/make_coco_json.py | 2 +- src/dataset/coco_utils.py | 102 -------------- src/dataset/rasterize_masks.py | 107 +++++++++++++++ 4 files changed, 271 insertions(+), 103 deletions(-) create mode 100644 src/dataset/coco/coco_utils.py rename src/dataset/{ => coco}/make_coco_json.py (98%) delete mode 100644 src/dataset/coco_utils.py create mode 100644 src/dataset/rasterize_masks.py diff --git a/src/dataset/coco/coco_utils.py b/src/dataset/coco/coco_utils.py new file mode 100644 index 0000000..d120d38 --- /dev/null +++ b/src/dataset/coco/coco_utils.py @@ -0,0 +1,163 @@ +import json +import os +from collections import defaultdict + +import numpy as np +import pycocotools.mask as maskUtils +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.structures import BoxMode +from PIL import Image +from pycocotools.coco import COCO +from shapely.geometry import Polygon + + +def split_coco(coco_dict, val_percentage=0.2): + # Extract images and annotations + images = coco_dict["images"] + annotations = coco_dict["annotations"] + + # Create a mapping from image_id to image metadata + image_id_to_image = {img["id"]: img for img in images} + + # Group annotations by image_id + image_to_annotations = defaultdict(list) + for annotation in annotations: + # Adjust category_id to be zero-indexed + annotation["category_id"] -= 1 + + # If bounding box is missing, compute it from the segmentation mask + if "bbox" not in annotation or not annotation["bbox"]: + if "segmentation" in annotation: + # Flatten the segmentation polygon and create a shapely Polygon + segmentation = annotation["segmentation"][ + 0 + ] # First polygon if multiple + poly = Polygon( + [ + (segmentation[i], segmentation[i + 1]) + for i in range(0, len(segmentation), 2) + ] + ) + + # Calculate bounding box (x_min, y_min, x_max, y_max) + x_min, y_min, x_max, y_max = poly.bounds + annotation["bbox"] = [ + x_min, + y_min, + x_max - x_min, + y_max - y_min, + ] # Convert to [x, y, width, height] + + # Add bbox_mode for Detectron2 + annotation["bbox_mode"] = BoxMode.XYWH_ABS + + # Add the annotation to the image_id group + image_to_annotations[annotation["image_id"]].append(annotation) + + # Create a list of restructured image-level dictionaries + dataset = [] + for image_id, image_data in image_id_to_image.items(): + image_dict = { + "file_name": image_data["file_name"], + "image_id": image_id, + "height": image_data["height"], + "width": image_data["width"], + "annotations": image_to_annotations.get(image_id, []), + } + dataset.append(image_dict) + + # Split the dataset into training and validation sets + split_index = int(len(dataset) * (1 - val_percentage)) + train_dataset = dataset[:split_index] + val_dataset = dataset[split_index:] + + with open("train_set.json", "w") as f: + json.dump(train_dataset, f, indent=4) + + return train_dataset, val_dataset + + +def register_coco(annotations_file, img_dir, val_percentage=0.2): + """ + Load the COCO dataset and split it in memory into training and validation sets. + + Args: + - annotations_file: Path to the COCO JSON annotations file. + - img_dir: Directory containing the images. + - val_percentage: Percentage of images to allocate to validation (default 20%). + + Returns: + - None + """ + # Load the COCO JSON in memory + with open(annotations_file, "r") as f: + coco_dict = json.load(f) + + # Split the dataset in memory + train_dict, val_dict = split_coco(coco_dict, val_percentage) + + # Register the training dataset + DatasetCatalog.register("my_segmentation_train", lambda: train_dict) + MetadataCatalog.get("my_segmentation_train").set( + thing_classes=[ + "innend\u00f8rs", + "parkering/sykkelstativ", + "asfalt/betong", + "gummifelt/kunstgress", + "sand/stein", + "gress", + "tr\u00e6r", + ] + ) + + # Register the validation dataset + DatasetCatalog.register("my_segmentation_val", lambda: val_dict) + MetadataCatalog.get("my_segmentation_val").set( + thing_classes=[ + "innend\u00f8rs", + "parkering/sykkelstativ", + "asfalt/betong", + "gummifelt/kunstgress", + "sand/stein", + "gress", + "tr\u00e6r", + ] + ) + + print("Datasets successfully registered!") + + +def convert_coco_to_masks(coco_annotation_path, output_mask_dir, output_image_dir): + # Load COCO annotations + coco = COCO(coco_annotation_path) + + # Get all image IDs + image_ids = coco.getImgIds() + + for img_id in image_ids: + # Load the corresponding image information + img_info = coco.loadImgs(img_id)[0] + img_filename = img_info["file_name"] + + # Load annotations (segmentations) for the image + ann_ids = coco.getAnnIds(imgIds=img_id) + anns = coco.loadAnns(ann_ids) + + # Create an empty mask + height, width = img_info["height"], img_info["width"] + mask = np.zeros((height, width), dtype=np.uint8) + + # For each annotation, fill in the mask with the class ID + for ann in anns: + category_id = ann["category_id"] + # Get the segmentation mask for the annotation + rle = coco.annToRLE(ann) + binary_mask = maskUtils.decode(rle) + + # Set the class ID in the mask + mask[binary_mask == 1] = category_id + + # Save the mask as a grayscale image + mask_img = Image.fromarray(mask) + mask_filename = os.path.splitext(img_filename)[0] + ".png" + mask_img.save(os.path.join(output_mask_dir, mask_filename)) diff --git a/src/dataset/make_coco_json.py b/src/dataset/coco/make_coco_json.py similarity index 98% rename from src/dataset/make_coco_json.py rename to src/dataset/coco/make_coco_json.py index d6f6805..5b17370 100644 --- a/src/dataset/make_coco_json.py +++ b/src/dataset/coco/make_coco_json.py @@ -53,7 +53,7 @@ def create_coco_annotations(gdf, output_dir, output_json, label_column="labelTek coco_output["images"].append( { "id": image_id, - "file_name": image_filename, + "file_name": image_path, "width": width, "height": height, } diff --git a/src/dataset/coco_utils.py b/src/dataset/coco_utils.py deleted file mode 100644 index b0d21f6..0000000 --- a/src/dataset/coco_utils.py +++ /dev/null @@ -1,102 +0,0 @@ -import json -import random - -from detectron2.data import DatasetCatalog, MetadataCatalog - - -def split_coco(coco_dict, val_percentage=0.2): - """ - Splits COCO dataset dictionary in memory into train and validation sets. - - Args: - - coco_dict: COCO dataset dictionary loaded in memory. - - val_percentage: Percentage of images to allocate to validation (default 20%). - - Returns: - - train_dict: Training dataset dictionary. - - val_dict: Validation dataset dictionary. - """ - images = coco_dict["images"] - annotations = coco_dict["annotations"] - - # Shuffle the images to ensure randomness - random.shuffle(images) - - # Split images into training and validation sets - val_size = int(len(images) * val_percentage) - val_images = images[:val_size] - train_images = images[val_size:] - - # Get image ids for train and val sets - val_image_ids = {img["id"] for img in val_images} - train_image_ids = {img["id"] for img in train_images} - - # Split annotations based on image ids - train_annotations = [ - ann for ann in annotations if ann["image_id"] in train_image_ids - ] - val_annotations = [ann for ann in annotations if ann["image_id"] in val_image_ids] - - # Create train and validation datasets in memory - train_dict = { - "images": train_images, - "annotations": train_annotations, - "categories": coco_dict["categories"], - } - val_dict = { - "images": val_images, - "annotations": val_annotations, - "categories": coco_dict["categories"], - } - - return train_dict, val_dict - - -def register_coco(annotations_file, img_dir, val_percentage=0.2): - """ - Load the COCO dataset and split it in memory into training and validation sets. - - Args: - - annotations_file: Path to the COCO JSON annotations file. - - img_dir: Directory containing the images. - - val_percentage: Percentage of images to allocate to validation (default 20%). - - Returns: - - None - """ - # Load the COCO JSON in memory - with open(annotations_file, "r") as f: - coco_dict = json.load(f) - - # Split the dataset in memory - train_dict, val_dict = split_coco(coco_dict, val_percentage) - - # Register the training dataset - DatasetCatalog.register("my_segmentation_train", lambda: train_dict) - MetadataCatalog.get("my_segmentation_train").set( - thing_classes=[ - "innend\u00f8rs", - "parkering/sykkelstativ", - "asfalt/betong", - "gummifelt/kunstgress", - "sand/stein", - "gress", - "tr\u00e6r", - ] - ) - - # Register the validation dataset - DatasetCatalog.register("my_segmentation_val", lambda: val_dict) - MetadataCatalog.get("my_segmentation_val").set( - thing_classes=[ - "innend\u00f8rs", - "parkering/sykkelstativ", - "asfalt/betong", - "gummifelt/kunstgress", - "sand/stein", - "gress", - "tr\u00e6r", - ] - ) - - print("Datasets successfully registered!") diff --git a/src/dataset/rasterize_masks.py b/src/dataset/rasterize_masks.py new file mode 100644 index 0000000..38c9510 --- /dev/null +++ b/src/dataset/rasterize_masks.py @@ -0,0 +1,107 @@ +import os + +import geopandas as gpd +import numpy as np +import pandas as pd +import yaml +from PIL import Image +from rasterio.features import rasterize +from rasterio.transform import from_origin +from shapely.geometry import MultiPolygon, Polygon + + +def get_image_dimensions(image_path): + """ + Get the dimensions (width, height) of an image using PIL. + """ + with Image.open(image_path) as img: + width, height = img.size + return height, width + + +def rasterize_masks( + geojson_path, image_dir, output_mask_dir, label_column="labelTekst" +): + """ + Convert GeoJSON polygons to raster masks and save them as PNGs. + + Parameters: + - geojson_path: Path to the GeoJSON file with polygon geometries. + - image_dir: Directory containing the corresponding images. + - output_mask_dir: Directory to save the output rasterized masks. + - label_column: The column in the GeoDataFrame that contains the text labels. + - resolution: The resolution of the output mask (default is 1024x1024). + """ + # Load the GeoJSON data + gdf = gpd.read_file(geojson_path) + + # Factorize the text labels to get numeric labels + gdf["label_encoded"], _ = pd.factorize(gdf[label_column]) + + # Ensure the output directory exists + os.makedirs(output_mask_dir, exist_ok=True) + + # Loop through each unique 'id' or corresponding image + for image_id in gdf["id"].unique(): + # Get all geometries and corresponding numeric labels for this image + gdf_image = gdf[gdf["id"] == image_id] + + # Get the corresponding image file and extract dimensions + image_filename = f"image_{image_id}.png" + image_path = os.path.join(image_dir, image_filename) + if not os.path.exists(image_path): + print(f"Image not found: {image_path}") + continue + + # Get the original image dimensions + img_height, img_width = get_image_dimensions(image_path) + + # Prepare a list of (geometry, class_id) tuples and compute bounds + shapes = [] + x_min, y_min, x_max, y_max = gdf_image.total_bounds + + # Calculate pixel size based on image's geometry bounds and output resolution + pixel_size_x = (x_max - x_min) / img_width + pixel_size_y = (y_max - y_min) / img_height + + # Define the transform for the rasterization + transform = from_origin(x_min, y_max, pixel_size_x, pixel_size_y) + + # Prepare a list of (geometry, class_id) tuples for rasterization + for _, row in gdf_image.iterrows(): + geometry = row.geometry + if isinstance(geometry, Polygon): + # If it's a single Polygon, add it to shapes + shapes.append((geometry, row["label_encoded"])) + elif isinstance(geometry, MultiPolygon): + # If it's a MultiPolygon, loop through each Polygon + for poly in geometry.geoms: + shapes.append((poly, row["label_encoded"])) + + # Create an empty mask with the specified output size (resolution) + mask = np.zeros((img_height, img_width), dtype=np.uint8) + + # Rasterize the geometries into the mask + mask = rasterize( + shapes=shapes, + out_shape=(img_height, img_width), + transform=transform, + fill=0, + dtype="uint8", + ) + + # Save the mask as a grayscale PNG + mask_filename = f"mask_{image_id}.png" + mask_img = Image.fromarray(mask) + mask_img.save(os.path.join(output_mask_dir, mask_filename)) + + print(f"Saved mask: {mask_filename}") + + +if __name__ == "__main__": + with open("config.yaml") as f: + cfg = yaml.load(f, Loader=yaml.FullLoader) + + rasterize_masks( + cfg["MASK"], cfg["IMG_DIR"], cfg["MASKS_DIR"], label_column="labelTekst" + )