From 57797cad15f9f84c805c3b0cd7cd16a768de78d9 Mon Sep 17 00:00:00 2001 From: Christoffer Date: Tue, 17 Oct 2023 15:26:15 +0200 Subject: [PATCH] revrting to old init --- darwin/dataset/local_dataset.py | 80 +++++++-------------------------- 1 file changed, 16 insertions(+), 64 deletions(-) diff --git a/darwin/dataset/local_dataset.py b/darwin/dataset/local_dataset.py index abf47e416..6973c1aee 100644 --- a/darwin/dataset/local_dataset.py +++ b/darwin/dataset/local_dataset.py @@ -123,42 +123,22 @@ def _validate_inputs(self, partition, split_type, annotation_type): if split_type not in ["random", "stratified"]: raise ValueError("split_type should be either 'random', 'stratified'") if annotation_type not in ["tag", "polygon", "bounding_box"]: - raise ValueError( - "annotation_type should be either 'tag', 'bounding_box', or 'polygon'" - ) + raise ValueError("annotation_type should be either 'tag', 'bounding_box', or 'polygon'") - def _setup_annotations_and_images( - self, - release_path, - annotations_dir, - images_dir, - annotation_type, - split, - partition, - split_type, - ): - stems = build_stems( - release_path, annotations_dir, annotation_type, split, partition, split_type - ) + def _setup_annotations_and_images(self, release_path, annotations_dir, images_dir, annotation_type, split, partition, split_type): + stems = build_stems(release_path, annotations_dir, annotation_type, split, partition, split_type) for stem in stems: annotation_path = annotations_dir / f"{stem}.json" images = [ image_path for ext in SUPPORTED_IMAGE_EXTENSIONS - for image_path in [ - images_dir / f"{stem}{ext}", - images_dir / f"{stem}{ext.upper()}", - ] + for image_path in [images_dir / f"{stem}{ext}", images_dir / f"{stem}{ext.upper()}"] if image_path.exists() ] if len(images) < 1: - raise ValueError( - f"Annotation ({annotation_path}) does not have a corresponding image" - ) + raise ValueError(f"Annotation ({annotation_path}) does not have a corresponding image") if len(images) > 1: - raise ValueError( - f"Image ({stem}) is present with multiple extensions. This is forbidden." - ) + raise ValueError(f"Image ({stem}) is present with multiple extensions. This is forbidden.") self.images_path.append(images[0]) self.annotations_path.append(annotation_path) @@ -219,9 +199,7 @@ def get_height_and_width(self, index: int) -> Tuple[float, float]: parsed = parse_darwin_json(self.annotations_path[index], index) return parsed.image_height, parsed.image_width - def extend( - self, dataset: "LocalDataset", extend_classes: bool = False - ) -> "LocalDataset": + def extend(self, dataset: "LocalDataset", extend_classes: bool = False) -> "LocalDataset": """ Extends the current dataset with another one. @@ -315,12 +293,7 @@ def parse_json(self, index: int) -> Dict[str, Any]: # Filter out unused classes and annotations of a different type if self.classes is not None: - annotations = [ - a - for a in annotations - if a.annotation_class.name in self.classes - and self.annotation_type_supported(a) - ] + annotations = [a for a in annotations if a.annotation_class.name in self.classes and self.annotation_type_supported(a)] return { "image_id": index, "image_path": str(self.images_path[index]), @@ -335,21 +308,14 @@ def annotation_type_supported(self, annotation) -> bool: return annotation_type == "tag" elif self.annotation_type == "bounding_box": is_bounding_box = annotation_type == "bounding_box" - is_supported_polygon = ( - annotation_type in ["polygon", "complex_polygon"] - and "bounding_box" in annotation.data - ) + is_supported_polygon = annotation_type in ["polygon", "complex_polygon"] and "bounding_box" in annotation.data return is_bounding_box or is_supported_polygon elif self.annotation_type == "polygon": return annotation_type in ["polygon", "complex_polygon"] else: - raise ValueError( - "annotation_type should be either 'tag', 'bounding_box', or 'polygon'" - ) + raise ValueError("annotation_type should be either 'tag', 'bounding_box', or 'polygon'") - def measure_mean_std( - self, multi_threaded: bool = True - ) -> Tuple[np.ndarray, np.ndarray]: + def measure_mean_std(self, multi_threaded: bool = True) -> Tuple[np.ndarray, np.ndarray]: """ Computes mean and std of trained images, given the train loader. @@ -372,9 +338,7 @@ def measure_mean_std( results = pool.map(self._return_mean, self.images_path) mean = np.sum(np.array(results), axis=0) / len(self.images_path) # Online image_classification deviation - results = pool.starmap( - self._return_std, [[item, mean] for item in self.images_path] - ) + results = pool.starmap(self._return_std, [[item, mean] for item in self.images_path]) std_sum = np.sum(np.array([item[0] for item in results]), axis=0) total_pixel_count = np.sum(np.array([item[1] for item in results])) std = np.sqrt(std_sum / total_pixel_count) @@ -420,20 +384,14 @@ def _compute_weights(labels: List[int]) -> np.ndarray: @staticmethod def _return_mean(image_path: Path) -> np.ndarray: img = np.array(load_pil_image(image_path)) - mean = np.array( - [np.mean(img[:, :, 0]), np.mean(img[:, :, 1]), np.mean(img[:, :, 2])] - ) + mean = np.array([np.mean(img[:, :, 0]), np.mean(img[:, :, 1]), np.mean(img[:, :, 2])]) return mean / 255.0 # Loads an image with OpenCV and returns the channel wise std of the image. @staticmethod def _return_std(image_path: Path, mean: np.ndarray) -> Tuple[np.ndarray, float]: img = np.array(load_pil_image(image_path)) / 255.0 - m2 = np.square( - np.array( - [img[:, :, 0] - mean[0], img[:, :, 1] - mean[1], img[:, :, 2] - mean[2]] - ) - ) + m2 = np.square(np.array([img[:, :, 0] - mean[0], img[:, :, 1] - mean[1], img[:, :, 2] - mean[2]])) return np.sum(np.sum(m2, axis=1), 1), m2.size / 3.0 def __getitem__(self, index: int): @@ -503,10 +461,7 @@ def build_stems( """ if partition is None: - return ( - str(e.relative_to(annotations_dir).parent / e.stem) - for e in sorted(annotations_dir.glob("**/*.json")) - ) + return (str(e.relative_to(annotations_dir).parent / e.stem) for e in sorted(annotations_dir.glob("**/*.json"))) if split_type == "random": split_filename = f"{split_type}_{partition}.txt" @@ -519,7 +474,4 @@ def build_stems( if split_path.is_file(): return (e.strip("\n\r") for e in split_path.open()) - raise FileNotFoundError( - "could not find a dataset partition. " - "Split the dataset using `split_dataset()` from `darwin.dataset.split_manager`" - ) + raise FileNotFoundError("could not find a dataset partition. " "Split the dataset using `split_dataset()` from `darwin.dataset.split_manager`")