From 57797cad15f9f84c805c3b0cd7cd16a768de78d9 Mon Sep 17 00:00:00 2001
From: Christoffer <christofferedlund@hotmail.com>
Date: Tue, 17 Oct 2023 15:26:15 +0200
Subject: [PATCH] revrting to old init

---
 darwin/dataset/local_dataset.py | 80 +++++++--------------------------
 1 file changed, 16 insertions(+), 64 deletions(-)

diff --git a/darwin/dataset/local_dataset.py b/darwin/dataset/local_dataset.py
index abf47e416..6973c1aee 100644
--- a/darwin/dataset/local_dataset.py
+++ b/darwin/dataset/local_dataset.py
@@ -123,42 +123,22 @@ def _validate_inputs(self, partition, split_type, annotation_type):
         if split_type not in ["random", "stratified"]:
             raise ValueError("split_type should be either 'random', 'stratified'")
         if annotation_type not in ["tag", "polygon", "bounding_box"]:
-            raise ValueError(
-                "annotation_type should be either 'tag', 'bounding_box', or 'polygon'"
-            )
+            raise ValueError("annotation_type should be either 'tag', 'bounding_box', or 'polygon'")
 
-    def _setup_annotations_and_images(
-        self,
-        release_path,
-        annotations_dir,
-        images_dir,
-        annotation_type,
-        split,
-        partition,
-        split_type,
-    ):
-        stems = build_stems(
-            release_path, annotations_dir, annotation_type, split, partition, split_type
-        )
+    def _setup_annotations_and_images(self, release_path, annotations_dir, images_dir, annotation_type, split, partition, split_type):
+        stems = build_stems(release_path, annotations_dir, annotation_type, split, partition, split_type)
         for stem in stems:
             annotation_path = annotations_dir / f"{stem}.json"
             images = [
                 image_path
                 for ext in SUPPORTED_IMAGE_EXTENSIONS
-                for image_path in [
-                    images_dir / f"{stem}{ext}",
-                    images_dir / f"{stem}{ext.upper()}",
-                ]
+                for image_path in [images_dir / f"{stem}{ext}", images_dir / f"{stem}{ext.upper()}"]
                 if image_path.exists()
             ]
             if len(images) < 1:
-                raise ValueError(
-                    f"Annotation ({annotation_path}) does not have a corresponding image"
-                )
+                raise ValueError(f"Annotation ({annotation_path}) does not have a corresponding image")
             if len(images) > 1:
-                raise ValueError(
-                    f"Image ({stem}) is present with multiple extensions. This is forbidden."
-                )
+                raise ValueError(f"Image ({stem}) is present with multiple extensions. This is forbidden.")
             self.images_path.append(images[0])
             self.annotations_path.append(annotation_path)
 
@@ -219,9 +199,7 @@ def get_height_and_width(self, index: int) -> Tuple[float, float]:
         parsed = parse_darwin_json(self.annotations_path[index], index)
         return parsed.image_height, parsed.image_width
 
-    def extend(
-        self, dataset: "LocalDataset", extend_classes: bool = False
-    ) -> "LocalDataset":
+    def extend(self, dataset: "LocalDataset", extend_classes: bool = False) -> "LocalDataset":
         """
         Extends the current dataset with another one.
 
@@ -315,12 +293,7 @@ def parse_json(self, index: int) -> Dict[str, Any]:
 
         # Filter out unused classes and annotations of a different type
         if self.classes is not None:
-            annotations = [
-                a
-                for a in annotations
-                if a.annotation_class.name in self.classes
-                and self.annotation_type_supported(a)
-            ]
+            annotations = [a for a in annotations if a.annotation_class.name in self.classes and self.annotation_type_supported(a)]
         return {
             "image_id": index,
             "image_path": str(self.images_path[index]),
@@ -335,21 +308,14 @@ def annotation_type_supported(self, annotation) -> bool:
             return annotation_type == "tag"
         elif self.annotation_type == "bounding_box":
             is_bounding_box = annotation_type == "bounding_box"
-            is_supported_polygon = (
-                annotation_type in ["polygon", "complex_polygon"]
-                and "bounding_box" in annotation.data
-            )
+            is_supported_polygon = annotation_type in ["polygon", "complex_polygon"] and "bounding_box" in annotation.data
             return is_bounding_box or is_supported_polygon
         elif self.annotation_type == "polygon":
             return annotation_type in ["polygon", "complex_polygon"]
         else:
-            raise ValueError(
-                "annotation_type should be either 'tag', 'bounding_box', or 'polygon'"
-            )
+            raise ValueError("annotation_type should be either 'tag', 'bounding_box', or 'polygon'")
 
-    def measure_mean_std(
-        self, multi_threaded: bool = True
-    ) -> Tuple[np.ndarray, np.ndarray]:
+    def measure_mean_std(self, multi_threaded: bool = True) -> Tuple[np.ndarray, np.ndarray]:
         """
         Computes mean and std of trained images, given the train loader.
 
@@ -372,9 +338,7 @@ def measure_mean_std(
                 results = pool.map(self._return_mean, self.images_path)
                 mean = np.sum(np.array(results), axis=0) / len(self.images_path)
                 # Online image_classification deviation
-                results = pool.starmap(
-                    self._return_std, [[item, mean] for item in self.images_path]
-                )
+                results = pool.starmap(self._return_std, [[item, mean] for item in self.images_path])
                 std_sum = np.sum(np.array([item[0] for item in results]), axis=0)
                 total_pixel_count = np.sum(np.array([item[1] for item in results]))
                 std = np.sqrt(std_sum / total_pixel_count)
@@ -420,20 +384,14 @@ def _compute_weights(labels: List[int]) -> np.ndarray:
     @staticmethod
     def _return_mean(image_path: Path) -> np.ndarray:
         img = np.array(load_pil_image(image_path))
-        mean = np.array(
-            [np.mean(img[:, :, 0]), np.mean(img[:, :, 1]), np.mean(img[:, :, 2])]
-        )
+        mean = np.array([np.mean(img[:, :, 0]), np.mean(img[:, :, 1]), np.mean(img[:, :, 2])])
         return mean / 255.0
 
     # Loads an image with OpenCV and returns the channel wise std of the image.
     @staticmethod
     def _return_std(image_path: Path, mean: np.ndarray) -> Tuple[np.ndarray, float]:
         img = np.array(load_pil_image(image_path)) / 255.0
-        m2 = np.square(
-            np.array(
-                [img[:, :, 0] - mean[0], img[:, :, 1] - mean[1], img[:, :, 2] - mean[2]]
-            )
-        )
+        m2 = np.square(np.array([img[:, :, 0] - mean[0], img[:, :, 1] - mean[1], img[:, :, 2] - mean[2]]))
         return np.sum(np.sum(m2, axis=1), 1), m2.size / 3.0
 
     def __getitem__(self, index: int):
@@ -503,10 +461,7 @@ def build_stems(
     """
 
     if partition is None:
-        return (
-            str(e.relative_to(annotations_dir).parent / e.stem)
-            for e in sorted(annotations_dir.glob("**/*.json"))
-        )
+        return (str(e.relative_to(annotations_dir).parent / e.stem) for e in sorted(annotations_dir.glob("**/*.json")))
 
     if split_type == "random":
         split_filename = f"{split_type}_{partition}.txt"
@@ -519,7 +474,4 @@ def build_stems(
     if split_path.is_file():
         return (e.strip("\n\r") for e in split_path.open())
 
-    raise FileNotFoundError(
-        "could not find a dataset partition. "
-        "Split the dataset using `split_dataset()` from `darwin.dataset.split_manager`"
-    )
+    raise FileNotFoundError("could not find a dataset partition. " "Split the dataset using `split_dataset()` from `darwin.dataset.split_manager`")