From db229fc3523b74acf1e31be2c15b0c4841c98f95 Mon Sep 17 00:00:00 2001
From: John Wilkie <john@v7labs.com>
Date: Fri, 27 Oct 2023 20:08:22 +0100
Subject: [PATCH 1/6] Fix for pulling releases containing folders but in a flat
 structure (WIP)

---
 darwin/dataset/local_dataset.py |   5 +-
 darwin/dataset/utils.py         |   9 +-
 darwin/utils/utils.py           | 320 +++++++++++++++++++++++++-------
 3 files changed, 258 insertions(+), 76 deletions(-)

diff --git a/darwin/dataset/local_dataset.py b/darwin/dataset/local_dataset.py
index d79ecae76..23aa6f87c 100644
--- a/darwin/dataset/local_dataset.py
+++ b/darwin/dataset/local_dataset.py
@@ -132,9 +132,12 @@ def _setup_annotations_and_images(
         split_type,
     ):
         # Find all the annotations and their corresponding images
+        with_folders = any([item.is_dir() for item in images_dir.iterdir()])
         for annotation_path in sorted(annotations_dir.glob("**/*.json")):
             darwin_json = stream_darwin_json(annotation_path)
-            image_path = get_image_path_from_stream(darwin_json, images_dir)
+            image_path = get_image_path_from_stream(
+                darwin_json, images_dir, with_folders
+            )
             if image_path.exists():
                 self.images_path.append(image_path)
                 self.annotations_path.append(annotation_path)
diff --git a/darwin/dataset/utils.py b/darwin/dataset/utils.py
index c11e3576a..5d295e3e3 100644
--- a/darwin/dataset/utils.py
+++ b/darwin/dataset/utils.py
@@ -230,7 +230,7 @@ def exhaust_generator(
 
     Exhausts the generator passed as parameter. Can be done multi threaded if desired.
     Creates and returns a coco record from the given annotation.
-    
+
     Uses ``BoxMode.XYXY_ABS`` from ``detectron2.structures`` if available, defaults to ``box_mode = 0``
     otherwise.
     Parameters
@@ -571,9 +571,10 @@ def _map_annotations_to_images(
     images_paths = []
     annotations_paths = []
     invalid_annotation_paths = []
+    with_folders = any([item.is_dir() for item in images_dir.iterdir()])
     for annotation_path in annotations_dir.glob("**/*.json"):
         darwin_json = stream_darwin_json(annotation_path)
-        image_path = get_image_path_from_stream(darwin_json, images_dir)
+        image_path = get_image_path_from_stream(darwin_json, images_dir, with_folders)
         if image_path.exists():
             images_paths.append(image_path)
             annotations_paths.append(annotation_path)
@@ -583,7 +584,9 @@ def _map_annotations_to_images(
                 invalid_annotation_paths.append(annotation_path)
                 continue
             else:
-                raise ValueError(f"Annotation ({annotation_path}) does not have a corresponding image")
+                raise ValueError(
+                    f"Annotation ({annotation_path}) does not have a corresponding image"
+                )
 
     return images_paths, annotations_paths, invalid_annotation_paths
 
diff --git a/darwin/utils/utils.py b/darwin/utils/utils.py
index 68f81a53f..4934b1c9b 100644
--- a/darwin/utils/utils.py
+++ b/darwin/utils/utils.py
@@ -216,7 +216,9 @@ def is_project_dir(project_path: Path) -> bool:
     return (project_path / "releases").exists() and (project_path / "images").exists()
 
 
-def get_progress_bar(array: List[dt.AnnotationFile], description: Optional[str] = None) -> Iterable[ProgressType]:
+def get_progress_bar(
+    array: List[dt.AnnotationFile], description: Optional[str] = None
+) -> Iterable[ProgressType]:
     """
     Get a rich a progress bar for the given list of annotation files.
 
@@ -265,7 +267,10 @@ def prompt(msg: str, default: Optional[str] = None) -> str:
 
 
 def find_files(
-    files: List[dt.PathLike], *, files_to_exclude: List[dt.PathLike] = [], recursive: bool = True
+    files: List[dt.PathLike],
+    *,
+    files_to_exclude: List[dt.PathLike] = [],
+    recursive: bool = True,
 ) -> List[Path]:
     """
     Retrieve a list of all files belonging to supported extensions. The exploration can be made
@@ -322,7 +327,9 @@ def secure_continue_request() -> bool:
 
 
 def persist_client_configuration(
-    client: "Client", default_team: Optional[str] = None, config_path: Optional[Path] = None
+    client: "Client",
+    default_team: Optional[str] = None,
+    config_path: Optional[Path] = None,
 ) -> Config:
     """
     Authenticate user against the server and creates a configuration file for him/her.
@@ -350,8 +357,14 @@ def persist_client_configuration(
         raise ValueError("Unable to get default team.")
 
     config: Config = Config(config_path)
-    config.set_team(team=team_config.slug, api_key=team_config.api_key, datasets_dir=team_config.datasets_dir)
-    config.set_global(api_endpoint=client.url, base_url=client.base_url, default_team=default_team)
+    config.set_team(
+        team=team_config.slug,
+        api_key=team_config.api_key,
+        datasets_dir=team_config.datasets_dir,
+    )
+    config.set_global(
+        api_endpoint=client.url, base_url=client.base_url, default_team=default_team
+    )
 
     return config
 
@@ -408,7 +421,9 @@ def attempt_decode(path: Path) -> dict:
             return data
         except Exception:
             continue
-    raise UnrecognizableFileEncoding(f"Unable to load file {path} with any encodings: {encodings}")
+    raise UnrecognizableFileEncoding(
+        f"Unable to load file {path} with any encodings: {encodings}"
+    )
 
 
 def load_data_from_file(path: Path) -> Tuple[dict, dt.AnnotationFileVersion]:
@@ -417,7 +432,9 @@ def load_data_from_file(path: Path) -> Tuple[dict, dt.AnnotationFileVersion]:
     return data, version
 
 
-def parse_darwin_json(path: Path, count: Optional[int] = None) -> Optional[dt.AnnotationFile]:
+def parse_darwin_json(
+    path: Path, count: Optional[int] = None
+) -> Optional[dt.AnnotationFile]:
     """
     Parses the given JSON file in v7's darwin proprietary format. Works for images, split frame
     videos (treated as images) and playback videos.
@@ -456,6 +473,7 @@ def parse_darwin_json(path: Path, count: Optional[int] = None) -> Optional[dt.An
         else:
             return _parse_darwin_image(path, data, count)
 
+
 def stream_darwin_json(path: Path) -> PersistentStreamingJSONObject:
     """
     Returns a Darwin JSON file as a persistent stream. This allows for parsing large files without
@@ -474,8 +492,11 @@ def stream_darwin_json(path: Path) -> PersistentStreamingJSONObject:
 
     with path.open() as infile:
         return json_stream.load(infile, persistent=True)
-    
-def get_image_path_from_stream(darwin_json: PersistentStreamingJSONObject, images_dir: Path) -> Path:
+
+
+def get_image_path_from_stream(
+    darwin_json: PersistentStreamingJSONObject, images_dir: Path, with_folders: bool
+) -> Path:
     """
     Returns the path to the image file associated with the given darwin json file (V1 or V2).
 
@@ -485,23 +506,46 @@ def get_image_path_from_stream(darwin_json: PersistentStreamingJSONObject, image
         A stream of the JSON file.
     images_dir : Path
         Path to the directory containing the images.
+    with_folders: Bool
+        Flag to determine if the release was pulled with or without folders.
 
     Returns
     -------
     Path
         Path to the image file.
     """
-    try:
-        return images_dir / (Path(darwin_json['item']['path'].lstrip('/\\'))) / Path(darwin_json['item']['name'])
-    except KeyError:
-        return images_dir / (Path(darwin_json['image']['path'].lstrip('/\\'))) / Path(darwin_json['image']['filename'])
+    if not with_folders:
+        try:
+            return images_dir / Path(darwin_json["item"]["name"])
+        except KeyError:
+            return images_dir / Path(darwin_json["image"]["filename"])
+    else:
+        try:
+            return (
+                images_dir
+                / (Path(darwin_json["item"]["path"].lstrip("/\\")))
+                / Path(darwin_json["item"]["name"])
+            )
+        except KeyError:
+            return (
+                images_dir
+                / (Path(darwin_json["image"]["path"].lstrip("/\\")))
+                / Path(darwin_json["image"]["filename"])
+            )
+
 
 def _parse_darwin_v2(path: Path, data: Dict[str, Any]) -> dt.AnnotationFile:
     item = data["item"]
     item_source = item.get("source_info", {})
-    slots: List[dt.Slot] = list(filter(None, map(_parse_darwin_slot, item.get("slots", []))))
-    annotations: List[Union[dt.Annotation, dt.VideoAnnotation]] = _data_to_annotations(data)
-    annotation_classes: Set[dt.AnnotationClass] = set([annotation.annotation_class for annotation in annotations])
+    slots: List[dt.Slot] = list(
+        filter(None, map(_parse_darwin_slot, item.get("slots", [])))
+    )
+    annotations: List[Union[dt.Annotation, dt.VideoAnnotation]] = _data_to_annotations(
+        data
+    )
+    annotation_classes: Set[dt.AnnotationClass] = set(
+        [annotation.annotation_class for annotation in annotations]
+    )
 
     if len(slots) == 0:
         annotation_file = dt.AnnotationFile(
@@ -509,7 +553,9 @@ def _parse_darwin_v2(path: Path, data: Dict[str, Any]) -> dt.AnnotationFile:
             path=path,
             filename=item["name"],
             item_id=item.get("source_info", {}).get("item_id", None),
-            dataset_name=item.get("source_info", {}).get("dataset", {}).get("name", None),
+            dataset_name=item.get("source_info", {})
+            .get("dataset", {})
+            .get("name", None),
             annotation_classes=annotation_classes,
             annotations=annotations,
             is_video=False,
@@ -530,13 +576,17 @@ def _parse_darwin_v2(path: Path, data: Dict[str, Any]) -> dt.AnnotationFile:
             path=path,
             filename=item["name"],
             item_id=item.get("source_info", {}).get("item_id", None),
-            dataset_name=item.get("source_info", {}).get("dataset", {}).get("name", None),
+            dataset_name=item.get("source_info", {})
+            .get("dataset", {})
+            .get("name", None),
             annotation_classes=annotation_classes,
             annotations=annotations,
             is_video=slot.frame_urls is not None,
             image_width=slot.width,
             image_height=slot.height,
-            image_url=None if len(slot.source_files or []) == 0 else slot.source_files[0]["url"],
+            image_url=None
+            if len(slot.source_files or []) == 0
+            else slot.source_files[0]["url"],
             image_thumbnail_url=slot.thumbnail_url,
             workview_url=item_source.get("workview_url", None),
             seq=0,
@@ -565,14 +615,25 @@ def _parse_darwin_slot(data: Dict[str, Any]) -> dt.Slot:
     )
 
 
-def _parse_darwin_image(path: Path, data: Dict[str, Any], count: Optional[int]) -> dt.AnnotationFile:
-    annotations: List[Union[dt.Annotation, dt.VideoAnnotation]] = _data_to_annotations(data)
-    annotation_classes: Set[dt.AnnotationClass] = set([annotation.annotation_class for annotation in annotations])
+def _parse_darwin_image(
+    path: Path, data: Dict[str, Any], count: Optional[int]
+) -> dt.AnnotationFile:
+    annotations: List[Union[dt.Annotation, dt.VideoAnnotation]] = _data_to_annotations(
+        data
+    )
+    annotation_classes: Set[dt.AnnotationClass] = set(
+        [annotation.annotation_class for annotation in annotations]
+    )
 
     slot = dt.Slot(
         name=None,
         type="image",
-        source_files=[{"url": data["image"].get("url"), "file_name": _get_local_filename(data["image"])}],
+        source_files=[
+            {
+                "url": data["image"].get("url"),
+                "file_name": _get_local_filename(data["image"]),
+            }
+        ],
         thumbnail_url=data["image"].get("thumbnail_url"),
         width=data["image"].get("width"),
         height=data["image"].get("height"),
@@ -599,17 +660,30 @@ def _parse_darwin_image(path: Path, data: Dict[str, Any], count: Optional[int])
     return annotation_file
 
 
-def _parse_darwin_video(path: Path, data: Dict[str, Any], count: Optional[int]) -> dt.AnnotationFile:
-    annotations: List[Union[dt.Annotation, dt.VideoAnnotation]] = _data_to_annotations(data)
-    annotation_classes: Set[dt.AnnotationClass] = set([annotation.annotation_class for annotation in annotations])
+def _parse_darwin_video(
+    path: Path, data: Dict[str, Any], count: Optional[int]
+) -> dt.AnnotationFile:
+    annotations: List[Union[dt.Annotation, dt.VideoAnnotation]] = _data_to_annotations(
+        data
+    )
+    annotation_classes: Set[dt.AnnotationClass] = set(
+        [annotation.annotation_class for annotation in annotations]
+    )
 
     if "width" not in data["image"] or "height" not in data["image"]:
-        raise OutdatedDarwinJSONFormat("Missing width/height in video, please re-export")
+        raise OutdatedDarwinJSONFormat(
+            "Missing width/height in video, please re-export"
+        )
 
     slot = dt.Slot(
         name=None,
         type="video",
-        source_files=[{"url": data["image"].get("url"), "file_name": _get_local_filename(data["image"])}],
+        source_files=[
+            {
+                "url": data["image"].get("url"),
+                "file_name": _get_local_filename(data["image"]),
+            }
+        ],
         thumbnail_url=data["image"].get("thumbnail_url"),
         width=data["image"].get("width"),
         height=data["image"].get("height"),
@@ -645,23 +719,41 @@ def _parse_darwin_annotation(annotation: Dict[str, Any]) -> Optional[dt.Annotati
     main_annotation: Optional[dt.Annotation] = None
 
     # Darwin JSON 2.0 representation of complex polygons
-    if "polygon" in annotation and "paths" in annotation["polygon"] and len(annotation["polygon"]["paths"]) > 1:
+    if (
+        "polygon" in annotation
+        and "paths" in annotation["polygon"]
+        and len(annotation["polygon"]["paths"]) > 1
+    ):
         bounding_box = annotation.get("bounding_box")
         paths = annotation["polygon"]["paths"]
-        main_annotation = dt.make_complex_polygon(name, paths, bounding_box, slot_names=slot_names)
+        main_annotation = dt.make_complex_polygon(
+            name, paths, bounding_box, slot_names=slot_names
+        )
     # Darwin JSON 2.0 representation of simple polygons
-    elif "polygon" in annotation and "paths" in annotation["polygon"] and len(annotation["polygon"]["paths"]) == 1:
+    elif (
+        "polygon" in annotation
+        and "paths" in annotation["polygon"]
+        and len(annotation["polygon"]["paths"]) == 1
+    ):
         bounding_box = annotation.get("bounding_box")
         paths = annotation["polygon"]["paths"]
-        main_annotation = dt.make_polygon(name, paths[0], bounding_box, slot_names=slot_names)
+        main_annotation = dt.make_polygon(
+            name, paths[0], bounding_box, slot_names=slot_names
+        )
     # Darwin JSON 1.0 representation of complex and simple polygons
     elif "polygon" in annotation:
         bounding_box = annotation.get("bounding_box")
         if "additional_paths" in annotation["polygon"]:
-            paths = [annotation["polygon"]["path"]] + annotation["polygon"]["additional_paths"]
-            main_annotation = dt.make_complex_polygon(name, paths, bounding_box, slot_names=slot_names)
+            paths = [annotation["polygon"]["path"]] + annotation["polygon"][
+                "additional_paths"
+            ]
+            main_annotation = dt.make_complex_polygon(
+                name, paths, bounding_box, slot_names=slot_names
+            )
         else:
-            main_annotation = dt.make_polygon(name, annotation["polygon"]["path"], bounding_box, slot_names=slot_names)
+            main_annotation = dt.make_polygon(
+                name, annotation["polygon"]["path"], bounding_box, slot_names=slot_names
+            )
     # Darwin JSON 1.0 representation of complex polygons
     elif "complex_polygon" in annotation:
         bounding_box = annotation.get("bounding_box")
@@ -673,42 +765,72 @@ def _parse_darwin_annotation(annotation: Dict[str, Any]) -> Optional[dt.Annotati
         if "additional_paths" in annotation["complex_polygon"]:
             paths.extend(annotation["complex_polygon"]["additional_paths"])
 
-        main_annotation = dt.make_complex_polygon(name, paths, bounding_box, slot_names=slot_names)
+        main_annotation = dt.make_complex_polygon(
+            name, paths, bounding_box, slot_names=slot_names
+        )
     elif "bounding_box" in annotation:
         bounding_box = annotation["bounding_box"]
         main_annotation = dt.make_bounding_box(
-            name, bounding_box["x"], bounding_box["y"], bounding_box["w"], bounding_box["h"], slot_names=slot_names
+            name,
+            bounding_box["x"],
+            bounding_box["y"],
+            bounding_box["w"],
+            bounding_box["h"],
+            slot_names=slot_names,
         )
     elif "tag" in annotation:
         main_annotation = dt.make_tag(name, slot_names=slot_names)
     elif "line" in annotation:
-        main_annotation = dt.make_line(name, annotation["line"]["path"], slot_names=slot_names)
+        main_annotation = dt.make_line(
+            name, annotation["line"]["path"], slot_names=slot_names
+        )
     elif "keypoint" in annotation:
         main_annotation = dt.make_keypoint(
-            name, annotation["keypoint"]["x"], annotation["keypoint"]["y"], slot_names=slot_names
+            name,
+            annotation["keypoint"]["x"],
+            annotation["keypoint"]["y"],
+            slot_names=slot_names,
         )
     elif "ellipse" in annotation:
-        main_annotation = dt.make_ellipse(name, annotation["ellipse"], slot_names=slot_names)
+        main_annotation = dt.make_ellipse(
+            name, annotation["ellipse"], slot_names=slot_names
+        )
     elif "cuboid" in annotation:
-        main_annotation = dt.make_cuboid(name, annotation["cuboid"], slot_names=slot_names)
+        main_annotation = dt.make_cuboid(
+            name, annotation["cuboid"], slot_names=slot_names
+        )
     elif "skeleton" in annotation:
-        main_annotation = dt.make_skeleton(name, annotation["skeleton"]["nodes"], slot_names=slot_names)
+        main_annotation = dt.make_skeleton(
+            name, annotation["skeleton"]["nodes"], slot_names=slot_names
+        )
     elif "table" in annotation:
         main_annotation = dt.make_table(
-            name, annotation["table"]["bounding_box"], annotation["table"]["cells"], slot_names=slot_names
+            name,
+            annotation["table"]["bounding_box"],
+            annotation["table"]["cells"],
+            slot_names=slot_names,
         )
     elif "string" in annotation:
-        main_annotation = dt.make_string(name, annotation["string"]["sources"], slot_names=slot_names)
+        main_annotation = dt.make_string(
+            name, annotation["string"]["sources"], slot_names=slot_names
+        )
     elif "graph" in annotation:
         main_annotation = dt.make_graph(
-            name, annotation["graph"]["nodes"], annotation["graph"]["edges"], slot_names=slot_names
+            name,
+            annotation["graph"]["nodes"],
+            annotation["graph"]["edges"],
+            slot_names=slot_names,
         )
     elif "mask" in annotation:
         main_annotation = dt.make_mask(name, slot_names=slot_names)
     elif "raster_layer" in annotation:
         raster_layer = annotation["raster_layer"]
         main_annotation = dt.make_raster_layer(
-            name, raster_layer["mask_annotation_ids_mapping"], raster_layer["total_pixels"], raster_layer["dense_rle"], slot_names=slot_names
+            name,
+            raster_layer["mask_annotation_ids_mapping"],
+            raster_layer["total_pixels"],
+            raster_layer["dense_rle"],
+            slot_names=slot_names,
         )
 
     if not main_annotation:
@@ -718,19 +840,29 @@ def _parse_darwin_annotation(annotation: Dict[str, Any]) -> Optional[dt.Annotati
     if "id" in annotation:
         main_annotation.id = annotation["id"]
     if "instance_id" in annotation:
-        main_annotation.subs.append(dt.make_instance_id(annotation["instance_id"]["value"]))
+        main_annotation.subs.append(
+            dt.make_instance_id(annotation["instance_id"]["value"])
+        )
     if "attributes" in annotation:
         main_annotation.subs.append(dt.make_attributes(annotation["attributes"]))
     if "text" in annotation:
         main_annotation.subs.append(dt.make_text(annotation["text"]["text"]))
     if "inference" in annotation:
-        main_annotation.subs.append(dt.make_opaque_sub("inference", annotation["inference"]))
+        main_annotation.subs.append(
+            dt.make_opaque_sub("inference", annotation["inference"])
+        )
     if "directional_vector" in annotation:
-        main_annotation.subs.append(dt.make_opaque_sub("directional_vector", annotation["directional_vector"]))
+        main_annotation.subs.append(
+            dt.make_opaque_sub("directional_vector", annotation["directional_vector"])
+        )
     if "measures" in annotation:
-        main_annotation.subs.append(dt.make_opaque_sub("measures", annotation["measures"]))
+        main_annotation.subs.append(
+            dt.make_opaque_sub("measures", annotation["measures"])
+        )
     if "auto_annotate" in annotation:
-        main_annotation.subs.append(dt.make_opaque_sub("auto_annotate", annotation["auto_annotate"]))
+        main_annotation.subs.append(
+            dt.make_opaque_sub("auto_annotate", annotation["auto_annotate"])
+        )
 
     if annotation.get("annotators") is not None:
         main_annotation.annotators = _parse_annotators(annotation["annotators"])
@@ -784,7 +916,9 @@ def _parse_darwin_raster_annotation(annotation: dict) -> Optional[dt.Annotation]
     slot_names: Optional[List[str]] = parse_slot_names(annotation)
 
     if not id or not name or not raster_layer:
-        raise ValueError("Raster annotation must have an 'id', 'name' and 'raster_layer' field")
+        raise ValueError(
+            "Raster annotation must have an 'id', 'name' and 'raster_layer' field"
+        )
 
     dense_rle, mask_annotation_ids_mapping, total_pixels = (
         raster_layer.get("dense_rle", None),
@@ -835,9 +969,14 @@ def _parse_darwin_mask_annotation(annotation: dict) -> Optional[dt.Annotation]:
 
 def _parse_annotators(annotators: List[Dict[str, Any]]) -> List[dt.AnnotationAuthor]:
     if not (hasattr(annotators, "full_name") or not hasattr(annotators, "email")):
-        raise AttributeError("JSON file must contain annotators with 'full_name' and 'email' fields")
+        raise AttributeError(
+            "JSON file must contain annotators with 'full_name' and 'email' fields"
+        )
 
-    return [dt.AnnotationAuthor(annotator["full_name"], annotator["email"]) for annotator in annotators]
+    return [
+        dt.AnnotationAuthor(annotator["full_name"], annotator["email"])
+        for annotator in annotators
+    ]
 
 
 def split_video_annotation(annotation: dt.AnnotationFile) -> List[dt.AnnotationFile]:
@@ -870,9 +1009,13 @@ def split_video_annotation(annotation: dt.AnnotationFile) -> List[dt.AnnotationF
     frame_annotations = []
     for i, frame_url in enumerate(annotation.frame_urls):
         annotations = [
-            a.frames[i] for a in annotation.annotations if isinstance(a, dt.VideoAnnotation) and i in a.frames
+            a.frames[i]
+            for a in annotation.annotations
+            if isinstance(a, dt.VideoAnnotation) and i in a.frames
         ]
-        annotation_classes: Set[dt.AnnotationClass] = set([annotation.annotation_class for annotation in annotations])
+        annotation_classes: Set[dt.AnnotationClass] = set(
+            [annotation.annotation_class for annotation in annotations]
+        )
         filename: str = f"{Path(annotation.filename).stem}/{i:07d}.png"
         frame_annotations.append(
             dt.AnnotationFile(
@@ -956,7 +1099,9 @@ def convert_polygons_to_sequences(
     else:
         list_polygons = cast(List[dt.Polygon], [polygons])
 
-    if not isinstance(list_polygons[0], list) or not isinstance(list_polygons[0][0], dict):
+    if not isinstance(list_polygons[0], list) or not isinstance(
+        list_polygons[0][0], dict
+    ):
         raise ValueError("Unknown input format")
 
     sequences: List[List[Union[int, float]]] = []
@@ -964,8 +1109,8 @@ def convert_polygons_to_sequences(
         path: List[Union[int, float]] = []
         for point in polygon:
             # Clip coordinates to the image size
-            x = max(min(point["x"], width -1) if width else point["x"], 0)
-            y = max(min(point["y"], height -1) if height else point["y"], 0)
+            x = max(min(point["x"], width - 1) if width else point["x"], 0)
+            y = max(min(point["y"], height - 1) if height else point["y"], 0)
             if rounding:
                 path.append(round(x))
                 path.append(round(y))
@@ -983,7 +1128,9 @@ def convert_polygons_to_sequences(
     details="Do not use.",
 )
 def convert_sequences_to_polygons(
-    sequences: List[Union[List[int], List[float]]], height: Optional[int] = None, width: Optional[int] = None
+    sequences: List[Union[List[int], List[float]]],
+    height: Optional[int] = None,
+    width: Optional[int] = None,
 ) -> Dict[str, List[dt.Polygon]]:
     """
     Converts a list of polygons, encoded as a list of dictionaries of into a list of nd.arrays
@@ -1095,7 +1242,9 @@ def convert_bounding_box_to_xyxy(box: dt.BoundingBox) -> List[float]:
     return [box["x"], box["y"], x2, y2]
 
 
-def convert_polygons_to_mask(polygons: List, height: int, width: int, value: Optional[int] = 1) -> np.ndarray:
+def convert_polygons_to_mask(
+    polygons: List, height: int, width: int, value: Optional[int] = 1
+) -> np.ndarray:
     """
     Converts a list of polygons, encoded as a list of dictionaries into an ``nd.array`` mask.
 
@@ -1139,7 +1288,7 @@ def chunk(items: List[Any], size: int) -> Iterator[Any]:
         A chunk of the of the given size.
     """
     for i in range(0, len(items), size):
-        yield items[i:i + size]
+        yield items[i : i + size]
 
 
 def is_unix_like_os() -> bool:
@@ -1189,31 +1338,58 @@ def _parse_version(data: dict) -> dt.AnnotationFileVersion:
     return dt.AnnotationFileVersion(int(major), int(minor), suffix)
 
 
-def _data_to_annotations(data: Dict[str, Any]) -> List[Union[dt.Annotation, dt.VideoAnnotation]]:
+def _data_to_annotations(
+    data: Dict[str, Any]
+) -> List[Union[dt.Annotation, dt.VideoAnnotation]]:
     raw_image_annotations = filter(
         lambda annotation: (
-            ("frames" not in annotation) and ("raster_layer" not in annotation) and ("mask" not in annotation)
+            ("frames" not in annotation)
+            and ("raster_layer" not in annotation)
+            and ("mask" not in annotation)
         ),
         data["annotations"],
     )
-    raw_video_annotations = filter(lambda annotation: "frames" in annotation, data["annotations"])
-    raw_raster_annotations = filter(lambda annotation: "raster_layer" in annotation, data["annotations"])
-    raw_mask_annotations = filter(lambda annotation: "mask" in annotation, data["annotations"])
-    image_annotations: List[dt.Annotation] = list(filter(None, map(_parse_darwin_annotation, raw_image_annotations)))
+    raw_video_annotations = filter(
+        lambda annotation: "frames" in annotation, data["annotations"]
+    )
+    raw_raster_annotations = filter(
+        lambda annotation: "raster_layer" in annotation, data["annotations"]
+    )
+    raw_mask_annotations = filter(
+        lambda annotation: "mask" in annotation, data["annotations"]
+    )
+    image_annotations: List[dt.Annotation] = list(
+        filter(None, map(_parse_darwin_annotation, raw_image_annotations))
+    )
     video_annotations: List[dt.VideoAnnotation] = list(
         filter(None, map(_parse_darwin_video_annotation, raw_video_annotations))
     )
     raster_annotations: List[dt.Annotation] = list(
         filter(None, map(_parse_darwin_raster_annotation, raw_raster_annotations))
     )
-    mask_annotations: List[dt.Annotation] = list(filter(None, map(_parse_darwin_mask_annotation, raw_mask_annotations)))
+    mask_annotations: List[dt.Annotation] = list(
+        filter(None, map(_parse_darwin_mask_annotation, raw_mask_annotations))
+    )
 
-    return [*image_annotations, *video_annotations, *raster_annotations, *mask_annotations]
+    return [
+        *image_annotations,
+        *video_annotations,
+        *raster_annotations,
+        *mask_annotations,
+    ]
 
 
 def _supported_schema_versions() -> Dict[Tuple[int, int, str], str]:
-    return {(2, 0, ""): "https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json/2.0/schema.json"}
+    return {
+        (
+            2,
+            0,
+            "",
+        ): "https://darwin-public.s3.eu-west-1.amazonaws.com/darwin_json/2.0/schema.json"
+    }
 
 
 def _default_schema(version: dt.AnnotationFileVersion) -> Optional[str]:
-    return _supported_schema_versions().get((version.major, version.minor, version.suffix))
+    return _supported_schema_versions().get(
+        (version.major, version.minor, version.suffix)
+    )

From 9c9ed4e38b64e167161f7f6580a4e04882f18abc Mon Sep 17 00:00:00 2001
From: John Wilkie <john@v7labs.com>
Date: Sat, 28 Oct 2023 16:32:44 +0100
Subject: [PATCH 2/6] Fixing tests (WIP)

---
 darwin/dataset/local_dataset.py | 14 +++++++++-
 darwin/dataset/utils.py         | 10 ++++++-
 darwin/utils/utils.py           | 49 ++++++++++++++++++++++++++-------
 3 files changed, 61 insertions(+), 12 deletions(-)

diff --git a/darwin/dataset/local_dataset.py b/darwin/dataset/local_dataset.py
index 23aa6f87c..fd3b82440 100644
--- a/darwin/dataset/local_dataset.py
+++ b/darwin/dataset/local_dataset.py
@@ -8,6 +8,7 @@
 from darwin.dataset.utils import get_classes, get_release_path, load_pil_image
 from darwin.utils import (
     SUPPORTED_IMAGE_EXTENSIONS,
+    get_darwin_json_version,
     get_image_path_from_stream,
     parse_darwin_json,
     stream_darwin_json,
@@ -131,12 +132,23 @@ def _setup_annotations_and_images(
         partition,
         split_type,
     ):
+        # Determine if the release is V1 or V2 JSON
+        json_version = get_darwin_json_version(annotations_dir)
+
+        #
+        annotation_files = list(annotations_dir.glob("**/*.json"))
+
+        for annotation_file in annotation_files:
+            with open(annotation_file, "r") as file:
+                data_str = file.read()
+                print(data_str)
+
         # Find all the annotations and their corresponding images
         with_folders = any([item.is_dir() for item in images_dir.iterdir()])
         for annotation_path in sorted(annotations_dir.glob("**/*.json")):
             darwin_json = stream_darwin_json(annotation_path)
             image_path = get_image_path_from_stream(
-                darwin_json, images_dir, with_folders
+                darwin_json, images_dir, with_folders, json_version
             )
             if image_path.exists():
                 self.images_path.append(image_path)
diff --git a/darwin/dataset/utils.py b/darwin/dataset/utils.py
index 5d295e3e3..2a74399e6 100644
--- a/darwin/dataset/utils.py
+++ b/darwin/dataset/utils.py
@@ -17,6 +17,7 @@
     SUPPORTED_EXTENSIONS,
     SUPPORTED_VIDEO_EXTENSIONS,
     attempt_decode,
+    get_darwin_json_version,
     get_image_path_from_stream,
     is_unix_like_os,
     parse_darwin_json,
@@ -568,13 +569,20 @@ def _map_annotations_to_images(
     Raises:
         ValueError: If there are inconsistencies with the annotations and images.
     """
+
     images_paths = []
     annotations_paths = []
     invalid_annotation_paths = []
+
+    # Determine if the release is V1 or V2 JSON
+    json_version = get_darwin_json_version(annotations_dir)
+
     with_folders = any([item.is_dir() for item in images_dir.iterdir()])
     for annotation_path in annotations_dir.glob("**/*.json"):
         darwin_json = stream_darwin_json(annotation_path)
-        image_path = get_image_path_from_stream(darwin_json, images_dir, with_folders)
+        image_path = get_image_path_from_stream(
+            darwin_json, images_dir, with_folders, json_version
+        )
         if image_path.exists():
             images_paths.append(image_path)
             annotations_paths.append(annotation_path)
diff --git a/darwin/utils/utils.py b/darwin/utils/utils.py
index 4934b1c9b..6cd83997b 100644
--- a/darwin/utils/utils.py
+++ b/darwin/utils/utils.py
@@ -495,10 +495,14 @@ def stream_darwin_json(path: Path) -> PersistentStreamingJSONObject:
 
 
 def get_image_path_from_stream(
-    darwin_json: PersistentStreamingJSONObject, images_dir: Path, with_folders: bool
+    darwin_json: PersistentStreamingJSONObject,
+    images_dir: Path,
+    with_folders: bool,
+    json_version: str,
 ) -> Path:
     """
-    Returns the path to the image file associated with the given darwin json file (V1 or V2).
+    Returns the path to the image file associated with the given darwin json file.
+    Compatible with V1 & V2 Darwin JSON, as well as releases in folders and flat structures.
 
     Parameters
     ----------
@@ -506,27 +510,32 @@ def get_image_path_from_stream(
         A stream of the JSON file.
     images_dir : Path
         Path to the directory containing the images.
-    with_folders: Bool
+    with_folders: bool
         Flag to determine if the release was pulled with or without folders.
+    json_version: str
+        String representing the version of the Darwin JSON
 
     Returns
     -------
     Path
         Path to the image file.
     """
-    if not with_folders:
-        try:
+    if json_version == "2.0":
+        if not with_folders:
             return images_dir / Path(darwin_json["item"]["name"])
-        except KeyError:
-            return images_dir / Path(darwin_json["image"]["filename"])
-    else:
-        try:
+        else:
             return (
                 images_dir
                 / (Path(darwin_json["item"]["path"].lstrip("/\\")))
                 / Path(darwin_json["item"]["name"])
             )
-        except KeyError:
+    else:
+        if not with_folders:
+            try:
+                return images_dir / Path(darwin_json["image"]["filename"])
+            except Exception:
+                pass
+        else:
             return (
                 images_dir
                 / (Path(darwin_json["image"]["path"].lstrip("/\\")))
@@ -534,6 +543,26 @@ def get_image_path_from_stream(
             )
 
 
+def get_darwin_json_version(annotations_dir: Path) -> str:
+    """
+    Returns true is the input Darwin JSON file is 2.0, and False if 1.0.
+
+    Parameters
+    ----------
+    annotations_dir : Path
+        Path to the directory containing the annotation files.
+
+    Returns
+    -------
+    str
+        A str representing the Darwin JSON version.
+    """
+    with open(next(annotations_dir.glob("*.json")), "r") as file:
+        data_str = file.read()
+        data = json.loads(data_str)
+        return "2.0" if "version" in data and data["version"] == "2.0" else "1.0"
+
+
 def _parse_darwin_v2(path: Path, data: Dict[str, Any]) -> dt.AnnotationFile:
     item = data["item"]
     item_source = item.get("source_info", {})

From 5a5d231e1066938f15b9031c67f35a8ac4536168 Mon Sep 17 00:00:00 2001
From: John Wilkie <john@v7labs.com>
Date: Tue, 21 Nov 2023 09:50:37 +0000
Subject: [PATCH 3/6] WIP

---
 darwin/utils/utils.py | 26 ++------------------------
 1 file changed, 2 insertions(+), 24 deletions(-)

diff --git a/darwin/utils/utils.py b/darwin/utils/utils.py
index 9dad81636..70df00bd8 100644
--- a/darwin/utils/utils.py
+++ b/darwin/utils/utils.py
@@ -540,6 +540,8 @@ def get_image_path_from_stream(
                 / Path(darwin_json["image"]["filename"])
             )
 
+        # WIP: Implementing this with regex instead of streaming
+
 
 def get_darwin_json_version(annotations_dir: Path) -> str:
     """
@@ -570,15 +572,9 @@ def _parse_darwin_v2(path: Path, data: Dict[str, Any]) -> dt.AnnotationFile:
     annotations: List[Union[dt.Annotation, dt.VideoAnnotation]] = _data_to_annotations(
         data
     )
-<<<<<<< HEAD
     annotation_classes: Set[dt.AnnotationClass] = set(
         [annotation.annotation_class for annotation in annotations]
     )
-=======
-    annotation_classes: Set[dt.AnnotationClass] = {
-        annotation.annotation_class for annotation in annotations
-    }
->>>>>>> master
 
     if len(slots) == 0:
         annotation_file = dt.AnnotationFile(
@@ -654,15 +650,9 @@ def _parse_darwin_image(
     annotations: List[Union[dt.Annotation, dt.VideoAnnotation]] = _data_to_annotations(
         data
     )
-<<<<<<< HEAD
     annotation_classes: Set[dt.AnnotationClass] = set(
         [annotation.annotation_class for annotation in annotations]
     )
-=======
-    annotation_classes: Set[dt.AnnotationClass] = {
-        annotation.annotation_class for annotation in annotations
-    }
->>>>>>> master
 
     slot = dt.Slot(
         name=None,
@@ -705,15 +695,9 @@ def _parse_darwin_video(
     annotations: List[Union[dt.Annotation, dt.VideoAnnotation]] = _data_to_annotations(
         data
     )
-<<<<<<< HEAD
     annotation_classes: Set[dt.AnnotationClass] = set(
         [annotation.annotation_class for annotation in annotations]
     )
-=======
-    annotation_classes: Set[dt.AnnotationClass] = {
-        annotation.annotation_class for annotation in annotations
-    }
->>>>>>> master
 
     if "width" not in data["image"] or "height" not in data["image"]:
         raise OutdatedDarwinJSONFormat(
@@ -1058,15 +1042,9 @@ def split_video_annotation(annotation: dt.AnnotationFile) -> List[dt.AnnotationF
             for a in annotation.annotations
             if isinstance(a, dt.VideoAnnotation) and i in a.frames
         ]
-<<<<<<< HEAD
         annotation_classes: Set[dt.AnnotationClass] = set(
             [annotation.annotation_class for annotation in annotations]
         )
-=======
-        annotation_classes: Set[dt.AnnotationClass] = {
-            annotation.annotation_class for annotation in annotations
-        }
->>>>>>> master
         filename: str = f"{Path(annotation.filename).stem}/{i:07d}.png"
         frame_annotations.append(
             dt.AnnotationFile(

From 69a935e3cada8554e552d2fd245029bd8dbbb312 Mon Sep 17 00:00:00 2001
From: John Wilkie <john@v7labs.com>
Date: Tue, 21 Nov 2023 11:09:01 +0000
Subject: [PATCH 4/6] Except JSON Streaming error & load JSON as normal

---
 darwin/dataset/local_dataset.py |  2 +-
 darwin/dataset/utils.py         |  2 +-
 darwin/utils/utils.py           | 44 ++++++++++++++++++---------------
 3 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/darwin/dataset/local_dataset.py b/darwin/dataset/local_dataset.py
index fd3b82440..5610caeb4 100644
--- a/darwin/dataset/local_dataset.py
+++ b/darwin/dataset/local_dataset.py
@@ -148,7 +148,7 @@ def _setup_annotations_and_images(
         for annotation_path in sorted(annotations_dir.glob("**/*.json")):
             darwin_json = stream_darwin_json(annotation_path)
             image_path = get_image_path_from_stream(
-                darwin_json, images_dir, with_folders, json_version
+                darwin_json, images_dir, with_folders, json_version, annotation_path
             )
             if image_path.exists():
                 self.images_path.append(image_path)
diff --git a/darwin/dataset/utils.py b/darwin/dataset/utils.py
index 2a74399e6..e4d43c180 100644
--- a/darwin/dataset/utils.py
+++ b/darwin/dataset/utils.py
@@ -581,7 +581,7 @@ def _map_annotations_to_images(
     for annotation_path in annotations_dir.glob("**/*.json"):
         darwin_json = stream_darwin_json(annotation_path)
         image_path = get_image_path_from_stream(
-            darwin_json, images_dir, with_folders, json_version
+            darwin_json, images_dir, with_folders, json_version, annotation_path
         )
         if image_path.exists():
             images_paths.append(image_path)
diff --git a/darwin/utils/utils.py b/darwin/utils/utils.py
index 70df00bd8..214ebdb5d 100644
--- a/darwin/utils/utils.py
+++ b/darwin/utils/utils.py
@@ -497,6 +497,7 @@ def get_image_path_from_stream(
     images_dir: Path,
     with_folders: bool,
     json_version: str,
+    annotation_path: Path,
 ) -> Path:
     """
     Returns the path to the image file associated with the given darwin json file.
@@ -518,29 +519,32 @@ def get_image_path_from_stream(
     Path
         Path to the image file.
     """
-    if json_version == "2.0":
-        if not with_folders:
-            return images_dir / Path(darwin_json["item"]["name"])
+    try:
+        if json_version == "2.0":
+            if not with_folders:
+                return images_dir / Path(darwin_json["item"]["name"])
+            else:
+                return (
+                    images_dir
+                    / (Path(darwin_json["item"]["path"].lstrip("/\\")))
+                    / Path(darwin_json["item"]["name"])
+                )
         else:
-            return (
-                images_dir
-                / (Path(darwin_json["item"]["path"].lstrip("/\\")))
-                / Path(darwin_json["item"]["name"])
-            )
-    else:
-        if not with_folders:
-            try:
+            if not with_folders:
                 return images_dir / Path(darwin_json["image"]["filename"])
-            except Exception:
-                pass
+            else:
+                return (
+                    images_dir
+                    / (Path(darwin_json["image"]["path"].lstrip("/\\")))
+                    / Path(darwin_json["image"]["filename"])
+                )
+    except OSError as e:
+        # Load in the JSON as normal
+        darwin_json = parse_darwin_json(path=annotation_path)
+        if not with_folders:
+            return images_dir / Path(darwin_json.filename)
         else:
-            return (
-                images_dir
-                / (Path(darwin_json["image"]["path"].lstrip("/\\")))
-                / Path(darwin_json["image"]["filename"])
-            )
-
-        # WIP: Implementing this with regex instead of streaming
+            return images_dir / Path(darwin_json.full_path.lstrip("/\\"))
 
 
 def get_darwin_json_version(annotations_dir: Path) -> str:

From 84a5b050356d610b99118c81f0dbe7430185c407 Mon Sep 17 00:00:00 2001
From: John Wilkie <john@v7labs.com>
Date: Tue, 21 Nov 2023 11:12:03 +0000
Subject: [PATCH 5/6] Fixed small typing bug in old darwin move_to_stage method

---
 darwin/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/darwin/client.py b/darwin/client.py
index b87bf371d..064c10180 100644
--- a/darwin/client.py
+++ b/darwin/client.py
@@ -908,7 +908,7 @@ def move_to_stage(
         dataset_slug: str,
         team_slug: str,
         filters: Dict[str, UnknownType],
-        stage_id: int,
+        stage_id: str,
     ) -> None:
         """
         Moves the given items to the specified stage

From 76db70fbab8f878528e90d9ca71e3c44deaef04b Mon Sep 17 00:00:00 2001
From: John Wilkie <john@v7labs.com>
Date: Mon, 11 Dec 2023 15:03:47 +0000
Subject: [PATCH 6/6] Undid change included in PY-641

---
 darwin/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/darwin/client.py b/darwin/client.py
index 064c10180..b87bf371d 100644
--- a/darwin/client.py
+++ b/darwin/client.py
@@ -908,7 +908,7 @@ def move_to_stage(
         dataset_slug: str,
         team_slug: str,
         filters: Dict[str, UnknownType],
-        stage_id: str,
+        stage_id: int,
     ) -> None:
         """
         Moves the given items to the specified stage