Merge branch 'master' into io-1496

v7labs · Oct 18, 2023 · cbe3c53 · cbe3c53
2 parents 08e657e + 677329f
commit cbe3c53
Show file tree

Hide file tree

Showing 74 changed files with 1,506 additions and 546 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -13,18 +13,11 @@
     "editor.insertSpaces": true,
     "editor.tabSize": 2
   },
-  "python.formatting.blackPath": "black",
-  "python.formatting.provider": "none",
-  "python.formatting.blackArgs": [
-    "-l 120"
-  ],
-  "python.linting.mypyEnabled": true,
   "isort.args": [
     "--profile",
     "black"
   ],
   "python.analysis.autoImportCompletions": true,
   "python.testing.pytestEnabled": true,
-  "python.linting.enabled": true,
-  "python.analysis.typeCheckingMode": "basic"
+  "python.analysis.typeCheckingMode": "basic",
 }
diff --git a/README.md b/README.md
@@ -47,6 +47,9 @@ To run test, first install the `test` extra package
 ```
 pip install darwin-py[test]
 ```
+### Development
+
+See our development and QA environment installation recommendations [here](docs/DEV.md)
 
 ---
 
@@ -167,7 +170,9 @@ Dataset example-team/test:0.1 downloaded at /directory/choosen/at/authentication
 The framework is designed to be usable as a standalone python library.
 Usage can be inferred from looking at the operations performed in `darwin/cli_functions.py`.
 A minimal example to download a dataset is provided below and a more extensive one can be found in
-[darwin_demo.py](https://github.com/v7labs/darwin-py/blob/master/darwin_demo.py).
+
+[./darwin_demo.py](https://github.com/v7labs/darwin-py/blob/master/darwin_demo.py).
+
 
 ```python
 from darwin.client import Client

diff --git a/darwin/dataset/download_manager.py b/darwin/dataset/download_manager.py
@@ -94,7 +94,7 @@ def download_all_images_from_annotations(
 
     # Verify that there is not already image in the images folder
     unfiltered_files = images_path.rglob(f"*") if use_folders else images_path.glob(f"*")
-    existing_images = {image.stem: image for image in unfiltered_files if is_image_extension_allowed(image.suffix)}
+    existing_images = {image for image in unfiltered_files if is_image_extension_allowed(image.suffix)}
 
     annotations_to_download_path = []
     for annotation_path in annotations_path.glob(f"*.{annotation_format}"):
@@ -103,11 +103,11 @@ def download_all_images_from_annotations(
             continue
 
         if not force_replace:
-            # Check collisions on image filename and json filename on the system
-            if annotation.filename in existing_images:
-                continue
-            if sanitize_filename(annotation_path.stem) in existing_images:
+            # Check the planned path for the image against the existing images
+            planned_image_path = images_path / Path(annotation.remote_path.lstrip('/\\')).resolve().absolute() / Path(annotation.filename)
+            if planned_image_path in existing_images:
                 continue
+
         annotations_to_download_path.append(annotation_path)
         if len(annotation.slots) > 1:
             force_slots = True
@@ -119,10 +119,11 @@ def download_all_images_from_annotations(
     if remove_extra:
         # Removes existing images for which there is not corresponding annotation
         annotations_downloaded_stem = [a.stem for a in annotations_path.glob(f"*.{annotation_format}")]
-        for existing_image in existing_images.values():
+        for existing_image in existing_images:
             if existing_image.stem not in annotations_downloaded_stem:
                 print(f"Removing {existing_image} as there is no corresponding annotation")
                 existing_image.unlink()
+
     # Create the generator with the partial functions
     download_functions: List = []
     for annotation_path in annotations_to_download_path:

diff --git a/darwin/dataset/remote_dataset.py b/darwin/dataset/remote_dataset.py
@@ -353,7 +353,8 @@ def pull(
             for error in errors:
                 self.console.print(f"\t - {error}")
 
-            downloaded_file_count = len([f for f in self.local_images_path.rglob("*") if f.is_file()])
+            downloaded_file_count = len([f for f in self.local_images_path.rglob("*") if f.is_file() and not f.name.startswith('.')])
+
             console.print(f"Total file count after download completed {str(downloaded_file_count)}.")
 
             return None, count

diff --git a/darwin/exporter/formats/darwin.py b/darwin/exporter/formats/darwin.py
@@ -47,15 +47,15 @@ def build_image_annotation(annotation_file: dt.AnnotationFile) -> Dict[str, Any]
             }
     """
     annotations: List[Dict[str, Any]] = []
+    print(annotations)
     for annotation in annotation_file.annotations:
         payload = {
             annotation.annotation_class.annotation_type: _build_annotation_data(annotation),
             "name": annotation.annotation_class.name,
         }
 
         if (
-            annotation.annotation_class.annotation_type == "complex_polygon"
-            or annotation.annotation_class.annotation_type == "polygon"
+            annotation.annotation_class.annotation_type == "complex_polygon" or annotation.annotation_class.annotation_type == "polygon"
         ) and "bounding_box" in annotation.data:
             payload["bounding_box"] = annotation.data["bounding_box"]
 

diff --git a/darwin/exporter/formats/darwin_1_0.py b/darwin/exporter/formats/darwin_1_0.py
@@ -1,5 +1,5 @@
 from pathlib import Path
-from typing import Any, Dict, Iterable, List, Union
+from typing import Iterable, List, Union
 
 import orjson as json
 
@@ -34,14 +34,11 @@ def export(annotation_files: Iterable[AnnotationFile], output_dir: Path) -> None
 
 
 def _export_file(annotation_file: AnnotationFile, _: int, output_dir: Path) -> None:
-
     try:
         filename = annotation_file.path.parts[-1]
         output_file_path = (output_dir / filename).with_suffix(".json")
     except Exception as e:
-        raise ExportException_CouldNotAssembleOutputPath(
-            f"Could not export file {annotation_file.path} to {output_dir}"
-        ) from e
+        raise ExportException_CouldNotAssembleOutputPath(f"Could not export file {annotation_file.path} to {output_dir}") from e
 
     try:
         output: DictFreeForm = _build_json(annotation_file)
@@ -50,9 +47,7 @@ def _export_file(annotation_file: AnnotationFile, _: int, output_dir: Path) -> N
 
     try:
         with open(output_file_path, "w") as f:
-            op = json.dumps(output, option=json.OPT_INDENT_2 | json.OPT_SERIALIZE_NUMPY | json.OPT_NON_STR_KEYS).decode(
-                "utf-8"
-            )
+            op = json.dumps(output, option=json.OPT_INDENT_2 | json.OPT_SERIALIZE_NUMPY | json.OPT_NON_STR_KEYS).decode("utf-8")
             f.write(op)
     except Exception as e:
         raise ExportException_CouldNotWriteFile(f"Could not write output for {annotation_file.path}") from e
@@ -170,12 +165,24 @@ def _build_image_annotation(annotation: Annotation, skip_slots: bool = False) ->
 
 
 def _build_legacy_annotation_data(annotation_class: AnnotationClass, data: DictFreeForm) -> DictFreeForm:
-    if annotation_class.annotation_type == "complex_polygon":
-        data["path"] = data["paths"]
-        del data["paths"]
-        return {"complex_polygon": data}
-    else:
-        return {annotation_class.annotation_type: data}
+    v1_data = {}
+    polygon_annotation_mappings = {"complex_polygon": "paths", "polygon": "path"}
+
+    if annotation_class.annotation_type in polygon_annotation_mappings:
+        key = polygon_annotation_mappings[annotation_class.annotation_type]
+        v1_data[annotation_class.annotation_type] = {"path": data.get(key)}
+
+    elif annotation_class.annotation_type == "tag":
+        v1_data["tag"] = {}
+
+    elif annotation_class.annotation_type == "bounding_box":
+        v1_data[annotation_class.annotation_type] = data
+
+    if "bounding_box" in data and annotation_class.annotation_type != "bounding_box":
+        # Poygons and complex polygons usually have attached bounding_box annotations
+        v1_data["bounding_box"] = data["bounding_box"]
+
+    return v1_data
 
 
 def _build_metadata(annotation_file: AnnotationFile) -> DictFreeForm:

diff --git a/darwin/future/core/client.py b/darwin/future/core/client.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Union, overload
+from typing import Callable, Dict, Optional, overload
 from urllib.parse import urlparse
 
 import requests
@@ -41,7 +41,10 @@ def validate_base_url(cls, v: str) -> str:
         if not v.endswith("/"):
             v += "/"
         check = urlparse(v)
-        assert check.scheme in {"http", "https"}, "base_url must start with http or https"
+        assert check.scheme in {
+            "http",
+            "https",
+        }, "base_url must start with http or https"
         assert check.netloc, "base_url must contain a domain"
         return v
 
@@ -136,7 +139,9 @@ def __init__(self, config: DarwinConfig, retries: Optional[Retry] = None) -> Non
         self.config = config
         self.session = requests.Session()
         if not retries:
-            retries = Retry(total=3, backoff_factor=0.2, status_forcelist=[500, 502, 503, 504])
+            retries = Retry(
+                total=3, backoff_factor=0.2, status_forcelist=[500, 502, 503, 504]
+            )
         self._setup_session(retries)
         self._mappings = {
             "get": self.session.get,
@@ -153,20 +158,32 @@ def _setup_session(self, retries: Retry) -> None:
 
     @property
     def headers(self) -> Dict[str, str]:
-        http_headers: Dict[str, str] = {"Content-Type": "application/json", "Accept": "application/json"}
+        http_headers: Dict[str, str] = {
+            "Content-Type": "application/json",
+            "Accept": "application/json",
+        }
         if self.config.api_key:
             http_headers["Authorization"] = f"ApiKey {self.config.api_key}"
         return http_headers
 
     @overload
-    def _generic_call(self, method: Callable[[str], requests.Response], endpoint: str) -> dict:
+    def _generic_call(
+        self, method: Callable[[str], requests.Response], endpoint: str
+    ) -> dict:
         ...
 
     @overload
-    def _generic_call(self, method: Callable[[str, dict], requests.Response], endpoint: str, payload: dict) -> dict:
+    def _generic_call(
+        self,
+        method: Callable[[str, dict], requests.Response],
+        endpoint: str,
+        payload: dict,
+    ) -> dict:
         ...
 
-    def _generic_call(self, method: Callable, endpoint: str, payload: Optional[dict] = None) -> JSONType:
+    def _generic_call(
+        self, method: Callable, endpoint: str, payload: Optional[dict] = None
+    ) -> JSONType:
         endpoint = self._sanitize_endpoint(endpoint)
         url = self.config.api_endpoint + endpoint
         if payload is not None:
@@ -179,24 +196,34 @@ def _generic_call(self, method: Callable, endpoint: str, payload: Optional[dict]
 
         return response.json()
 
-    def _contain_qs_and_endpoint(self, endpoint: str, query_string: Optional[QueryString] = None) -> str:
+    def _contain_qs_and_endpoint(
+        self, endpoint: str, query_string: Optional[QueryString] = None
+    ) -> str:
         if not query_string:
             return endpoint
 
         assert "?" not in endpoint
         return endpoint + str(query_string)
 
-    def get(self, endpoint: str, query_string: Optional[QueryString] = None) -> JSONType:
-        return self._generic_call(self.session.get, self._contain_qs_and_endpoint(endpoint, query_string))
+    def get(
+        self, endpoint: str, query_string: Optional[QueryString] = None
+    ) -> JSONType:
+        return self._generic_call(
+            self.session.get, self._contain_qs_and_endpoint(endpoint, query_string)
+        )
 
     def put(self, endpoint: str, data: dict) -> JSONType:
         return self._generic_call(self.session.put, endpoint, data)
 
     def post(self, endpoint: str, data: dict) -> JSONType:
         return self._generic_call(self.session.post, endpoint, data)
 
-    def delete(self, endpoint: str, query_string: Optional[QueryString] = None) -> JSONType:
-        return self._generic_call(self.session.delete, self._contain_qs_and_endpoint(endpoint, query_string))
+    def delete(
+        self, endpoint: str, query_string: Optional[QueryString] = None
+    ) -> JSONType:
+        return self._generic_call(
+            self.session.delete, self._contain_qs_and_endpoint(endpoint, query_string)
+        )
 
     def patch(self, endpoint: str, data: dict) -> JSONType:
         return self._generic_call(self.session.patch, endpoint, data)

diff --git a/darwin/future/core/datasets/__init__.py b/darwin/future/core/datasets/__init__.py
@@ -1,4 +1,4 @@
-from darwin.future.core.datasets.create_dataset import *
-from darwin.future.core.datasets.get_dataset import *
-from darwin.future.core.datasets.list_datasets import *
-from darwin.future.core.datasets.remove_dataset import *
+from darwin.future.core.datasets.create_dataset import create_dataset
+from darwin.future.core.datasets.get_dataset import get_dataset
+from darwin.future.core.datasets.list_datasets import list_datasets
+from darwin.future.core.datasets.remove_dataset import remove_dataset
diff --git a/darwin/future/core/datasets/remove_dataset.py b/darwin/future/core/datasets/remove_dataset.py
@@ -4,7 +4,9 @@
 from darwin.future.exceptions import DatasetNotFound
 
 
-def remove_dataset(api_client: ClientCore, id: int, team_slug: Optional[str] = None) -> int:
+def remove_dataset(
+    api_client: ClientCore, id: int, team_slug: Optional[str] = None
+) -> int:
     """
     Creates a new dataset for the given team
 

diff --git a/darwin/future/core/items/__init__.py b/darwin/future/core/items/__init__.py
@@ -1,2 +1,2 @@
-from darwin.future.core.items.get import *
-from darwin.future.core.items.move_items import *
+from darwin.future.core.items.get import get_item_ids, get_item_ids_stage
+from darwin.future.core.items.move_items import move_items_to_stage
diff --git a/darwin/future/core/items/get.py b/darwin/future/core/items/get.py
@@ -5,7 +5,9 @@
 from darwin.future.core.types.common import QueryString
 
 
-def get_item_ids(api_client: ClientCore, team_slug: str, dataset_id: Union[str, int]) -> List[UUID]:
+def get_item_ids(
+    api_client: ClientCore, team_slug: str, dataset_id: Union[str, int]
+) -> List[UUID]:
     """
     Returns a list of item ids for the dataset
 
@@ -26,15 +28,24 @@ def get_item_ids(api_client: ClientCore, team_slug: str, dataset_id: Union[str,
 
     response = api_client.get(
         f"/v2/teams/{team_slug}/items/ids",
-        QueryString({"not_statuses": "archived,error", "sort[id]": "desc", "dataset_ids": str(dataset_id)}),
+        QueryString(
+            {
+                "not_statuses": "archived,error",
+                "sort[id]": "desc",
+                "dataset_ids": str(dataset_id),
+            }
+        ),
     )
-    assert type(response) == dict
+    assert isinstance(response, dict)
     uuids = [UUID(uuid) for uuid in response["item_ids"]]
     return uuids
 
 
 def get_item_ids_stage(
-    api_client: ClientCore, team_slug: str, dataset_id: Union[int, str], stage_id: Union[UUID, str]
+    api_client: ClientCore,
+    team_slug: str,
+    dataset_id: Union[int, str],
+    stage_id: Union[UUID, str],
 ) -> List[UUID]:
     """
     Returns a list of item ids for the stage
@@ -57,8 +68,10 @@ def get_item_ids_stage(
     """
     response = api_client.get(
         f"/v2/teams/{team_slug}/items/ids",
-        QueryString({"workflow_stage_ids": str(stage_id), "dataset_ids": str(dataset_id)}),
+        QueryString(
+            {"workflow_stage_ids": str(stage_id), "dataset_ids": str(dataset_id)}
+        ),
     )
-    assert type(response) == dict
+    assert isinstance(response, dict)
     uuids = [UUID(uuid) for uuid in response["item_ids"]]
     return uuids
diff --git a/darwin/future/core/items/move_items.py b/darwin/future/core/items/move_items.py
@@ -6,7 +6,12 @@
 
 
 def move_items_to_stage(
-    api_client: ClientCore, team_slug: str, workflow_id: UUID, dataset_id: int, stage_id: UUID, item_ids: List[UUID]
+    api_client: ClientCore,
+    team_slug: str,
+    workflow_id: UUID,
+    dataset_id: int,
+    stage_id: UUID,
+    item_ids: List[UUID],
 ) -> JSONType:
     """
     Moves a list of items to a stage

diff --git a/darwin/future/core/team/get_team.py b/darwin/future/core/team/get_team.py
@@ -1,7 +1,6 @@
 from typing import List, Optional, Tuple
 
 from darwin.future.core.client import ClientCore
-from darwin.future.core.types.common import JSONType
 from darwin.future.data_objects.team import TeamCore, TeamMemberCore
 
 
@@ -13,7 +12,9 @@ def get_team(client: ClientCore, team_slug: Optional[str] = None) -> TeamCore:
     return TeamCore.parse_obj(response)
 
 
-def get_team_members(client: ClientCore) -> Tuple[List[TeamMemberCore], List[Exception]]:
+def get_team_members(
+    client: ClientCore,
+) -> Tuple[List[TeamMemberCore], List[Exception]]:
     response = client.get("/memberships")
     members = []
     errors = []

diff --git a/darwin/future/core/types/__init__.py b/darwin/future/core/types/__init__.py
@@ -1 +1 @@
-from .common import *
+from .common import JSONType, QueryString, TeamSlug
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		from .common import *
		from .common import JSONType, QueryString, TeamSlug