Skip to content

Commit

Permalink
Merge branch 'master' into io-1496
Browse files Browse the repository at this point in the history
  • Loading branch information
JBWilkie authored Oct 18, 2023
2 parents 08e657e + 677329f commit cbe3c53
Show file tree
Hide file tree
Showing 74 changed files with 1,506 additions and 546 deletions.
9 changes: 1 addition & 8 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,11 @@
"editor.insertSpaces": true,
"editor.tabSize": 2
},
"python.formatting.blackPath": "black",
"python.formatting.provider": "none",
"python.formatting.blackArgs": [
"-l 120"
],
"python.linting.mypyEnabled": true,
"isort.args": [
"--profile",
"black"
],
"python.analysis.autoImportCompletions": true,
"python.testing.pytestEnabled": true,
"python.linting.enabled": true,
"python.analysis.typeCheckingMode": "basic"
"python.analysis.typeCheckingMode": "basic",
}
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ To run test, first install the `test` extra package
```
pip install darwin-py[test]
```
### Development

See our development and QA environment installation recommendations [here](docs/DEV.md)

---

Expand Down Expand Up @@ -167,7 +170,9 @@ Dataset example-team/test:0.1 downloaded at /directory/choosen/at/authentication
The framework is designed to be usable as a standalone python library.
Usage can be inferred from looking at the operations performed in `darwin/cli_functions.py`.
A minimal example to download a dataset is provided below and a more extensive one can be found in
[darwin_demo.py](https://github.com/v7labs/darwin-py/blob/master/darwin_demo.py).

[./darwin_demo.py](https://github.com/v7labs/darwin-py/blob/master/darwin_demo.py).


```python
from darwin.client import Client
Expand Down
13 changes: 7 additions & 6 deletions darwin/dataset/download_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def download_all_images_from_annotations(

# Verify that there is not already image in the images folder
unfiltered_files = images_path.rglob(f"*") if use_folders else images_path.glob(f"*")
existing_images = {image.stem: image for image in unfiltered_files if is_image_extension_allowed(image.suffix)}
existing_images = {image for image in unfiltered_files if is_image_extension_allowed(image.suffix)}

annotations_to_download_path = []
for annotation_path in annotations_path.glob(f"*.{annotation_format}"):
Expand All @@ -103,11 +103,11 @@ def download_all_images_from_annotations(
continue

if not force_replace:
# Check collisions on image filename and json filename on the system
if annotation.filename in existing_images:
continue
if sanitize_filename(annotation_path.stem) in existing_images:
# Check the planned path for the image against the existing images
planned_image_path = images_path / Path(annotation.remote_path.lstrip('/\\')).resolve().absolute() / Path(annotation.filename)
if planned_image_path in existing_images:
continue

annotations_to_download_path.append(annotation_path)
if len(annotation.slots) > 1:
force_slots = True
Expand All @@ -119,10 +119,11 @@ def download_all_images_from_annotations(
if remove_extra:
# Removes existing images for which there is not corresponding annotation
annotations_downloaded_stem = [a.stem for a in annotations_path.glob(f"*.{annotation_format}")]
for existing_image in existing_images.values():
for existing_image in existing_images:
if existing_image.stem not in annotations_downloaded_stem:
print(f"Removing {existing_image} as there is no corresponding annotation")
existing_image.unlink()

# Create the generator with the partial functions
download_functions: List = []
for annotation_path in annotations_to_download_path:
Expand Down
3 changes: 2 additions & 1 deletion darwin/dataset/remote_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,8 @@ def pull(
for error in errors:
self.console.print(f"\t - {error}")

downloaded_file_count = len([f for f in self.local_images_path.rglob("*") if f.is_file()])
downloaded_file_count = len([f for f in self.local_images_path.rglob("*") if f.is_file() and not f.name.startswith('.')])

console.print(f"Total file count after download completed {str(downloaded_file_count)}.")

return None, count
Expand Down
4 changes: 2 additions & 2 deletions darwin/exporter/formats/darwin.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,15 @@ def build_image_annotation(annotation_file: dt.AnnotationFile) -> Dict[str, Any]
}
"""
annotations: List[Dict[str, Any]] = []
print(annotations)
for annotation in annotation_file.annotations:
payload = {
annotation.annotation_class.annotation_type: _build_annotation_data(annotation),
"name": annotation.annotation_class.name,
}

if (
annotation.annotation_class.annotation_type == "complex_polygon"
or annotation.annotation_class.annotation_type == "polygon"
annotation.annotation_class.annotation_type == "complex_polygon" or annotation.annotation_class.annotation_type == "polygon"
) and "bounding_box" in annotation.data:
payload["bounding_box"] = annotation.data["bounding_box"]

Expand Down
35 changes: 21 additions & 14 deletions darwin/exporter/formats/darwin_1_0.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pathlib import Path
from typing import Any, Dict, Iterable, List, Union
from typing import Iterable, List, Union

import orjson as json

Expand Down Expand Up @@ -34,14 +34,11 @@ def export(annotation_files: Iterable[AnnotationFile], output_dir: Path) -> None


def _export_file(annotation_file: AnnotationFile, _: int, output_dir: Path) -> None:

try:
filename = annotation_file.path.parts[-1]
output_file_path = (output_dir / filename).with_suffix(".json")
except Exception as e:
raise ExportException_CouldNotAssembleOutputPath(
f"Could not export file {annotation_file.path} to {output_dir}"
) from e
raise ExportException_CouldNotAssembleOutputPath(f"Could not export file {annotation_file.path} to {output_dir}") from e

try:
output: DictFreeForm = _build_json(annotation_file)
Expand All @@ -50,9 +47,7 @@ def _export_file(annotation_file: AnnotationFile, _: int, output_dir: Path) -> N

try:
with open(output_file_path, "w") as f:
op = json.dumps(output, option=json.OPT_INDENT_2 | json.OPT_SERIALIZE_NUMPY | json.OPT_NON_STR_KEYS).decode(
"utf-8"
)
op = json.dumps(output, option=json.OPT_INDENT_2 | json.OPT_SERIALIZE_NUMPY | json.OPT_NON_STR_KEYS).decode("utf-8")
f.write(op)
except Exception as e:
raise ExportException_CouldNotWriteFile(f"Could not write output for {annotation_file.path}") from e
Expand Down Expand Up @@ -170,12 +165,24 @@ def _build_image_annotation(annotation: Annotation, skip_slots: bool = False) ->


def _build_legacy_annotation_data(annotation_class: AnnotationClass, data: DictFreeForm) -> DictFreeForm:
if annotation_class.annotation_type == "complex_polygon":
data["path"] = data["paths"]
del data["paths"]
return {"complex_polygon": data}
else:
return {annotation_class.annotation_type: data}
v1_data = {}
polygon_annotation_mappings = {"complex_polygon": "paths", "polygon": "path"}

if annotation_class.annotation_type in polygon_annotation_mappings:
key = polygon_annotation_mappings[annotation_class.annotation_type]
v1_data[annotation_class.annotation_type] = {"path": data.get(key)}

elif annotation_class.annotation_type == "tag":
v1_data["tag"] = {}

elif annotation_class.annotation_type == "bounding_box":
v1_data[annotation_class.annotation_type] = data

if "bounding_box" in data and annotation_class.annotation_type != "bounding_box":
# Poygons and complex polygons usually have attached bounding_box annotations
v1_data["bounding_box"] = data["bounding_box"]

return v1_data


def _build_metadata(annotation_file: AnnotationFile) -> DictFreeForm:
Expand Down
51 changes: 39 additions & 12 deletions darwin/future/core/client.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Union, overload
from typing import Callable, Dict, Optional, overload
from urllib.parse import urlparse

import requests
Expand Down Expand Up @@ -41,7 +41,10 @@ def validate_base_url(cls, v: str) -> str:
if not v.endswith("/"):
v += "/"
check = urlparse(v)
assert check.scheme in {"http", "https"}, "base_url must start with http or https"
assert check.scheme in {
"http",
"https",
}, "base_url must start with http or https"
assert check.netloc, "base_url must contain a domain"
return v

Expand Down Expand Up @@ -136,7 +139,9 @@ def __init__(self, config: DarwinConfig, retries: Optional[Retry] = None) -> Non
self.config = config
self.session = requests.Session()
if not retries:
retries = Retry(total=3, backoff_factor=0.2, status_forcelist=[500, 502, 503, 504])
retries = Retry(
total=3, backoff_factor=0.2, status_forcelist=[500, 502, 503, 504]
)
self._setup_session(retries)
self._mappings = {
"get": self.session.get,
Expand All @@ -153,20 +158,32 @@ def _setup_session(self, retries: Retry) -> None:

@property
def headers(self) -> Dict[str, str]:
http_headers: Dict[str, str] = {"Content-Type": "application/json", "Accept": "application/json"}
http_headers: Dict[str, str] = {
"Content-Type": "application/json",
"Accept": "application/json",
}
if self.config.api_key:
http_headers["Authorization"] = f"ApiKey {self.config.api_key}"
return http_headers

@overload
def _generic_call(self, method: Callable[[str], requests.Response], endpoint: str) -> dict:
def _generic_call(
self, method: Callable[[str], requests.Response], endpoint: str
) -> dict:
...

@overload
def _generic_call(self, method: Callable[[str, dict], requests.Response], endpoint: str, payload: dict) -> dict:
def _generic_call(
self,
method: Callable[[str, dict], requests.Response],
endpoint: str,
payload: dict,
) -> dict:
...

def _generic_call(self, method: Callable, endpoint: str, payload: Optional[dict] = None) -> JSONType:
def _generic_call(
self, method: Callable, endpoint: str, payload: Optional[dict] = None
) -> JSONType:
endpoint = self._sanitize_endpoint(endpoint)
url = self.config.api_endpoint + endpoint
if payload is not None:
Expand All @@ -179,24 +196,34 @@ def _generic_call(self, method: Callable, endpoint: str, payload: Optional[dict]

return response.json()

def _contain_qs_and_endpoint(self, endpoint: str, query_string: Optional[QueryString] = None) -> str:
def _contain_qs_and_endpoint(
self, endpoint: str, query_string: Optional[QueryString] = None
) -> str:
if not query_string:
return endpoint

assert "?" not in endpoint
return endpoint + str(query_string)

def get(self, endpoint: str, query_string: Optional[QueryString] = None) -> JSONType:
return self._generic_call(self.session.get, self._contain_qs_and_endpoint(endpoint, query_string))
def get(
self, endpoint: str, query_string: Optional[QueryString] = None
) -> JSONType:
return self._generic_call(
self.session.get, self._contain_qs_and_endpoint(endpoint, query_string)
)

def put(self, endpoint: str, data: dict) -> JSONType:
return self._generic_call(self.session.put, endpoint, data)

def post(self, endpoint: str, data: dict) -> JSONType:
return self._generic_call(self.session.post, endpoint, data)

def delete(self, endpoint: str, query_string: Optional[QueryString] = None) -> JSONType:
return self._generic_call(self.session.delete, self._contain_qs_and_endpoint(endpoint, query_string))
def delete(
self, endpoint: str, query_string: Optional[QueryString] = None
) -> JSONType:
return self._generic_call(
self.session.delete, self._contain_qs_and_endpoint(endpoint, query_string)
)

def patch(self, endpoint: str, data: dict) -> JSONType:
return self._generic_call(self.session.patch, endpoint, data)
Expand Down
8 changes: 4 additions & 4 deletions darwin/future/core/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from darwin.future.core.datasets.create_dataset import *
from darwin.future.core.datasets.get_dataset import *
from darwin.future.core.datasets.list_datasets import *
from darwin.future.core.datasets.remove_dataset import *
from darwin.future.core.datasets.create_dataset import create_dataset
from darwin.future.core.datasets.get_dataset import get_dataset
from darwin.future.core.datasets.list_datasets import list_datasets
from darwin.future.core.datasets.remove_dataset import remove_dataset
4 changes: 3 additions & 1 deletion darwin/future/core/datasets/remove_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
from darwin.future.exceptions import DatasetNotFound


def remove_dataset(api_client: ClientCore, id: int, team_slug: Optional[str] = None) -> int:
def remove_dataset(
api_client: ClientCore, id: int, team_slug: Optional[str] = None
) -> int:
"""
Creates a new dataset for the given team
Expand Down
4 changes: 2 additions & 2 deletions darwin/future/core/items/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from darwin.future.core.items.get import *
from darwin.future.core.items.move_items import *
from darwin.future.core.items.get import get_item_ids, get_item_ids_stage
from darwin.future.core.items.move_items import move_items_to_stage
25 changes: 19 additions & 6 deletions darwin/future/core/items/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
from darwin.future.core.types.common import QueryString


def get_item_ids(api_client: ClientCore, team_slug: str, dataset_id: Union[str, int]) -> List[UUID]:
def get_item_ids(
api_client: ClientCore, team_slug: str, dataset_id: Union[str, int]
) -> List[UUID]:
"""
Returns a list of item ids for the dataset
Expand All @@ -26,15 +28,24 @@ def get_item_ids(api_client: ClientCore, team_slug: str, dataset_id: Union[str,

response = api_client.get(
f"/v2/teams/{team_slug}/items/ids",
QueryString({"not_statuses": "archived,error", "sort[id]": "desc", "dataset_ids": str(dataset_id)}),
QueryString(
{
"not_statuses": "archived,error",
"sort[id]": "desc",
"dataset_ids": str(dataset_id),
}
),
)
assert type(response) == dict
assert isinstance(response, dict)
uuids = [UUID(uuid) for uuid in response["item_ids"]]
return uuids


def get_item_ids_stage(
api_client: ClientCore, team_slug: str, dataset_id: Union[int, str], stage_id: Union[UUID, str]
api_client: ClientCore,
team_slug: str,
dataset_id: Union[int, str],
stage_id: Union[UUID, str],
) -> List[UUID]:
"""
Returns a list of item ids for the stage
Expand All @@ -57,8 +68,10 @@ def get_item_ids_stage(
"""
response = api_client.get(
f"/v2/teams/{team_slug}/items/ids",
QueryString({"workflow_stage_ids": str(stage_id), "dataset_ids": str(dataset_id)}),
QueryString(
{"workflow_stage_ids": str(stage_id), "dataset_ids": str(dataset_id)}
),
)
assert type(response) == dict
assert isinstance(response, dict)
uuids = [UUID(uuid) for uuid in response["item_ids"]]
return uuids
7 changes: 6 additions & 1 deletion darwin/future/core/items/move_items.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,12 @@


def move_items_to_stage(
api_client: ClientCore, team_slug: str, workflow_id: UUID, dataset_id: int, stage_id: UUID, item_ids: List[UUID]
api_client: ClientCore,
team_slug: str,
workflow_id: UUID,
dataset_id: int,
stage_id: UUID,
item_ids: List[UUID],
) -> JSONType:
"""
Moves a list of items to a stage
Expand Down
5 changes: 3 additions & 2 deletions darwin/future/core/team/get_team.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from typing import List, Optional, Tuple

from darwin.future.core.client import ClientCore
from darwin.future.core.types.common import JSONType
from darwin.future.data_objects.team import TeamCore, TeamMemberCore


Expand All @@ -13,7 +12,9 @@ def get_team(client: ClientCore, team_slug: Optional[str] = None) -> TeamCore:
return TeamCore.parse_obj(response)


def get_team_members(client: ClientCore) -> Tuple[List[TeamMemberCore], List[Exception]]:
def get_team_members(
client: ClientCore,
) -> Tuple[List[TeamMemberCore], List[Exception]]:
response = client.get("/memberships")
members = []
errors = []
Expand Down
2 changes: 1 addition & 1 deletion darwin/future/core/types/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .common import *
from .common import JSONType, QueryString, TeamSlug
Loading

0 comments on commit cbe3c53

Please sign in to comment.