Skip to content

Commit

Permalink
[IO-1278][IO-1277] Core Items/Folders apis (#691)
Browse files Browse the repository at this point in the history
* basics

* get folders

* linting

* tests

* linting

* camel case fields

* line length

* line length

* linting changes

* linting changes

* import sorting

* assert not required

* merge master

* linting
  • Loading branch information
Nathanjp91 authored Oct 20, 2023
1 parent b0953bf commit f5992b4
Show file tree
Hide file tree
Showing 27 changed files with 359 additions and 55 deletions.
88 changes: 88 additions & 0 deletions darwin/future/core/items/get.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from typing import List, Union
from uuid import UUID

from pydantic import parse_obj_as

from darwin.future.core.client import ClientCore
from darwin.future.core.types.common import QueryString
from darwin.future.data_objects.item import Folder, Item


def get_item_ids(
Expand Down Expand Up @@ -75,3 +78,88 @@ def get_item_ids_stage(
assert isinstance(response, dict)
uuids = [UUID(uuid) for uuid in response["item_ids"]]
return uuids


def get_item(
api_client: ClientCore,
team_slug: str,
item_id: Union[UUID, str],
params: QueryString = QueryString({}),
) -> Item:
"""
Returns an item
Parameters
----------
client: Client
The client to use for the request
team_slug: str
The slug of the team to get item ids for
item_id: str
The id or slug of the item to get
Returns
-------
dict
The item
"""
response = api_client.get(f"/v2/teams/{team_slug}/items/{item_id}", params)
assert isinstance(response, dict)
return parse_obj_as(Item, response)


def list_items(
api_client: ClientCore,
team_slug: str,
params: QueryString,
) -> List[Item]:
"""
Returns a list of items for the dataset
Parameters
----------
client: Client
The client to use for the request
team_slug: str
The slug of the team to get items for
dataset_id: str
The id or slug of the dataset to get items for
Returns
-------
List[Item]
A list of items
"""
assert "dataset_ids" in params.value, "dataset_ids must be provided"
response = api_client.get(f"/v2/teams/{team_slug}/items", params)
assert isinstance(response, dict)
return parse_obj_as(List[Item], response["items"])


def list_folders(
api_client: ClientCore,
team_slug: str,
params: QueryString,
) -> List[Folder]:
"""
Returns a list of folders for the team and dataset
Parameters
----------
client: Client
The client to use for the request
team_slug: str
The slug of the team to get folder ids for
params: QueryString
parameters to filter the folders
Returns
-------
List[Folder]
The folders
"""
assert "dataset_ids" in params.value, "dataset_ids must be provided"
response = api_client.get(f"/v2/teams/{team_slug}/items/folders", params)
assert isinstance(response, dict)
assert "folders" in response
return parse_obj_as(List[Folder], response["folders"])
7 changes: 4 additions & 3 deletions darwin/future/core/team/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Can't import * in this module because of a circular import problem specific to teams
# The TeamCore module can instantiate from a client, but the client needs to use the team backend module
# to request the object for team. To circumvent this there's a get_raw method in this module that returns
# the raw team object, which is then passed to the TeamCore module, but if we import * here it introduces the
# The TeamCore module can instantiate from a client, but the client needs to use the
# team backend module to request the object for team. To circumvent this there's a
# get_raw method in this module that returns the raw team object, which is then passed
# to the TeamCore module, but if we import * here it introduces the
# circular import problem.
3 changes: 1 addition & 2 deletions darwin/future/core/types/common.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from typing import Any, Dict, List, Union


from darwin.future.data_objects import validators as darwin_validators
from darwin.future.data_objects.typing import UnknownType

Expand Down Expand Up @@ -39,7 +38,7 @@ class QueryString:

value: Dict[str, str]

def dict_check(cls, value: UnknownType) -> Dict[str, str]:
def dict_check(self, value: UnknownType) -> Dict[str, str]:
assert isinstance(value, dict)
assert all(isinstance(k, str) and isinstance(v, str) for k, v in value.items())
return value
Expand Down
8 changes: 4 additions & 4 deletions darwin/future/core/types/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ def filter_attr(self, attr: Any) -> bool: # type: ignore
def _from_dict(cls, d: Dict[str, Any]) -> QueryFilter: # type: ignore
if "name" not in d or "param" not in d:
raise InvalidQueryFilter(
f"args must be a QueryFilter or a dict with 'name' and 'param' keys, "
f"got {d}"
"args must be a QueryFilter or a dict with 'name' and 'param' keys,"
f" got {d}"
)
modifier = Modifier(d["modifier"]) if "modifier" in d else None
return QueryFilter(name=d["name"], param=str(d["param"]), modifier=modifier)
Expand All @@ -91,8 +91,8 @@ def _from_arg(cls, arg: object) -> QueryFilter:
return cls._from_dict(arg)
else:
raise InvalidQueryFilter(
f"args must be a QueryFilter or a dict with 'name' and 'param' keys, "
f"got {arg}"
"args must be a QueryFilter or a dict with 'name' and 'param' keys,"
f" got {arg}"
)

@classmethod
Expand Down
4 changes: 0 additions & 4 deletions darwin/future/core/utils/pathutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@


def attempt_open(path: Path) -> dict:
# TODO: Refactor this to be some sort of map method. Mypy doesn't like generic callables
# and will need to be typed
# reader: yaml.safe_load if path.suffix.lower() == ".yaml" else json.loads
# map_reader = {".yaml": yaml.safe_load, ".json": json.loads}
try:
if "yaml" in path.suffix.lower():
return open_yaml(path)
Expand Down
1 change: 0 additions & 1 deletion darwin/future/core/workflows/list_workflows.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from typing import List, Optional, Tuple


from darwin.future.core.client import ClientCore
from darwin.future.data_objects.workflow import WorkflowCore, WorkflowListValidator

Expand Down
78 changes: 78 additions & 0 deletions darwin/future/data_objects/item.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# @see: GraphotateWeb.Schemas.DatasetsV2.ItemRegistration.ExistingItem
from typing import Dict, List, Literal, Optional, Union
from uuid import UUID

from pydantic import Field, validator

from darwin.datatypes import NumberLike
from darwin.future.data_objects.pydantic_base import DefaultDarwin
from darwin.future.data_objects.typing import UnknownType

ItemFrameRate = Union[NumberLike, Literal["native"]]


def validate_no_slashes(v: UnknownType) -> str:
assert isinstance(v, str), "Must be a string"
assert len(v) > 0, "cannot be empty"
assert r"^[^/].*$".find(v) == -1, "cannot start with a slash"

return v


class ItemSlot(DefaultDarwin):
# GraphotateWeb.Schemas.DatasetsV2.ItemRegistration.ExistingSlot

# Required fields
slot_name: str
file_name: str

# Optional fields
storage_key: Optional[str]
as_frames: Optional[bool]
extract_views: Optional[bool]
fps: Optional[ItemFrameRate] = Field(None, alias="fps")
metadata: Optional[Dict[str, UnknownType]] = Field({}, alias="metadata")
tags: Optional[Union[List[str], Dict[str, str]]] = Field(None, alias="tags")
type: Literal["image", "video", "pdf", "dicom"] = Field(..., alias="type")

@validator("slot_name")
def validate_slot_name(cls, v: UnknownType) -> str:
assert isinstance(v, str), "slot_name must be a string"
assert len(v) > 0, "slot_name cannot be empty"
return v

@validator("storage_key")
def validate_storage_key(cls, v: UnknownType) -> str:
return validate_no_slashes(v)

@validator("fps")
def validate_fps(cls, v: UnknownType) -> ItemFrameRate:
assert isinstance(v, (int, float, str)), "fps must be a number or 'native'"
if isinstance(v, (int, float)):
assert v >= 0.0, "fps must be a positive number"
if isinstance(v, str):
assert v == "native", "fps must be 'native' or a number greater than 0"
return v


class Item(DefaultDarwin):
name: str
path: str
archived: bool
dataset_id: int
id: UUID
layout: Dict[str, UnknownType]
slots: List[ItemSlot]
processing_status: str
priority: int

@validator("name")
def validate_name(cls, v: UnknownType) -> str:
return validate_no_slashes(v)


class Folder(DefaultDarwin):
dataset_id: int
filtered_item_count: int
path: str
unfiltered_item_count: int
5 changes: 4 additions & 1 deletion darwin/future/data_objects/release.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@


class ReleaseCore(DefaultDarwin):
"""A class to manage all the information around a release on the darwin platform, including validation
"""
A class to manage all the information around a release on the darwin platform
including validation
Attributes
----------
name : str
Expand Down
4 changes: 3 additions & 1 deletion darwin/future/data_objects/team.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ class TeamMemberCore(DefaultDarwin):


class TeamCore(DefaultDarwin):
"""A class to manage all the information around a Team on the darwin platform, including validation
"""
A class to manage all the information around a Team on the darwin platform
including validation
Attributes
----------
Expand Down
10 changes: 6 additions & 4 deletions darwin/future/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@ class DarwinException(Exception):
"""
Generic Darwin exception.
Used to differentiate from errors that originate in our code, and those that originate in
third-party libraries.
Used to differentiate from errors that originate in our code, and those that
originate in third-party libraries.
Extends `Exception` and adds a `parent_exception` field to store the original exception.
Extends `Exception` and adds a `parent_exception` field to store the original
exception.
Also has a `combined_exceptions` field to store a list of exceptions that were combined into
Also has a `combined_exceptions` field to store a list of exceptions that were
combined into
"""

parent_exception: Optional[Exception] = None
Expand Down
19 changes: 16 additions & 3 deletions darwin/future/meta/objects/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,13 @@


class Dataset(MetaBase[DatasetCore]):
"""Dataset Meta object. Facilitates the creation of Query objects, lazy loading of sub fields
"""
Dataset Meta object. Facilitates the creation of Query objects, lazy loading of
sub fields
Args:
MetaBase (Dataset): Generic MetaBase object expanded by Dataset core object return type
MetaBase (Dataset): Generic MetaBase object expanded by Dataset core object
return type
Returns:
_type_: DatasetMeta
Expand Down Expand Up @@ -126,5 +129,15 @@ def upload_files(
preserve_folders: bool = False,
verbose: bool = False,
) -> Dataset:
upload_data(self._element.name, files, files_to_exclude, fps, path, frames, extract_views, preserve_folders, verbose) # type: ignore
upload_data(
self._element.name,
files, # type: ignore
files_to_exclude,
fps,
path,
frames,
extract_views,
preserve_folders,
verbose,
)
return self
20 changes: 11 additions & 9 deletions darwin/future/meta/objects/team.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,21 @@


class Team(MetaBase[TeamCore]):
"""Team Meta object. Facilitates the creation of Query objects, lazy loading of
sub fields like members unlike other MetaBase objects, does not extend the
__next__ function because it is not iterable. This is because Team is linked to
api key and only one team can be returned, but stores a list of teams for
consistency. This does mean however that to access the underlying team object,
you must access the first element of the list team = client.team[0]
"""
Team Meta object. Facilitates the creation of Query objects, lazy loading of sub
fields like members unlike other MetaBase objects, does not extend the __next__
function because it is not iterable. This is because Team is linked to api key and
only one team can be returned, but stores a list of teams for consistency. This
does mean however that to access the underlying team object, you must access the
first element of the list
team = client.team[0]
Args:
MetaBase (Team): Generic MetaBase object expanded by Team core object
return type
MetaBase (Team): Generic MetaBase object expanded by Team core object return
type
Returns:
_type_: TeamMeta
Team: Team object
"""

def __init__(self, client: ClientCore, team: Optional[TeamCore] = None) -> None:
Expand Down
12 changes: 11 additions & 1 deletion darwin/future/meta/objects/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,17 @@ def upload_files(
auto_push: bool = True,
) -> Workflow:
assert self._element.dataset is not None
upload_data(self.datasets[0].name, files, files_to_exclude, fps, path, frames, extract_views, preserve_folders, verbose) # type: ignore
upload_data(
self.datasets[0].name,
files, # type: ignore
files_to_exclude,
fps,
path,
frames,
extract_views,
preserve_folders,
verbose,
)
if auto_push:
self.push_from_dataset_stage()
return self
6 changes: 4 additions & 2 deletions darwin/future/meta/queries/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,10 @@ def _collect(self) -> List[Dataset]:
def _execute_filters(
self, datasets: List[Dataset], filter: QueryFilter
) -> List[Dataset]:
"""Executes filtering on the local list of datasets, applying special logic for role filtering
otherwise calls the parent method for general filtering on the values of the datasets
"""
Executes filtering on the local list of datasets, applying special logic for
role filtering otherwise calls the parent method for general filtering on the
values of the datasets
Parameters
----------
Expand Down
Loading

0 comments on commit f5992b4

Please sign in to comment.