Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[IO-1278][IO-1277] Core Items/Folders apis #691

Merged
merged 16 commits into from
Oct 20, 2023
Merged
88 changes: 88 additions & 0 deletions darwin/future/core/items/get.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from typing import List, Union
from uuid import UUID

from pydantic import parse_obj_as

from darwin.future.core.client import ClientCore
from darwin.future.core.types.common import QueryString
from darwin.future.data_objects.item import Folder, Item


def get_item_ids(
Expand Down Expand Up @@ -75,3 +78,88 @@ def get_item_ids_stage(
assert isinstance(response, dict)
uuids = [UUID(uuid) for uuid in response["item_ids"]]
return uuids


def get_item(
api_client: ClientCore,
team_slug: str,
item_id: Union[UUID, str],
params: QueryString = QueryString({}),
) -> Item:
"""
Returns an item

Parameters
----------
client: Client
The client to use for the request
team_slug: str
The slug of the team to get item ids for
item_id: str
The id or slug of the item to get

Returns
-------
dict
The item
"""
response = api_client.get(f"/v2/teams/{team_slug}/items/{item_id}", params)
assert isinstance(response, dict)
return parse_obj_as(Item, response)


def list_items(
api_client: ClientCore,
team_slug: str,
params: QueryString,
) -> List[Item]:
"""
Returns a list of items for the dataset

Parameters
----------
client: Client
The client to use for the request
team_slug: str
The slug of the team to get items for
dataset_id: str
The id or slug of the dataset to get items for

Returns
-------
List[Item]
A list of items
"""
assert "dataset_ids" in params.value, "dataset_ids must be provided"
response = api_client.get(f"/v2/teams/{team_slug}/items", params)
assert isinstance(response, dict)
return parse_obj_as(List[Item], response["items"])


def list_folders(
api_client: ClientCore,
team_slug: str,
params: QueryString,
) -> List[Folder]:
"""
Returns a list of folders for the team and dataset

Parameters
----------
client: Client
The client to use for the request
team_slug: str
The slug of the team to get folder ids for
params: QueryString
parameters to filter the folders

Returns
-------
List[Folder]
The folders
"""
assert "dataset_ids" in params.value, "dataset_ids must be provided"
response = api_client.get(f"/v2/teams/{team_slug}/items/folders", params)
assert isinstance(response, dict)
assert "folders" in response
return parse_obj_as(List[Folder], response["folders"])
7 changes: 4 additions & 3 deletions darwin/future/core/team/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Can't import * in this module because of a circular import problem specific to teams
# The TeamCore module can instantiate from a client, but the client needs to use the team backend module
# to request the object for team. To circumvent this there's a get_raw method in this module that returns
# the raw team object, which is then passed to the TeamCore module, but if we import * here it introduces the
# The TeamCore module can instantiate from a client, but the client needs to use the
# team backend module to request the object for team. To circumvent this there's a
# get_raw method in this module that returns the raw team object, which is then passed
# to the TeamCore module, but if we import * here it introduces the
# circular import problem.
3 changes: 1 addition & 2 deletions darwin/future/core/types/common.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from typing import Any, Dict, List, Union


from darwin.future.data_objects import validators as darwin_validators
from darwin.future.data_objects.typing import UnknownType

Expand Down Expand Up @@ -39,7 +38,7 @@ class QueryString:

value: Dict[str, str]

def dict_check(cls, value: UnknownType) -> Dict[str, str]:
def dict_check(self, value: UnknownType) -> Dict[str, str]:
assert isinstance(value, dict)
assert all(isinstance(k, str) and isinstance(v, str) for k, v in value.items())
return value
Expand Down
16 changes: 5 additions & 11 deletions darwin/future/core/types/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,7 @@

from abc import ABC, abstractmethod
from enum import Enum
from typing import (
Any,
Callable,
Dict,
Generic,
List,
Optional,
TypeVar,
)
from typing import Any, Callable, Dict, Generic, List, Optional, TypeVar

from darwin.future.core.client import ClientCore
from darwin.future.exceptions import (
Expand Down Expand Up @@ -76,7 +68,8 @@ def filter_attr(self, attr: Any) -> bool: # type: ignore
def _from_dict(cls, d: Dict[str, Any]) -> QueryFilter: # type: ignore
if "name" not in d or "param" not in d:
raise InvalidQueryFilter(
f"args must be a QueryFilter or a dict with 'name' and 'param' keys, got {d}"
"args must be a QueryFilter or a dict with 'name' and 'param' keys,"
f" got {d}"
)
modifier = Modifier(d["modifier"]) if "modifier" in d else None
return QueryFilter(name=d["name"], param=str(d["param"]), modifier=modifier)
Expand All @@ -98,7 +91,8 @@ def _from_arg(cls, arg: object) -> QueryFilter:
return cls._from_dict(arg)
else:
raise InvalidQueryFilter(
f"args must be a QueryFilter or a dict with 'name' and 'param' keys, got {arg}"
"args must be a QueryFilter or a dict with 'name' and 'param' keys,"
f" got {arg}"
)

@classmethod
Expand Down
4 changes: 0 additions & 4 deletions darwin/future/core/utils/pathutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@


def attempt_open(path: Path) -> dict:
# TODO: Refactor this to be some sort of map method. Mypy doesn't like generic callables
# and will need to be typed
# reader: yaml.safe_load if path.suffix.lower() == ".yaml" else json.loads
# map_reader = {".yaml": yaml.safe_load, ".json": json.loads}
try:
if "yaml" in path.suffix.lower():
return open_yaml(path)
Expand Down
1 change: 0 additions & 1 deletion darwin/future/core/workflows/list_workflows.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from typing import List, Optional, Tuple


from darwin.future.core.client import ClientCore
from darwin.future.data_objects.workflow import WorkflowCore, WorkflowListValidator

Expand Down
78 changes: 78 additions & 0 deletions darwin/future/data_objects/item.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like this stuff especially 😉

Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# @see: GraphotateWeb.Schemas.DatasetsV2.ItemRegistration.ExistingItem
from typing import Dict, List, Literal, Optional, Union
from uuid import UUID

from pydantic import Field, validator

from darwin.datatypes import NumberLike
from darwin.future.data_objects.pydantic_base import DefaultDarwin
from darwin.future.data_objects.typing import UnknownType

ItemFrameRate = Union[NumberLike, Literal["native"]]


def validate_no_slashes(v: UnknownType) -> str:
assert isinstance(v, str), "Must be a string"
assert len(v) > 0, "cannot be empty"
assert r"^[^/].*$".find(v) == -1, "cannot start with a slash"

return v


class ItemSlot(DefaultDarwin):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the main one I've changed in my branch

# GraphotateWeb.Schemas.DatasetsV2.ItemRegistration.ExistingSlot

# Required fields
slot_name: str
file_name: str

# Optional fields
storage_key: Optional[str]
as_frames: Optional[bool]
extract_views: Optional[bool]
fps: Optional[ItemFrameRate] = Field(None, alias="fps")
metadata: Optional[Dict[str, UnknownType]] = Field({}, alias="metadata")
tags: Optional[Union[List[str], Dict[str, str]]] = Field(None, alias="tags")
type: Literal["image", "video", "pdf", "dicom"] = Field(..., alias="type")

@validator("slot_name")
def validate_slot_name(cls, v: UnknownType) -> str:
assert isinstance(v, str), "slot_name must be a string"
assert len(v) > 0, "slot_name cannot be empty"
return v

@validator("storage_key")
def validate_storage_key(cls, v: UnknownType) -> str:
return validate_no_slashes(v)

@validator("fps")
def validate_fps(cls, v: UnknownType) -> ItemFrameRate:
assert isinstance(v, (int, float, str)), "fps must be a number or 'native'"
if isinstance(v, (int, float)):
assert v >= 0.0, "fps must be a positive number"
if isinstance(v, str):
assert v == "native", "fps must be 'native' or a number greater than 0"
return v


class Item(DefaultDarwin):
name: str
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And this is where we'll have to figure it out between us.

path: str
archived: bool
dataset_id: int
id: UUID
layout: Dict[str, UnknownType]
slots: List[ItemSlot]
processing_status: str
priority: int

@validator("name")
def validate_name(cls, v: UnknownType) -> str:
return validate_no_slashes(v)


class Folder(DefaultDarwin):
dataset_id: int
filtered_item_count: int
path: str
unfiltered_item_count: int
5 changes: 4 additions & 1 deletion darwin/future/data_objects/release.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@


class ReleaseCore(DefaultDarwin):
"""A class to manage all the information around a release on the darwin platform, including validation
"""
A class to manage all the information around a release on the darwin platform
including validation

Attributes
----------
name : str
Expand Down
4 changes: 3 additions & 1 deletion darwin/future/data_objects/team.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ class TeamMemberCore(DefaultDarwin):


class TeamCore(DefaultDarwin):
"""A class to manage all the information around a Team on the darwin platform, including validation
"""
A class to manage all the information around a Team on the darwin platform
including validation

Attributes
----------
Expand Down
10 changes: 6 additions & 4 deletions darwin/future/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@ class DarwinException(Exception):
"""
Generic Darwin exception.

Used to differentiate from errors that originate in our code, and those that originate in
third-party libraries.
Used to differentiate from errors that originate in our code, and those that
originate in third-party libraries.

Extends `Exception` and adds a `parent_exception` field to store the original exception.
Extends `Exception` and adds a `parent_exception` field to store the original
exception.

Also has a `combined_exceptions` field to store a list of exceptions that were combined into
Also has a `combined_exceptions` field to store a list of exceptions that were
combined into
"""

parent_exception: Optional[Exception] = None
Expand Down
19 changes: 16 additions & 3 deletions darwin/future/meta/objects/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,13 @@


class Dataset(MetaBase[DatasetCore]):
"""Dataset Meta object. Facilitates the creation of Query objects, lazy loading of sub fields
"""
Dataset Meta object. Facilitates the creation of Query objects, lazy loading of
sub fields

Args:
MetaBase (Dataset): Generic MetaBase object expanded by Dataset core object return type
MetaBase (Dataset): Generic MetaBase object expanded by Dataset core object
return type

Returns:
_type_: DatasetMeta
Expand Down Expand Up @@ -126,5 +129,15 @@ def upload_files(
preserve_folders: bool = False,
verbose: bool = False,
) -> Dataset:
upload_data(self._element.name, files, files_to_exclude, fps, path, frames, extract_views, preserve_folders, verbose) # type: ignore
upload_data(
self._element.name,
files, # type: ignore
files_to_exclude,
fps,
path,
frames,
extract_views,
preserve_folders,
verbose,
)
return self
1 change: 1 addition & 0 deletions darwin/future/meta/objects/stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,5 @@ def edges(self) -> List[List[UUID]]:
return [
[edge.id, edge.source_stage_id, edge.target_stage_id]
for edge in self._element.edges
if edge.source_stage_id and edge.target_stage_id
]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JBWilkie source_stage_id and target_stage_id are optional so they can be None. Which means this comprehension without the if check actually returns a List[List[UUID | None]]. Let me know if this change doesn't make sense though and you want the None's

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't see a use case for why the Nones would be needed but you never know

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually changed this so that it returns the WFEdgeCore items instead of the IDs within them, not sure why I didn't do this originally

This means we don't need to worry about this if check

21 changes: 14 additions & 7 deletions darwin/future/meta/objects/team.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,21 @@


class Team(MetaBase[TeamCore]):
"""Team Meta object. Facilitates the creation of Query objects, lazy loading of sub fields like members
unlike other MetaBase objects, does not extend the __next__ function because it is not iterable. This is because
Team is linked to api key and only one team can be returned, but stores a list of teams for consistency. This
does mean however that to access the underlying team object, you must access the first element of the list
"""
Team Meta object. Facilitates the creation of Query objects, lazy loading of sub
fields like members unlike other MetaBase objects, does not extend the __next__
function because it is not iterable. This is because Team is linked to api key and
only one team can be returned, but stores a list of teams for consistency. This
does mean however that to access the underlying team object, you must access the
first element of the list
team = client.team[0]

Args:
MetaBase (Team): Generic MetaBase object expanded by Team core object return type
MetaBase (Team): Generic MetaBase object expanded by Team core object return
type

Returns:
_type_: TeamMeta
Team: Team object
"""

def __init__(self, client: ClientCore, team: Optional[TeamCore] = None) -> None:
Expand Down Expand Up @@ -57,7 +61,10 @@ def workflows(self) -> WorkflowQuery:
return WorkflowQuery(self.client, meta_params={"team_slug": self.slug})

def __str__(self) -> str:
return f"TeamMeta(name='{self.name}', slug='{self.slug}', id='{self.id}' - {len(self._element.members if self._element.members else [])} members)"
return (
f"TeamMeta(name='{self.name}', slug='{self.slug}', id='{self.id}' -"
f" {len(self._element.members if self._element.members else [])} members)"
)

@classmethod
def delete_dataset(
Expand Down
Loading