[IO-1278][IO-1277] Core Items/Folders apis (#691)

* basics * get folders * linting * tests * linting * camel case fields * line length * line length * linting changes * linting changes * import sorting * assert not required * merge master * linting
v7labs · Oct 20, 2023 · f5992b4 · f5992b4
1 parent b0953bf
commit f5992b4
Show file tree

Hide file tree

Showing 27 changed files with 359 additions and 55 deletions.
diff --git a/darwin/future/core/items/get.py b/darwin/future/core/items/get.py
@@ -1,8 +1,11 @@
 from typing import List, Union
 from uuid import UUID
 
+from pydantic import parse_obj_as
+
 from darwin.future.core.client import ClientCore
 from darwin.future.core.types.common import QueryString
+from darwin.future.data_objects.item import Folder, Item
 
 
 def get_item_ids(
@@ -75,3 +78,88 @@ def get_item_ids_stage(
     assert isinstance(response, dict)
     uuids = [UUID(uuid) for uuid in response["item_ids"]]
     return uuids
+
+
+def get_item(
+    api_client: ClientCore,
+    team_slug: str,
+    item_id: Union[UUID, str],
+    params: QueryString = QueryString({}),
+) -> Item:
+    """
+    Returns an item
+
+    Parameters
+    ----------
+    client: Client
+        The client to use for the request
+    team_slug: str
+        The slug of the team to get item ids for
+    item_id: str
+        The id or slug of the item to get
+
+    Returns
+    -------
+    dict
+        The item
+    """
+    response = api_client.get(f"/v2/teams/{team_slug}/items/{item_id}", params)
+    assert isinstance(response, dict)
+    return parse_obj_as(Item, response)
+
+
+def list_items(
+    api_client: ClientCore,
+    team_slug: str,
+    params: QueryString,
+) -> List[Item]:
+    """
+    Returns a list of items for the dataset
+
+    Parameters
+    ----------
+    client: Client
+        The client to use for the request
+    team_slug: str
+        The slug of the team to get items for
+    dataset_id: str
+        The id or slug of the dataset to get items for
+
+    Returns
+    -------
+    List[Item]
+        A list of items
+    """
+    assert "dataset_ids" in params.value, "dataset_ids must be provided"
+    response = api_client.get(f"/v2/teams/{team_slug}/items", params)
+    assert isinstance(response, dict)
+    return parse_obj_as(List[Item], response["items"])
+
+
+def list_folders(
+    api_client: ClientCore,
+    team_slug: str,
+    params: QueryString,
+) -> List[Folder]:
+    """
+    Returns a list of folders for the team and dataset
+
+    Parameters
+    ----------
+    client: Client
+        The client to use for the request
+    team_slug: str
+        The slug of the team to get folder ids for
+    params: QueryString
+        parameters to filter the folders
+
+    Returns
+    -------
+    List[Folder]
+        The folders
+    """
+    assert "dataset_ids" in params.value, "dataset_ids must be provided"
+    response = api_client.get(f"/v2/teams/{team_slug}/items/folders", params)
+    assert isinstance(response, dict)
+    assert "folders" in response
+    return parse_obj_as(List[Folder], response["folders"])
diff --git a/darwin/future/core/team/__init__.py b/darwin/future/core/team/__init__.py
@@ -1,5 +1,6 @@
 # Can't import * in this module because of a circular import problem specific to teams
-# The TeamCore module can instantiate from a client, but the client needs to use the team backend module
-# to request the object for team. To circumvent this there's a get_raw method in this module that returns
-# the raw team object, which is then passed to the TeamCore module, but if we import * here it introduces the
+# The TeamCore module can instantiate from a client, but the client needs to use the
+# team backend module to request the object for team. To circumvent this there's a
+# get_raw method in this module that returns the raw team object, which is then passed
+# to the TeamCore module, but if we import * here it introduces the
 # circular import problem.
diff --git a/darwin/future/core/types/common.py b/darwin/future/core/types/common.py
@@ -1,6 +1,5 @@
 from typing import Any, Dict, List, Union
 
-
 from darwin.future.data_objects import validators as darwin_validators
 from darwin.future.data_objects.typing import UnknownType
 
@@ -39,7 +38,7 @@ class QueryString:
 
     value: Dict[str, str]
 
-    def dict_check(cls, value: UnknownType) -> Dict[str, str]:
+    def dict_check(self, value: UnknownType) -> Dict[str, str]:
         assert isinstance(value, dict)
         assert all(isinstance(k, str) and isinstance(v, str) for k, v in value.items())
         return value

diff --git a/darwin/future/core/types/query.py b/darwin/future/core/types/query.py
@@ -68,8 +68,8 @@ def filter_attr(self, attr: Any) -> bool:  # type: ignore
     def _from_dict(cls, d: Dict[str, Any]) -> QueryFilter:  # type: ignore
         if "name" not in d or "param" not in d:
             raise InvalidQueryFilter(
-                f"args must be a QueryFilter or a dict with 'name' and 'param' keys, "
-                f"got {d}"
+                "args must be a QueryFilter or a dict with 'name' and 'param' keys,"
+                f" got {d}"
             )
         modifier = Modifier(d["modifier"]) if "modifier" in d else None
         return QueryFilter(name=d["name"], param=str(d["param"]), modifier=modifier)
@@ -91,8 +91,8 @@ def _from_arg(cls, arg: object) -> QueryFilter:
             return cls._from_dict(arg)
         else:
             raise InvalidQueryFilter(
-                f"args must be a QueryFilter or a dict with 'name' and 'param' keys, "
-                f"got {arg}"
+                "args must be a QueryFilter or a dict with 'name' and 'param' keys,"
+                f" got {arg}"
             )
 
     @classmethod

diff --git a/darwin/future/core/utils/pathutils.py b/darwin/future/core/utils/pathutils.py
@@ -10,10 +10,6 @@
 
 
 def attempt_open(path: Path) -> dict:
-    # TODO: Refactor this to be some sort of map method. Mypy doesn't like generic callables
-    # and will need to be typed
-    # reader: yaml.safe_load if path.suffix.lower() == ".yaml" else json.loads
-    # map_reader = {".yaml": yaml.safe_load, ".json": json.loads}
     try:
         if "yaml" in path.suffix.lower():
             return open_yaml(path)

diff --git a/darwin/future/core/workflows/list_workflows.py b/darwin/future/core/workflows/list_workflows.py
@@ -1,6 +1,5 @@
 from typing import List, Optional, Tuple
 
-
 from darwin.future.core.client import ClientCore
 from darwin.future.data_objects.workflow import WorkflowCore, WorkflowListValidator
 

diff --git a/darwin/future/data_objects/item.py b/darwin/future/data_objects/item.py
@@ -0,0 +1,78 @@
+# @see: GraphotateWeb.Schemas.DatasetsV2.ItemRegistration.ExistingItem
+from typing import Dict, List, Literal, Optional, Union
+from uuid import UUID
+
+from pydantic import Field, validator
+
+from darwin.datatypes import NumberLike
+from darwin.future.data_objects.pydantic_base import DefaultDarwin
+from darwin.future.data_objects.typing import UnknownType
+
+ItemFrameRate = Union[NumberLike, Literal["native"]]
+
+
+def validate_no_slashes(v: UnknownType) -> str:
+    assert isinstance(v, str), "Must be a string"
+    assert len(v) > 0, "cannot be empty"
+    assert r"^[^/].*$".find(v) == -1, "cannot start with a slash"
+
+    return v
+
+
+class ItemSlot(DefaultDarwin):
+    # GraphotateWeb.Schemas.DatasetsV2.ItemRegistration.ExistingSlot
+
+    # Required fields
+    slot_name: str
+    file_name: str
+
+    # Optional fields
+    storage_key: Optional[str]
+    as_frames: Optional[bool]
+    extract_views: Optional[bool]
+    fps: Optional[ItemFrameRate] = Field(None, alias="fps")
+    metadata: Optional[Dict[str, UnknownType]] = Field({}, alias="metadata")
+    tags: Optional[Union[List[str], Dict[str, str]]] = Field(None, alias="tags")
+    type: Literal["image", "video", "pdf", "dicom"] = Field(..., alias="type")
+
+    @validator("slot_name")
+    def validate_slot_name(cls, v: UnknownType) -> str:
+        assert isinstance(v, str), "slot_name must be a string"
+        assert len(v) > 0, "slot_name cannot be empty"
+        return v
+
+    @validator("storage_key")
+    def validate_storage_key(cls, v: UnknownType) -> str:
+        return validate_no_slashes(v)
+
+    @validator("fps")
+    def validate_fps(cls, v: UnknownType) -> ItemFrameRate:
+        assert isinstance(v, (int, float, str)), "fps must be a number or 'native'"
+        if isinstance(v, (int, float)):
+            assert v >= 0.0, "fps must be a positive number"
+        if isinstance(v, str):
+            assert v == "native", "fps must be 'native' or a number greater than 0"
+        return v
+
+
+class Item(DefaultDarwin):
+    name: str
+    path: str
+    archived: bool
+    dataset_id: int
+    id: UUID
+    layout: Dict[str, UnknownType]
+    slots: List[ItemSlot]
+    processing_status: str
+    priority: int
+
+    @validator("name")
+    def validate_name(cls, v: UnknownType) -> str:
+        return validate_no_slashes(v)
+
+
+class Folder(DefaultDarwin):
+    dataset_id: int
+    filtered_item_count: int
+    path: str
+    unfiltered_item_count: int
diff --git a/darwin/future/data_objects/release.py b/darwin/future/data_objects/release.py
@@ -7,7 +7,10 @@
 
 
 class ReleaseCore(DefaultDarwin):
-    """A class to manage all the information around a release on the darwin platform, including validation
+    """
+    A class to manage all the information around a release on the darwin platform
+    including validation
+
     Attributes
     ----------
     name : str

diff --git a/darwin/future/data_objects/team.py b/darwin/future/data_objects/team.py
@@ -32,7 +32,9 @@ class TeamMemberCore(DefaultDarwin):
 
 
 class TeamCore(DefaultDarwin):
-    """A class to manage all the information around a Team on the darwin platform, including validation
+    """
+    A class to manage all the information around a Team on the darwin platform
+    including validation
 
     Attributes
     ----------

diff --git a/darwin/future/exceptions.py b/darwin/future/exceptions.py
@@ -7,12 +7,14 @@ class DarwinException(Exception):
     """
     Generic Darwin exception.
 
-    Used to differentiate from errors that originate in our code, and those that originate in
-    third-party libraries.
+    Used to differentiate from errors that originate in our code, and those that
+    originate in third-party libraries.
 
-    Extends `Exception` and adds a `parent_exception` field to store the original exception.
+    Extends `Exception` and adds a `parent_exception` field to store the original
+    exception.
 
-    Also has a `combined_exceptions` field to store a list of exceptions that were combined into
+    Also has a `combined_exceptions` field to store a list of exceptions that were
+    combined into
     """
 
     parent_exception: Optional[Exception] = None

diff --git a/darwin/future/meta/objects/dataset.py b/darwin/future/meta/objects/dataset.py
@@ -15,10 +15,13 @@
 
 
 class Dataset(MetaBase[DatasetCore]):
-    """Dataset Meta object. Facilitates the creation of Query objects, lazy loading of sub fields
+    """
+    Dataset Meta object. Facilitates the creation of Query objects, lazy loading of
+    sub fields
 
     Args:
-        MetaBase (Dataset): Generic MetaBase object expanded by Dataset core object return type
+        MetaBase (Dataset): Generic MetaBase object expanded by Dataset core object
+            return type
 
     Returns:
         _type_: DatasetMeta
@@ -126,5 +129,15 @@ def upload_files(
         preserve_folders: bool = False,
         verbose: bool = False,
     ) -> Dataset:
-        upload_data(self._element.name, files, files_to_exclude, fps, path, frames, extract_views, preserve_folders, verbose)  # type: ignore
+        upload_data(
+            self._element.name,
+            files,  # type: ignore
+            files_to_exclude,
+            fps,
+            path,
+            frames,
+            extract_views,
+            preserve_folders,
+            verbose,
+        )
         return self
diff --git a/darwin/future/meta/objects/team.py b/darwin/future/meta/objects/team.py
@@ -14,19 +14,21 @@
 
 
 class Team(MetaBase[TeamCore]):
-    """Team Meta object. Facilitates the creation of Query objects, lazy loading of
-    sub fields like members unlike other MetaBase objects, does not extend the
-    __next__ function because it is not iterable. This is because Team is linked to
-    api key and only one team can be returned, but stores a list of teams for
-    consistency. This does mean however that to access the underlying team object,
-    you must access the first element of the list team = client.team[0]
+    """
+    Team Meta object. Facilitates the creation of Query objects, lazy loading of sub
+    fields like members unlike other MetaBase objects, does not extend the __next__
+    function because it is not iterable. This is because Team is linked to api key and
+    only one team can be returned, but stores a list of teams for consistency. This
+    does mean however that to access the underlying team object, you must access the
+    first element of the list
+    team = client.team[0]
 
     Args:
-        MetaBase (Team): Generic MetaBase object expanded by Team core object
-        return type
+        MetaBase (Team): Generic MetaBase object expanded by Team core object return
+            type
 
     Returns:
-        _type_: TeamMeta
+        Team: Team object
     """
 
     def __init__(self, client: ClientCore, team: Optional[TeamCore] = None) -> None:

diff --git a/darwin/future/meta/objects/workflow.py b/darwin/future/meta/objects/workflow.py
@@ -59,7 +59,17 @@ def upload_files(
         auto_push: bool = True,
     ) -> Workflow:
         assert self._element.dataset is not None
-        upload_data(self.datasets[0].name, files, files_to_exclude, fps, path, frames, extract_views, preserve_folders, verbose)  # type: ignore
+        upload_data(
+            self.datasets[0].name,
+            files,  # type: ignore
+            files_to_exclude,
+            fps,
+            path,
+            frames,
+            extract_views,
+            preserve_folders,
+            verbose,
+        )
         if auto_push:
             self.push_from_dataset_stage()
         return self
diff --git a/darwin/future/meta/queries/dataset.py b/darwin/future/meta/queries/dataset.py
@@ -37,8 +37,10 @@ def _collect(self) -> List[Dataset]:
     def _execute_filters(
         self, datasets: List[Dataset], filter: QueryFilter
     ) -> List[Dataset]:
-        """Executes filtering on the local list of datasets, applying special logic for role filtering
-        otherwise calls the parent method for general filtering on the values of the datasets
+        """
+        Executes filtering on the local list of datasets, applying special logic for
+        role filtering otherwise calls the parent method for general filtering on the
+        values of the datasets
 
         Parameters
         ----------