-
Notifications
You must be signed in to change notification settings - Fork 41
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[IO-1746] Introduction of Pagination queries and changes to item_ids endpoint #707
Changes from 14 commits
df0bd12
633f040
6236927
d8d614b
1ea5a9f
f762cca
feb3c46
aafae1f
680ce1d
386838c
a38cf7d
d506e53
c5acd04
0974fb1
5707c97
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,10 @@ | |
|
||
|
||
def get_item_ids( | ||
api_client: ClientCore, team_slug: str, dataset_id: Union[str, int] | ||
api_client: ClientCore, | ||
team_slug: str, | ||
dataset_id: Union[str, int], | ||
params: QueryString = QueryString({}), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I should probably refactor |
||
) -> List[UUID]: | ||
""" | ||
Returns a list of item ids for the dataset | ||
|
@@ -28,16 +31,14 @@ def get_item_ids( | |
List[UUID] | ||
A list of item ids | ||
""" | ||
|
||
response = api_client.get( | ||
f"/v2/teams/{team_slug}/items/ids", | ||
f"/v2/teams/{team_slug}/items/list_ids", | ||
QueryString( | ||
{ | ||
"not_statuses": "archived,error", | ||
"sort[id]": "desc", | ||
"dataset_ids": str(dataset_id), | ||
} | ||
), | ||
) | ||
+ params, | ||
) | ||
assert isinstance(response, dict) | ||
uuids = [UUID(uuid) for uuid in response["item_ids"]] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
from __future__ import annotations | ||
|
||
from typing import Any, Dict, List, Union | ||
|
||
from darwin.future.data_objects import validators as darwin_validators | ||
|
@@ -83,3 +85,6 @@ def __init__(self, value: Dict[str, str]) -> None: | |
|
||
def __str__(self) -> str: | ||
return "?" + "&".join(f"{k}={v}" for k, v in self.value.items()) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Useful for combining parameters in the core functions when required for pagination which I treat seperately There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, nice addition actually. Gotta love a good dunder. |
||
def __add__(self, other: QueryString) -> QueryString: | ||
return QueryString({**self.value, **other.value}) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,7 @@ | |
from typing import Any, Callable, Dict, Generic, List, Optional, TypeVar | ||
|
||
from darwin.future.core.client import ClientCore | ||
from darwin.future.data_objects.page import Page | ||
from darwin.future.exceptions import ( | ||
InvalidQueryFilter, | ||
InvalidQueryModifier, | ||
|
@@ -29,8 +30,9 @@ class Modifier(Enum): | |
|
||
|
||
class QueryFilter(DefaultDarwin): | ||
"""Basic query filter with a name and a parameter | ||
|
||
""" | ||
Basic query filter with a name and a parameter | ||
Modifiers are for client side filtering only, and are not passed to the API | ||
Attributes | ||
---------- | ||
name: str | ||
|
@@ -104,6 +106,12 @@ def _from_kwarg(cls, key: str, value: str) -> QueryFilter: | |
modifier = None | ||
return QueryFilter(name=key, param=value, modifier=modifier) | ||
|
||
def to_dict(self, ignore_modifier: bool = True) -> Dict[str, str]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Mostly so they can be swapped in and out of QueryStrings for Paginated endpoints so that .where() syntax still works, for now ignores modifiers. I think this is not the most elegant solution though as eventually I think we'll want to probably do both server side query filter and client side once collected |
||
d = {self.name: self.param} | ||
if self.modifier is not None and not ignore_modifier: | ||
d["modifier"] = self.modifier.value | ||
return d | ||
|
||
|
||
class Query(Generic[T], ABC): | ||
""" | ||
|
@@ -154,17 +162,16 @@ def __init__( | |
self.meta_params: dict = meta_params or {} | ||
self.client = client | ||
self.filters = filters or [] | ||
self.results: Optional[List[T]] = None | ||
self._changed_since_last: bool = True | ||
self.results: dict[int, T] = {} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the core change that allows for Pagination, but all existing functions have been swapped over to preserve the outputs There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah ok, so you went with the int hashtable style approach. 👍🏻 |
||
self._changed_since_last: bool = False | ||
|
||
def filter(self, filter: QueryFilter) -> Query[T]: | ||
return self + filter | ||
|
||
def __add__(self, filter: QueryFilter) -> Query[T]: | ||
self._changed_since_last = True | ||
return self.__class__( | ||
self.client, filters=[*self.filters, filter], meta_params=self.meta_params | ||
) | ||
self.filters.append(filter) | ||
return self | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like this better, it's much clearer and nicer to read for devs, but is the functionality not slightly different? Calling I assume, basically, that the difference doesn't matter? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it is slightly different yeah, it is modification in place rather than re-allocation, but I think that's a much more sensible approach here as most people won't be using the add dunder functionality except us anyway as internal functions, and it the modification in place saves time and memory. |
||
|
||
def __sub__(self, filter: QueryFilter) -> Query[T]: | ||
self._changed_since_last = True | ||
|
@@ -186,16 +193,16 @@ def __isub__(self, filter: QueryFilter) -> Query[T]: | |
|
||
def __len__(self) -> int: | ||
if not self.results: | ||
self.results = list(self._collect()) | ||
return len(self.results) | ||
self.results = {**self.results, **self._collect()} | ||
return len(self.results.keys()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm confident you've done this for a reason, but with I might be missing something here, or maybe it's a STFU to mypy or similar? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nah you're right that's a simpler call and have pushed a fix |
||
|
||
def __iter__(self) -> Query[T]: | ||
self.n = 0 | ||
return self | ||
|
||
def __next__(self) -> T: | ||
if not self.results: | ||
self.results = list(self._collect()) | ||
self.collect() | ||
if self.n < len(self.results): | ||
result = self.results[self.n] | ||
self.n += 1 | ||
|
@@ -205,12 +212,12 @@ def __next__(self) -> T: | |
|
||
def __getitem__(self, index: int) -> T: | ||
if not self.results: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this now need to also take into account There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. self.collect() already accounts for it, which is a wrapper that runs the _collect function that has to be written per meta class but does all the _changed_since_last and other logic |
||
self.results = list(self._collect()) | ||
self.results = {**self.results, **self._collect()} | ||
return self.results[index] | ||
|
||
def __setitem__(self, index: int, value: T) -> None: | ||
if not self.results: | ||
self.results = list(self._collect()) | ||
self.results = {**self.results, **self._collect()} | ||
self.results[index] = value | ||
|
||
def where(self, *args: object, **kwargs: str) -> Query[T]: | ||
|
@@ -222,18 +229,21 @@ def where(self, *args: object, **kwargs: str) -> Query[T]: | |
|
||
def collect(self, force: bool = False) -> List[T]: | ||
if force or self._changed_since_last: | ||
self.results = [] | ||
self.results = self._collect() | ||
self.results = {} | ||
self.results = {**self.results, **self._collect()} | ||
self._changed_since_last = False | ||
return self.results | ||
return self._unwrap(self.results) | ||
|
||
def _unwrap(self, results: Dict[int, T]) -> List[T]: | ||
return list(results.values()) | ||
|
||
@abstractmethod | ||
def _collect(self) -> List[T]: | ||
def _collect(self) -> Dict[int, T]: | ||
raise NotImplementedError("Not implemented") | ||
|
||
def collect_one(self) -> T: | ||
if not self.results: | ||
self.results = list(self.collect()) | ||
self.results = {**self.results, **self._collect()} | ||
if len(self.results) == 0: | ||
raise ResultsNotFound("No results found") | ||
if len(self.results) > 1: | ||
|
@@ -242,12 +252,71 @@ def collect_one(self) -> T: | |
|
||
def first(self) -> T: | ||
if not self.results: | ||
self.results = list(self.collect()) | ||
self.results = {**self.results, **self._collect()} | ||
if len(self.results) == 0: | ||
raise ResultsNotFound("No results found") | ||
|
||
return self.results[0] | ||
|
||
def _generic_execute_filter(self, objects: List[T], filter: QueryFilter) -> List[T]: | ||
return [ | ||
m for m in objects if filter.filter_attr(getattr(m._element, filter.name)) | ||
] | ||
|
||
|
||
class PaginatedQuery(Query[T]): | ||
def __init__( | ||
self, | ||
client: ClientCore, | ||
filters: List[QueryFilter] | None = None, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we use this style of typing union whilst still meeting our version needs? It was introduced in 3.10. I'm happy if other versions will just ignore this though, because frankly it looks a significant amount nicer. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe we can if we use the from future import annotations at the top, which I do anyway for some imports around classes that return themselves. |
||
meta_params: Param | None = None, | ||
page: Page | None = None, | ||
): | ||
super().__init__(client, filters, meta_params) | ||
self.page = page or Page() | ||
self.completed = False | ||
|
||
def collect(self, force: bool = False) -> List[T]: | ||
if force or self._changed_since_last: | ||
self.page = Page() | ||
self.completed = False | ||
if self.completed: | ||
return self._unwrap(self.results) | ||
new_results = self._collect() | ||
self.results = {**self.results, **new_results} | ||
if len(new_results) < self.page.size or len(new_results) == 0: | ||
self.completed = True | ||
else: | ||
self.page.increment() | ||
return self._unwrap(self.results) | ||
|
||
def collect_all(self, force: bool = False) -> List[T]: | ||
if force: | ||
self.page = Page() | ||
self.completed = False | ||
self.results = {} | ||
while not self.completed: | ||
self.collect() | ||
return self._unwrap(self.results) | ||
|
||
def __getitem__(self, index: int) -> T: | ||
if index not in self.results: | ||
temp_page = self.page | ||
self.page = self.page.get_required_page(index) | ||
self.collect() | ||
self.page = temp_page | ||
return super().__getitem__(index) | ||
|
||
def __next__(self) -> T: | ||
if not self.completed and self.n not in self.results: | ||
self.collect() | ||
if self.completed and self.n not in self.results: | ||
raise StopIteration | ||
result = self.results[self.n] | ||
self.n += 1 | ||
return result | ||
|
||
def __len__(self) -> int: | ||
if not self.completed: | ||
self.collect_all() | ||
return len(self.results.keys()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same Q as before |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
from __future__ import annotations | ||
|
||
from math import floor | ||
|
||
from pydantic import NonNegativeInt, PositiveInt | ||
|
||
from darwin.future.core.types.common import QueryString | ||
from darwin.future.data_objects.pydantic_base import DefaultDarwin | ||
|
||
|
||
def must_be_positive(v: int) -> int: | ||
if v is not None and v < 0: | ||
raise ValueError("Value must be positive") | ||
return v | ||
|
||
|
||
class Page(DefaultDarwin): | ||
offset: NonNegativeInt = 0 | ||
size: PositiveInt = 500 | ||
|
||
def get_required_page(self, item_index: int) -> Page: | ||
""" | ||
Get the page that contains the item at the specified index | ||
|
||
Args: | ||
item_index (int): The index of the item | ||
|
||
Returns: | ||
Page: The page that contains the item | ||
""" | ||
assert self.size is not None | ||
required_offset = floor(item_index / self.size) * self.size | ||
return Page(offset=required_offset, size=self.size) | ||
|
||
def to_query_string(self) -> QueryString: | ||
""" | ||
Generate a query string from the page object, some fields are not included if they are None, | ||
and certain fields are renamed. Outgoing and incoming query strings are different and require | ||
dropping certain fields | ||
|
||
Returns: | ||
QueryString: Outgoing query string | ||
""" | ||
qs_dict = {"page[offset]": str(self.offset), "page[size]": str(self.size)} | ||
return QueryString(qs_dict) | ||
|
||
def increment(self) -> None: | ||
""" | ||
Increment the page offset by the page size | ||
""" | ||
self.offset += self.size |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Totally unsure why I even had this overload, it didn't matter for the typing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Possibly just experimenting with overloads etc. and got left in?