Skip to content

Commit

Permalink
Add detector metadata (#148)
Browse files Browse the repository at this point in the history
* initial commit

* Automatically reformatting code

* add metadata field to the detector model

* Automatically reformatting code

* small clean up

* make pylint happy

* wip

* add metadata field to detector serializer and api spec

* address pr feedback

---------

Co-authored-by: Auto-format Bot <[email protected]>
  • Loading branch information
blaise-muhirwa and Auto-format Bot authored Jan 18, 2024
1 parent c4d3f07 commit bb54236
Show file tree
Hide file tree
Showing 11 changed files with 119 additions and 5 deletions.
1 change: 1 addition & 0 deletions generated/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ with openapi_client.ApiClient(configuration) as api_client:
group_name="group_name_example",
confidence_threshold=0.9,
pipeline_config="pipeline_config_example",
metadata="metadata_example",
) # DetectorCreationInput |

try:
Expand Down
1 change: 1 addition & 0 deletions generated/docs/Detector.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Name | Type | Description | Notes
**query** | **str** | A question about the image. | [readonly]
**group_name** | **str** | Which group should this detector be part of? | [readonly]
**confidence_threshold** | **float** | If the detector&#39;s prediction is below this confidence threshold, send the image query for human review. | [optional] if omitted the server will use the default value of 0.9
**metadata** | **{str: (bool, date, datetime, dict, float, int, list, str, none_type)}, none_type** | A dictionary of custom key/value metadata to associate with the detector (limited to 1KB). | [optional]
**any string name** | **bool, date, datetime, dict, float, int, list, str, none_type** | any string name can be used but the value must be the correct type | [optional]

[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
Expand Down
1 change: 1 addition & 0 deletions generated/docs/DetectorCreationInput.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Name | Type | Description | Notes
**group_name** | **str** | Which group should this detector be part of? | [optional]
**confidence_threshold** | **float** | If the detector&#39;s prediction is below this confidence threshold, send the image query for human review. | [optional] if omitted the server will use the default value of 0.9
**pipeline_config** | **str, none_type** | (Advanced usage) Configuration to instantiate a specific prediction pipeline. | [optional]
**metadata** | **str, none_type** | A dictionary of custom key/value metadata to associate with the detector (limited to 1KB). | [optional]
**any string name** | **bool, date, datetime, dict, float, int, list, str, none_type** | any string name can be used but the value must be the correct type | [optional]

[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
Expand Down
1 change: 1 addition & 0 deletions generated/docs/DetectorsApi.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ with openapi_client.ApiClient(configuration) as api_client:
group_name="group_name_example",
confidence_threshold=0.9,
pipeline_config="pipeline_config_example",
metadata="metadata_example",
) # DetectorCreationInput |

# example passing only required values which don't have defaults set
Expand Down
10 changes: 8 additions & 2 deletions generated/model.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: public-api.yaml
# timestamp: 2023-11-09T05:00:29+00:00
# timestamp: 2024-01-16T18:09:48+00:00

from __future__ import annotations

Expand Down Expand Up @@ -33,6 +33,9 @@ class DetectorCreationInput(BaseModel):
pipeline_config: Optional[constr(max_length=8192)] = Field(
None, description="(Advanced usage) Configuration to instantiate a specific prediction pipeline."
)
metadata: Optional[str] = Field(
None, description="A dictionary of custom key/value metadata to associate with the detector (limited to 1KB)."
)


class DetectorTypeEnum(Enum):
Expand Down Expand Up @@ -60,6 +63,9 @@ class Detector(BaseModel):
"If the detector's prediction is below this confidence threshold, send the image query for human review."
),
)
metadata: Optional[Dict[str, Any]] = Field(
None, description="A dictionary of custom key/value metadata to associate with the detector (limited to 1KB)."
)


class ImageQuery(BaseModel):
Expand Down Expand Up @@ -87,4 +93,4 @@ class PaginatedImageQueryList(BaseModel):
count: Optional[int] = Field(None, example=123)
next: Optional[AnyUrl] = Field(None, example="http://api.example.org/accounts/?page=4")
previous: Optional[AnyUrl] = Field(None, example="http://api.example.org/accounts/?page=2")
results: Optional[List[ImageQuery]] = None
results: Optional[List[ImageQuery]] = None
7 changes: 7 additions & 0 deletions generated/openapi_client/model/detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ def openapi_types():
"query": (str,), # noqa: E501
"group_name": (str,), # noqa: E501
"confidence_threshold": (float,), # noqa: E501
"metadata": (
{str: (bool, date, datetime, dict, float, int, list, str, none_type)},
none_type,
), # noqa: E501
}

@cached_property
Expand All @@ -136,6 +140,7 @@ def discriminator():
"query": "query", # noqa: E501
"group_name": "group_name", # noqa: E501
"confidence_threshold": "confidence_threshold", # noqa: E501
"metadata": "metadata", # noqa: E501
}

read_only_vars = {
Expand Down Expand Up @@ -193,6 +198,7 @@ def _from_openapi_data(cls, id, type, created_at, name, query, group_name, *args
through its discriminator because we passed in
_visited_composed_classes = (Animal,)
confidence_threshold (float): If the detector's prediction is below this confidence threshold, send the image query for human review.. [optional] if omitted the server will use the default value of 0.9 # noqa: E501
metadata ({str: (bool, date, datetime, dict, float, int, list, str, none_type)}, none_type): A dictionary of custom key/value metadata to associate with the detector (limited to 1KB).. [optional] # noqa: E501
"""

_check_type = kwargs.pop("_check_type", True)
Expand Down Expand Up @@ -287,6 +293,7 @@ def __init__(self, name, *args, **kwargs): # noqa: E501
through its discriminator because we passed in
_visited_composed_classes = (Animal,)
confidence_threshold (float): If the detector's prediction is below this confidence threshold, send the image query for human review.. [optional] if omitted the server will use the default value of 0.9 # noqa: E501
metadata ({str: (bool, date, datetime, dict, float, int, list, str, none_type)}, none_type): A dictionary of custom key/value metadata to associate with the detector (limited to 1KB).. [optional] # noqa: E501
"""

_check_type = kwargs.pop("_check_type", True)
Expand Down
7 changes: 7 additions & 0 deletions generated/openapi_client/model/detector_creation_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ def openapi_types():
str,
none_type,
), # noqa: E501
"metadata": (
str,
none_type,
), # noqa: E501
}

@cached_property
Expand All @@ -126,6 +130,7 @@ def discriminator():
"group_name": "group_name", # noqa: E501
"confidence_threshold": "confidence_threshold", # noqa: E501
"pipeline_config": "pipeline_config", # noqa: E501
"metadata": "metadata", # noqa: E501
}

read_only_vars = {}
Expand Down Expand Up @@ -175,6 +180,7 @@ def _from_openapi_data(cls, name, query, *args, **kwargs): # noqa: E501
group_name (str): Which group should this detector be part of?. [optional] # noqa: E501
confidence_threshold (float): If the detector's prediction is below this confidence threshold, send the image query for human review.. [optional] if omitted the server will use the default value of 0.9 # noqa: E501
pipeline_config (str, none_type): (Advanced usage) Configuration to instantiate a specific prediction pipeline.. [optional] # noqa: E501
metadata (str, none_type): A dictionary of custom key/value metadata to associate with the detector (limited to 1KB).. [optional] # noqa: E501
"""

_check_type = kwargs.pop("_check_type", True)
Expand Down Expand Up @@ -270,6 +276,7 @@ def __init__(self, name, query, *args, **kwargs): # noqa: E501
group_name (str): Which group should this detector be part of?. [optional] # noqa: E501
confidence_threshold (float): If the detector's prediction is below this confidence threshold, send the image query for human review.. [optional] if omitted the server will use the default value of 0.9 # noqa: E501
pipeline_config (str, none_type): (Advanced usage) Configuration to instantiate a specific prediction pipeline.. [optional] # noqa: E501
metadata (str, none_type): A dictionary of custom key/value metadata to associate with the detector (limited to 1KB).. [optional] # noqa: E501
"""

_check_type = kwargs.pop("_check_type", True)
Expand Down
12 changes: 12 additions & 0 deletions spec/public-api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,12 @@ components:
description:
If the detector's prediction is below this confidence threshold,
send the image query for human review.
metadata:
type: object
nullable: true
description:
A dictionary of custom key/value metadata to associate with the detector
(limited to 1KB). This is encoded as a URL-safe, base64-encoded JSON string.
required:
- created_at
- group_name
Expand Down Expand Up @@ -303,6 +309,12 @@ components:
nullable: true
description: (Advanced usage) Configuration to instantiate a specific prediction pipeline.
maxLength: 8192
metadata:
type: string
nullable: true
description:
A dictionary of custom key/value metadata to associate with the detector
(limited to 1KB). This is encoded as a URL-safe, base64-encoded JSON string.
required:
# TODO: make name optional - that's how the web version is going.
- name
Expand Down
13 changes: 13 additions & 0 deletions src/groundlight/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ def create_detector(
*,
confidence_threshold: Optional[float] = None,
pipeline_config: Optional[str] = None,
metadata: Union[dict, str, None] = None,
) -> Detector:
"""
Create a new detector with a given name and query
Expand All @@ -199,13 +200,19 @@ def create_detector(
:param pipeline_config: the pipeline config
:param metadata: A dictionary or JSON string of custom key/value metadata to associate with
the detector (limited to 1KB). You can retrieve this metadata later by calling
`get_detector()`.
:return: Detector
"""
detector_creation_input = DetectorCreationInput(name=name, query=query)
if confidence_threshold is not None:
detector_creation_input.confidence_threshold = confidence_threshold
if pipeline_config is not None:
detector_creation_input.pipeline_config = pipeline_config
if metadata is not None:
detector_creation_input.metadata = str(url_encode_dict(metadata, name="metadata", size_limit_bytes=1024))
obj = self.detectors_api.create_detector(detector_creation_input)
return Detector.parse_obj(obj.to_dict())

Expand All @@ -216,6 +223,7 @@ def get_or_create_detector(
*,
confidence_threshold: Optional[float] = None,
pipeline_config: Optional[str] = None,
metadata: Union[dict, str, None] = None,
) -> Detector:
"""
Tries to look up the detector by name. If a detector with that name, query, and
Expand All @@ -230,6 +238,10 @@ def get_or_create_detector(
:param pipeline_config: the pipeline config
:param metadata: A dictionary or JSON string of custom key/value metadata to associate with
the detector (limited to 1KB). You can retrieve this metadata later by calling
`get_detector()`.
:return: Detector
"""
try:
Expand All @@ -241,6 +253,7 @@ def get_or_create_detector(
query=query,
confidence_threshold=confidence_threshold,
pipeline_config=pipeline_config,
metadata=metadata,
)

# TODO: We may soon allow users to update the retrieved detector's fields.
Expand Down
3 changes: 1 addition & 2 deletions src/groundlight/encodings.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import base64
import json
import sys
from typing import Dict, Optional, Union


Expand Down Expand Up @@ -34,7 +33,7 @@ def url_encode_dict(maybe_dict: Union[Dict, str], name: str, size_limit_bytes: O
data_json = json.dumps(maybe_dict)

if size_limit_bytes is not None:
size_bytes = sys.getsizeof(data_json)
size_bytes = len(data_json)
if size_bytes > size_limit_bytes:
raise ValueError(f"`{name}` is too large: {size_bytes} bytes > {size_limit_bytes} bytes limit.")

Expand Down
68 changes: 67 additions & 1 deletion test/integration/test_groundlight.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
# ruff: noqa: F403,F405
# pylint: disable=wildcard-import,unused-wildcard-import,redefined-outer-name,import-outside-toplevel
import json
import random
import string
import time
from datetime import datetime
from typing import Any, Dict, Optional, Union
Expand All @@ -28,6 +30,27 @@ def is_valid_display_result(result: Any) -> bool:
return True


def generate_random_dict(target_size_bytes=1024, key_length=8, value_length=10) -> Dict[str, str]:
"""
Generate a random dictionary with an approximate size in bytes.
"""
key_chars = string.ascii_lowercase + string.digits
value_chars = string.ascii_letters + string.digits

random_dict: Dict[str, str] = {}
while len(json.dumps(random_dict).encode("utf-8")) < target_size_bytes:
key = "".join(random.choice(key_chars) for _ in range(key_length))
value = "".join(random.choice(value_chars) for _ in range(value_length))
random_dict[key] = value

# Check if adding another pair would likely exceed the size
# The 4 is for the quotes around the key and value, and the colon and comma
if len(json.dumps(random_dict).encode("utf-8")) + key_length + value_length + 4 > target_size_bytes:
break

return random_dict


def is_valid_display_label(label: str) -> bool:
"""Is the image query result label valid to display to the user?."""
# NOTE: For now, we strictly only show UPPERCASE labels to the user.
Expand Down Expand Up @@ -252,7 +275,50 @@ def test_submit_image_query_with_human_review_param(gl: Groundlight, detector: D
assert is_valid_display_result(_image_query.result)


@pytest.mark.skip_for_edge_endpoint(reason="The edge-endpoint does not support passing metadata.")
@pytest.mark.skip_for_edge_endpoint(reason="The edge-endpoint does not support passing detector metadata.")
def test_create_detector_with_metadata(gl: Groundlight):
name = f"Test {datetime.utcnow()}" # Need a unique name
query = "Is there a dog?"
metadata = generate_random_dict(target_size_bytes=200)
detector = gl.create_detector(name=name, query=query, metadata=metadata)
assert detector.metadata == metadata

retrieved_detector = gl.get_detector(id=detector.id)
assert retrieved_detector.metadata == metadata


@pytest.mark.skip_for_edge_endpoint(reason="The edge-endpoint does not support passing detector metadata.")
def test_get_or_create_detector_with_metadata(gl: Groundlight):
unique_name = f"Unique name {datetime.utcnow()}"
query = "Is there a dog?"
metadata = generate_random_dict(target_size_bytes=200)
detector = gl.get_or_create_detector(name=unique_name, query=query, metadata=metadata)
assert detector.metadata == metadata

retrieved_detector = gl.get_or_create_detector(name=unique_name, query=query, metadata=metadata)
assert retrieved_detector.id == detector.id
assert retrieved_detector.metadata == metadata


@pytest.mark.skip_for_edge_endpoint(reason="The edge-endpoint does not support passing detector metadata.")
@pytest.mark.parametrize(
"metadata_list",
[
[generate_random_dict(target_size_bytes=3000)],
["this is not valid JSON"],
[""],
],
)
def test_create_detector_with_invalid_metadata(gl: Groundlight, metadata_list: Any):
name = f"Test {datetime.utcnow()}" # Need a unique name
query = "Is there a dog?"

for metadata in metadata_list:
with pytest.raises((TypeError, ValueError)):
gl.create_detector(name=name, query=query, metadata=metadata)


@pytest.mark.skip_for_edge_endpoint(reason="The edge-endpoint does not support passing image query metadata.")
@pytest.mark.parametrize("metadata", [None, {}, {"a": 1}, '{"a": 1}'])
def test_submit_image_query_with_metadata(
gl: Groundlight, detector: Detector, image: str, metadata: Union[Dict, str, None]
Expand Down

0 comments on commit bb54236

Please sign in to comment.