From bb542361ef435bc715b2af7eaed63f1918bee61b Mon Sep 17 00:00:00 2001 From: blaise-muhirwa <135643310+blaise-muhirwa@users.noreply.github.com> Date: Thu, 18 Jan 2024 13:54:48 -0800 Subject: [PATCH] Add detector metadata (#148) * initial commit * Automatically reformatting code * add metadata field to the detector model * Automatically reformatting code * small clean up * make pylint happy * wip * add metadata field to detector serializer and api spec * address pr feedback --------- Co-authored-by: Auto-format Bot --- generated/README.md | 1 + generated/docs/Detector.md | 1 + generated/docs/DetectorCreationInput.md | 1 + generated/docs/DetectorsApi.md | 1 + generated/model.py | 10 ++- generated/openapi_client/model/detector.py | 7 ++ .../model/detector_creation_input.py | 7 ++ spec/public-api.yaml | 12 ++++ src/groundlight/client.py | 13 ++++ src/groundlight/encodings.py | 3 +- test/integration/test_groundlight.py | 68 ++++++++++++++++++- 11 files changed, 119 insertions(+), 5 deletions(-) diff --git a/generated/README.md b/generated/README.md index 57285fbe..2e9fb09e 100644 --- a/generated/README.md +++ b/generated/README.md @@ -81,6 +81,7 @@ with openapi_client.ApiClient(configuration) as api_client: group_name="group_name_example", confidence_threshold=0.9, pipeline_config="pipeline_config_example", + metadata="metadata_example", ) # DetectorCreationInput | try: diff --git a/generated/docs/Detector.md b/generated/docs/Detector.md index 638120c4..e2264d06 100644 --- a/generated/docs/Detector.md +++ b/generated/docs/Detector.md @@ -12,6 +12,7 @@ Name | Type | Description | Notes **query** | **str** | A question about the image. | [readonly] **group_name** | **str** | Which group should this detector be part of? | [readonly] **confidence_threshold** | **float** | If the detector's prediction is below this confidence threshold, send the image query for human review. | [optional] if omitted the server will use the default value of 0.9 +**metadata** | **{str: (bool, date, datetime, dict, float, int, list, str, none_type)}, none_type** | A dictionary of custom key/value metadata to associate with the detector (limited to 1KB). | [optional] **any string name** | **bool, date, datetime, dict, float, int, list, str, none_type** | any string name can be used but the value must be the correct type | [optional] [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) diff --git a/generated/docs/DetectorCreationInput.md b/generated/docs/DetectorCreationInput.md index e57b93a3..cd6c25b8 100644 --- a/generated/docs/DetectorCreationInput.md +++ b/generated/docs/DetectorCreationInput.md @@ -10,6 +10,7 @@ Name | Type | Description | Notes **group_name** | **str** | Which group should this detector be part of? | [optional] **confidence_threshold** | **float** | If the detector's prediction is below this confidence threshold, send the image query for human review. | [optional] if omitted the server will use the default value of 0.9 **pipeline_config** | **str, none_type** | (Advanced usage) Configuration to instantiate a specific prediction pipeline. | [optional] +**metadata** | **str, none_type** | A dictionary of custom key/value metadata to associate with the detector (limited to 1KB). | [optional] **any string name** | **bool, date, datetime, dict, float, int, list, str, none_type** | any string name can be used but the value must be the correct type | [optional] [[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md) diff --git a/generated/docs/DetectorsApi.md b/generated/docs/DetectorsApi.md index 8c13cee3..630b5697 100644 --- a/generated/docs/DetectorsApi.md +++ b/generated/docs/DetectorsApi.md @@ -54,6 +54,7 @@ with openapi_client.ApiClient(configuration) as api_client: group_name="group_name_example", confidence_threshold=0.9, pipeline_config="pipeline_config_example", + metadata="metadata_example", ) # DetectorCreationInput | # example passing only required values which don't have defaults set diff --git a/generated/model.py b/generated/model.py index 910ad20a..3246dc28 100644 --- a/generated/model.py +++ b/generated/model.py @@ -1,6 +1,6 @@ # generated by datamodel-codegen: # filename: public-api.yaml -# timestamp: 2023-11-09T05:00:29+00:00 +# timestamp: 2024-01-16T18:09:48+00:00 from __future__ import annotations @@ -33,6 +33,9 @@ class DetectorCreationInput(BaseModel): pipeline_config: Optional[constr(max_length=8192)] = Field( None, description="(Advanced usage) Configuration to instantiate a specific prediction pipeline." ) + metadata: Optional[str] = Field( + None, description="A dictionary of custom key/value metadata to associate with the detector (limited to 1KB)." + ) class DetectorTypeEnum(Enum): @@ -60,6 +63,9 @@ class Detector(BaseModel): "If the detector's prediction is below this confidence threshold, send the image query for human review." ), ) + metadata: Optional[Dict[str, Any]] = Field( + None, description="A dictionary of custom key/value metadata to associate with the detector (limited to 1KB)." + ) class ImageQuery(BaseModel): @@ -87,4 +93,4 @@ class PaginatedImageQueryList(BaseModel): count: Optional[int] = Field(None, example=123) next: Optional[AnyUrl] = Field(None, example="http://api.example.org/accounts/?page=4") previous: Optional[AnyUrl] = Field(None, example="http://api.example.org/accounts/?page=2") - results: Optional[List[ImageQuery]] = None \ No newline at end of file + results: Optional[List[ImageQuery]] = None diff --git a/generated/openapi_client/model/detector.py b/generated/openapi_client/model/detector.py index 51e3c116..d1b629e5 100644 --- a/generated/openapi_client/model/detector.py +++ b/generated/openapi_client/model/detector.py @@ -122,6 +122,10 @@ def openapi_types(): "query": (str,), # noqa: E501 "group_name": (str,), # noqa: E501 "confidence_threshold": (float,), # noqa: E501 + "metadata": ( + {str: (bool, date, datetime, dict, float, int, list, str, none_type)}, + none_type, + ), # noqa: E501 } @cached_property @@ -136,6 +140,7 @@ def discriminator(): "query": "query", # noqa: E501 "group_name": "group_name", # noqa: E501 "confidence_threshold": "confidence_threshold", # noqa: E501 + "metadata": "metadata", # noqa: E501 } read_only_vars = { @@ -193,6 +198,7 @@ def _from_openapi_data(cls, id, type, created_at, name, query, group_name, *args through its discriminator because we passed in _visited_composed_classes = (Animal,) confidence_threshold (float): If the detector's prediction is below this confidence threshold, send the image query for human review.. [optional] if omitted the server will use the default value of 0.9 # noqa: E501 + metadata ({str: (bool, date, datetime, dict, float, int, list, str, none_type)}, none_type): A dictionary of custom key/value metadata to associate with the detector (limited to 1KB).. [optional] # noqa: E501 """ _check_type = kwargs.pop("_check_type", True) @@ -287,6 +293,7 @@ def __init__(self, name, *args, **kwargs): # noqa: E501 through its discriminator because we passed in _visited_composed_classes = (Animal,) confidence_threshold (float): If the detector's prediction is below this confidence threshold, send the image query for human review.. [optional] if omitted the server will use the default value of 0.9 # noqa: E501 + metadata ({str: (bool, date, datetime, dict, float, int, list, str, none_type)}, none_type): A dictionary of custom key/value metadata to associate with the detector (limited to 1KB).. [optional] # noqa: E501 """ _check_type = kwargs.pop("_check_type", True) diff --git a/generated/openapi_client/model/detector_creation_input.py b/generated/openapi_client/model/detector_creation_input.py index eee2ec9c..332df8cc 100644 --- a/generated/openapi_client/model/detector_creation_input.py +++ b/generated/openapi_client/model/detector_creation_input.py @@ -114,6 +114,10 @@ def openapi_types(): str, none_type, ), # noqa: E501 + "metadata": ( + str, + none_type, + ), # noqa: E501 } @cached_property @@ -126,6 +130,7 @@ def discriminator(): "group_name": "group_name", # noqa: E501 "confidence_threshold": "confidence_threshold", # noqa: E501 "pipeline_config": "pipeline_config", # noqa: E501 + "metadata": "metadata", # noqa: E501 } read_only_vars = {} @@ -175,6 +180,7 @@ def _from_openapi_data(cls, name, query, *args, **kwargs): # noqa: E501 group_name (str): Which group should this detector be part of?. [optional] # noqa: E501 confidence_threshold (float): If the detector's prediction is below this confidence threshold, send the image query for human review.. [optional] if omitted the server will use the default value of 0.9 # noqa: E501 pipeline_config (str, none_type): (Advanced usage) Configuration to instantiate a specific prediction pipeline.. [optional] # noqa: E501 + metadata (str, none_type): A dictionary of custom key/value metadata to associate with the detector (limited to 1KB).. [optional] # noqa: E501 """ _check_type = kwargs.pop("_check_type", True) @@ -270,6 +276,7 @@ def __init__(self, name, query, *args, **kwargs): # noqa: E501 group_name (str): Which group should this detector be part of?. [optional] # noqa: E501 confidence_threshold (float): If the detector's prediction is below this confidence threshold, send the image query for human review.. [optional] if omitted the server will use the default value of 0.9 # noqa: E501 pipeline_config (str, none_type): (Advanced usage) Configuration to instantiate a specific prediction pipeline.. [optional] # noqa: E501 + metadata (str, none_type): A dictionary of custom key/value metadata to associate with the detector (limited to 1KB).. [optional] # noqa: E501 """ _check_type = kwargs.pop("_check_type", True) diff --git a/spec/public-api.yaml b/spec/public-api.yaml index 7e7c2cb9..e4d0f34f 100644 --- a/spec/public-api.yaml +++ b/spec/public-api.yaml @@ -265,6 +265,12 @@ components: description: If the detector's prediction is below this confidence threshold, send the image query for human review. + metadata: + type: object + nullable: true + description: + A dictionary of custom key/value metadata to associate with the detector + (limited to 1KB). This is encoded as a URL-safe, base64-encoded JSON string. required: - created_at - group_name @@ -303,6 +309,12 @@ components: nullable: true description: (Advanced usage) Configuration to instantiate a specific prediction pipeline. maxLength: 8192 + metadata: + type: string + nullable: true + description: + A dictionary of custom key/value metadata to associate with the detector + (limited to 1KB). This is encoded as a URL-safe, base64-encoded JSON string. required: # TODO: make name optional - that's how the web version is going. - name diff --git a/src/groundlight/client.py b/src/groundlight/client.py index 2cfc283a..244a4c0d 100644 --- a/src/groundlight/client.py +++ b/src/groundlight/client.py @@ -187,6 +187,7 @@ def create_detector( *, confidence_threshold: Optional[float] = None, pipeline_config: Optional[str] = None, + metadata: Union[dict, str, None] = None, ) -> Detector: """ Create a new detector with a given name and query @@ -199,6 +200,10 @@ def create_detector( :param pipeline_config: the pipeline config + :param metadata: A dictionary or JSON string of custom key/value metadata to associate with + the detector (limited to 1KB). You can retrieve this metadata later by calling + `get_detector()`. + :return: Detector """ detector_creation_input = DetectorCreationInput(name=name, query=query) @@ -206,6 +211,8 @@ def create_detector( detector_creation_input.confidence_threshold = confidence_threshold if pipeline_config is not None: detector_creation_input.pipeline_config = pipeline_config + if metadata is not None: + detector_creation_input.metadata = str(url_encode_dict(metadata, name="metadata", size_limit_bytes=1024)) obj = self.detectors_api.create_detector(detector_creation_input) return Detector.parse_obj(obj.to_dict()) @@ -216,6 +223,7 @@ def get_or_create_detector( *, confidence_threshold: Optional[float] = None, pipeline_config: Optional[str] = None, + metadata: Union[dict, str, None] = None, ) -> Detector: """ Tries to look up the detector by name. If a detector with that name, query, and @@ -230,6 +238,10 @@ def get_or_create_detector( :param pipeline_config: the pipeline config + :param metadata: A dictionary or JSON string of custom key/value metadata to associate with + the detector (limited to 1KB). You can retrieve this metadata later by calling + `get_detector()`. + :return: Detector """ try: @@ -241,6 +253,7 @@ def get_or_create_detector( query=query, confidence_threshold=confidence_threshold, pipeline_config=pipeline_config, + metadata=metadata, ) # TODO: We may soon allow users to update the retrieved detector's fields. diff --git a/src/groundlight/encodings.py b/src/groundlight/encodings.py index 36adbe3a..168a71bf 100644 --- a/src/groundlight/encodings.py +++ b/src/groundlight/encodings.py @@ -1,6 +1,5 @@ import base64 import json -import sys from typing import Dict, Optional, Union @@ -34,7 +33,7 @@ def url_encode_dict(maybe_dict: Union[Dict, str], name: str, size_limit_bytes: O data_json = json.dumps(maybe_dict) if size_limit_bytes is not None: - size_bytes = sys.getsizeof(data_json) + size_bytes = len(data_json) if size_bytes > size_limit_bytes: raise ValueError(f"`{name}` is too large: {size_bytes} bytes > {size_limit_bytes} bytes limit.") diff --git a/test/integration/test_groundlight.py b/test/integration/test_groundlight.py index 8f74d391..ee24e6c5 100644 --- a/test/integration/test_groundlight.py +++ b/test/integration/test_groundlight.py @@ -2,6 +2,8 @@ # ruff: noqa: F403,F405 # pylint: disable=wildcard-import,unused-wildcard-import,redefined-outer-name,import-outside-toplevel import json +import random +import string import time from datetime import datetime from typing import Any, Dict, Optional, Union @@ -28,6 +30,27 @@ def is_valid_display_result(result: Any) -> bool: return True +def generate_random_dict(target_size_bytes=1024, key_length=8, value_length=10) -> Dict[str, str]: + """ + Generate a random dictionary with an approximate size in bytes. + """ + key_chars = string.ascii_lowercase + string.digits + value_chars = string.ascii_letters + string.digits + + random_dict: Dict[str, str] = {} + while len(json.dumps(random_dict).encode("utf-8")) < target_size_bytes: + key = "".join(random.choice(key_chars) for _ in range(key_length)) + value = "".join(random.choice(value_chars) for _ in range(value_length)) + random_dict[key] = value + + # Check if adding another pair would likely exceed the size + # The 4 is for the quotes around the key and value, and the colon and comma + if len(json.dumps(random_dict).encode("utf-8")) + key_length + value_length + 4 > target_size_bytes: + break + + return random_dict + + def is_valid_display_label(label: str) -> bool: """Is the image query result label valid to display to the user?.""" # NOTE: For now, we strictly only show UPPERCASE labels to the user. @@ -252,7 +275,50 @@ def test_submit_image_query_with_human_review_param(gl: Groundlight, detector: D assert is_valid_display_result(_image_query.result) -@pytest.mark.skip_for_edge_endpoint(reason="The edge-endpoint does not support passing metadata.") +@pytest.mark.skip_for_edge_endpoint(reason="The edge-endpoint does not support passing detector metadata.") +def test_create_detector_with_metadata(gl: Groundlight): + name = f"Test {datetime.utcnow()}" # Need a unique name + query = "Is there a dog?" + metadata = generate_random_dict(target_size_bytes=200) + detector = gl.create_detector(name=name, query=query, metadata=metadata) + assert detector.metadata == metadata + + retrieved_detector = gl.get_detector(id=detector.id) + assert retrieved_detector.metadata == metadata + + +@pytest.mark.skip_for_edge_endpoint(reason="The edge-endpoint does not support passing detector metadata.") +def test_get_or_create_detector_with_metadata(gl: Groundlight): + unique_name = f"Unique name {datetime.utcnow()}" + query = "Is there a dog?" + metadata = generate_random_dict(target_size_bytes=200) + detector = gl.get_or_create_detector(name=unique_name, query=query, metadata=metadata) + assert detector.metadata == metadata + + retrieved_detector = gl.get_or_create_detector(name=unique_name, query=query, metadata=metadata) + assert retrieved_detector.id == detector.id + assert retrieved_detector.metadata == metadata + + +@pytest.mark.skip_for_edge_endpoint(reason="The edge-endpoint does not support passing detector metadata.") +@pytest.mark.parametrize( + "metadata_list", + [ + [generate_random_dict(target_size_bytes=3000)], + ["this is not valid JSON"], + [""], + ], +) +def test_create_detector_with_invalid_metadata(gl: Groundlight, metadata_list: Any): + name = f"Test {datetime.utcnow()}" # Need a unique name + query = "Is there a dog?" + + for metadata in metadata_list: + with pytest.raises((TypeError, ValueError)): + gl.create_detector(name=name, query=query, metadata=metadata) + + +@pytest.mark.skip_for_edge_endpoint(reason="The edge-endpoint does not support passing image query metadata.") @pytest.mark.parametrize("metadata", [None, {}, {"a": 1}, '{"a": 1}']) def test_submit_image_query_with_metadata( gl: Groundlight, detector: Detector, image: str, metadata: Union[Dict, str, None]