From f8f32455924406568284ea660e65ffa4e855f3b4 Mon Sep 17 00:00:00 2001 From: Tyler Romero Date: Wed, 18 Oct 2023 14:16:03 -0700 Subject: [PATCH 1/9] Improve the plumbing for some of the ask* methods --- .../1-grabbing-images.md | 6 +- .../3-managing-confidence.md | 10 +++ src/groundlight/client.py | 74 ++++++++++++++----- 3 files changed, 70 insertions(+), 20 deletions(-) diff --git a/docs/docs/building-applications/1-grabbing-images.md b/docs/docs/building-applications/1-grabbing-images.md index b8a34f60..0f06f9af 100644 --- a/docs/docs/building-applications/1-grabbing-images.md +++ b/docs/docs/building-applications/1-grabbing-images.md @@ -4,12 +4,12 @@ sidebar_position: 1 # Grabbing Images -Groundlight's SDK accepts images in many popular formats, including PIL, OpenCV, and numpy arrays. +Groundlight's SDK accepts images in many popular formats, including PIL, OpenCV, and numpy arrays. ## PIL -The Groundlight SDK can accept PIL images directly in `submit_image_query`. Here's an example: +The Groundlight SDK can accept PIL images directly in `submit_image_query` (or its derivatives: `ask_ml`, `ask_confident`, `ask_async`). Here's an example: ```python from groundlight import Groundlight @@ -60,7 +60,7 @@ gl.submit_image_query(detector, np_img) Groundlight expects images in BGR order, because this is standard for OpenCV, which uses numpy arrays as image storage. (OpenCV uses BGR because it was originally developed decades ago for compatibility with the BGR color format used by many cameras and image processing hardware at the time of its creation.) Most other image libraries use RGB order, so if you are using images as numpy arrays which did not originate from OpenCV you likely need to reverse the channel order before sending the images to Groundlight. -Note this change was made in v0.8 of the Groundlight SDK - in previous versions, RGB order was expected. +Note this change was made in v0.8 of the Groundlight SDK - in previous versions, RGB order was expected. If you have an RGB array, you must reverse the channel order before sending it to Groundlight, like: diff --git a/docs/docs/building-applications/3-managing-confidence.md b/docs/docs/building-applications/3-managing-confidence.md index 757ce8bd..44c126c9 100644 --- a/docs/docs/building-applications/3-managing-confidence.md +++ b/docs/docs/building-applications/3-managing-confidence.md @@ -38,6 +38,16 @@ Higher confidence also requires more labels, which increases labor costs. ::: +If you want to submit image queries without waiting for confidence to reach your detector's threshold, use the +`ask_ml` or `ask_async` methods. `ask_ml` will wait for the first available prediction from an ML model, but will not wait +for confident results via escalation to a human reviewer. `ask_async` will return immediately with a placeholder response, +and the user can poll `get_image_query` to see when the results are ready. In both cases, image queries which are below the desired confidence level will still be escalated for further analysis, and the results are incorporated as training data to improve your ML model, but your code will not wait for that to happen. + + +```python notest continuation + +``` + Or if you want to execute `submit_image_query` as fast as possible, set `wait=0`. You will either get the ML results or a placeholder response if the ML model hasn't finished executing. Image queries which are below the desired confidence level will still be escalated for further analysis, and the results are incorporated as training data to improve your ML model, but your code will not wait for that to happen. ```python notest continuation diff --git a/src/groundlight/client.py b/src/groundlight/client.py index c2c3b508..c1ecb9dc 100644 --- a/src/groundlight/client.py +++ b/src/groundlight/client.py @@ -1,3 +1,4 @@ +from functools import partial import logging import os import time @@ -304,9 +305,20 @@ def submit_image_query( # noqa: PLR0913 # pylint: disable=too-many-arguments, t Any pixel format will get converted to JPEG at high quality before sending to service. :type image: str or bytes or Image.Image or BytesIO or BufferedReader or np.ndarray - :param wait: How long to wait (in seconds) for a confident answer. + :param wait: How long to poll (in seconds) for a confident answer in this method. This is a client-side timeout. :type wait: float + :param patience_time: How long to wait (in seconds) for a confident answer for this image query. + The longer the patience_time, the more likely Groundlight will arrive at a confident answer. + Within patience_time, Groundlight will update ML predictions based on stronger findings, + and, additionally, Groundlight will prioritize human review of the image query if necessary. + This is a soft server-side timeout. If not set, use the detector's patience_time. + :type patience_time: float + + :param confidence_threshold: The confidence threshold to wait for. + If not set, use the detector's confidence threshold. + :type confidence_threshold: float + :param human_review: If `None` or `DEFAULT`, send the image query for human review only if the ML prediction is not confident. If set to `ALWAYS`, always send the image query for human review. @@ -375,8 +387,10 @@ def ask_confident( confidence_threshold: Optional[float] = None, wait: Optional[float] = None, ) -> ImageQuery: - """Evaluates an image with Groundlight waiting until an answer above the confidence threshold - of the detector is reached or the wait period has passed. + """ + Evaluates an image with Groundlight waiting until an answer above the confidence threshold + of the detector is reached or the wait period has passed. + :param detector: the Detector object, or string id of a detector like `det_12345` :type detector: Detector or str @@ -405,6 +419,8 @@ def ask_confident( image, confidence_threshold=confidence_threshold, wait=wait, + patience_time=wait, + human_review=None, ) def ask_ml( @@ -412,6 +428,7 @@ def ask_ml( detector: Union[Detector, str], image: Union[str, bytes, Image.Image, BytesIO, BufferedReader, np.ndarray], wait: Optional[float] = None, + # human_review: Optional[str] = None, ) -> ImageQuery: """Evaluates an image with Groundlight, getting the first answer Groundlight can provide. :param detector: the Detector object, or string id of a detector like `det_12345` @@ -447,8 +464,9 @@ def ask_async( self, detector: Union[Detector, str], image: Union[str, bytes, Image.Image, BytesIO, BufferedReader, np.ndarray], + patience_time: Optional[float] = None, + confidence_threshold: Optional[float] = None, human_review: Optional[str] = None, - inspection_id: Optional[str] = None, ) -> ImageQuery: """ Convenience method for submitting an `ImageQuery` asynchronously. This is equivalent to calling @@ -469,6 +487,17 @@ def ask_async( :type image: str or bytes or Image.Image or BytesIO or BufferedReader or np.ndarray + :param patience_time: How long to wait (in seconds) for a confident answer for this image query. + The longer the patience_time, the more likely Groundlight will arrive at a confident answer. + Within patience_time, Groundlight will update ML predictions based on stronger findings, + and, additionally, Groundlight will prioritize human review of the image query if necessary. + This is a soft server-side timeout. If not set, use the detector's patience_time. + :type patience_time: float + + :param confidence_threshold: The confidence threshold to wait for. + If not set, use the detector's confidence threshold. + :type confidence_threshold: float + :param human_review: If `None` or `DEFAULT`, send the image query for human review only if the ML prediction is not confident. If set to `ALWAYS`, always send the image query for human review. @@ -500,26 +529,35 @@ def ask_async( assert image_query.id is not None # Do not attempt to access the result of this query as the result for all async queries - # will be None. Your result is being computed asynchronously and will be available - # later + # will be None. Your result is being computed asynchronously and will be available later assert image_query.result is None - # retrieve the result later or on another machine by calling gl.get_image_query() - # with the id of the image_query above - image_query = gl.get_image_query(image_query.id) + # retrieve the result later or on another machine by calling gl.wait_for_confident_result() + # with the id of the image_query above. This will block until the result is available. + image_query = gl.wait_for_confident_result(image_query.id) # now the result will be available for your use assert image_query.result is not None + # alternatively, you can check if the result is available (without blocking) by calling + # gl.get_image_query() with the id of the image_query above. This will return the image_query, + # but the result may still be None. + image_query = gl.get_image_query(image_query.id) """ return self.submit_image_query( - detector, image, wait=0, human_review=human_review, want_async=True, inspection_id=inspection_id + detector, + image, + wait=0, + patience_time=patience_time, + confidence_threshold=confidence_threshold, + human_review=human_review, + want_async=True ) def wait_for_confident_result( self, image_query: Union[ImageQuery, str], - confidence_threshold: float, + confidence_threshold: Optional[float] = None, timeout_sec: float = 30.0, ) -> ImageQuery: """ @@ -529,7 +567,8 @@ def wait_for_confident_result( :param image_query: An ImageQuery object to poll :type image_query: ImageQuery or str - :param confidence_threshold: The minimum confidence level required to return before the timeout. + :param confidence_threshold: The confidence threshold to wait for. + If not set, use the detector's confidence threshold. :type confidence_threshold: float :param timeout_sec: The maximum number of seconds to wait. @@ -538,10 +577,13 @@ def wait_for_confident_result( :return: ImageQuery :rtype: ImageQuery """ + if isinstance(image_query, str): + image_query: ImageQuery = self.get_image_query(image_query) - def confidence_above_thresh(iq): - return iq_is_confident(iq, confidence_threshold=confidence_threshold) + if confidence_threshold is None: + confidence_threshold = self.get_detector(image_query.detector_id).confidence_threshold + confidence_above_thresh = partial(iq_is_confident, confidence_threshold=confidence_threshold) return self._wait_for_result(image_query, condition=confidence_above_thresh, timeout_sec=timeout_sec) def wait_for_ml_result(self, image_query: Union[ImageQuery, str], timeout_sec: float = 30.0) -> ImageQuery: @@ -551,9 +593,6 @@ def wait_for_ml_result(self, image_query: Union[ImageQuery, str], timeout_sec: f :param image_query: An ImageQuery object to poll :type image_query: ImageQuery or str - :param confidence_threshold: The minimum confidence level required to return before the timeout. - :type confidence_threshold: float - :param timeout_sec: The maximum number of seconds to wait. :type timeout_sec: float @@ -623,6 +662,7 @@ def add_label(self, image_query: Union[ImageQuery, str], label: Union[Label, str else: image_query_id = str(image_query) # Some old imagequery id's started with "chk_" + # TODO: handle iqe if not image_query_id.startswith(("chk_", "iq_")): raise ValueError(f"Invalid image query id {image_query_id}") api_label = convert_display_label_to_internal(image_query_id, label) From 5b7c63ec46649fc4f1488b19b1c7bb4c14330b4d Mon Sep 17 00:00:00 2001 From: Tyler Romero Date: Wed, 18 Oct 2023 14:25:54 -0700 Subject: [PATCH 2/9] Small doc fixup --- docs/docs/building-applications/5-async-queries.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/docs/building-applications/5-async-queries.md b/docs/docs/building-applications/5-async-queries.md index d14704c2..3507f66e 100644 --- a/docs/docs/building-applications/5-async-queries.md +++ b/docs/docs/building-applications/5-async-queries.md @@ -16,7 +16,7 @@ from time import sleep detector = gl.get_or_create_detector(name="your_detector_name", query="your_query") -cam = cv2.VideoCapture(0) # Initialize camera (0 is the default index) +cam = cv2.VideoCapture(0) # Initialize camera (0 is the default index) while True: _, image = cam.read() # Capture one frame from the camera @@ -35,7 +35,7 @@ from groundlight import Groundlight detector = gl.get_or_create_detector(name="your_detector_name", query="your_query") -image_query_id = db.get_next_image_query_id() +image_query_id = db.get_next_image_query_id() while image_query_id is not None: image_query = gl.get_image_query(id=image_query_id) # retrieve the image query from Groundlight @@ -67,4 +67,7 @@ result = image_query.result # This will always be 'None' as you asked asynchron image_query = gl.get_image_query(id=image_query.id) # Immediately retrieve the image query from Groundlight result = image_query.result # This will likely be 'UNCLEAR' as Groundlight is still processing your query + +image_query = gl.wait_for_confident_result(id=image_query.id) # Poll for a confident result from Groundlight +result = image_query.result ``` \ No newline at end of file From 35013f7ee0d1685f8bcba8316f60c7bbf995df17 Mon Sep 17 00:00:00 2001 From: Auto-format Bot Date: Wed, 18 Oct 2023 21:26:52 +0000 Subject: [PATCH 3/9] Automatically reformatting code --- src/groundlight/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/groundlight/client.py b/src/groundlight/client.py index c1ecb9dc..59435d55 100644 --- a/src/groundlight/client.py +++ b/src/groundlight/client.py @@ -1,7 +1,7 @@ -from functools import partial import logging import os import time +from functools import partial from io import BufferedReader, BytesIO from typing import Callable, Optional, Union @@ -551,7 +551,7 @@ def ask_async( patience_time=patience_time, confidence_threshold=confidence_threshold, human_review=human_review, - want_async=True + want_async=True, ) def wait_for_confident_result( From c661b67eeee3119f5b771c5ca8666ccb7430cf43 Mon Sep 17 00:00:00 2001 From: Tyler Romero Date: Wed, 18 Oct 2023 14:33:15 -0700 Subject: [PATCH 4/9] Fixups --- .../building-applications/3-managing-confidence.md | 10 ---------- src/groundlight/client.py | 5 +++-- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/docs/docs/building-applications/3-managing-confidence.md b/docs/docs/building-applications/3-managing-confidence.md index 44c126c9..757ce8bd 100644 --- a/docs/docs/building-applications/3-managing-confidence.md +++ b/docs/docs/building-applications/3-managing-confidence.md @@ -38,16 +38,6 @@ Higher confidence also requires more labels, which increases labor costs. ::: -If you want to submit image queries without waiting for confidence to reach your detector's threshold, use the -`ask_ml` or `ask_async` methods. `ask_ml` will wait for the first available prediction from an ML model, but will not wait -for confident results via escalation to a human reviewer. `ask_async` will return immediately with a placeholder response, -and the user can poll `get_image_query` to see when the results are ready. In both cases, image queries which are below the desired confidence level will still be escalated for further analysis, and the results are incorporated as training data to improve your ML model, but your code will not wait for that to happen. - - -```python notest continuation - -``` - Or if you want to execute `submit_image_query` as fast as possible, set `wait=0`. You will either get the ML results or a placeholder response if the ML model hasn't finished executing. Image queries which are below the desired confidence level will still be escalated for further analysis, and the results are incorporated as training data to improve your ML model, but your code will not wait for that to happen. ```python notest continuation diff --git a/src/groundlight/client.py b/src/groundlight/client.py index 59435d55..6a5761d8 100644 --- a/src/groundlight/client.py +++ b/src/groundlight/client.py @@ -428,9 +428,10 @@ def ask_ml( detector: Union[Detector, str], image: Union[str, bytes, Image.Image, BytesIO, BufferedReader, np.ndarray], wait: Optional[float] = None, - # human_review: Optional[str] = None, ) -> ImageQuery: - """Evaluates an image with Groundlight, getting the first answer Groundlight can provide. + """ + Evaluates an image with Groundlight, getting the first answer Groundlight can provide. + :param detector: the Detector object, or string id of a detector like `det_12345` :type detector: Detector or str From 54885dd54168a71380239fc04567e1dba3272fa2 Mon Sep 17 00:00:00 2001 From: Tyler Romero Date: Wed, 18 Oct 2023 14:37:44 -0700 Subject: [PATCH 5/9] Touch ups --- docs/docs/building-applications/1-grabbing-images.md | 6 +++--- src/groundlight/client.py | 7 +++---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/docs/docs/building-applications/1-grabbing-images.md b/docs/docs/building-applications/1-grabbing-images.md index 0f06f9af..b8a34f60 100644 --- a/docs/docs/building-applications/1-grabbing-images.md +++ b/docs/docs/building-applications/1-grabbing-images.md @@ -4,12 +4,12 @@ sidebar_position: 1 # Grabbing Images -Groundlight's SDK accepts images in many popular formats, including PIL, OpenCV, and numpy arrays. +Groundlight's SDK accepts images in many popular formats, including PIL, OpenCV, and numpy arrays. ## PIL -The Groundlight SDK can accept PIL images directly in `submit_image_query` (or its derivatives: `ask_ml`, `ask_confident`, `ask_async`). Here's an example: +The Groundlight SDK can accept PIL images directly in `submit_image_query`. Here's an example: ```python from groundlight import Groundlight @@ -60,7 +60,7 @@ gl.submit_image_query(detector, np_img) Groundlight expects images in BGR order, because this is standard for OpenCV, which uses numpy arrays as image storage. (OpenCV uses BGR because it was originally developed decades ago for compatibility with the BGR color format used by many cameras and image processing hardware at the time of its creation.) Most other image libraries use RGB order, so if you are using images as numpy arrays which did not originate from OpenCV you likely need to reverse the channel order before sending the images to Groundlight. -Note this change was made in v0.8 of the Groundlight SDK - in previous versions, RGB order was expected. +Note this change was made in v0.8 of the Groundlight SDK - in previous versions, RGB order was expected. If you have an RGB array, you must reverse the channel order before sending it to Groundlight, like: diff --git a/src/groundlight/client.py b/src/groundlight/client.py index 6a5761d8..e2f38b8a 100644 --- a/src/groundlight/client.py +++ b/src/groundlight/client.py @@ -305,7 +305,7 @@ def submit_image_query( # noqa: PLR0913 # pylint: disable=too-many-arguments, t Any pixel format will get converted to JPEG at high quality before sending to service. :type image: str or bytes or Image.Image or BytesIO or BufferedReader or np.ndarray - :param wait: How long to poll (in seconds) for a confident answer in this method. This is a client-side timeout. + :param wait: How long to poll (in seconds) for a confident answer. This is a client-side timeout. :type wait: float :param patience_time: How long to wait (in seconds) for a confident answer for this image query. @@ -578,10 +578,9 @@ def wait_for_confident_result( :return: ImageQuery :rtype: ImageQuery """ - if isinstance(image_query, str): - image_query: ImageQuery = self.get_image_query(image_query) - if confidence_threshold is None: + if isinstance(image_query, str): + image_query: ImageQuery = self.get_image_query(image_query) confidence_threshold = self.get_detector(image_query.detector_id).confidence_threshold confidence_above_thresh = partial(iq_is_confident, confidence_threshold=confidence_threshold) From 945a4bc2d2467a450eb52a64058ce379ef07b32d Mon Sep 17 00:00:00 2001 From: Tyler Romero Date: Wed, 18 Oct 2023 14:43:55 -0700 Subject: [PATCH 6/9] lint fixes --- src/groundlight/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/groundlight/client.py b/src/groundlight/client.py index e2f38b8a..016ea92d 100644 --- a/src/groundlight/client.py +++ b/src/groundlight/client.py @@ -461,7 +461,7 @@ def ask_ml( wait = self.DEFAULT_WAIT if wait is None else wait return self.wait_for_ml_result(iq, timeout_sec=wait) - def ask_async( + def ask_async( # noqa: PLR0913 # pylint: disable=too-many-arguments self, detector: Union[Detector, str], image: Union[str, bytes, Image.Image, BytesIO, BufferedReader, np.ndarray], @@ -580,7 +580,7 @@ def wait_for_confident_result( """ if confidence_threshold is None: if isinstance(image_query, str): - image_query: ImageQuery = self.get_image_query(image_query) + image_query = self.get_image_query(image_query) confidence_threshold = self.get_detector(image_query.detector_id).confidence_threshold confidence_above_thresh = partial(iq_is_confident, confidence_threshold=confidence_threshold) From 867ffac1bc09682f28cb650ec65b8154f7e7ed3d Mon Sep 17 00:00:00 2001 From: Tyler Romero Date: Wed, 18 Oct 2023 14:57:37 -0700 Subject: [PATCH 7/9] Update src/groundlight/client.py Co-authored-by: Sunil Kumar --- src/groundlight/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/groundlight/client.py b/src/groundlight/client.py index 016ea92d..0ed0f9b6 100644 --- a/src/groundlight/client.py +++ b/src/groundlight/client.py @@ -542,7 +542,7 @@ def ask_async( # noqa: PLR0913 # pylint: disable=too-many-arguments # alternatively, you can check if the result is available (without blocking) by calling # gl.get_image_query() with the id of the image_query above. This will return the image_query, - # but the result may still be None. + # but the result may still be None or UNCLEAR. image_query = gl.get_image_query(image_query.id) """ return self.submit_image_query( From 76212eb466fb89c8fb75b8ccd988dd958eab0e81 Mon Sep 17 00:00:00 2001 From: Tyler Romero Date: Wed, 18 Oct 2023 15:00:26 -0700 Subject: [PATCH 8/9] address PR --- src/groundlight/client.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/groundlight/client.py b/src/groundlight/client.py index 0ed0f9b6..081b2f71 100644 --- a/src/groundlight/client.py +++ b/src/groundlight/client.py @@ -541,8 +541,7 @@ def ask_async( # noqa: PLR0913 # pylint: disable=too-many-arguments assert image_query.result is not None # alternatively, you can check if the result is available (without blocking) by calling - # gl.get_image_query() with the id of the image_query above. This will return the image_query, - # but the result may still be None or UNCLEAR. + # gl.get_image_query() with the id of the image_query above. image_query = gl.get_image_query(image_query.id) """ return self.submit_image_query( @@ -662,7 +661,7 @@ def add_label(self, image_query: Union[ImageQuery, str], label: Union[Label, str else: image_query_id = str(image_query) # Some old imagequery id's started with "chk_" - # TODO: handle iqe + # TODO: handle iqe_ for image_queries returned from edge endpoints if not image_query_id.startswith(("chk_", "iq_")): raise ValueError(f"Invalid image query id {image_query_id}") api_label = convert_display_label_to_internal(image_query_id, label) From 8de7f3abe6ce0f972096d81943e24ae773ba13d3 Mon Sep 17 00:00:00 2001 From: Tyler Romero Date: Wed, 18 Oct 2023 15:44:43 -0700 Subject: [PATCH 9/9] increment version number --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5ac44064..ebecead3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ packages = [ {include = "**/*.py", from = "src"}, ] readme = "README.md" -version = "0.12.0" +version = "0.12.1" [tool.poetry.dependencies] # For certifi, use ">=" instead of "^" since it upgrades its "major version" every year, not really following semver