-
Notifications
You must be signed in to change notification settings - Fork 3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Utilize source field for confidence and answered #264
Merged
+142
−98
Merged
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
9c38fa4
Utilize source field for confidence and answered
brandon-groundlight 3f83237
Automatically reformatting code
3bae22d
fix typo
brandon-groundlight 202c221
Merge branch 'use_source_for_confidence' of github.com:groundlight/py…
brandon-groundlight 0ad673f
appease the linting gods
brandon-groundlight c516b3b
bump version to push the fix
brandon-groundlight 5f0a7f8
another fix got out first, bumping verison
brandon-groundlight 0d7043d
Merge branch 'main' into use_source_for_confidence
brandon-groundlight File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
""" | ||
We collect various expensive tests here. These tests should not be run regularly. | ||
""" | ||
|
||
# Optional star-imports are weird and not usually recommended ... | ||
# ruff: noqa: F403,F405 | ||
# pylint: disable=wildcard-import,unused-wildcard-import,redefined-outer-name,import-outside-toplevel | ||
import random | ||
import time | ||
from datetime import datetime | ||
|
||
import pytest | ||
from groundlight import Groundlight | ||
from groundlight.internalapi import iq_is_answered, iq_is_confident | ||
from groundlight.optional_imports import * | ||
from model import ( | ||
Detector, | ||
) | ||
|
||
DEFAULT_CONFIDENCE_THRESHOLD = 0.9 | ||
IQ_IMPROVEMENT_THRESHOLD = 0.75 | ||
|
||
|
||
@pytest.fixture(name="gl") | ||
def fixture_gl() -> Groundlight: | ||
"""Creates a Groundlight client object for testing.""" | ||
_gl = Groundlight() | ||
_gl.DEFAULT_WAIT = 10 | ||
return _gl | ||
|
||
|
||
@pytest.mark.skip(reason="This test requires a human labeler who does not need to be in the testing loop") | ||
def test_human_label(gl: Groundlight): | ||
detector = gl.create_detector(name=f"Test {datetime.utcnow()}", query="Is there a dog?") | ||
img_query = gl.submit_image_query( | ||
detector=detector.id, image="test/assets/dog.jpeg", wait=60, human_review="ALWAYS" | ||
) | ||
|
||
count = 0 | ||
sleep_time = 5 | ||
total_time = 60 | ||
while img_query.result.source == "ALGORITHM" or img_query.result.label == "STILL_PROCESSING": | ||
count += 1 | ||
time.sleep(sleep_time) | ||
img_query = gl.get_image_query(img_query.id) | ||
if count > total_time / sleep_time: | ||
assert False, f"Human review is taking too long: {img_query}" | ||
|
||
assert iq_is_answered(img_query) | ||
assert iq_is_confident(img_query, confidence_threshold=0.9) | ||
|
||
|
||
@pytest.mark.skip(reason="This test can block development depending on the state of the service") | ||
@pytest.mark.skipif(MISSING_PIL, reason="Needs pillow") # type: ignore | ||
def test_detector_improvement(gl: Groundlight): | ||
# test that we get confidence improvement after sending images in | ||
# Pass two of each type of image in | ||
import time | ||
|
||
from PIL import Image, ImageEnhance | ||
|
||
random.seed(2741) | ||
|
||
name = f"Test test_detector_improvement {datetime.utcnow()}" # Need a unique name | ||
query = "Is there a dog?" | ||
detector = gl.create_detector(name=name, query=query) | ||
|
||
def submit_noisy_image(image, label=None): | ||
sharpness = ImageEnhance.Sharpness(image) | ||
noisy_image = sharpness.enhance(random.uniform(0.75, 1.25)) | ||
color = ImageEnhance.Color(noisy_image) | ||
noisy_image = color.enhance(random.uniform(0.75, 1)) | ||
contrast = ImageEnhance.Contrast(noisy_image) | ||
noisy_image = contrast.enhance(random.uniform(0.75, 1)) | ||
brightness = ImageEnhance.Brightness(noisy_image) | ||
noisy_image = brightness.enhance(random.uniform(0.75, 1)) | ||
img_query = gl.submit_image_query(detector=detector.id, image=noisy_image, wait=0, human_review="NEVER") | ||
if label is not None: | ||
gl.add_label(img_query, label) | ||
return img_query | ||
|
||
dog = Image.open("test/assets/dog.jpeg") | ||
cat = Image.open("test/assets/cat.jpeg") | ||
|
||
submit_noisy_image(dog, "YES") | ||
submit_noisy_image(dog, "YES") | ||
submit_noisy_image(cat, "NO") | ||
submit_noisy_image(cat, "NO") | ||
|
||
# wait to give enough time to train | ||
wait_period = 30 # seconds | ||
num_wait_periods = 4 # 2 minutes total | ||
result_confidence = 0.6 | ||
new_dog_query = None | ||
new_cat_query = None | ||
for _ in range(num_wait_periods): | ||
time.sleep(wait_period) | ||
new_dog_query = submit_noisy_image(dog) | ||
new_cat_query = submit_noisy_image(cat) | ||
new_cat_result_confidence = new_cat_query.result.confidence | ||
new_dog_result_confidence = new_dog_query.result.confidence | ||
|
||
if ( | ||
new_cat_result_confidence and new_cat_result_confidence < result_confidence | ||
) or new_cat_query.result.label == "YES": | ||
# If the new query is not confident enough, we'll try again | ||
continue | ||
elif ( | ||
new_dog_result_confidence and new_dog_result_confidence < result_confidence | ||
) or new_dog_query.result.label == "NO": | ||
# If the new query is not confident enough, we'll try again | ||
continue | ||
else: | ||
assert True | ||
return | ||
|
||
assert ( | ||
False | ||
), f"The detector {detector} quality has not improved after two minutes q.v. {new_dog_query}, {new_cat_query}" | ||
|
||
|
||
@pytest.mark.skip( | ||
reason="We don't yet have an SLA level to test ask_confident against, and the test is flakey as a result" | ||
) | ||
def test_ask_method_quality(gl: Groundlight, detector: Detector): | ||
# asks for some level of quality on how fast ask_ml is and that we will get a confident result from ask_confident | ||
fast_always_yes_iq = gl.ask_ml(detector=detector.id, image="test/assets/dog.jpeg", wait=0) | ||
assert iq_is_answered(fast_always_yes_iq) | ||
name = f"Test {datetime.utcnow()}" # Need a unique name | ||
query = "Is there a dog?" | ||
detector = gl.create_detector(name=name, query=query, confidence_threshold=0.8) | ||
fast_iq = gl.ask_ml(detector=detector.id, image="test/assets/dog.jpeg", wait=0) | ||
assert iq_is_answered(fast_iq) | ||
confident_iq = gl.ask_confident(detector=detector.id, image="test/assets/dog.jpeg", wait=180) | ||
assert confident_iq.result.confidence is None or (confident_iq.result.confidence > IQ_IMPROVEMENT_THRESHOLD) |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not related to your PR but could you change subtletie to subtlety above?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A subtle change