From 8e043b667a33d790089279243487cd14825703f4 Mon Sep 17 00:00:00 2001 From: emmaamblard Date: Mon, 16 Jan 2023 15:56:11 +0000 Subject: [PATCH 1/8] test prediction on predicted objects with same class but new score --- QA/py/tests/test_classification.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/QA/py/tests/test_classification.py b/QA/py/tests/test_classification.py index 3a518c03..d4fe0817 100644 --- a/QA/py/tests/test_classification.py +++ b/QA/py/tests/test_classification.py @@ -29,6 +29,7 @@ def _prj_query(fastapi, auth, prj_id, **kwargs): OBJECT_SET_DELETE_URL = "/object_set/" OBJECT_SET_SUMMARY_URL = "/object_set/{project_id}/summary?only_total=False" OBJECT_SET_PARENTS_URL = "/object_set/parents" +OBJECT_QUERY_URL = "/object/{object_id}" PROJECT_SET_USER_STATS = "/project_set/user_stats?ids={prj_ids}" @@ -60,10 +61,11 @@ def classify_all(fastapi, obj_ids, classif_id): assert rsp.status_code == status.HTTP_200_OK -def classify_auto_all(fastapi, obj_ids, classif_id): +def classify_auto_all(fastapi, obj_ids, classif_id, scores=None): url = OBJECT_SET_CLASSIFY_AUTO_URL classifications = [classif_id for _obj in obj_ids] - scores = [0.52 for _obj in obj_ids] + if not scores: + scores = [0.52 for _obj in obj_ids] rsp = fastapi.post(url, headers=ADMIN_AUTH, json={"target_ids": obj_ids, "classifications": classifications, "scores": scores, @@ -187,6 +189,13 @@ def get_object_set_stats(): 'nb_validated': 0, 'projid': prj_id, 'used_taxa': [-1, crustacea]} + + # New ML results with a different score for the second object + classify_auto_all(fastapi, [obj_ids[1]], crustacea, [0.8]) + url = OBJECT_QUERY_URL.format(object_id=obj_ids[1]) + rsp = fastapi.get(url, headers=ADMIN_AUTH) + assert rsp.status_code == status.HTTP_200_OK + assert rsp.json()['classif_auto_score'] == 0.8 # Admin (me!) thinks that all is a copepod :) classify_all(fastapi, obj_ids, copepod_id) From b52b8fbd4cc1192c277c012cb17317fe9501d395 Mon Sep 17 00:00:00 2001 From: emmaamblard Date: Tue, 17 Jan 2023 15:17:11 +0000 Subject: [PATCH 2/8] test reset to predicted on validated objects --- QA/py/tests/test_classification.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/QA/py/tests/test_classification.py b/QA/py/tests/test_classification.py index d4fe0817..7d07c23c 100644 --- a/QA/py/tests/test_classification.py +++ b/QA/py/tests/test_classification.py @@ -272,6 +272,19 @@ def classify_all_no_change(classif_id): 'used_taxa': [ 25835]}] # <- copepod is gone, unclassified as well, replaced with entomobryomorpha + # Reset to predicted on validated objects + url = OBJECT_SET_RESET_PREDICTED_URL.format(project_id=prj_id) + rsp = fastapi.post(url, headers=ADMIN_AUTH, json={}) + assert rsp.status_code == status.HTTP_200_OK + stats = rsp.json() + + assert get_stats(fastapi, prj_id) == {'nb_dubious': 0, + 'nb_predicted': 8, + 'nb_unclassified': 0, + 'nb_validated': 0, + 'projid': prj_id, + 'used_taxa': [25835]} + # Delete some object via API, why not? rsp = fastapi.delete(OBJECT_SET_DELETE_URL, headers=ADMIN_AUTH, json=obj_ids[:4]) assert rsp.status_code == status.HTTP_200_OK @@ -293,7 +306,7 @@ def classify_all_no_change(classif_id): ref_stats = [{"projid": prj_id, "annotators": [{"id": 1, "name": "Application Administrator"}], - "activities": [{"id": 1, "nb_actions": 8, + "activities": [{"id": 1, "nb_actions": 12, "last_annot": "2022-05-12T14:21:15"}]}] # Fix the date on both sides ref_stats[0]["activities"][0]["last_annot"] = "FIXED DATE" From 5455021c72ac88469d0819ba2eb9f8ff94f623a7 Mon Sep 17 00:00:00 2001 From: emmaamblard Date: Tue, 24 Jan 2023 13:52:35 +0000 Subject: [PATCH 3/8] test reverts --- QA/py/tests/test_classification.py | 43 +++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/QA/py/tests/test_classification.py b/QA/py/tests/test_classification.py index 7d07c23c..7402f1ec 100644 --- a/QA/py/tests/test_classification.py +++ b/QA/py/tests/test_classification.py @@ -216,6 +216,34 @@ def get_object_set_stats(): assert classif == [ {'objid': obj_ids[0], 'classif_id': 12846, 'classif_date': 'now', 'classif_who': None, 'classif_type': 'A', 'classif_qual': 'P', 'classif_score': 0.52, 'user_name': None, 'taxon_name': 'Crustacea'}] + + # Revert on validated objects + url = OBJECT_SET_REVERT_URL.format(project_id=prj_id, dry_run=False, tgt_usr="") + rsp = fastapi.post(url, headers=ADMIN_AUTH, json={}) + assert rsp.status_code == status.HTTP_200_OK + stats = rsp.json() + + assert get_stats(fastapi, prj_id) == {'nb_dubious': 0, + 'nb_predicted': 4, + 'nb_unclassified': 4, + 'nb_validated': 0, + 'projid': prj_id, + 'used_taxa': [-1, crustacea]} + + # Second revert, should not change since the last record in history is the same + rsp = fastapi.post(url, headers=ADMIN_AUTH, json={}) + assert rsp.status_code == status.HTTP_200_OK + stats = rsp.json() + + assert get_stats(fastapi, prj_id) == {'nb_dubious': 0, + 'nb_predicted': 4, + 'nb_unclassified': 4, + 'nb_validated': 0, + 'projid': prj_id, + 'used_taxa': [-1, crustacea]} + + # Apply validation again after revert + classify_all(fastapi, obj_ids, copepod_id) # Not a copepod :( classify_all(fastapi, obj_ids, entomobryomorpha_id) @@ -271,7 +299,7 @@ def classify_all_no_change(classif_id): 'projid': prj_id, 'used_taxa': [ 25835]}] # <- copepod is gone, unclassified as well, replaced with entomobryomorpha - + # Reset to predicted on validated objects url = OBJECT_SET_RESET_PREDICTED_URL.format(project_id=prj_id) rsp = fastapi.post(url, headers=ADMIN_AUTH, json={}) @@ -284,6 +312,19 @@ def classify_all_no_change(classif_id): 'nb_validated': 0, 'projid': prj_id, 'used_taxa': [25835]} + + # Revert after reset to predicted + url = OBJECT_SET_REVERT_URL.format(project_id=prj_id, dry_run=False, tgt_usr="") + rsp = fastapi.post(url, headers=ADMIN_AUTH, json={}) + assert rsp.status_code == status.HTTP_200_OK + stats = rsp.json() + + assert get_stats(fastapi, prj_id) == {'nb_dubious': 0, + 'nb_predicted': 0, + 'nb_unclassified': 0, + 'nb_validated': 8, + 'projid': prj_id, + 'used_taxa': [25835]} # Delete some object via API, why not? rsp = fastapi.delete(OBJECT_SET_DELETE_URL, headers=ADMIN_AUTH, json=obj_ids[:4]) From 42108538c2c8a5af227ed8f4ab5b7e8bf5421fc2 Mon Sep 17 00:00:00 2001 From: emmaamblard Date: Wed, 25 Jan 2023 09:41:59 +0000 Subject: [PATCH 4/8] test incorrect ML output --- QA/py/tests/test_classification.py | 31 ++++++++++++++++++++++++++++++ py/main.py | 2 ++ 2 files changed, 33 insertions(+) diff --git a/QA/py/tests/test_classification.py b/QA/py/tests/test_classification.py index 7402f1ec..530e1e91 100644 --- a/QA/py/tests/test_classification.py +++ b/QA/py/tests/test_classification.py @@ -3,6 +3,7 @@ # Copyright (C) 2015-2020 Picheral, Colin, Irisson (UPMC-CNRS) # import logging +import pytest from API_models.filters import ProjectFilters, ProjectFiltersDict from starlette import status @@ -71,6 +72,33 @@ def classify_auto_all(fastapi, obj_ids, classif_id, scores=None): "scores": scores, "keep_log": True}) assert rsp.status_code == status.HTTP_200_OK + + +def classify_auto_incorrect(fastapi, obj_ids): + url = OBJECT_SET_CLASSIFY_AUTO_URL + classifications = [-1 for _obj in obj_ids] + + # List of scores of a different length, should raise an error + scores = [0.1 for _obj in obj_ids[:-1]] + with pytest.raises(AssertionError): + rsp = fastapi.post(url, headers=ADMIN_AUTH, json={"target_ids": obj_ids, + "classifications": classifications, + "scores": scores, + "keep_log": True}) + # List of scores outside [0, 1], should raise an error + scores = [2. for _obj in obj_ids] + with pytest.raises(AssertionError): + rsp = fastapi.post(url, headers=ADMIN_AUTH, json={"target_ids": obj_ids, + "classifications": classifications, + "scores": scores, + "keep_log": True}) + # List of scores with wrong type, should fail + scores = [None for _obj in obj_ids] + rsp = fastapi.post(url, headers=ADMIN_AUTH, json={"target_ids": obj_ids, + "classifications": classifications, + "scores": scores, + "keep_log": True}) + assert rsp.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY # Note: to go faster in a local dev environment, use "filled_database" instead of "database" below @@ -180,6 +208,9 @@ def get_object_set_stats(): rsp = fastapi.post(url, headers=ADMIN_AUTH, json={}) assert rsp.status_code == status.HTTP_200_OK + # Incorrect ML results + classify_auto_incorrect(fastapi, obj_ids[:4]) + # Super ML result, 4 first objects are crustacea classify_auto_all(fastapi, obj_ids[:4], crustacea) diff --git a/py/main.py b/py/main.py index 806bd9a9..975b0f2d 100644 --- a/py/main.py +++ b/py/main.py @@ -1570,6 +1570,8 @@ def classify_auto_object_set(req: ClassifyAutoReq = Body(...), """ assert len(req.target_ids) == len(req.classifications) == len(req.scores), \ "Need the same number of objects, classifications and scores" + assert all(isinstance(score, float) and 0 <= score <= 1 for score in req.scores), \ + "Scores should be floats between 0 and 1" with ObjectManager() as sce: with RightsThrower(): ret, prj_id, changes = sce.classify_auto_set(current_user, req.target_ids, req.classifications, req.scores, From 2359e7f012d566e0978ea4743ea24ed8ffb38124 Mon Sep 17 00:00:00 2001 From: emmaamblard Date: Wed, 25 Jan 2023 09:55:40 +0000 Subject: [PATCH 5/8] remove some hardcoded ids --- QA/py/tests/test_classification.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/QA/py/tests/test_classification.py b/QA/py/tests/test_classification.py index 530e1e91..dbbcff0a 100644 --- a/QA/py/tests/test_classification.py +++ b/QA/py/tests/test_classification.py @@ -237,7 +237,7 @@ def get_object_set_stats(): 'nb_unclassified': 0, 'nb_validated': 8, 'projid': prj_id, - 'used_taxa': [25828]} # No more Unclassified and Copepod is in + + 'used_taxa': [copepod_id]} # No more Unclassified and Copepod is in + # No history yet as the object was just created classif = classif_history(fastapi, obj_ids[0]) @@ -245,7 +245,7 @@ def get_object_set_stats(): assert classif[0]['classif_date'] is not None # e.g. 2021-09-12T09:28:03.278626 classif[0]['classif_date'] = "now" assert classif == [ - {'objid': obj_ids[0], 'classif_id': 12846, 'classif_date': 'now', 'classif_who': None, + {'objid': obj_ids[0], 'classif_id': crustacea, 'classif_date': 'now', 'classif_who': None, 'classif_type': 'A', 'classif_qual': 'P', 'classif_score': 0.52, 'user_name': None, 'taxon_name': 'Crustacea'}] # Revert on validated objects @@ -295,7 +295,7 @@ def classify_all_no_change(classif_id): classif2[0]['classif_date'] = 'hopefully just now' classif2[1]['classif_date'] = 'a bit before' assert classif2 == [{'classif_date': 'hopefully just now', - 'classif_id': 25828, + 'classif_id': copepod_id, 'classif_qual': 'V', 'classif_score': None, 'classif_type': 'M', @@ -304,7 +304,7 @@ def classify_all_no_change(classif_id): 'taxon_name': 'Copepoda', 'user_name': 'Application Administrator'}, {'classif_date': 'a bit before', - 'classif_id': 12846, + 'classif_id': crustacea, 'classif_qual': 'P', 'classif_score': 0.52, 'classif_type': 'A', @@ -328,8 +328,8 @@ def classify_all_no_change(classif_id): 'nb_unclassified': 0, 'nb_validated': 8, 'projid': prj_id, - 'used_taxa': [ - 25835]}] # <- copepod is gone, unclassified as well, replaced with entomobryomorpha + 'used_taxa': + [entomobryomorpha_id]}] # <- copepod is gone, unclassified as well, replaced with entomobryomorpha # Reset to predicted on validated objects url = OBJECT_SET_RESET_PREDICTED_URL.format(project_id=prj_id) @@ -342,7 +342,7 @@ def classify_all_no_change(classif_id): 'nb_unclassified': 0, 'nb_validated': 0, 'projid': prj_id, - 'used_taxa': [25835]} + 'used_taxa': [entomobryomorpha_id]} # Revert after reset to predicted url = OBJECT_SET_REVERT_URL.format(project_id=prj_id, dry_run=False, tgt_usr="") @@ -355,7 +355,7 @@ def classify_all_no_change(classif_id): 'nb_unclassified': 0, 'nb_validated': 8, 'projid': prj_id, - 'used_taxa': [25835]} + 'used_taxa': [entomobryomorpha_id]} # Delete some object via API, why not? rsp = fastapi.delete(OBJECT_SET_DELETE_URL, headers=ADMIN_AUTH, json=obj_ids[:4]) From be0cd7f1e72ace9ec6cfc8a9b445719b5ebfc09b Mon Sep 17 00:00:00 2001 From: emmaamblard Date: Fri, 27 Jan 2023 10:28:26 +0000 Subject: [PATCH 6/8] check stats after second ML prediction --- QA/py/tests/test_classification.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/QA/py/tests/test_classification.py b/QA/py/tests/test_classification.py index dbbcff0a..1cb4c18a 100644 --- a/QA/py/tests/test_classification.py +++ b/QA/py/tests/test_classification.py @@ -227,6 +227,13 @@ def get_object_set_stats(): rsp = fastapi.get(url, headers=ADMIN_AUTH) assert rsp.status_code == status.HTTP_200_OK assert rsp.json()['classif_auto_score'] == 0.8 + + assert get_stats(fastapi, prj_id) == {'nb_dubious': 0, + 'nb_predicted': 4, + 'nb_unclassified': 4, + 'nb_validated': 0, + 'projid': prj_id, + 'used_taxa': [-1, crustacea]} # Admin (me!) thinks that all is a copepod :) classify_all(fastapi, obj_ids, copepod_id) From db43636d2fff147041345b30cd10c3cb1326a65f Mon Sep 17 00:00:00 2001 From: emmaamblard Date: Thu, 16 Feb 2023 14:38:13 +0000 Subject: [PATCH 7/8] add tests for BO/Prediction.py --- QA/py/tests/test_prediction.py | 58 +++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/QA/py/tests/test_prediction.py b/QA/py/tests/test_prediction.py index 6cc84725..43d1ac21 100644 --- a/QA/py/tests/test_prediction.py +++ b/QA/py/tests/test_prediction.py @@ -1,9 +1,17 @@ import logging +import pytest +import pandas as pd +import numpy as np from starlette import status -from tests.credentials import ADMIN_AUTH +from tests.credentials import ADMIN_AUTH, CREATOR_AUTH from tests.test_jobs import get_job_and_wait_until_ok +from tests.test_classification import _prj_query + +from BO.Prediction import DeepFeatures + +from API_operations.CRUD.ObjectParents import SamplesService OBJECT_SET_PREDICT_URL = "/object_set/predict" @@ -37,3 +45,51 @@ def no_test_basic_prediction(config, database, fastapi, caplog): assert rsp.status_code == status.HTTP_200_OK job_id = get_job_and_wait_until_ok(fastapi, rsp) + + +def test_prediction_functions(config, database, fastapi, caplog): + caplog.set_level(logging.ERROR) + from tests.test_import import test_import + prj_id = test_import(config, database, caplog, "Test Prediction") + + obj_ids = _prj_query(fastapi, CREATOR_AUTH, prj_id) + assert len(obj_ids) == 8 + + # Prepare fake CNN features to insert + features = list() + for i, oi in enumerate(obj_ids): + features.append([(i+1) * .1] * 50) + features_df = pd.DataFrame(features, index=obj_ids) + + # Test features insertion + with SamplesService() as sce: + n_inserts = DeepFeatures.save(sce.session, features_df) + assert n_inserts == 8 + sce.session.commit() + + # Test features retrieval + with SamplesService() as sce: + ret = DeepFeatures.np_read_for_objects(sce.session, obj_ids) + assert (ret == np.array(features, dtype='float32')).all() + + # Test find_missing without any missing features + with SamplesService() as sce: + ret = DeepFeatures.find_missing(sce.session, prj_id) + assert ret == {} + + # Test deletion + with SamplesService() as sce: + n_deletes = DeepFeatures.delete_all(sce.session, prj_id) + assert n_deletes == 8 + sce.session.commit() + + # Test find_missing after deletion + with SamplesService() as sce: + ret = DeepFeatures.find_missing(sce.session, prj_id) + assert len(ret) == 8 + + # Test features retrieval in empty table, should raise an error + with SamplesService() as sce: + with pytest.raises(AssertionError): + ret = DeepFeatures.np_read_for_objects(sce.session, obj_ids) + From db286288318f77dd9be8f2bcd7f01a6e3fcc8840 Mon Sep 17 00:00:00 2001 From: emmaamblard Date: Thu, 16 Feb 2023 14:41:58 +0000 Subject: [PATCH 8/8] add pandas to requirements for tox --- QA/py/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/QA/py/requirements.txt b/QA/py/requirements.txt index 9d3ace0d..7a2aa6c4 100644 --- a/QA/py/requirements.txt +++ b/QA/py/requirements.txt @@ -56,7 +56,7 @@ scikit-learn==1.0 #tensorflow-hub==0.12.0 #tensorflow_addons==0.14.0 # Used same place as TF, CNN generation -#pandas==1.3.3 +pandas==1.3.3 ##lycon==0.2.0 # Conflicts with the version required by TF #opencv-python-headless==4.5.3.56 #imgaug==0.4.0