Skip to content

Commit

Permalink
Merge pull request #54 from ecotaxa/tests_classif_update
Browse files Browse the repository at this point in the history
Tests classif update
  • Loading branch information
grololo06 authored Feb 19, 2023
2 parents a0f96d9 + db28628 commit 5e22ac2
Show file tree
Hide file tree
Showing 4 changed files with 171 additions and 12 deletions.
2 changes: 1 addition & 1 deletion QA/py/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ scikit-learn==1.0
#tensorflow-hub==0.12.0
#tensorflow_addons==0.14.0
# Used same place as TF, CNN generation
#pandas==1.3.3
pandas==1.3.3
##lycon==0.2.0 # Conflicts with the version required by TF
#opencv-python-headless==4.5.3.56
#imgaug==0.4.0
Expand Down
121 changes: 111 additions & 10 deletions QA/py/tests/test_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# Copyright (C) 2015-2020 Picheral, Colin, Irisson (UPMC-CNRS)
#
import logging
import pytest

from typing import List
from API_models.filters import ProjectFilters, ProjectFiltersDict
Expand Down Expand Up @@ -30,6 +31,7 @@ def _prj_query(fastapi, auth, prj_id, **kwargs) -> List[int]:
OBJECT_SET_DELETE_URL = "/object_set/"
OBJECT_SET_SUMMARY_URL = "/object_set/{project_id}/summary?only_total=False"
OBJECT_SET_PARENTS_URL = "/object_set/parents"
OBJECT_QUERY_URL = "/object/{object_id}"

PROJECT_SET_USER_STATS = "/project_set/user_stats?ids={prj_ids}"

Expand Down Expand Up @@ -61,15 +63,43 @@ def classify_all(fastapi, obj_ids, classif_id):
assert rsp.status_code == status.HTTP_200_OK


def classify_auto_all(fastapi, obj_ids, classif_id):
def classify_auto_all(fastapi, obj_ids, classif_id, scores=None):
url = OBJECT_SET_CLASSIFY_AUTO_URL
classifications = [classif_id for _obj in obj_ids]
scores = [0.52 for _obj in obj_ids]
if not scores:
scores = [0.52 for _obj in obj_ids]
rsp = fastapi.post(url, headers=ADMIN_AUTH, json={"target_ids": obj_ids,
"classifications": classifications,
"scores": scores,
"keep_log": True})
assert rsp.status_code == status.HTTP_200_OK


def classify_auto_incorrect(fastapi, obj_ids):
url = OBJECT_SET_CLASSIFY_AUTO_URL
classifications = [-1 for _obj in obj_ids]

# List of scores of a different length, should raise an error
scores = [0.1 for _obj in obj_ids[:-1]]
with pytest.raises(AssertionError):
rsp = fastapi.post(url, headers=ADMIN_AUTH, json={"target_ids": obj_ids,
"classifications": classifications,
"scores": scores,
"keep_log": True})
# List of scores outside [0, 1], should raise an error
scores = [2. for _obj in obj_ids]
with pytest.raises(AssertionError):
rsp = fastapi.post(url, headers=ADMIN_AUTH, json={"target_ids": obj_ids,
"classifications": classifications,
"scores": scores,
"keep_log": True})
# List of scores with wrong type, should fail
scores = [None for _obj in obj_ids]
rsp = fastapi.post(url, headers=ADMIN_AUTH, json={"target_ids": obj_ids,
"classifications": classifications,
"scores": scores,
"keep_log": True})
assert rsp.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY


# Note: to go faster in a local dev environment, use "filled_database" instead of "database" below
Expand Down Expand Up @@ -179,9 +209,26 @@ def get_object_set_stats():
rsp = fastapi.post(url, headers=ADMIN_AUTH, json={})
assert rsp.status_code == status.HTTP_200_OK

# Incorrect ML results
classify_auto_incorrect(fastapi, obj_ids[:4])

# Super ML result, 4 first objects are crustacea
classify_auto_all(fastapi, obj_ids[:4], crustacea)

assert get_stats(fastapi, prj_id) == {'nb_dubious': 0,
'nb_predicted': 4,
'nb_unclassified': 4,
'nb_validated': 0,
'projid': prj_id,
'used_taxa': [-1, crustacea]}

# New ML results with a different score for the second object
classify_auto_all(fastapi, [obj_ids[1]], crustacea, [0.8])
url = OBJECT_QUERY_URL.format(object_id=obj_ids[1])
rsp = fastapi.get(url, headers=ADMIN_AUTH)
assert rsp.status_code == status.HTTP_200_OK
assert rsp.json()['classif_auto_score'] == 0.8

assert get_stats(fastapi, prj_id) == {'nb_dubious': 0,
'nb_predicted': 4,
'nb_unclassified': 4,
Expand All @@ -198,16 +245,44 @@ def get_object_set_stats():
'nb_unclassified': 0,
'nb_validated': 8,
'projid': prj_id,
'used_taxa': [25828]} # No more Unclassified and Copepod is in +
'used_taxa': [copepod_id]} # No more Unclassified and Copepod is in +

# No history yet as the object was just created
classif = classif_history(fastapi, obj_ids[0])
assert len(classif) == 1
assert classif[0]['classif_date'] is not None # e.g. 2021-09-12T09:28:03.278626
classif[0]['classif_date'] = "now"
assert classif == [
{'objid': obj_ids[0], 'classif_id': 12846, 'classif_date': 'now', 'classif_who': None,
{'objid': obj_ids[0], 'classif_id': crustacea, 'classif_date': 'now', 'classif_who': None,
'classif_type': 'A', 'classif_qual': 'P', 'classif_score': 0.52, 'user_name': None, 'taxon_name': 'Crustacea'}]

# Revert on validated objects
url = OBJECT_SET_REVERT_URL.format(project_id=prj_id, dry_run=False, tgt_usr="")
rsp = fastapi.post(url, headers=ADMIN_AUTH, json={})
assert rsp.status_code == status.HTTP_200_OK
stats = rsp.json()

assert get_stats(fastapi, prj_id) == {'nb_dubious': 0,
'nb_predicted': 4,
'nb_unclassified': 4,
'nb_validated': 0,
'projid': prj_id,
'used_taxa': [-1, crustacea]}

# Second revert, should not change since the last record in history is the same
rsp = fastapi.post(url, headers=ADMIN_AUTH, json={})
assert rsp.status_code == status.HTTP_200_OK
stats = rsp.json()

assert get_stats(fastapi, prj_id) == {'nb_dubious': 0,
'nb_predicted': 4,
'nb_unclassified': 4,
'nb_validated': 0,
'projid': prj_id,
'used_taxa': [-1, crustacea]}

# Apply validation again after revert
classify_all(fastapi, obj_ids, copepod_id)

# Not a copepod :(
classify_all(fastapi, obj_ids, entomobryomorpha_id)
Expand All @@ -228,7 +303,7 @@ def classify_all_no_change(classif_id):
classif2[0]['classif_date'] = 'hopefully just now'
classif2[1]['classif_date'] = 'a bit before'
assert classif2 == [{'classif_date': 'hopefully just now',
'classif_id': 25828,
'classif_id': copepod_id,
'classif_qual': 'V',
'classif_score': None,
'classif_type': 'M',
Expand All @@ -237,7 +312,7 @@ def classify_all_no_change(classif_id):
'taxon_name': 'Copepoda',
'user_name': 'Application Administrator'},
{'classif_date': 'a bit before',
'classif_id': 12846,
'classif_id': crustacea,
'classif_qual': 'P',
'classif_score': 0.52,
'classif_type': 'A',
Expand All @@ -261,9 +336,35 @@ def classify_all_no_change(classif_id):
'nb_unclassified': 0,
'nb_validated': 8,
'projid': prj_id,
'used_taxa': [
25835]}] # <- copepod is gone, unclassified as well, replaced with entomobryomorpha

'used_taxa':
[entomobryomorpha_id]}] # <- copepod is gone, unclassified as well, replaced with entomobryomorpha

# Reset to predicted on validated objects
url = OBJECT_SET_RESET_PREDICTED_URL.format(project_id=prj_id)
rsp = fastapi.post(url, headers=ADMIN_AUTH, json={})
assert rsp.status_code == status.HTTP_200_OK
stats = rsp.json()

assert get_stats(fastapi, prj_id) == {'nb_dubious': 0,
'nb_predicted': 8,
'nb_unclassified': 0,
'nb_validated': 0,
'projid': prj_id,
'used_taxa': [entomobryomorpha_id]}

# Revert after reset to predicted
url = OBJECT_SET_REVERT_URL.format(project_id=prj_id, dry_run=False, tgt_usr="")
rsp = fastapi.post(url, headers=ADMIN_AUTH, json={})
assert rsp.status_code == status.HTTP_200_OK
stats = rsp.json()

assert get_stats(fastapi, prj_id) == {'nb_dubious': 0,
'nb_predicted': 0,
'nb_unclassified': 0,
'nb_validated': 8,
'projid': prj_id,
'used_taxa': [entomobryomorpha_id]}

# Delete some object via API, why not?
rsp = fastapi.delete(OBJECT_SET_DELETE_URL, headers=ADMIN_AUTH, json=obj_ids[:4])
assert rsp.status_code == status.HTTP_200_OK
Expand All @@ -285,7 +386,7 @@ def classify_all_no_change(classif_id):
ref_stats = [{"projid": prj_id,
"annotators": [{"id": 1,
"name": "Application Administrator"}],
"activities": [{"id": 1, "nb_actions": 8,
"activities": [{"id": 1, "nb_actions": 12,
"last_annot": "2022-05-12T14:21:15"}]}]
# Fix the date on both sides
ref_stats[0]["activities"][0]["last_annot"] = "FIXED DATE"
Expand Down
58 changes: 57 additions & 1 deletion QA/py/tests/test_prediction.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,17 @@
import logging
import pytest
import pandas as pd
import numpy as np

from starlette import status

from tests.credentials import ADMIN_AUTH
from tests.credentials import ADMIN_AUTH, CREATOR_AUTH
from tests.test_jobs import get_job_and_wait_until_ok
from tests.test_classification import _prj_query

from BO.Prediction import DeepFeatures

from API_operations.CRUD.ObjectParents import SamplesService

OBJECT_SET_PREDICT_URL = "/object_set/predict"

Expand Down Expand Up @@ -37,3 +45,51 @@ def no_test_basic_prediction(config, database, fastapi, caplog):
assert rsp.status_code == status.HTTP_200_OK

job_id = get_job_and_wait_until_ok(fastapi, rsp)


def test_prediction_functions(config, database, fastapi, caplog):
caplog.set_level(logging.ERROR)
from tests.test_import import test_import
prj_id = test_import(config, database, caplog, "Test Prediction")

obj_ids = _prj_query(fastapi, CREATOR_AUTH, prj_id)
assert len(obj_ids) == 8

# Prepare fake CNN features to insert
features = list()
for i, oi in enumerate(obj_ids):
features.append([(i+1) * .1] * 50)
features_df = pd.DataFrame(features, index=obj_ids)

# Test features insertion
with SamplesService() as sce:
n_inserts = DeepFeatures.save(sce.session, features_df)
assert n_inserts == 8
sce.session.commit()

# Test features retrieval
with SamplesService() as sce:
ret = DeepFeatures.np_read_for_objects(sce.session, obj_ids)
assert (ret == np.array(features, dtype='float32')).all()

# Test find_missing without any missing features
with SamplesService() as sce:
ret = DeepFeatures.find_missing(sce.session, prj_id)
assert ret == {}

# Test deletion
with SamplesService() as sce:
n_deletes = DeepFeatures.delete_all(sce.session, prj_id)
assert n_deletes == 8
sce.session.commit()

# Test find_missing after deletion
with SamplesService() as sce:
ret = DeepFeatures.find_missing(sce.session, prj_id)
assert len(ret) == 8

# Test features retrieval in empty table, should raise an error
with SamplesService() as sce:
with pytest.raises(AssertionError):
ret = DeepFeatures.np_read_for_objects(sce.session, obj_ids)

2 changes: 2 additions & 0 deletions py/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -1571,6 +1571,8 @@ def classify_auto_object_set(req: ClassifyAutoReq = Body(...),
"""
assert len(req.target_ids) == len(req.classifications) == len(req.scores), \
"Need the same number of objects, classifications and scores"
assert all(isinstance(score, float) and 0 <= score <= 1 for score in req.scores), \
"Scores should be floats between 0 and 1"
with ObjectManager() as sce:
with RightsThrower():
ret, prj_id, changes = sce.classify_auto_set(current_user, req.target_ids, req.classifications, req.scores,
Expand Down

0 comments on commit 5e22ac2

Please sign in to comment.