Skip to content

Commit

Permalink
ecotaxa/ecotaxa_front#764: First implementation _without_ mix of stat…
Browse files Browse the repository at this point in the history
…us inside the same taxon.
  • Loading branch information
grololo06 committed Feb 12, 2023
1 parent 8ef9835 commit 2ff03f0
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 22 deletions.
6 changes: 6 additions & 0 deletions QA/py/pg_files/data_load.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7712,6 +7712,8 @@ COPY public.taxonomy (id, parent_id, name, id_source, nbrobj, nbrobjcum, creatio
85016 84959 t005 m217 \N \N \N \N t005 \N 2018-01-02 00:00:00 \N \N \N A M
\.

-- e.g. to get some data with lineage:
-- ecotaxa4=# copy (select * from worms where aphia_id in (10194, 152352, 1828, 1821, 146419)) to '/tmp/cp.sql';
COPY public.worms (aphia_id, url, scientificname, authority, status, unacceptreason, taxon_rank_id, rank, valid_aphia_id, valid_name, valid_authority, parent_name_usage_id, kingdom, phylum, class_, "order", family, genus, citation, lsid, is_marine, is_brackish, is_freshwater, is_terrestrial, is_extinct, match_type, modified, all_fetched) FROM stdin;
1 http://www.marinespecies.org/aphia.php?p=taxdetails&id=1 Biota \N accepted \N 0 \N 1 Biota\N 1 \N \N \N \N \N \N WoRMS (2020). Biota. Accessed at: http://www.marinespecies.org/aphia.php?p=taxdetails&id=1 on 2020-09-17 urn:lsid:marinespecies.org:taxname:1 t t t t \N \N exact 2004-12-21 15:54:05.437 t
889851 http://www.marinespecies.org/aphia.php?p=taxdetails&id=889851 Sarcotacidea Yamaguti, 1963 unaccepted \N 100 Order 1381349 Ergasilida Khodami, Mercado-Salas, Tang & Martinez Arbizu, 2019 155879 Animalia Arthropoda Hexanauplia Sarcotacidea \N \N Walter, T.C.; Boxshall, G. (2020). World of Copepods database. Sarcotacidea. Accessed through: World Register of Marine Species at: http://www.marinespecies.org/aphia.php?p=taxdetails&id=889851 on 2020-09-20 urn:lsid:marinespecies.org:taxname:889851 t f f f \N exact 2016-11-28 11:01:17.91 t
Expand All @@ -7726,4 +7728,8 @@ COPY public.worms (aphia_id, url, scientificname, authority, status, unacceptrea
155879 http://www.marinespecies.org/aphia.php?p=taxdetails&id=155879 Podoplea Giesbrecht, 1882 accepted \N 90 Superorder 155879 Podoplea Giesbrecht, 1882 155876 Animalia Arthropoda Hexanauplia \N \N \N WoRMS (2020). Podoplea. Accessed at: http://www.marinespecies.org/aphia.php?p=taxdetails&id=155879 on 2020-09-19 urn:lsid:marinespecies.org:taxname:155879 t \N t \N \N exact 2008-08-27 21:14:09.817 t
155876 http://www.marinespecies.org/aphia.php?p=taxdetails&id=155876 Neocopepoda Huys & Boxshall, 1991 accepted \N 80 Infraclass 155876 Neocopepoda Huys & Boxshall, 1991 1080 Animalia Arthropoda Hexanauplia \N \N \N WoRMS (2020). Neocopepoda. Accessed at: http://www.marinespecies.org/aphia.php?p=taxdetails&id=155876 on 2020-09-19 urn:lsid:marinespecies.org:taxname:155876 t t t \N \N exact 2008-06-23 12:21:53.35 t
1080 http://www.marinespecies.org/aphia.php?p=taxdetails&id=1080 Copepoda Milne Edwards, 1840 accepted \N 70 Subclass 1080 Copepoda Milne Edwards, 1840 889925 Animalia Arthropoda Hexanauplia \N \N \N WoRMS (2020). Copepoda. Accessed at: http://www.marinespecies.org/aphia.php?p=taxdetails&id=1080 on 2020-09-19 urn:lsid:marinespecies.org:taxname:1080 t t t \N \N exact 2016-11-30 12:36:48.403 t
1821 http://www.marinespecies.org/aphia.php?p=taxdetails&id=1821 Chordata Haeckel, 1874 accepted \N 30 Phylum 1821 Chordata Haeckel, 1874 2 Animalia Chordata \N \N \N \N WoRMS (2020). Chordata. Accessed at: http://www.marinespecies.org/aphia.php?p=taxdetails&id=1821 on 2020-09-17 urn:lsid:marinespecies.org:taxname:1821 t \N \N \N \N exact 2004-12-21 15:54:05.437 t
146419 http://www.marinespecies.org/aphia.php?p=taxdetails&id=146419 Vertebrata \N accepted \N 40 Subphylum 146419 Vertebrata \N 1821 Animalia Chordata \N \N \N \N WoRMS (2020). Vertebrata. Accessed at: http://www.marinespecies.org/aphia.php?p=taxdetails&id=146419 on 2020-09-19 urn:lsid:marinespecies.org:taxname:146419 t \N \N \N \N exact 2004-12-21 15:54:05.437 t
1828 http://www.marinespecies.org/aphia.php?p=taxdetails&id=1828 Gnathostomata \N accepted \N 50 Superclass 1828 Gnathostomata \N 146419 Animalia Chordata \N \N \N \N WoRMS (2020). Gnathostomata. Accessed at: http://www.marinespecies.org/aphia.php?p=taxdetails&id=1828 on 2020-09-19 urn:lsid:marinespecies.org:taxname:1828 t \N \N \N \N exact 2004-12-21 15:54:05.437 t
10194 http://www.marinespecies.org/aphia.php?p=taxdetails&id=10194 Actinopterygii \N accepted \N 60 Class 10194 Actinopterygii \N 1828 Animalia Chordata Actinopterygii \N \N \N WoRMS (2020). Actinopterygii. Accessed at: http://www.marinespecies.org/aphia.php?p=taxdetails&id=10194 on 2020-09-19 urn:lsid:marinespecies.org:taxname:10194 t \N \N \N \N exact 2017-02-02 05:40:48.577 t
\.
32 changes: 21 additions & 11 deletions QA/py/tests/emodnet_ref.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@
</rangeOfDates>
</temporalCoverage>
<taxonomicCoverage>
<taxonomicClassification>
<taxonRankName>Class</taxonRankName>
<taxonRankValue>Actinopterygii</taxonRankValue>
</taxonomicClassification>
<taxonomicClassification>
<taxonRankName>Family</taxonRankName>
<taxonRankValue>Oncaeidae</taxonRankValue>
Expand Down Expand Up @@ -145,10 +149,11 @@
<field index="1" term="http://rs.tdwg.org/dwc/terms/eventID"/>
<field index="2" term="http://rs.tdwg.org/dwc/terms/occurrenceID"/>
<field index="3" term="http://rs.tdwg.org/dwc/terms/basisOfRecord"/>
<field index="4" term="http://rs.tdwg.org/dwc/terms/scientificName"/>
<field index="5" term="http://rs.tdwg.org/dwc/terms/scientificNameID"/>
<field index="6" term="http://rs.tdwg.org/dwc/terms/kingdom"/>
<field index="7" term="http://rs.tdwg.org/dwc/terms/occurrenceStatus"/>
<field index="4" term="https://dwc.tdwg.org/list/#dwc_identificationVerificationStatus"/>
<field index="5" term="http://rs.tdwg.org/dwc/terms/scientificName"/>
<field index="6" term="http://rs.tdwg.org/dwc/terms/scientificNameID"/>
<field index="7" term="http://rs.tdwg.org/dwc/terms/kingdom"/>
<field index="8" term="http://rs.tdwg.org/dwc/terms/occurrenceStatus"/>
</extension>
"""
_meta_emofs_with_computations = r"""
Expand Down Expand Up @@ -194,13 +199,14 @@
m106_mn04_n6_sml m106_mn04_n6_sml sample IMEV EMODNET test collection 2014-04-20T04:20:00Z 18.000000 -24.416667 600.0 1000.0
"""
_occurence = r"""
id eventID occurrenceID basisOfRecord scientificName scientificNameID kingdom occurrenceStatus
m106_mn01_n1_sml m106_mn01_n1_sml m106_mn01_n1_sml_78418 MachineObservation Oncaeidae urn:lsid:marinespecies.org:taxname:128586 Animalia present
m106_mn01_n1_sml m106_mn01_n1_sml m106_mn01_n1_sml_45072 MachineObservation Cyclopoida urn:lsid:marinespecies.org:taxname:1101 Animalia present
m106_mn04_n4_sml m106_mn04_n4_sml m106_mn04_n4_sml_78418 MachineObservation Oncaeidae urn:lsid:marinespecies.org:taxname:128586 Animalia present
m106_mn04_n5_sml m106_mn04_n5_sml m106_mn04_n5_sml_78418 MachineObservation Oncaeidae urn:lsid:marinespecies.org:taxname:128586 Animalia present
m106_mn04_n6_sml m106_mn04_n6_sml m106_mn04_n6_sml_45072 MachineObservation Cyclopoida urn:lsid:marinespecies.org:taxname:1101 Animalia present
m106_mn04_n6_sml m106_mn04_n6_sml m106_mn04_n6_sml_78418 MachineObservation Oncaeidae urn:lsid:marinespecies.org:taxname:128586 Animalia present
id eventID occurrenceID basisOfRecord identificationVerificationStatus scientificName scientificNameID kingdom occurrenceStatus
m106_mn01_n1_sml m106_mn01_n1_sml m106_mn01_n1_sml_78418 MachineObservation ValidatedByHuman Oncaeidae urn:lsid:marinespecies.org:taxname:128586 Animalia present
m106_mn01_n1_sml m106_mn01_n1_sml m106_mn01_n1_sml_45072 MachineObservation ValidatedByHuman Cyclopoida urn:lsid:marinespecies.org:taxname:1101 Animalia present
m106_mn01_n3_sml m106_mn01_n3_sml m106_mn01_n3_sml_56693 MachineObservation PredictedByMachine Actinopterygii urn:lsid:marinespecies.org:taxname:10194 Animalia present
m106_mn04_n4_sml m106_mn04_n4_sml m106_mn04_n4_sml_78418 MachineObservation ValidatedByHuman Oncaeidae urn:lsid:marinespecies.org:taxname:128586 Animalia present
m106_mn04_n5_sml m106_mn04_n5_sml m106_mn04_n5_sml_78418 MachineObservation ValidatedByHuman Oncaeidae urn:lsid:marinespecies.org:taxname:128586 Animalia present
m106_mn04_n6_sml m106_mn04_n6_sml m106_mn04_n6_sml_45072 MachineObservation ValidatedByHuman Cyclopoida urn:lsid:marinespecies.org:taxname:1101 Animalia present
m106_mn04_n6_sml m106_mn04_n6_sml m106_mn04_n6_sml_78418 MachineObservation ValidatedByHuman Oncaeidae urn:lsid:marinespecies.org:taxname:128586 Animalia present
"""
# _occurence_with_absent = r"""m106_mn01_n2_sml m106_mn01_n2_sml m106_mn01_n2_sml_45072 MachineObservation Cyclopoida urn:lsid:marinespecies.org:taxname:1101 0 Animalia absent
# m106_mn01_n2_sml m106_mn01_n2_sml m106_mn01_n2_sml_78418 MachineObservation Oncaeidae urn:lsid:marinespecies.org:taxname:128586 0 Animalia absent
Expand All @@ -218,6 +224,9 @@
m106_mn01_n1_sml m106_mn01_n1_sml_45072 355604.586438 Biovolume of biological entity specified elsewhere per unit volume of the water body Cubic millimetres per cubic metre http://vocab.nerc.ac.uk/collection/P01/current/CVOLUKNB/ http://vocab.nerc.ac.uk/collection/P06/current/CMCM/
m106_mn01_n1_sml http://vocab.nerc.ac.uk/collection/L22/current/TOOL1578/ Imaging instrument name https://vocab.nerc.ac.uk/collection/P06/current/XXXX/
m106_mn01_n2_sml http://vocab.nerc.ac.uk/collection/L22/current/TOOL1578/ Imaging instrument name https://vocab.nerc.ac.uk/collection/P06/current/XXXX/
m106_mn01_n3_sml m106_mn01_n3_sml_56693 1 Count (in assayed sample) of biological entity specified elsewhere http://vocab.nerc.ac.uk/collection/P01/current/OCOUNT01/
m106_mn01_n3_sml m106_mn01_n3_sml_56693 0.02 Abundance of biological entity specified elsewhere per unit volume of the water body Number per cubic metre http://vocab.nerc.ac.uk/collection/P01/current/SDBIOL01/ http://vocab.nerc.ac.uk/collection/P06/current/UPMM/
m106_mn01_n3_sml m106_mn01_n3_sml_56693 194359.383023 Biovolume of biological entity specified elsewhere per unit volume of the water body Cubic millimetres per cubic metre http://vocab.nerc.ac.uk/collection/P01/current/CVOLUKNB/ http://vocab.nerc.ac.uk/collection/P06/current/CMCM/
m106_mn01_n3_sml http://vocab.nerc.ac.uk/collection/L22/current/TOOL1578/ Imaging instrument name https://vocab.nerc.ac.uk/collection/P06/current/XXXX/
m106_mn04_n4_sml m106_mn04_n4_sml_78418 1 Count (in assayed sample) of biological entity specified elsewhere http://vocab.nerc.ac.uk/collection/P01/current/OCOUNT01/
m106_mn04_n4_sml http://vocab.nerc.ac.uk/collection/L22/current/TOOL1578/ Imaging instrument name https://vocab.nerc.ac.uk/collection/P06/current/XXXX/
Expand All @@ -239,6 +248,7 @@
m106_mn01_n1_sml m106_mn01_n1_sml_45072 1 Count (in assayed sample) of biological entity specified elsewhere http://vocab.nerc.ac.uk/collection/P01/current/OCOUNT01/
m106_mn01_n1_sml http://vocab.nerc.ac.uk/collection/L22/current/TOOL1578/ Imaging instrument name https://vocab.nerc.ac.uk/collection/P06/current/XXXX/
m106_mn01_n2_sml http://vocab.nerc.ac.uk/collection/L22/current/TOOL1578/ Imaging instrument name https://vocab.nerc.ac.uk/collection/P06/current/XXXX/
m106_mn01_n3_sml m106_mn01_n3_sml_56693 1 Count (in assayed sample) of biological entity specified elsewhere http://vocab.nerc.ac.uk/collection/P01/current/OCOUNT01/
m106_mn01_n3_sml http://vocab.nerc.ac.uk/collection/L22/current/TOOL1578/ Imaging instrument name https://vocab.nerc.ac.uk/collection/P06/current/XXXX/
m106_mn04_n4_sml m106_mn04_n4_sml_78418 1 Count (in assayed sample) of biological entity specified elsewhere http://vocab.nerc.ac.uk/collection/P01/current/OCOUNT01/
m106_mn04_n4_sml http://vocab.nerc.ac.uk/collection/L22/current/TOOL1578/ Imaging instrument name https://vocab.nerc.ac.uk/collection/P06/current/XXXX/
Expand Down
3 changes: 2 additions & 1 deletion QA/py/tests/test_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#
import logging

from typing import List
from API_models.filters import ProjectFilters, ProjectFiltersDict
from starlette import status

Expand All @@ -14,7 +15,7 @@
from tests.test_taxa_query import TAXA_SET_QUERY_URL


def _prj_query(fastapi, auth, prj_id, **kwargs):
def _prj_query(fastapi, auth, prj_id, **kwargs) -> List[int]:
""" Query using the filters in kwargs """
url = OBJECT_SET_QUERY_URL.format(project_id=prj_id)
rsp = fastapi.post(url, headers=auth, json=kwargs)
Expand Down
11 changes: 7 additions & 4 deletions QA/py/tests/test_export_emodnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def do_test_emodnet_export(config, database, fastapi, caplog):
from tests.test_import import BAD_FREE_DIR, test_import, do_import, test_import_a_bit_more_skipping
prj_id = test_import(config, database, caplog, "EMODNET project", str(PLAIN_FILE), "UVP6")
# Add a sample spanning 2 days (m106_mn01_n3_sml) for testing date ranges in event.txt
# this sample contains 2 'detritus' at load time
# this sample contains 2 'detritus' at load time and 1 small<egg (92731) which resolves to nearest Phylo Actinopterygii (56693)
test_import_a_bit_more_skipping(config, database, caplog, "EMODNET project")
# Add a sample with corrupted or absent needed free columns, for provoking calculation warnings
do_import(prj_id, BAD_FREE_DIR, ADMIN_USER_ID)
Expand Down Expand Up @@ -81,9 +81,13 @@ def do_test_emodnet_export(config, database, fastapi, caplog):
# "accepted, not ."]
# assert rsp.json()["warnings"] == []

# Validate everything, otherwise no export.
# Validate nearly everything, otherwise no export.
obj_ids = _prj_query(fastapi, CREATOR_AUTH, prj_id)
assert len(obj_ids) == 19
# The Actinopterygii object in m106_mn01_n3_sml remains Predicted
stay_predicted = _prj_query(fastapi, CREATOR_AUTH, prj_id, taxo="92731")
assert len(stay_predicted) == 1
obj_ids.remove(stay_predicted[0])
url = OBJECT_SET_CLASSIFY_URL
classifications = [-1 for _obj in obj_ids] # Keep current
rsp = fastapi.post(url, headers=ADMIN_AUTH, json={"target_ids": obj_ids,
Expand Down Expand Up @@ -138,7 +142,6 @@ def do_test_emodnet_export(config, database, fastapi, caplog):
"Could not extract sampling net name and features from sample m106_mn01_n2_sml (at least one of ['net_type', 'net_mesh', 'net_surf'] free column is absent).",
"No occurrence added for sample 'm106_mn01_n2_sml' in project #%d" % prj_id,
"Could not extract sampling net name and features from sample m106_mn01_n3_sml (at least one of ['net_type', 'net_mesh', 'net_surf'] free column is absent).",
"No occurrence added for sample 'm106_mn01_n3_sml' in project #%d" % prj_id,
"Could not extract sampling net name and features from sample m106_mn04_n4_sml (at least one of ['net_type', 'net_mesh', 'net_surf'] free column is absent).",
"Sample 'm106_mn04_n4_sml' taxo(s) #[1, 78418]: Computed concentration is NaN, input data is missing or incorrect",
"Sample 'm106_mn04_n4_sml' taxo(s) #[1, 78418]: Computed biovolume is NaN, input data is missing or incorrect",
Expand All @@ -158,7 +161,7 @@ def do_test_emodnet_export(config, database, fastapi, caplog):
"Some values could not be converted to float in {'obj_area': 1583.0, 'sam_tot_vol': '2000', 'ssm_pixel': '10.6', 'ssm_sub_part': 'hi'}",
"Some values could not be converted to float in {'obj_area': 1583.0, 'sam_tot_vol': '2000', 'ssm_pixel': '10.6', 'ssm_sub_part': 'hi'}",
"Sample 'm106_mn04_n6_sml' taxo(s) #[1, 45072, 78418]: Computed biovolume is NaN, input data is missing or incorrect",
'Stats: validated:19 produced to zip:7 not produced (M):12 not produced (P):0']
"Stats: predicted:1 validated:18 produced to zip:8 not produced (M):11 not produced (P):0"]
assert warns == ref_warns
assert rsp.json()["errors"] == []
# job_id = rsp.json()["job_id"]
Expand Down
20 changes: 14 additions & 6 deletions py/API_operations/exports/DarwinCore.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@
AbundancePerUnitVolumeOfTheWaterBody, BiovolumeOfBiologicalEntity, SamplingInstrumentName, CountOfBiologicalEntity, \
ImagingInstrumentName
from formats.DarwinCore.models import DwC_Event, RecordTypeEnum, DwC_Occurrence, OccurrenceStatusEnum, \
BasisOfRecordEnum, EMLGeoCoverage, EMLTemporalCoverage, EMLMeta, EMLTitle, EMLPerson, EMLKeywordSet, \
EMLTaxonomicClassification, EMLAdditionalMeta, EMLIdentifier, EMLAssociatedPerson
BasisOfRecordEnum, IdentificationVerificationEnum, EMLGeoCoverage, EMLTemporalCoverage, EMLMeta, EMLTitle, \
EMLPerson, EMLKeywordSet, EMLTaxonomicClassification, EMLAdditionalMeta, EMLIdentifier, EMLAssociatedPerson
from helpers.DateTime import now_time
from helpers.DynamicLogs import get_logger, LogsSwitcher
# TODO: Move somewhere else
Expand Down Expand Up @@ -101,6 +101,7 @@ def __init__(self, collection_id: CollectionIDT, dry_run: bool, with_zeroes: boo
self.warnings: List[str] = []
# Summary for logging issues
self.validated_count = 0
self.predicted_count = 0
self.produced_count = 0
self.ignored_count: Dict[ClassifIDT, int] = {}
self.ignored_morpho: int = 0
Expand Down Expand Up @@ -762,7 +763,11 @@ def add_occurences(self, sample: Sample, arch: DwC_Archive, event_id: str, predi
for a_lsid, for_lsid in by_lsid_desc.items():
occurrence_id, aggreg_for_lsid, worms = for_lsid
self.produced_count += aggreg_for_lsid.abundance
# TODO: The record depends on the status (validated or just predicted)
# TODO: More in record depends on the status (validated or just predicted),
# not just identificationVerificationStatus
# @see https://github.com/ecotaxa/ecotaxa_front/issues/764#issuecomment-1420324532
verif_status = IdentificationVerificationEnum.predictedByMachine if predicted \
else IdentificationVerificationEnum.validatedByHuman
occ = DwC_Occurrence(eventID=event_id,
occurrenceID=occurrence_id,
# Below is better as an EMOF @see CountOfBiologicalEntity
Expand All @@ -771,7 +776,8 @@ def add_occurences(self, sample: Sample, arch: DwC_Archive, event_id: str, predi
scientificNameID=worms.lsid,
kingdom=worms.kingdom,
occurrenceStatus=OccurrenceStatusEnum.present,
basisOfRecord=BasisOfRecordEnum.machineObservation)
basisOfRecord=BasisOfRecordEnum.machineObservation,
identificationVerificationStatus=verif_status)
arch.occurences.add(occ)
nb_added_occurences += 1
# Add eMoFs if possible and required, but the decision is made inside the def
Expand Down Expand Up @@ -832,8 +838,9 @@ def keep_stats(self, taxon_info: WoRMS, count: int) -> None:

def log_stats(self) -> None:
not_produced = sum(self.ignored_count.values())
self.warnings.append("Stats: validated:%d produced to zip:%d not produced (M):%d not produced (P):%d"
% (self.validated_count, self.produced_count, self.ignored_morpho, not_produced))
self.warnings.append(
"Stats: predicted:%d validated:%d produced to zip:%d not produced (M):%d not produced (P):%d"
% (self.predicted_count, self.validated_count, self.produced_count, self.ignored_morpho, not_produced))
if len(self.ignored_count) > 0:
unmatched = []
ids = list(self.ignored_count.keys())
Expand Down Expand Up @@ -868,3 +875,4 @@ def update_db_stats(self) -> None:
a_stat: ProjectTaxoStats
for a_stat in ProjectBO.read_taxo_stats(self.session, project_ids, []):
self.validated_count += a_stat.nb_validated
self.predicted_count += a_stat.nb_predicted

0 comments on commit 2ff03f0

Please sign in to comment.