Skip to content

Commit

Permalink
Fixing the GbifService to lookup more than the first page from each v…
Browse files Browse the repository at this point in the history
…ocabulariy. Adding a first cut at an implementation for tdwg/bdq#277 VALIDATION_PATHWAY_STANDARD, using the GBIF vocabulary API by default, instead of the Darwin Core vocabulary, along with unit test.
  • Loading branch information
chicoreus committed Jul 26, 2024
1 parent 07f6473 commit f39d3e0
Show file tree
Hide file tree
Showing 7 changed files with 201 additions and 32 deletions.
69 changes: 57 additions & 12 deletions src/main/java/org/filteredpush/qc/metadata/DwCMetadataDQ.java
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,10 @@
* And the following supplementary tests:
*
* #235 VALIDATION_LIFESTAGE_NOTEMPTY 34b9eec9-03d5-4dc9-94b7-5b05ddcaaa87
* #270 VALIDATION_LIFESTAGE_STANDARD be40d19e-1fe7-42ed-b9d0-961f4cf3eb6a
* #225 VALIDATION_DISPOSITION_NOTEMPTY b4c17611-2703-474f-b46a-93b08ecfee16
* #232 VALIDATION_INDIVIDUALCOUNT_NOTEMPTY aff0facd-1d2a-40a5-a55a-61f950cd68a0
* #290 VALIDATION_INDIVIDUALCOUNT_INTEGER 43abded0-c3bf-44e7-8c1f-c4207608b1fa
* #242 VALIDATION_RECORDEDBY_NOTEMPTY 986ad95d-ffa1-4e3b-a6cb-ed943c87be0d
* #243 VALIDATION_RECORDNUMBER_NOTEMPTY 3bd2477c-6497-43b0-94e6-b811eed1b1cb
* #260 VALIDATION_PREPARATIONS_NOTEMPTY 2aa1b7a0-0473-4a90-bf11-a02137c5c65b
Expand Down Expand Up @@ -115,7 +117,7 @@ public class DwCMetadataDQ {
*/
@Issue(label="ISSUE_DATAGENERALIZATIONS_NOTEMPTY", description="Is there a value in dwc:dataGeneralizations?")
@Provides("13d5a10e-188e-40fd-a22c-dbaa87b91df2")
@ProvidesVersion("https://rs.tdwg.org/bdq/terms/13d5a10e-188e-40fd-a22c-dbaa87b91df2/2022-11-08")
@ProvidesVersion("https://rs.tdwg.org/bdq/terms/13d5a10e-188e-40fd-a22c-dbaa87b91df2/2023-09-18")
@Specification("POTENTIAL_ISSUE if dwc:dataGeneralizations is not EMPTY; otherwise NOT_ISSUE ")
public static DQResponse<IssueValue> issueDatageneralizationsNotempty(@ActedUpon("dwc:dataGeneralizations") String dataGeneralizations) {
DQResponse<IssueValue> result = new DQResponse<IssueValue>();
Expand Down Expand Up @@ -1202,7 +1204,7 @@ public DQResponse<AmendmentValue> amendmentDegreeofestablishmentStandardized(
/**
* Does the value of dwc:pathway occur in bdq:sourceAuthority?
*
* Provides: VALIDATION_PATHWAY_STANDARD
* Provides: 277 VALIDATION_PATHWAY_STANDARD
* Version: 2024-02-09
*
* @param pathway the provided dwc:pathway to evaluate as ActedUpon.
Expand All @@ -1212,23 +1214,55 @@ public DQResponse<AmendmentValue> amendmentDegreeofestablishmentStandardized(
@Provides("5424e933-bee7-4125-839e-d8743ea69f93")
@ProvidesVersion("https://rs.tdwg.org/bdq/terms/5424e933-bee7-4125-839e-d8743ea69f93/2024-02-09")
@Specification("EXTERNAL_PREREQUISITES_NOT_MET if the bdq:sourceAuthority is not available; INTERNAL_PREREQUISITES_NOT_MET if dwc:pathway is EMPTY; COMPLIANT if the value of dwc:pathway is in the bdq:sourceAuthority; otherwise NOT_COMPLIANT. bdq:sourceAuthority default = 'Darwin Core pathway' {[https://dwc.tdwg.org/list/#dwc_pathway]} {dwc:pathway vocabulary API [https://api.gbif.org/v1/vocabularies/Pathway/concepts]}")
public DQResponse<ComplianceValue> validationPathwayStandard(
@ActedUpon("dwc:pathway") String pathway
public static DQResponse<ComplianceValue> validationPathwayStandard(
@ActedUpon("dwc:pathway") String pathway,
@Parameter(name="bdq:sourceAuthority") String sourceAuthority
) {
DQResponse<ComplianceValue> result = new DQResponse<ComplianceValue>();

//TODO: Implement specification
// EXTERNAL_PREREQUISITES_NOT_MET if the bdq:sourceAuthority
// is not available; INTERNAL_PREREQUISITES_NOT_MET if dwc:pathway
// is EMPTY; COMPLIANT if the value of dwc:pathway is in the
// bdq:sourceAuthority; otherwise NOT_COMPLIANT. bdq:sourceAuthority
// default = "Darwin Core pathway" {[https://dwc.tdwg.org/list/#dwc_pathway]}
// {dwc:pathway vocabulary API [https://api.gbif.org/v1/vocabularies/Pathway/concepts]}
// bdq:sourceAuthority; otherwise NOT_COMPLIANT.
//

//TODO: Parameters. This test is defined as parameterized.
// bdq:sourceAuthority
// bdq:sourceAuthority default = "Pathway Controlled Vocabulary List of Terms"
// {[https://dwc.tdwg.org/pw/]}
// {GBIF vocabulary API [https://api.gbif.org/v1/vocabularies/Pathway/concepts]}

if (MetadataUtils.isEmpty(pathway)) {
result.addComment("No Value provided for dwc:pathway");
result.setResultState(ResultState.INTERNAL_PREREQUISITES_NOT_MET);
} else {
if (MetadataUtils.isEmpty(sourceAuthority)) {
// TODO: Implement tdwg vocabulary lookup
sourceAuthority = "GBIF Pathway Vocabulary";
}
try {
MetadataSourceAuthority sourceAuthorityObject = new MetadataSourceAuthority(sourceAuthority);
if (!MetadataSingleton.getInstance().isLoaded()) {
result.addComment("Error accessing sourceAuthority: " + MetadataSingleton.getInstance().getLoadError() );
result.setResultState(ResultState.EXTERNAL_PREREQUISITES_NOT_MET);
} else {
result.setResultState(ResultState.RUN_HAS_RESULT);
if (MetadataSingleton.getInstance().getPathwayValues().containsKey(pathway)) {
result.addComment("Provided value of dwc:pathway found in the sourceAuthority");
result.setValue(ComplianceValue.COMPLIANT);
} else {
result.addComment("Provided value of dwc:pathway [" + pathway + "] not found in the sourceAuthority");
result.setValue(ComplianceValue.NOT_COMPLIANT);
}
}
} catch (SourceAuthorityException e) {
result.addComment("Error with specified bdq:sourceAuthority ["+ sourceAuthority +"]: " + e.getMessage());
result.setResultState(ResultState.EXTERNAL_PREREQUISITES_NOT_MET);
} catch (Exception e) {

}
}

return result;
}

Expand Down Expand Up @@ -1501,11 +1535,10 @@ public static DQResponse<ComplianceValue> validationPathwayNotempty(
return result;
}

// TODO: Implementation of ISSUE_DATAGENERALIZATIONS_NOTEMPTY is not up to date with current version: https://rs.tdwg.org/bdq/terms/13d5a10e-188e-40fd-a22c-dbaa87b91df2/2023-09-18 see line: 87
/**
* Is dwc:individualCount an Integer ?
*
* Provides: VALIDATION_INDIVIDUALCOUNT_INTEGER
* Provides: 290 VALIDATION_INDIVIDUALCOUNT_INTEGER
* Version: 2024-02-11
*
* @param individualCount the provided dwc:individualCount to evaluate as ActedUpon.
Expand All @@ -1524,14 +1557,26 @@ public DQResponse<ComplianceValue> validationIndividualcountInteger(
// COMPLIANT if the value of dwc:individualCount is interpretable
// an integer; otherwise NOT_COMPLIANT.

result.setResultState(ResultState.RUN_HAS_RESULT);
if (MetadataUtils.isEmpty(individualCount)) {
result.setValue(ComplianceValue.NOT_COMPLIANT);
result.addComment("Provided value for individualCount is empty, not an integer.");
} else if (individualCount.trim().matches("^[0-9]+$")) {
result.setValue(ComplianceValue.COMPLIANT);
result.addComment("Provided value for individualCount is an integer.");
} else {
result.setValue(ComplianceValue.NOT_COMPLIANT);
result.addComment("Provided value for individualCount ["+individualCount+"] is not an integer.");
}

return result;
}


/**
* Propose amendment to the value of dwc:preparations using bdq:sourceAuthority.
*
* Provides: AMENDMENT_REPRODUCTIVECONDITION_STANDARDIZED
* Provides: 282 AMENDMENT_REPRODUCTIVECONDITION_STANDARDIZED
* Version: 2024-02-09
*
* @param reproductiveCondition the provided dwc:reproductiveCondition to evaluate as ActedUpon.
Expand Down Expand Up @@ -1565,7 +1610,7 @@ public DQResponse<AmendmentValue> amendmentReproductiveconditionStandardized(
/**
* Propose amendment to the value of dwc:preparations using bdq:sourceAuthority.
*
* Provides: AMENDMENT_PREPARATIONS_STANDARDIZED
* Provides: 280 AMENDMENT_PREPARATIONS_STANDARDIZED
* Version: 2024-02-09
*
* @param preparations the provided dwc:preparations to evaluate as ActedUpon.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,4 +125,24 @@ public static DQResponse<ComplianceValue> validationLifestageStandard(
) {
return validationLifestageStandard(lifeStage,null);
}

/**
* Does the value of dwc:pathway occur in bdq:sourceAuthority, using
* the default sourceAuthority?
*
* Provides: 277 VALIDATION_PATHWAY_STANDARD
* Version: 2024-02-09
*
* @param pathway the provided dwc:pathway to evaluate as ActedUpon.
* @return DQResponse the response of type ComplianceValue to return
*/
@Validation(label="VALIDATION_PATHWAY_STANDARD", description="Does the value of dwc:pathway occur in bdq:sourceAuthority?")
@Provides("5424e933-bee7-4125-839e-d8743ea69f93")
@ProvidesVersion("https://rs.tdwg.org/bdq/terms/5424e933-bee7-4125-839e-d8743ea69f93/2024-02-09")
@Specification("EXTERNAL_PREREQUISITES_NOT_MET if the bdq:sourceAuthority is not available; INTERNAL_PREREQUISITES_NOT_MET if dwc:pathway is EMPTY; COMPLIANT if the value of dwc:pathway is in the bdq:sourceAuthority; otherwise NOT_COMPLIANT. bdq:sourceAuthority default = 'Darwin Core pathway' {[https://dwc.tdwg.org/list/#dwc_pathway]} {dwc:pathway vocabulary API [https://api.gbif.org/v1/vocabularies/Pathway/concepts]}")
public static DQResponse<ComplianceValue> validationPathwayStandard(
@ActedUpon("dwc:pathway") String pathway
) {
return validationPathwayStandard(pathway, null);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
public enum EnumMetadataSourceAuthority {

GBIF_LIFESTAGE,
GBIF_PATHWAY,
DWC_BASISOFRECORD,
INVALID;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ public MetadataSourceAuthority(String authorityString) throws SourceAuthorityExc
this.authority = EnumMetadataSourceAuthority.GBIF_LIFESTAGE;
} else if (authorityString.equals("https://api.gbif.org/v1/vocabularies/LifeStage")) {
this.authority = EnumMetadataSourceAuthority.GBIF_LIFESTAGE;
} else if (authorityString.toUpperCase().equals("GBIF PATHWAY VOCABULARY")) {
this.authority = EnumMetadataSourceAuthority.GBIF_PATHWAY;
} else if (authorityString.toUpperCase().startsWith("HTTPS://INVALID/")) {
this.authority = EnumMetadataSourceAuthority.INVALID;
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ public Map<String,List<String>> loadVocabulary(String vocabulary) {
HashMap<String,List<String>> result = new HashMap();

String lookup = gbifApiEndpoint + "vocabularies/" + vocabulary + "/concepts";
logger.debug(lookup);
URI lookupURI = URI.create(lookup);

HttpClient client = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.NORMAL).build();
Expand All @@ -72,32 +73,59 @@ public Map<String,List<String>> loadVocabulary(String vocabulary) {
try {
response = client.send(request,HttpResponse.BodyHandlers.ofString());
JSONObject responseJson = (JSONObject) JSONValue.parse(response.body());
boolean loaded = true;

logger.debug(response);
logger.debug(response.body());

boolean endOfRecords = false;
JSONArray resultList = (JSONArray) responseJson.get("results");
logger.debug(responseJson.get("count"));
if (resultList!=null) {
for (int i=0; i<resultList.size(); i++) {
JSONObject item = (JSONObject) resultList.get(i);
String name = item.get("name").toString();
ArrayList<String> list = new ArrayList<String>();
list.add(name);
// label[0].value
JSONArray labels = (JSONArray) item.get("label");
for (int j=0; j<labels.size(); j++) {
String label = ((JSONObject)labels.get(j)).get("value").toString();
list.add(label);
}
// externalDefinitions[0]
JSONArray terms = (JSONArray) item.get("externalDefinitions");
for (int j=0; j<terms.size(); j++) {
String externalDefinition = terms.get(j).toString();
list.add(externalDefinition);
}
result.put(name,list);
}
int limit = Integer.parseInt(responseJson.get("limit").toString());
int offset = 0;
while (endOfRecords==false) {
if (!loaded) {
lookup = gbifApiEndpoint + "vocabularies/" + vocabulary + "/concepts?offset=" + offset;
logger.debug(lookup);
lookupURI = URI.create(lookup);
client = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.NORMAL).build();
request = HttpRequest.newBuilder().uri(lookupURI).GET().build();
response = client.send(request,HttpResponse.BodyHandlers.ofString());
responseJson = (JSONObject) JSONValue.parse(response.body());
resultList = (JSONArray) responseJson.get("results");
}
for (int i=0; i<resultList.size(); i++) {
JSONObject item = (JSONObject) resultList.get(i);
String name = item.get("name").toString();
ArrayList<String> list = new ArrayList<String>();
list.add(name);
logger.debug(name);
// label[0].value
JSONArray labels = (JSONArray) item.get("label");
for (int j=0; j<labels.size(); j++) {
String label = ((JSONObject)labels.get(j)).get("value").toString();
list.add(label);
}
// externalDefinitions[0]
JSONArray terms = (JSONArray) item.get("externalDefinitions");
for (int j=0; j<terms.size(); j++) {
String externalDefinition = terms.get(j).toString();
list.add(externalDefinition);
}
result.put(name,list);
}
String endOfRecordsString = responseJson.get("endOfRecords").toString();
if (endOfRecordsString.toLowerCase().equals("true")) {
endOfRecords = true;
} else {
endOfRecords = false;
}
offset = offset + limit;
loaded = false;
}
}
logger.debug(result.size());

} catch (IOException e) {
logger.error(e.getMessage(), e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ public class MetadataSingleton {
private Map<String,List<String>> lifeStageTerms = new HashMap<String,List<String>>();
private Map<String,String> lifeStageValues = new HashMap<String,String>();

private Map<String,List<String>> pathwayTerms = new HashMap<String,List<String>>();
private Map<String,String> pathwayValues = new HashMap<String,String>();

private MetadataSingleton() {
init();
}
Expand All @@ -64,8 +67,8 @@ private void init() {

try {
GbifService gbif = new GbifService();

lifeStageTerms = gbif.loadVocabulary("LifeStage");

Iterator<String> keys = lifeStageTerms.keySet().iterator();
while (keys.hasNext()) {
String key = keys.next();
Expand All @@ -75,6 +78,18 @@ private void init() {
lifeStageValues.put(i.next(), key);
}
}

pathwayTerms = gbif.loadVocabulary("Pathway");
keys = pathwayTerms.keySet().iterator();
while (keys.hasNext()) {
String key = keys.next();
List<String> values = pathwayTerms.get(key);
Iterator<String> i = values.iterator();
while (i.hasNext()) {
pathwayValues.put(i.next(), key);
}
}

loaded = true;
loadError = "";
} catch (Exception e) {
Expand All @@ -85,6 +100,9 @@ private void init() {
public Map<String,String> getLifeStageValues() {
return lifeStageValues;
}
public Map<String,String> getPathwayValues() {
return pathwayValues;
}

public Boolean isLoaded() {
return loaded;
Expand Down
55 changes: 55 additions & 0 deletions src/test/java/org/filteredpush/qc/metadata/DwCMetadataDQTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -932,4 +932,59 @@ public void testValidationLifestageStandard() {

}

/**
* Test method for {@link org.filteredpush.qc.metadata.DwCMetadataDQ#validationPathwayStandard(java.lang.String)}.
*/
@Test
public void validationPathwayStandard() {
String pathway = "foo";
DQResponse<ComplianceValue> result = DwCMetadataDQ.validationPathwayStandard(pathway,"GBIF Pathway Vocabulary");
logger.debug(result.getComment());
assertEquals(ResultState.RUN_HAS_RESULT.getLabel(), result.getResultState().getLabel());
assertEquals(ComplianceValue.NOT_COMPLIANT.getLabel(), result.getValue().getLabel());
assertNotNull(result.getComment());

pathway = "";
result = DwCMetadataDQ.validationPathwayStandard(pathway,"GBIF Pathway Vocabulary");
logger.debug(result.getComment());
assertEquals(ResultState.INTERNAL_PREREQUISITES_NOT_MET.getLabel(), result.getResultState().getLabel());
assertNull(result.getValue());

pathway = "transportStowaway";
result = DwCMetadataDQ.validationPathwayStandard(pathway,"GBIF Pathway Vocabulary");
logger.debug(result.getComment());
assertEquals(ResultState.RUN_HAS_RESULT.getLabel(), result.getResultState().getLabel());
assertEquals(ComplianceValue.COMPLIANT.getLabel(), result.getValue().getLabel());
assertNotNull(result.getComment());

pathway = "contaminateBait";
result = DwCMetadataDQ.validationPathwayStandard(pathway,"GBIF Pathway Vocabulary");
logger.debug(result.getComment());
assertEquals(ResultState.RUN_HAS_RESULT.getLabel(), result.getResultState().getLabel());
assertEquals(ComplianceValue.COMPLIANT.getLabel(), result.getValue().getLabel());
assertNotNull(result.getComment());

pathway = "contaminantNursery";
result = DwCMetadataDQ.validationPathwayStandard(pathway,"GBIF Pathway Vocabulary");
logger.debug(result.getComment());
assertEquals(ResultState.RUN_HAS_RESULT.getLabel(), result.getResultState().getLabel());
assertEquals(ComplianceValue.COMPLIANT.getLabel(), result.getValue().getLabel());
assertNotNull(result.getComment());

pathway = "otherEscape";
result = DwCMetadataDQ.validationPathwayStandard(pathway,"GBIF Pathway Vocabulary");
logger.debug(result.getComment());
assertEquals(ResultState.RUN_HAS_RESULT.getLabel(), result.getResultState().getLabel());
assertEquals(ComplianceValue.COMPLIANT.getLabel(), result.getValue().getLabel());
assertNotNull(result.getComment());

pathway = "corridorAndDispersal";
result = DwCMetadataDQ.validationPathwayStandard(pathway,"GBIF Pathway Vocabulary");
logger.debug(result.getComment());
assertEquals(ResultState.RUN_HAS_RESULT.getLabel(), result.getResultState().getLabel());
assertEquals(ComplianceValue.COMPLIANT.getLabel(), result.getValue().getLabel());
assertNotNull(result.getComment());

}

}

0 comments on commit f39d3e0

Please sign in to comment.