Skip to content

Commit

Permalink
Updates to tests fixing issues found in running against validation da…
Browse files Browse the repository at this point in the history
…ta, including updates and additions of backing methods, unit tets, and some cleanup. Includes bugfixes and improvements for tdwg/bdq#55 and substantive bugfixes to tdwg/bdq#201.
  • Loading branch information
chicoreus committed Aug 27, 2024
1 parent c1f4c55 commit fc785f7
Show file tree
Hide file tree
Showing 4 changed files with 325 additions and 33 deletions.
93 changes: 63 additions & 30 deletions src/main/java/org/filteredpush/qc/georeference/DwCGeoRefDQ.java
Original file line number Diff line number Diff line change
Expand Up @@ -449,8 +449,8 @@ public static DQResponse<AmendmentValue> amendmentCoordinatesFromVerbatim(
}

// TODO: Evaluate verbatimCoordinateSystem and verbatimSRS.
// if coordinates are lat/long and verbatimSRS is blank or consistent with EPSG:4326, do the transform
// if not, identify an available transform (none implemented yet) or fail.
// if verbatimSRS can be identified to be not consistent with EPSG:4326, fail
// if verbatimCoordinateSystem can be identified to not be geographic, fail.

boolean interpreted = false;

Expand Down Expand Up @@ -885,20 +885,6 @@ public static DQResponse<ComplianceValue> validationCountryNotempty(@ActedUpon("
return result;
}

//TODO: Implement specification
// INTERNAL_PREREQUISITES_NOT_MET if dwc:decimalLatitude is
// EMPTY or does not have a valid value, or dwc:decimalLongitude
// is EMPTY or does not have a valid value, or dwc:geodeticDatum
// is EMPTY or does not contain an interpretable value; AMENDED
// if the values of dwc:decimalLatitude, dwc:decimalLongitude
// and dwc:geodeticDatum are changed based on a conversion
// between the coordinate reference systems as specified by
// dwc:geodeticDatum and bdq:targetCRS, and, if dwc:coordinateUncertaintyInMeters
// was an interpretable value, the uncertainty from the conversion
// is added to it, and the value of dwc:coordinatePrecision
// is provided from the conversion result; otherwise NOT_AMENDED.
// bdq:targetCRS = "EPSG:4326"

/**
* Propose amendment to the value of dwc:geodeticDatum and potentially to dwc:decimalLatitude and/or dwc:decimalLongitude based on a conversion between spatial reference systems.
*
Expand Down Expand Up @@ -1260,6 +1246,29 @@ public static DQResponse<AmendmentValue> amendmentMindepthMaxdepthFromVerbatim(

// TODO: pattern "to {number} {units}"

// Test for case where only a minimum or maximum is specified, this fails.
boolean failureCase = false;
boolean containsMin = false;
boolean containsMax = false;
if (verbatimDepth.matches(".*[mM](in)(imum){0,1}[ ]*[dD]epth.*")) {
containsMin = true;
}
if (verbatimDepth.matches(".*[mM](ax)(imum){0,1}[ ]*[dD]epth.*")) {
containsMax = true;
}
if(containsMin ^ containsMax) {
// xor, exactly one of the two is true.
if (!verbatimDepth.matches(".*[0-9].*[ ,;:-].*[0-9]")) {
// a minimum or a maximum was specified, but only one number was found
failureCase = true;
if (containsMin) {
result.addComment("One number, specifying minimum depth was found.");
} else {
result.addComment("One number, specifying maximum depth was found.");
}
}
}

String simplified = verbatimDepth;
if (verbatimDepth.matches(".*[mM](in|ax)(imum){0,1}[ ]*[dD]epth.*")) {
simplified = verbatimDepth.replaceAll("[mM](in|ax)(imum){0,1}[ ]*[dD]epth", "").trim();
Expand All @@ -1278,7 +1287,11 @@ public static DQResponse<AmendmentValue> amendmentMindepthMaxdepthFromVerbatim(

logger.debug(verbatimDepth);
logger.debug(simplified);
if (simplified.matches("^[0-9]+([.]{0,1}[0-9]*){0,1} *(m|m[.]|[mM](eter(s){0,1}))$")) {
if (failureCase) {
// handle the failure case above.
result.addComment("Unable to Interpret provided dwc:verbatimDepth into a depth range ["+ verbatimDepth +"].");
result.setResultState(ResultState.NOT_AMENDED);
} if (simplified.matches("^[0-9]+([.]{0,1}[0-9]*){0,1} *(m|m[.]|[mM](eter(s){0,1}))$")) {
String cleaned = simplified.replaceAll("[ Mmetrs]+", "").trim();
cleaned = cleaned.replaceAll("[.]$","");
result.addComment("Interpreted equal minimum and maximum depths in meters from dwc:verbatimDepth ["+ verbatimDepth +"] interpreted as a depth range in meters ");
Expand Down Expand Up @@ -2109,7 +2122,7 @@ public static DQResponse<AmendmentValue> amendmentCountrycodeFromCoordinates(
@Consulted("dwc:decimalLatitude") String decimalLatitude,
@Consulted("dwc:decimalLongitude") String decimalLongitude,
@ActedUpon("dwc:countryCode") String countryCode,
@Consulted("dwc:sourceAuthority") String sourceAuthority
@Parameter(name="bdq:sourceAuthority") String sourceAuthority
) {
DQResponse<AmendmentValue> result = new DQResponse<AmendmentValue>();

Expand Down Expand Up @@ -3080,7 +3093,7 @@ public static DQResponse<ComplianceValue> validationCountrystateprovinceConsiste
/**
* Is the combination of the values of the terms dwc:country, dwc:stateProvince unique in the bdq:sourceAuthority?
*
* Provides: VALIDATION_COUNTRYSTATEPROVINCE_UNAMBIGUOUS
* Provides: 201 VALIDATION_COUNTRYSTATEPROVINCE_UNAMBIGUOUS
* Version: 2023-09-18
*
* @param country the provided dwc:country to evaluate
Expand Down Expand Up @@ -3146,10 +3159,10 @@ public static DQResponse<ComplianceValue> validationCountrystateprovinceUnambigu
}
}


if (GEOUtil.isEmpty(country)) {
// If country is empty, does stateProvince match a single entry?
if (lookup.lookupPrimary(stateProvince)) {
logger.debug(stateProvince);
if (lookup.lookupUniquePrimary(stateProvince)) {
result.setResultState(ResultState.RUN_HAS_RESULT);
result.setValue(ComplianceValue.COMPLIANT);
result.addComment("Provided value of dwc:country is empty and dwc:stateProvince ["+stateProvince+"] matches a single primary division level entity known to the Getty TGN");
Expand All @@ -3174,29 +3187,49 @@ public static DQResponse<ComplianceValue> validationCountrystateprovinceUnambigu
logger.debug(countryToLookup);
if (lookup.lookupCountry(countryToLookup)) {
logger.debug(stateProvince);
logger.debug(lookup.lookupPrimary(stateProvince));
if (lookup.lookupPrimary(stateProvince)) {
// logger.debug(lookup.lookupPrimary(stateProvince));
List<GettyTGNObject> primaryMatches = lookup.getPrimaryObjects(stateProvince);
if (primaryMatches!=null && primaryMatches.size()>0) {
if (preferredCountry==null) {
preferredCountry = countryToLookup;
}
GettyTGNObject primaryObject = lookup.getPrimaryObject(stateProvince);
String primaryParentage = primaryObject.getParentageString();
logger.debug(primaryParentage);
if (primaryParentage==null) {
Iterator<GettyTGNObject> ipm = primaryMatches.iterator();
int matchCount = 0;
boolean hasSomeParentage = false;
StringBuffer primaryParentages = new StringBuffer();
String primaryParentage = "";
while (ipm.hasNext()) {
GettyTGNObject primaryObject = ipm.next();
primaryParentage = primaryObject.getParentageString();
logger.debug(primaryParentage);
if (primaryParentage!=null) {
hasSomeParentage = true;
if (primaryParentage.contains(preferredCountry)) {
matchCount++;
primaryParentages.append(primaryParentage);
}
}
}
if (hasSomeParentage==false) {
result.setResultState(ResultState.RUN_HAS_RESULT);
result.setValue(ComplianceValue.NOT_COMPLIANT);
result.addComment("Parentage not found for dwc:stateProvince ["+stateProvince+"] in the Getty TGN");
} else {
if (primaryParentage.contains(preferredCountry)) {
if (matchCount==1) {
result.setResultState(ResultState.RUN_HAS_RESULT);
result.setValue(ComplianceValue.COMPLIANT);
result.addComment("The dwc:country ["+country+"] as ["+preferredCountry+"] was found in the parentage ["+primaryParentage+"] of dwc:stateProvince ["+stateProvince+"] in the Getty TGN");
} else if (matchCount==0) {
result.setResultState(ResultState.RUN_HAS_RESULT);
result.setValue(ComplianceValue.NOT_COMPLIANT);
result.addComment("The combination of dwc:country ["+country+"] as ["+preferredCountry+"] with dwc:stateProvince ["+stateProvince+"] was not found in the Getty TGN");
} else {
result.setResultState(ResultState.RUN_HAS_RESULT);
result.setValue(ComplianceValue.NOT_COMPLIANT);
result.addComment("The dwc:country ["+country+"] as ["+preferredCountry+"] was not found in the parentage ["+primaryParentage+"] of dwc:stateProvince ["+stateProvince+"] in the Getty TGN");
result.addComment("Non-unique match");
result.addComment("The dwc:country ["+country+"] as ["+preferredCountry+"] was found in the "+matchCount+" parentages ["+primaryParentages.toString()+"] of dwc:stateProvince ["+stateProvince+"] in the Getty TGN");
}
}
}
} else {
result.setResultState(ResultState.RUN_HAS_RESULT);
result.setValue(ComplianceValue.NOT_COMPLIANT);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,28 @@ public static DQResponse<ComplianceValue> validationCoordinatesStateprovinceCons
return validationCoordinatesStateprovinceConsistent(decimalLatitude, decimalLongitude, stateProvince, null, null);
}


/**
* Is the combination of the values of the terms dwc:country, dwc:stateProvince unique in the bdq:sourceAuthority?
* Uses the default source authority.
*
* Provides: 201 VALIDATION_COUNTRYSTATEPROVINCE_UNAMBIGUOUS
* Version: 2023-09-18
*
* @param country the provided dwc:country to evaluate
* @param stateProvince the provided dwc:stateProvince to evaluate
* @return DQResponse the response of type ComplianceValue to return
*/
@Validation(label="VALIDATION_COUNTRYSTATEPROVINCE_UNAMBIGUOUS", description="Is the combination of the values of the terms dwc:country, dwc:stateProvince unique in the bdq:sourceAuthority?")
@Provides("d257eb98-27cb-48e5-8d3c-ab9fca4edd11")
@ProvidesVersion("https://rs.tdwg.org/bdq/terms/d257eb98-27cb-48e5-8d3c-ab9fca4edd11/2023-09-18")
@Specification("EXTERNAL_PREREQUISITES_NOT_MET if the bdq:sourceAuthority is not available; INTERNAL_PREREQUISITES_NOT_MET if the terms dwc:country and dwc:stateProvince are EMPTY; COMPLIANT if the combination of values of dwc:country and dwc:stateProvince are unambiguously resolved to a single result with a child-parent relationship in the bdq:sourceAuthority and the entity matching the value of dwc:country in the bdq:sourceAuthority is an ISO country-like entity in the bdq:sourceAuthority; otherwise NOT_COMPLIANT bdq:sourceAuthority default = 'The Getty Thesaurus of Geographic Names (TGN)' [https://www.getty.edu/research/tools/vocabularies/tgn/index.html]")
public static DQResponse<ComplianceValue> validationCountrystateprovinceUnambiguous(
@ActedUpon("dwc:country") String country,
@ActedUpon("dwc:stateProvince") String stateProvince
) {
return validationCountrystateprovinceUnambiguous(country,stateProvince, null);
}
// TODO: Specification needs source authority to be added.
/**
* Propose amendment of the signs of dwc:decimalLatitude and/or dwc:decimalLongitude to
Expand Down
136 changes: 133 additions & 3 deletions src/main/java/org/filteredpush/qc/georeference/util/GettyLookup.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.eclipse.emf.common.util.URI;
import org.filteredpush.qc.georeference.SourceAuthorityException;

import edu.getty.tgn.objects.Vocabulary;
import edu.getty.tgn.objects.Vocabulary.Subject;
Expand All @@ -37,8 +38,12 @@ public class GettyLookup {

private static final Log logger = LogFactory.getLog(GettyLookup.class);

// Cache of countries found
private Map<String,GettyTGNObject> countryCache;
private Map<String,GettyTGNObject> primaryCache;
// Cache of first matching primary divisions found
private Map<String,GettyTGNObject> primaryCache;
// Cache of unique primary divisions found
private Map<String,GettyTGNObject> uniquePrimaryCache;

/** Constant <code>GETTY_TGN="The Getty Thesaurus of Geographic Names"{trunked}</code> */
public static final String GETTY_TGN = "The Getty Thesaurus of Geographic Names (TGN)";
Expand All @@ -56,6 +61,7 @@ public GettyLookup() {
private void init() {
countryCache = new HashMap<String,GettyTGNObject>();
primaryCache = new HashMap<String,GettyTGNObject>();
uniquePrimaryCache = new HashMap<String,GettyTGNObject>();
}

/**
Expand Down Expand Up @@ -307,8 +313,8 @@ public Boolean lookupPrimary(String primaryDivision) {
JAXBContext jc = JAXBContext.newInstance(Vocabulary.class);
Unmarshaller unmarshaler = jc.createUnmarshaller();
Vocabulary response = (Vocabulary) unmarshaler.unmarshal(is);
System.out.println(response.getCount());
System.out.println(response.getCount());
logger.debug(response.getCount());
logger.debug(response.getCount());
if (response.getCount().compareTo(BigInteger.ONE) >= 0) {
// idiom for line above from BigInteger docs: (x.compareTo(y) <op> 0)
retval = true;
Expand All @@ -328,11 +334,76 @@ public Boolean lookupPrimary(String primaryDivision) {
e.printStackTrace();
}
}
logger.debug(retval);

return retval;
}


/**
* Match a secondary geopolitical entity (state/province) name in the the Getty TGN, returns
* true if exactly one such entity is found.
*
* @param primaryDivision the state/province to look up.
* @return true if the secondaryDivision is found as appropriate geopolitical entity in TGN matching
* any form of the name at least once, false if the primary division is not found in TGN,
* null on an exception querying TGN.
*/
public Boolean lookupUniquePrimary(String primaryDivision) {

Boolean retval = null;

if (GEOUtil.isEmpty(primaryDivision)) {
retval = false;
} else {
if (uniquePrimaryCache.containsKey(primaryDivision)) {
logger.debug(uniquePrimaryCache.get(primaryDivision));
retval = true;
} else {
// See: http://vocabsservices.getty.edu/Schemas/TGN/tgn_place_type.xsd for place types
String placeTypeID = "81100"; //first level subdivision
// See documentation in: https://www.getty.edu/research/tools/vocabularies/vocab_web_services.pdf
String baseURI = "http://vocabsservices.getty.edu/TGNService.asmx/TGNGetTermMatch?";

StringBuilder request = new StringBuilder();
request.append(baseURI);
// enclose in quotes for exact match
String primaryEncoded = URI.encodeFragment('"'+primaryDivision+'"', false);
request.append("name=").append(primaryEncoded);
request.append("&placetypeid=").append(placeTypeID);
request.append("&nationid=").append("");
logger.debug(request.toString());
try {
URL url = new URL(request.toString());
HttpURLConnection getty = (HttpURLConnection) url.openConnection();
InputStream is = getty.getInputStream();
JAXBContext jc = JAXBContext.newInstance(Vocabulary.class);
Unmarshaller unmarshaler = jc.createUnmarshaller();
Vocabulary response = (Vocabulary) unmarshaler.unmarshal(is);
logger.debug(response.getCount());
logger.debug(response.getCount());
if (response.getCount().compareTo(BigInteger.ONE) == 0) {
// idiom for line above from BigInteger docs: (x.compareTo(y) <op> 0)
retval = true;
// cache the match
uniquePrimaryCache.put(primaryDivision, new GettyTGNObject(response.getSubject().get(0),placeTypeID));
} else {
retval = false;
}
} catch (JAXBException e) {
logger.debug(e.getMessage(),e);
} catch (MalformedURLException e) {
logger.debug(e.getMessage(),e);
} catch (IOException e) {
logger.debug(e.getMessage(),e);
}
}
}
logger.debug(retval);

return retval;
}


/**
* <p>getPreferredCountryName.</p>
Expand Down Expand Up @@ -519,4 +590,63 @@ public GettyTGNObject getPrimaryObject(String primaryDivision) {
return retval;
}

/**
* <p>getPrimaryObject.</p>
*
* @param primaryDivision a {@link java.lang.String} object.
* @return a {@link edu.getty.tgn.service.GettyTGNObject} object.
* @throws SourceAuthorityException
*/
public List<GettyTGNObject> getPrimaryObjects(String primaryDivision) throws SourceAuthorityException {

List<GettyTGNObject> retval = new ArrayList<GettyTGNObject>();

if (uniquePrimaryCache.containsKey(primaryDivision)) {
logger.debug(uniquePrimaryCache.get(primaryDivision).getName());
retval.add(uniquePrimaryCache.get(primaryDivision));
} else {
// See: http://vocabsservices.getty.edu/Schemas/TGN/tgn_place_type.xsd for place types
String placeTypeID = "81100";
// See documentation in: https://www.getty.edu/research/tools/vocabularies/vocab_web_services.pdf
String baseURI = "http://vocabsservices.getty.edu//TGNService.asmx/TGNGetTermMatch?";

StringBuilder request = new StringBuilder();
request.append(baseURI);
String primaryEncoded = URI.encodeFragment('"'+primaryDivision+'"', false);
request.append("name=").append(primaryEncoded);
request.append("&placetypeid=").append(placeTypeID);
request.append("&nationid=").append("");
try {
URL url = new URL(request.toString());
HttpURLConnection getty = (HttpURLConnection) url.openConnection();
InputStream is = getty.getInputStream();
JAXBContext jc = JAXBContext.newInstance(Vocabulary.class);
Unmarshaller unmarshaler = jc.createUnmarshaller();
Vocabulary response = (Vocabulary) unmarshaler.unmarshal(is);
System.out.println(response.getCount());
if (response.getCount()==BigInteger.ONE) {
// found match
}
List<Subject> subjects = response.getSubject();
Iterator<Subject> i = subjects.iterator();
while (i.hasNext()) {
Subject subject = i.next();
logger.debug(subject.getPreferredTerm().getValue());
logger.debug(subject.getSubjectID());
logger.debug(subject.getPreferredParent());
retval.add(new GettyTGNObject(subject,placeTypeID));
}
} catch (JAXBException e) {
logger.error(e.getMessage());
throw new SourceAuthorityException("Error interpreting json response returned from Getty TGN:" + e.getMessage());
} catch (MalformedURLException e) {
logger.error(e.getMessage());
} catch (IOException e) {
logger.error(e.getMessage());
throw new SourceAuthorityException("Error accessing Getty TGN:" + e.getMessage());
}
}
return retval;
}

}
Loading

0 comments on commit fc785f7

Please sign in to comment.