diff --git a/SynonymGroup.ts b/SynonymGroup.ts index 2b63e77..3bbb53c 100644 --- a/SynonymGroup.ts +++ b/SynonymGroup.ts @@ -67,10 +67,19 @@ type Treatments = { dpr: Set; cite: Set; }; + +/** + * Describes a taxon-concept and why it is considered a synonym by synolib + */ export type JustifiedSynonym = { taxonConceptUri: string; taxonName: TaxonName; /** Human-readable authority */ taxonConceptAuthority?: string; + /** + * Catalogue of Life Taxon URIs + * @example `["https://www.catalogueoflife.org/data/taxon/TWFG"]` + */ + colID: string[]; justifications: JustificationSet; treatments: Treatments; loading: boolean; @@ -162,6 +171,13 @@ export default class SynonymGroup implements AsyncIterable { private controller = new AbortController(); + /** + * Constructs a SynonymGroup + * + * @param sparqlEndpoint SPARQL-Endpoint to query + * @param taxonName either a string of the form "Genus species infraspecific" (species & infraspecific names optional), or an URI of a http://filteredpush.org/ontologies/oa/dwcFP#TaxonConcept or a CoL taxon URI + * @param [ignoreRank=false] if taxonName is "Genus" of "Genus species", by default it will ony search for taxons of rank genus/species. If set to true, sub-taxa are also considered as staring points. + */ constructor( sparqlEndpoint: SparqlEndpoint, taxonName: string, @@ -366,26 +382,41 @@ SELECT DISTINCT ?url ?description WHERE { taxonName: string, ): Promise => { if (fetchInit.signal.aborted) return Promise.resolve([]); - const [genus, species, subspecies] = taxonName.split(" "); - // subspecies could also be variety - // ignoreRank has no effect when there is a 'subspecies', as this is assumed to be the lowest rank & should thus not be able to return results in another rank + let taxonNameQuery = ""; + if (taxonName.startsWith("http")) { + if (taxonName.includes("catalogueoflife.org")) { + taxonNameQuery = + `?tc <${taxonName}> .`; + } else { + taxonNameQuery = `BIND(<${taxonName}> as ?tc)`; + } + } else { + const [genus, species, subspecies] = taxonName.split(" "); + // subspecies could also be variety + // ignoreRank has no effect when there is a 'subspecies', as this is assumed to be the lowest rank & should thus not be able to return results in another rank + taxonNameQuery = `?tc dwc:genus "${genus}" .`; + if (species) taxonNameQuery += ` ?tc dwc:species "${species}" .`; + if (subspecies) { + taxonNameQuery += + ` ?tc (dwc:subspecies|dwc:variety) "${subspecies}" .`; + } + if (!subspecies && !ignoreRank) { + taxonNameQuery += ` ?tc dwc:rank "${ + species ? "species" : "genus" + }" .`; + } + } const query = `PREFIX cito: PREFIX dc: PREFIX dwc: PREFIX treat: SELECT DISTINCT - ?tn ?name ?tc (group_concat(DISTINCT ?auth; separator=" / ") as ?authority) (group_concat(DISTINCT ?aug;separator="|") as ?augs) (group_concat(DISTINCT ?def;separator="|") as ?defs) (group_concat(DISTINCT ?dpr;separator="|") as ?dprs) (group_concat(DISTINCT ?cite;separator="|") as ?cites) (group_concat(DISTINCT ?trtn;separator="|") as ?trtns) (group_concat(DISTINCT ?citetn;separator="|") as ?citetns) + ?tn ?name ?tc (group_concat(DISTINCT ?auth; separator=" / ") as ?authority) (group_concat(DISTINCT ?colid; separator="|") as ?colids) (group_concat(DISTINCT ?aug;separator="|") as ?augs) (group_concat(DISTINCT ?def;separator="|") as ?defs) (group_concat(DISTINCT ?dpr;separator="|") as ?dprs) (group_concat(DISTINCT ?cite;separator="|") as ?cites) (group_concat(DISTINCT ?trtn;separator="|") as ?trtns) (group_concat(DISTINCT ?citetn;separator="|") as ?citetns) WHERE { - ?tc dwc:genus "${genus}"; - treat:hasTaxonName ?tn; - ${species ? `dwc:species "${species}";` : ""} - ${subspecies ? `(dwc:subspecies|dwc:variety) "${subspecies}";` : ""} - ${ - ignoreRank || !!subspecies - ? "" - : `dwc:rank "${species ? "species" : "genus"}";` - } + ${taxonNameQuery} + ?tc treat:hasTaxonName ?tn ; a . + OPTIONAL { ?tc ?colid . } ?tn dwc:genus ?genus . OPTIONAL { ?tn dwc:subGenus ?subgenus . } OPTIONAL { @@ -413,7 +444,7 @@ GROUP BY ?tn ?name ?tc`; .then( (json: SparqlJson) => json.results.bindings.filter((t) => (t.tc && t.tn)) - .map((t) => { + .map((t): JustifiedSynonym => { return { taxonConceptUri: t.tc.value, taxonName: makeTaxonName( @@ -423,6 +454,11 @@ GROUP BY ?tn ?name ?tc`; t.citetns?.value.split("|"), ), taxonConceptAuthority: t.authority?.value, + colID: t.colids?.value.split("|").filter((s) => + s.startsWith( + "https://www.catalogueoflife.org/data/taxon/", + ) + ), justifications: new JustificationSet([ `${t.tc.value} matches "${taxonName}"`, ]), @@ -450,10 +486,11 @@ PREFIX dc: PREFIX dwc: PREFIX treat: SELECT DISTINCT - ?tc (group_concat(DISTINCT ?auth; separator=" / ") as ?authority) (group_concat(DISTINCT ?aug;separator="|") as ?augs) (group_concat(DISTINCT ?def;separator="|") as ?defs) (group_concat(DISTINCT ?dpr;separator="|") as ?dprs) (group_concat(DISTINCT ?cite;separator="|") as ?cites) + ?tc (group_concat(DISTINCT ?auth; separator=" / ") as ?authority) (group_concat(DISTINCT ?colid; separator="|") as ?colids) (group_concat(DISTINCT ?aug;separator="|") as ?augs) (group_concat(DISTINCT ?def;separator="|") as ?defs) (group_concat(DISTINCT ?dpr;separator="|") as ?dprs) (group_concat(DISTINCT ?cite;separator="|") as ?cites) WHERE { ?tc treat:hasTaxonName <${taxon.taxonName.uri}> . OPTIONAL { ?tc dwc:scientificNameAuthorship ?auth . } + OPTIONAL { ?tc ?colid . } OPTIONAL { ?aug treat:augmentsTaxonConcept ?tc . } OPTIONAL { ?def treat:definesTaxonConcept ?tc . } OPTIONAL { ?dpr treat:deprecates ?tc . } @@ -481,6 +518,9 @@ GROUP BY ?tc`; taxonConceptUri: t.tc.value, taxonName: taxon.taxonName, taxonConceptAuthority: t.authority?.value, + colID: t.colids?.value.split("|").filter((s) => + s.startsWith("https://www.catalogueoflife.org/data/taxon/") + ), justifications: new JustificationSet([{ toString: () => `${t.tc.value} has taxon name ${taxon.taxonName.uri}`, @@ -508,7 +548,7 @@ PREFIX dc: PREFIX dwc: PREFIX treat: SELECT DISTINCT - ?tn ?name ?tc (group_concat(DISTINCT ?auth; separator=" / ") as ?authority) (group_concat(DISTINCT ?justification; separator="|") as ?justs) (group_concat(DISTINCT ?aug;separator="|") as ?augs) (group_concat(DISTINCT ?def;separator="|") as ?defs) (group_concat(DISTINCT ?dpr;separator="|") as ?dprs) (group_concat(DISTINCT ?cite;separator="|") as ?cites) (group_concat(DISTINCT ?trtn;separator="|") as ?trtns) (group_concat(DISTINCT ?citetn;separator="|") as ?citetns) + ?tn ?name ?tc (group_concat(DISTINCT ?auth; separator=" / ") as ?authority) (group_concat(DISTINCT ?colid; separator="|") as ?colids) (group_concat(DISTINCT ?justification; separator="|") as ?justs) (group_concat(DISTINCT ?aug;separator="|") as ?augs) (group_concat(DISTINCT ?def;separator="|") as ?defs) (group_concat(DISTINCT ?dpr;separator="|") as ?dprs) (group_concat(DISTINCT ?cite;separator="|") as ?cites) (group_concat(DISTINCT ?trtn;separator="|") as ?trtns) (group_concat(DISTINCT ?citetn;separator="|") as ?citetns) WHERE { ?justification treat:deprecates <${taxon.taxonConceptUri}> ; (treat:augmentsTaxonConcept|treat:definesTaxonConcept) ?tc . @@ -522,6 +562,7 @@ WHERE { } BIND(CONCAT(?genus, COALESCE(CONCAT(" (",?subgenus,")"), ""), COALESCE(CONCAT(" ",?species), ""), COALESCE(CONCAT(" ", ?subspecies), ""), COALESCE(CONCAT(" var. ", ?variety), "")) as ?name) OPTIONAL { ?tc dwc:scientificNameAuthorship ?auth . } + OPTIONAL { ?tc ?colid . } OPTIONAL { ?aug treat:augmentsTaxonConcept ?tc . } OPTIONAL { ?def treat:definesTaxonConcept ?tc . } OPTIONAL { ?dpr treat:deprecates ?tc . } @@ -539,41 +580,46 @@ GROUP BY ?tn ?name ?tc`; ).then(( json: SparqlJson, ) => - json.results.bindings.filter((t) => t.tc).map((t) => { - return { - taxonConceptUri: t.tc.value, - taxonName: makeTaxonName( - t.tn.value, - t.name?.value, - t.trtns?.value.split("|"), - t.citetns?.value.split("|"), - ), - taxonConceptAuthority: t.authority?.value, - justifications: new JustificationSet( - t.justs?.value.split("|").map((url) => { - if (!this.treatments.has(url)) { - this.treatments.set(url, { - url, - details: getTreatmentDetails(url), - }); - } - return { - toString: () => - `${t.tc.value} deprecates ${taxon.taxonConceptUri} according to ${url}`, - precedingSynonym: taxon, - treatment: this.treatments.get(url), - }; - }), - ), - treatments: { - def: makeTreatmentSet(t.defs?.value.split("|")), - aug: makeTreatmentSet(t.augs?.value.split("|")), - dpr: makeTreatmentSet(t.dprs?.value.split("|")), - cite: makeTreatmentSet(t.cites?.value.split("|")), - } as Treatments, - loading: true, - }; - }), (error) => { + json.results.bindings.filter((t) => t.tc).map( + (t): JustifiedSynonym => { + return { + taxonConceptUri: t.tc.value, + taxonName: makeTaxonName( + t.tn.value, + t.name?.value, + t.trtns?.value.split("|"), + t.citetns?.value.split("|"), + ), + taxonConceptAuthority: t.authority?.value, + colID: t.colids?.value.split("|").filter((s) => + s.startsWith("https://www.catalogueoflife.org/data/taxon/") + ), + justifications: new JustificationSet( + t.justs?.value.split("|").map((url) => { + if (!this.treatments.has(url)) { + this.treatments.set(url, { + url, + details: getTreatmentDetails(url), + }); + } + return { + toString: () => + `${t.tc.value} deprecates ${taxon.taxonConceptUri} according to ${url}`, + precedingSynonym: taxon, + treatment: this.treatments.get(url), + }; + }), + ), + treatments: { + def: makeTreatmentSet(t.defs?.value.split("|")), + aug: makeTreatmentSet(t.augs?.value.split("|")), + dpr: makeTreatmentSet(t.dprs?.value.split("|")), + cite: makeTreatmentSet(t.cites?.value.split("|")), + } as Treatments, + loading: true, + }; + }, + ), (error) => { console.warn("SPARQL Error: " + error); return []; }); @@ -585,7 +631,7 @@ PREFIX dc: PREFIX dwc: PREFIX treat: SELECT DISTINCT - ?tn ?name ?tc (group_concat(DISTINCT ?auth; separator=" / ") as ?authority) (group_concat(DISTINCT ?justification; separator="|") as ?justs) (group_concat(DISTINCT ?aug;separator="|") as ?augs) (group_concat(DISTINCT ?def;separator="|") as ?defs) (group_concat(DISTINCT ?dpr;separator="|") as ?dprs) (group_concat(DISTINCT ?cite;separator="|") as ?cites) (group_concat(DISTINCT ?trtn;separator="|") as ?trtns) (group_concat(DISTINCT ?citetn;separator="|") as ?citetns) + ?tn ?name ?tc (group_concat(DISTINCT ?auth; separator=" / ") as ?authority) (group_concat(DISTINCT ?colid; separator="|") as ?colids) (group_concat(DISTINCT ?justification; separator="|") as ?justs) (group_concat(DISTINCT ?aug;separator="|") as ?augs) (group_concat(DISTINCT ?def;separator="|") as ?defs) (group_concat(DISTINCT ?dpr;separator="|") as ?dprs) (group_concat(DISTINCT ?cite;separator="|") as ?cites) (group_concat(DISTINCT ?trtn;separator="|") as ?trtns) (group_concat(DISTINCT ?citetn;separator="|") as ?citetns) WHERE { ?justification (treat:augmentsTaxonConcept|treat:definesTaxonConcept) <${taxon.taxonConceptUri}> ; treat:deprecates ?tc . @@ -599,6 +645,7 @@ WHERE { } BIND(CONCAT(?genus, COALESCE(CONCAT(" (",?subgenus,")"), ""), COALESCE(CONCAT(" ",?species), ""), COALESCE(CONCAT(" ", ?subspecies), ""), COALESCE(CONCAT(" var. ", ?variety), "")) as ?name) OPTIONAL { ?tc dwc:scientificNameAuthorship ?auth . } + OPTIONAL { ?tc ?colid . } OPTIONAL { ?aug treat:augmentsTaxonConcept ?tc . } OPTIONAL { ?def treat:definesTaxonConcept ?tc . } OPTIONAL { ?dpr treat:deprecates ?tc . } @@ -616,41 +663,46 @@ GROUP BY ?tn ?name ?tc`; ).then(( json: SparqlJson, ) => - json.results.bindings.filter((t) => t.tc).map((t) => { - return { - taxonConceptUri: t.tc.value, - taxonName: makeTaxonName( - t.tn.value, - t.name?.value, - t.trtns?.value.split("|"), - t.citetns?.value.split("|"), - ), - taxonConceptAuthority: t.authority?.value, - justifications: new JustificationSet( - t.justs?.value.split("|").map((url) => { - if (!this.treatments.has(url)) { - this.treatments.set(url, { - url, - details: getTreatmentDetails(url), - }); - } - return { - toString: () => - `${t.tc.value} deprecates ${taxon.taxonConceptUri} according to ${url}`, - precedingSynonym: taxon, - treatment: this.treatments.get(url), - }; - }), - ), - treatments: { - def: makeTreatmentSet(t.defs?.value.split("|")), - aug: makeTreatmentSet(t.augs?.value.split("|")), - dpr: makeTreatmentSet(t.dprs?.value.split("|")), - cite: makeTreatmentSet(t.cites?.value.split("|")), - } as Treatments, - loading: true, - }; - }), (error) => { + json.results.bindings.filter((t) => t.tc).map( + (t): JustifiedSynonym => { + return { + taxonConceptUri: t.tc.value, + taxonName: makeTaxonName( + t.tn.value, + t.name?.value, + t.trtns?.value.split("|"), + t.citetns?.value.split("|"), + ), + taxonConceptAuthority: t.authority?.value, + colID: t.colids?.value.split("|").filter((s) => + s.startsWith("https://www.catalogueoflife.org/data/taxon/") + ), + justifications: new JustificationSet( + t.justs?.value.split("|").map((url) => { + if (!this.treatments.has(url)) { + this.treatments.set(url, { + url, + details: getTreatmentDetails(url), + }); + } + return { + toString: () => + `${t.tc.value} deprecates ${taxon.taxonConceptUri} according to ${url}`, + precedingSynonym: taxon, + treatment: this.treatments.get(url), + }; + }), + ), + treatments: { + def: makeTreatmentSet(t.defs?.value.split("|")), + aug: makeTreatmentSet(t.augs?.value.split("|")), + dpr: makeTreatmentSet(t.dprs?.value.split("|")), + cite: makeTreatmentSet(t.cites?.value.split("|")), + } as Treatments, + loading: true, + }; + }, + ), (error) => { console.warn("SPARQL Error: " + error); return []; }); diff --git a/main.ts b/main.ts index c41e4df..5747b29 100644 --- a/main.ts +++ b/main.ts @@ -20,7 +20,15 @@ try { for await (const synonym of synoGroup) { console.log( Colors.red( - ` * Found synonym: ${tcName(synonym)} <${synonym.taxonConceptUri}>`, + ` * Found synonym: ${tcName(synonym)} <${synonym.taxonConceptUri}>${ + synonym.colID.length + ? ` [CoL: ${ + synonym.colID.map((id) => + id.replace("https://www.catalogueoflife.org/data/taxon/", "") + ).join(", ") + }]` + : "" + }`, ), ); console.log( @@ -30,9 +38,9 @@ try { } <${synonym.taxonName.uri}>`, ), ); - synonym.taxonName.vernacularNames.then((v) => - console.log(JSON.stringify(v)) - ); + synonym.taxonName.vernacularNames.then((v) => { + if (Object.getOwnPropertyNames(v).length) console.log(JSON.stringify(v)); + }); for (const treatment of synonym.taxonName.treatments.aug) { console.log( Colors.gray( diff --git a/npm-package/package-lock.json b/npm-package/package-lock.json index c7968cf..e4a30d1 100644 --- a/npm-package/package-lock.json +++ b/npm-package/package-lock.json @@ -1,12 +1,12 @@ { "name": "@factsmission/synogroup", - "version": "2.2.0", + "version": "2.3.0-0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@factsmission/synogroup", - "version": "2.2.0", + "version": "2.3.0-0", "license": "MIT", "devDependencies": { "typescript": "^4.9.5" diff --git a/npm-package/package.json b/npm-package/package.json index 4cc298a..55ac34c 100644 --- a/npm-package/package.json +++ b/npm-package/package.json @@ -1,6 +1,6 @@ { "name": "@factsmission/synogroup", - "version": "2.2.0", + "version": "2.3.0-0", "description": "", "main": "index.js", "scripts": {