Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrate col data #13

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
226 changes: 139 additions & 87 deletions SynonymGroup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,19 @@ type Treatments = {
dpr: Set<Treatment>;
cite: Set<Treatment>;
};

/**
* Describes a taxon-concept and why it is considered a synonym by synolib
*/
export type JustifiedSynonym = {
taxonConceptUri: string;
taxonName: TaxonName;
/** Human-readable authority */ taxonConceptAuthority?: string;
/**
* Catalogue of Life Taxon URIs
* @example `["https://www.catalogueoflife.org/data/taxon/TWFG"]`
*/
colID: string[];
justifications: JustificationSet;
treatments: Treatments;
loading: boolean;
Expand Down Expand Up @@ -162,6 +171,13 @@ export default class SynonymGroup implements AsyncIterable<JustifiedSynonym> {

private controller = new AbortController();

/**
* Constructs a SynonymGroup
*
* @param sparqlEndpoint SPARQL-Endpoint to query
* @param taxonName either a string of the form "Genus species infraspecific" (species & infraspecific names optional), or an URI of a http://filteredpush.org/ontologies/oa/dwcFP#TaxonConcept or a CoL taxon URI
* @param [ignoreRank=false] if taxonName is "Genus" of "Genus species", by default it will ony search for taxons of rank genus/species. If set to true, sub-taxa are also considered as staring points.
*/
constructor(
sparqlEndpoint: SparqlEndpoint,
taxonName: string,
Expand Down Expand Up @@ -366,26 +382,41 @@ SELECT DISTINCT ?url ?description WHERE {
taxonName: string,
): Promise<JustifiedSynonym[]> => {
if (fetchInit.signal.aborted) return Promise.resolve([]);
const [genus, species, subspecies] = taxonName.split(" ");
// subspecies could also be variety
// ignoreRank has no effect when there is a 'subspecies', as this is assumed to be the lowest rank & should thus not be able to return results in another rank
let taxonNameQuery = "";
if (taxonName.startsWith("http")) {
if (taxonName.includes("catalogueoflife.org")) {
taxonNameQuery =
`?tc <http://www.w3.org/2000/01/rdf-schema#seeAlso> <${taxonName}> .`;
} else {
taxonNameQuery = `BIND(<${taxonName}> as ?tc)`;
}
} else {
const [genus, species, subspecies] = taxonName.split(" ");
// subspecies could also be variety
// ignoreRank has no effect when there is a 'subspecies', as this is assumed to be the lowest rank & should thus not be able to return results in another rank
taxonNameQuery = `?tc dwc:genus "${genus}" .`;
if (species) taxonNameQuery += ` ?tc dwc:species "${species}" .`;
if (subspecies) {
taxonNameQuery +=
` ?tc (dwc:subspecies|dwc:variety) "${subspecies}" .`;
}
if (!subspecies && !ignoreRank) {
taxonNameQuery += ` ?tc dwc:rank "${
species ? "species" : "genus"
}" .`;
}
}
const query = `PREFIX cito: <http://purl.org/spar/cito/>
PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX dwc: <http://rs.tdwg.org/dwc/terms/>
PREFIX treat: <http://plazi.org/vocab/treatment#>
SELECT DISTINCT
?tn ?name ?tc (group_concat(DISTINCT ?auth; separator=" / ") as ?authority) (group_concat(DISTINCT ?aug;separator="|") as ?augs) (group_concat(DISTINCT ?def;separator="|") as ?defs) (group_concat(DISTINCT ?dpr;separator="|") as ?dprs) (group_concat(DISTINCT ?cite;separator="|") as ?cites) (group_concat(DISTINCT ?trtn;separator="|") as ?trtns) (group_concat(DISTINCT ?citetn;separator="|") as ?citetns)
?tn ?name ?tc (group_concat(DISTINCT ?auth; separator=" / ") as ?authority) (group_concat(DISTINCT ?colid; separator="|") as ?colids) (group_concat(DISTINCT ?aug;separator="|") as ?augs) (group_concat(DISTINCT ?def;separator="|") as ?defs) (group_concat(DISTINCT ?dpr;separator="|") as ?dprs) (group_concat(DISTINCT ?cite;separator="|") as ?cites) (group_concat(DISTINCT ?trtn;separator="|") as ?trtns) (group_concat(DISTINCT ?citetn;separator="|") as ?citetns)
WHERE {
?tc dwc:genus "${genus}";
treat:hasTaxonName ?tn;
${species ? `dwc:species "${species}";` : ""}
${subspecies ? `(dwc:subspecies|dwc:variety) "${subspecies}";` : ""}
${
ignoreRank || !!subspecies
? ""
: `dwc:rank "${species ? "species" : "genus"}";`
}
${taxonNameQuery}
?tc treat:hasTaxonName ?tn ;
a <http://filteredpush.org/ontologies/oa/dwcFP#TaxonConcept>.
OPTIONAL { ?tc <http://www.w3.org/2000/01/rdf-schema#seeAlso> ?colid . }
?tn dwc:genus ?genus .
OPTIONAL { ?tn dwc:subGenus ?subgenus . }
OPTIONAL {
Expand Down Expand Up @@ -413,7 +444,7 @@ GROUP BY ?tn ?name ?tc`;
.then(
(json: SparqlJson) =>
json.results.bindings.filter((t) => (t.tc && t.tn))
.map((t) => {
.map((t): JustifiedSynonym => {
return {
taxonConceptUri: t.tc.value,
taxonName: makeTaxonName(
Expand All @@ -423,6 +454,11 @@ GROUP BY ?tn ?name ?tc`;
t.citetns?.value.split("|"),
),
taxonConceptAuthority: t.authority?.value,
colID: t.colids?.value.split("|").filter((s) =>
s.startsWith(
"https://www.catalogueoflife.org/data/taxon/",
)
),
justifications: new JustificationSet([
`${t.tc.value} matches "${taxonName}"`,
]),
Expand Down Expand Up @@ -450,10 +486,11 @@ PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX dwc: <http://rs.tdwg.org/dwc/terms/>
PREFIX treat: <http://plazi.org/vocab/treatment#>
SELECT DISTINCT
?tc (group_concat(DISTINCT ?auth; separator=" / ") as ?authority) (group_concat(DISTINCT ?aug;separator="|") as ?augs) (group_concat(DISTINCT ?def;separator="|") as ?defs) (group_concat(DISTINCT ?dpr;separator="|") as ?dprs) (group_concat(DISTINCT ?cite;separator="|") as ?cites)
?tc (group_concat(DISTINCT ?auth; separator=" / ") as ?authority) (group_concat(DISTINCT ?colid; separator="|") as ?colids) (group_concat(DISTINCT ?aug;separator="|") as ?augs) (group_concat(DISTINCT ?def;separator="|") as ?defs) (group_concat(DISTINCT ?dpr;separator="|") as ?dprs) (group_concat(DISTINCT ?cite;separator="|") as ?cites)
WHERE {
?tc treat:hasTaxonName <${taxon.taxonName.uri}> .
OPTIONAL { ?tc dwc:scientificNameAuthorship ?auth . }
OPTIONAL { ?tc <http://www.w3.org/2000/01/rdf-schema#seeAlso> ?colid . }
OPTIONAL { ?aug treat:augmentsTaxonConcept ?tc . }
OPTIONAL { ?def treat:definesTaxonConcept ?tc . }
OPTIONAL { ?dpr treat:deprecates ?tc . }
Expand Down Expand Up @@ -481,6 +518,9 @@ GROUP BY ?tc`;
taxonConceptUri: t.tc.value,
taxonName: taxon.taxonName,
taxonConceptAuthority: t.authority?.value,
colID: t.colids?.value.split("|").filter((s) =>
s.startsWith("https://www.catalogueoflife.org/data/taxon/")
),
justifications: new JustificationSet([{
toString: () =>
`${t.tc.value} has taxon name ${taxon.taxonName.uri}`,
Expand Down Expand Up @@ -508,7 +548,7 @@ PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX dwc: <http://rs.tdwg.org/dwc/terms/>
PREFIX treat: <http://plazi.org/vocab/treatment#>
SELECT DISTINCT
?tn ?name ?tc (group_concat(DISTINCT ?auth; separator=" / ") as ?authority) (group_concat(DISTINCT ?justification; separator="|") as ?justs) (group_concat(DISTINCT ?aug;separator="|") as ?augs) (group_concat(DISTINCT ?def;separator="|") as ?defs) (group_concat(DISTINCT ?dpr;separator="|") as ?dprs) (group_concat(DISTINCT ?cite;separator="|") as ?cites) (group_concat(DISTINCT ?trtn;separator="|") as ?trtns) (group_concat(DISTINCT ?citetn;separator="|") as ?citetns)
?tn ?name ?tc (group_concat(DISTINCT ?auth; separator=" / ") as ?authority) (group_concat(DISTINCT ?colid; separator="|") as ?colids) (group_concat(DISTINCT ?justification; separator="|") as ?justs) (group_concat(DISTINCT ?aug;separator="|") as ?augs) (group_concat(DISTINCT ?def;separator="|") as ?defs) (group_concat(DISTINCT ?dpr;separator="|") as ?dprs) (group_concat(DISTINCT ?cite;separator="|") as ?cites) (group_concat(DISTINCT ?trtn;separator="|") as ?trtns) (group_concat(DISTINCT ?citetn;separator="|") as ?citetns)
WHERE {
?justification treat:deprecates <${taxon.taxonConceptUri}> ;
(treat:augmentsTaxonConcept|treat:definesTaxonConcept) ?tc .
Expand All @@ -522,6 +562,7 @@ WHERE {
}
BIND(CONCAT(?genus, COALESCE(CONCAT(" (",?subgenus,")"), ""), COALESCE(CONCAT(" ",?species), ""), COALESCE(CONCAT(" ", ?subspecies), ""), COALESCE(CONCAT(" var. ", ?variety), "")) as ?name)
OPTIONAL { ?tc dwc:scientificNameAuthorship ?auth . }
OPTIONAL { ?tc <http://www.w3.org/2000/01/rdf-schema#seeAlso> ?colid . }
OPTIONAL { ?aug treat:augmentsTaxonConcept ?tc . }
OPTIONAL { ?def treat:definesTaxonConcept ?tc . }
OPTIONAL { ?dpr treat:deprecates ?tc . }
Expand All @@ -539,41 +580,46 @@ GROUP BY ?tn ?name ?tc`;
).then((
json: SparqlJson,
) =>
json.results.bindings.filter((t) => t.tc).map((t) => {
return {
taxonConceptUri: t.tc.value,
taxonName: makeTaxonName(
t.tn.value,
t.name?.value,
t.trtns?.value.split("|"),
t.citetns?.value.split("|"),
),
taxonConceptAuthority: t.authority?.value,
justifications: new JustificationSet(
t.justs?.value.split("|").map((url) => {
if (!this.treatments.has(url)) {
this.treatments.set(url, {
url,
details: getTreatmentDetails(url),
});
}
return {
toString: () =>
`${t.tc.value} deprecates ${taxon.taxonConceptUri} according to ${url}`,
precedingSynonym: taxon,
treatment: this.treatments.get(url),
};
}),
),
treatments: {
def: makeTreatmentSet(t.defs?.value.split("|")),
aug: makeTreatmentSet(t.augs?.value.split("|")),
dpr: makeTreatmentSet(t.dprs?.value.split("|")),
cite: makeTreatmentSet(t.cites?.value.split("|")),
} as Treatments,
loading: true,
};
}), (error) => {
json.results.bindings.filter((t) => t.tc).map(
(t): JustifiedSynonym => {
return {
taxonConceptUri: t.tc.value,
taxonName: makeTaxonName(
t.tn.value,
t.name?.value,
t.trtns?.value.split("|"),
t.citetns?.value.split("|"),
),
taxonConceptAuthority: t.authority?.value,
colID: t.colids?.value.split("|").filter((s) =>
s.startsWith("https://www.catalogueoflife.org/data/taxon/")
),
justifications: new JustificationSet(
t.justs?.value.split("|").map((url) => {
if (!this.treatments.has(url)) {
this.treatments.set(url, {
url,
details: getTreatmentDetails(url),
});
}
return {
toString: () =>
`${t.tc.value} deprecates ${taxon.taxonConceptUri} according to ${url}`,
precedingSynonym: taxon,
treatment: this.treatments.get(url),
};
}),
),
treatments: {
def: makeTreatmentSet(t.defs?.value.split("|")),
aug: makeTreatmentSet(t.augs?.value.split("|")),
dpr: makeTreatmentSet(t.dprs?.value.split("|")),
cite: makeTreatmentSet(t.cites?.value.split("|")),
} as Treatments,
loading: true,
};
},
), (error) => {
console.warn("SPARQL Error: " + error);
return [];
});
Expand All @@ -585,7 +631,7 @@ PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX dwc: <http://rs.tdwg.org/dwc/terms/>
PREFIX treat: <http://plazi.org/vocab/treatment#>
SELECT DISTINCT
?tn ?name ?tc (group_concat(DISTINCT ?auth; separator=" / ") as ?authority) (group_concat(DISTINCT ?justification; separator="|") as ?justs) (group_concat(DISTINCT ?aug;separator="|") as ?augs) (group_concat(DISTINCT ?def;separator="|") as ?defs) (group_concat(DISTINCT ?dpr;separator="|") as ?dprs) (group_concat(DISTINCT ?cite;separator="|") as ?cites) (group_concat(DISTINCT ?trtn;separator="|") as ?trtns) (group_concat(DISTINCT ?citetn;separator="|") as ?citetns)
?tn ?name ?tc (group_concat(DISTINCT ?auth; separator=" / ") as ?authority) (group_concat(DISTINCT ?colid; separator="|") as ?colids) (group_concat(DISTINCT ?justification; separator="|") as ?justs) (group_concat(DISTINCT ?aug;separator="|") as ?augs) (group_concat(DISTINCT ?def;separator="|") as ?defs) (group_concat(DISTINCT ?dpr;separator="|") as ?dprs) (group_concat(DISTINCT ?cite;separator="|") as ?cites) (group_concat(DISTINCT ?trtn;separator="|") as ?trtns) (group_concat(DISTINCT ?citetn;separator="|") as ?citetns)
WHERE {
?justification (treat:augmentsTaxonConcept|treat:definesTaxonConcept) <${taxon.taxonConceptUri}> ;
treat:deprecates ?tc .
Expand All @@ -599,6 +645,7 @@ WHERE {
}
BIND(CONCAT(?genus, COALESCE(CONCAT(" (",?subgenus,")"), ""), COALESCE(CONCAT(" ",?species), ""), COALESCE(CONCAT(" ", ?subspecies), ""), COALESCE(CONCAT(" var. ", ?variety), "")) as ?name)
OPTIONAL { ?tc dwc:scientificNameAuthorship ?auth . }
OPTIONAL { ?tc <http://www.w3.org/2000/01/rdf-schema#seeAlso> ?colid . }
OPTIONAL { ?aug treat:augmentsTaxonConcept ?tc . }
OPTIONAL { ?def treat:definesTaxonConcept ?tc . }
OPTIONAL { ?dpr treat:deprecates ?tc . }
Expand All @@ -616,41 +663,46 @@ GROUP BY ?tn ?name ?tc`;
).then((
json: SparqlJson,
) =>
json.results.bindings.filter((t) => t.tc).map((t) => {
return {
taxonConceptUri: t.tc.value,
taxonName: makeTaxonName(
t.tn.value,
t.name?.value,
t.trtns?.value.split("|"),
t.citetns?.value.split("|"),
),
taxonConceptAuthority: t.authority?.value,
justifications: new JustificationSet(
t.justs?.value.split("|").map((url) => {
if (!this.treatments.has(url)) {
this.treatments.set(url, {
url,
details: getTreatmentDetails(url),
});
}
return {
toString: () =>
`${t.tc.value} deprecates ${taxon.taxonConceptUri} according to ${url}`,
precedingSynonym: taxon,
treatment: this.treatments.get(url),
};
}),
),
treatments: {
def: makeTreatmentSet(t.defs?.value.split("|")),
aug: makeTreatmentSet(t.augs?.value.split("|")),
dpr: makeTreatmentSet(t.dprs?.value.split("|")),
cite: makeTreatmentSet(t.cites?.value.split("|")),
} as Treatments,
loading: true,
};
}), (error) => {
json.results.bindings.filter((t) => t.tc).map(
(t): JustifiedSynonym => {
return {
taxonConceptUri: t.tc.value,
taxonName: makeTaxonName(
t.tn.value,
t.name?.value,
t.trtns?.value.split("|"),
t.citetns?.value.split("|"),
),
taxonConceptAuthority: t.authority?.value,
colID: t.colids?.value.split("|").filter((s) =>
s.startsWith("https://www.catalogueoflife.org/data/taxon/")
),
justifications: new JustificationSet(
t.justs?.value.split("|").map((url) => {
if (!this.treatments.has(url)) {
this.treatments.set(url, {
url,
details: getTreatmentDetails(url),
});
}
return {
toString: () =>
`${t.tc.value} deprecates ${taxon.taxonConceptUri} according to ${url}`,
precedingSynonym: taxon,
treatment: this.treatments.get(url),
};
}),
),
treatments: {
def: makeTreatmentSet(t.defs?.value.split("|")),
aug: makeTreatmentSet(t.augs?.value.split("|")),
dpr: makeTreatmentSet(t.dprs?.value.split("|")),
cite: makeTreatmentSet(t.cites?.value.split("|")),
} as Treatments,
loading: true,
};
},
), (error) => {
console.warn("SPARQL Error: " + error);
return [];
});
Expand Down
16 changes: 12 additions & 4 deletions main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,15 @@ try {
for await (const synonym of synoGroup) {
console.log(
Colors.red(
` * Found synonym: ${tcName(synonym)} <${synonym.taxonConceptUri}>`,
` * Found synonym: ${tcName(synonym)} <${synonym.taxonConceptUri}>${
synonym.colID.length
? ` [CoL: ${
synonym.colID.map((id) =>
id.replace("https://www.catalogueoflife.org/data/taxon/", "")
).join(", ")
}]`
: ""
}`,
),
);
console.log(
Expand All @@ -30,9 +38,9 @@ try {
} <${synonym.taxonName.uri}>`,
),
);
synonym.taxonName.vernacularNames.then((v) =>
console.log(JSON.stringify(v))
);
synonym.taxonName.vernacularNames.then((v) => {
if (Object.getOwnPropertyNames(v).length) console.log(JSON.stringify(v));
});
for (const treatment of synonym.taxonName.treatments.aug) {
console.log(
Colors.gray(
Expand Down
4 changes: 2 additions & 2 deletions npm-package/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion npm-package/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@factsmission/synogroup",
"version": "2.2.0",
"version": "2.3.0-0",
"description": "",
"main": "index.js",
"scripts": {
Expand Down