Skip to content

Commit

Permalink
migrate Calflora tools from ca-plant-tools (#15)
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkenny54 authored Dec 3, 2024
1 parent 048aba6 commit 4aee72e
Show file tree
Hide file tree
Showing 7 changed files with 395 additions and 5 deletions.
5 changes: 4 additions & 1 deletion data/exceptions.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
"translation": "Castilleja ambigua subsp. ambigua"
}
},
"Delphinium gypsophilum": { "calflora": { "notintaxondata": true } },
"Delphinium gypsophilum": {
"calflora": { "notintaxondata": true }
},
"Downingia ornatissima var. mirabilis": {
"inat": {
"notintaxondata": true
Expand Down Expand Up @@ -83,6 +85,7 @@
"translation": "Campanula sharsmithiae"
}
},
"Sagina procumbens": { "calflora": { "native": false } },
"Salvia apiana": { "calflora": { "notintaxondata": true } },
"Streptanthus albidus subsp. peramoenus": {
"calflora": {
Expand Down
4 changes: 2 additions & 2 deletions lib/exceptions.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class Exceptions {

// Read default configuration.
this.#exceptions = readConfig(
Config.getPackageDir() + "/data/exceptions.json"
Config.getPackageDir() + "/data/exceptions.json",
);

// Add/overwrite with local configuration.
Expand All @@ -38,7 +38,7 @@ class Exceptions {
* @param {string} name
* @param {string} cat
* @param {string} subcat
* @param {string} defaultValue
* @param {string} [defaultValue]
*/
getValue(name, cat, subcat, defaultValue) {
const taxonData = this.#exceptions[name];
Expand Down
220 changes: 220 additions & 0 deletions lib/tools/calflora.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
import * as path from "path";
import { CSV } from "../csv.js";
import { Files } from "../files.js";

const CALFLORA_URL_ALL =
"https://www.calflora.org/app/downtext?xun=117493&table=species&format=Tab&cols=0,1,4,5,8,38,41,43&psp=lifeform::grass,Tree,Herb,Fern,Shrub,Vine!!&par=f&active=";
const CALFLORA_URL_COUNTY =
"https://www.calflora.org/app/downtext?xun=117493&table=species&format=Tab&cols=0,1,4,5,8,38,41,43&psp=countylist::ALA,CCA!!&active=1";

/**
* @typedef {{
* "Native Status":string,
* TJMTID:string
* "Active in Calflora?":string
* Calrecnum:string
* }} CalfloraData
*/

class Calflora {
/** @type {Object<string,CalfloraData>} */
static #taxa = {};

/**
*
* @param {string} toolsDataDir
* @param {Taxa} taxa
* @param {import("../exceptions.js").Exceptions} exceptions
* @param {ErrorLog} errorLog
*/
static async analyze(toolsDataDir, taxa, exceptions, errorLog) {
/**
* @param {string} url
* @param {string} targetFile
*/
async function retrieveCalfloraFile(url, targetFile) {
// Retrieve file if it's not there.
targetFile = toolsDataPath + "/" + targetFile;
if (Files.exists(targetFile)) {
return;
}
console.log("retrieving " + targetFile);
await Files.fetch(url, targetFile);
}

const toolsDataPath = toolsDataDir + "/calflora";
// Create data directory if it's not there.
Files.mkdir(toolsDataPath);

const calfloraDataFileNameActive = "calflora_taxa_active.tsv";
const calfloraDataFileNameCounties = "calflora_taxa_counties.tsv";

await retrieveCalfloraFile(
CALFLORA_URL_ALL + "1",
calfloraDataFileNameActive,
);
// County list and "all" lists are both incomplete; load everything to get as much as possible.
await retrieveCalfloraFile(
CALFLORA_URL_COUNTY,
calfloraDataFileNameCounties,
);

const csvActive = CSV.readFile(
path.join(toolsDataPath, calfloraDataFileNameActive),
);
const csvCounties = CSV.readFile(
path.join(toolsDataPath, calfloraDataFileNameCounties),
);

for (const row of csvActive) {
this.#taxa[row["Taxon"]] = row;
}
for (const row of csvCounties) {
this.#taxa[row["Taxon"]] = row;
}

for (const taxon of taxa.getTaxonList()) {
const name = taxon.getName();
if (name.includes(" unknown")) {
continue;
}
const cfName = taxon.getCalfloraName();
const cfData = Calflora.#taxa[cfName];
if (!cfData) {
if (
!exceptions.hasException(name, "calflora", "notintaxondata")
) {
errorLog.log(name, "not found in Calflora files");
}
continue;
}

// Check native status.
const cfNative = cfData["Native Status"];
let cfIsNative = cfNative === "rare" || cfNative === "native";
// Override if exception is specified.
const nativeException = exceptions.getValue(
name,
"calflora",
"native",
undefined,
);
if (typeof nativeException === "boolean") {
if (nativeException === cfIsNative) {
errorLog.log(
name,
"has unnecessary Calflora native override",
);
}
cfIsNative = nativeException;
}
if (cfIsNative !== taxon.isCANative()) {
errorLog.log(
name,
"has different nativity status in Calflora",
cfIsNative.toString(),
);
}

// Check if it is active in Calflora.
const isActive = cfData["Active in Calflora?"];
if (isActive !== "YES") {
errorLog.log(name, "is not active in Calflora", isActive);
}

// Check Jepson IDs.
const cfJepsonID = cfData.TJMTID;
if (cfJepsonID !== taxon.getJepsonID()) {
if (
!exceptions.hasException(name, "calflora", "badjepsonid") &&
!exceptions.hasException(name, "calflora", "notintaxondata")
) {
errorLog.log(
name,
"Jepson ID in Calflora is different than taxa.csv",
cfJepsonID,
taxon.getJepsonID(),
);
}
}

// Check Calflora ID.
const cfID = cfData["Calrecnum"];
if (cfID !== taxon.getCalfloraID()) {
errorLog.log(
name,
"Calflora ID in Calflora is different than taxa.csv",
cfID,
taxon.getCalfloraID(),
);
}
}

this.#checkExceptions(taxa, exceptions, errorLog);
}

/**
* @param {Taxa} taxa
* @param {import("../exceptions.js").Exceptions} exceptions
* @param {ErrorLog} errorLog
*/
static #checkExceptions(taxa, exceptions, errorLog) {
// Check the Calflora exceptions and make sure they still apply.
for (const [name, v] of exceptions.getExceptions()) {
const exceptions = v.calflora;
if (!exceptions) {
continue;
}

// Make sure the taxon is still in our list.
const taxon = taxa.getTaxon(name);
if (!taxon) {
// Don't process global exceptions if taxon is not in local list.
if (taxa.isSubset() && !v.local) {
continue;
}
errorLog.log(
name,
"has Calflora exceptions but not in Taxa collection",
);
continue;
}

for (const [k] of Object.entries(exceptions)) {
const cfData = Calflora.#taxa[name];
switch (k) {
case "badjepsonid": {
// Make sure Jepson ID is still wrong.
const cfID = cfData.TJMTID;
const jepsID = taxon.getJepsonID();
if (cfID === jepsID) {
errorLog.log(
name,
"has Calflora badjepsonid exception but IDs are the same",
);
}
break;
}
case "native":
break;
case "notintaxondata":
if (cfData) {
errorLog.log(
name,
"found in Calflora data but has notintaxondata exception",
);
}
break;
default:
errorLog.log(
name,
"unrecognized Calflora exception",
k,
);
}
}
}
}
}

export { Calflora };
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
"bin": {
"ca-plant-list": "scripts/build-site.js",
"ca-plant-book": "scripts/build-ebook.js",
"cpl-photos": "scripts/cpl-photos.js"
"cpl-photos": "scripts/cpl-photos.js",
"cpl-tools": "scripts/cpl-tools.js"
},
"dependencies": {
"archiver": "^5.3.1",
Expand Down
3 changes: 2 additions & 1 deletion schemas/exceptions.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"badjepsonid": {
"const": true
},
"native": { "type": "boolean" },
"notintaxondata": {
"const": true
}
Expand Down Expand Up @@ -54,4 +55,4 @@
},
"additionalProperties": false
}
}
}
Loading

0 comments on commit 4aee72e

Please sign in to comment.