diff --git a/data/exceptions.json b/data/exceptions.json index 2713119..2da36f2 100644 --- a/data/exceptions.json +++ b/data/exceptions.json @@ -19,7 +19,9 @@ "translation": "Castilleja ambigua subsp. ambigua" } }, - "Delphinium gypsophilum": { "calflora": { "notintaxondata": true } }, + "Delphinium gypsophilum": { + "calflora": { "notintaxondata": true } + }, "Downingia ornatissima var. mirabilis": { "inat": { "notintaxondata": true @@ -83,6 +85,7 @@ "translation": "Campanula sharsmithiae" } }, + "Sagina procumbens": { "calflora": { "native": false } }, "Salvia apiana": { "calflora": { "notintaxondata": true } }, "Streptanthus albidus subsp. peramoenus": { "calflora": { diff --git a/lib/exceptions.js b/lib/exceptions.js index 1d95dc7..3b9bd54 100644 --- a/lib/exceptions.js +++ b/lib/exceptions.js @@ -18,7 +18,7 @@ class Exceptions { // Read default configuration. this.#exceptions = readConfig( - Config.getPackageDir() + "/data/exceptions.json" + Config.getPackageDir() + "/data/exceptions.json", ); // Add/overwrite with local configuration. @@ -38,7 +38,7 @@ class Exceptions { * @param {string} name * @param {string} cat * @param {string} subcat - * @param {string} defaultValue + * @param {string} [defaultValue] */ getValue(name, cat, subcat, defaultValue) { const taxonData = this.#exceptions[name]; diff --git a/lib/tools/calflora.js b/lib/tools/calflora.js new file mode 100644 index 0000000..7be3611 --- /dev/null +++ b/lib/tools/calflora.js @@ -0,0 +1,220 @@ +import * as path from "path"; +import { CSV } from "../csv.js"; +import { Files } from "../files.js"; + +const CALFLORA_URL_ALL = + "https://www.calflora.org/app/downtext?xun=117493&table=species&format=Tab&cols=0,1,4,5,8,38,41,43&psp=lifeform::grass,Tree,Herb,Fern,Shrub,Vine!!&par=f&active="; +const CALFLORA_URL_COUNTY = + "https://www.calflora.org/app/downtext?xun=117493&table=species&format=Tab&cols=0,1,4,5,8,38,41,43&psp=countylist::ALA,CCA!!&active=1"; + +/** + * @typedef {{ + * "Native Status":string, + * TJMTID:string + * "Active in Calflora?":string + * Calrecnum:string + * }} CalfloraData + */ + +class Calflora { + /** @type {Object} */ + static #taxa = {}; + + /** + * + * @param {string} toolsDataDir + * @param {Taxa} taxa + * @param {import("../exceptions.js").Exceptions} exceptions + * @param {ErrorLog} errorLog + */ + static async analyze(toolsDataDir, taxa, exceptions, errorLog) { + /** + * @param {string} url + * @param {string} targetFile + */ + async function retrieveCalfloraFile(url, targetFile) { + // Retrieve file if it's not there. + targetFile = toolsDataPath + "/" + targetFile; + if (Files.exists(targetFile)) { + return; + } + console.log("retrieving " + targetFile); + await Files.fetch(url, targetFile); + } + + const toolsDataPath = toolsDataDir + "/calflora"; + // Create data directory if it's not there. + Files.mkdir(toolsDataPath); + + const calfloraDataFileNameActive = "calflora_taxa_active.tsv"; + const calfloraDataFileNameCounties = "calflora_taxa_counties.tsv"; + + await retrieveCalfloraFile( + CALFLORA_URL_ALL + "1", + calfloraDataFileNameActive, + ); + // County list and "all" lists are both incomplete; load everything to get as much as possible. + await retrieveCalfloraFile( + CALFLORA_URL_COUNTY, + calfloraDataFileNameCounties, + ); + + const csvActive = CSV.readFile( + path.join(toolsDataPath, calfloraDataFileNameActive), + ); + const csvCounties = CSV.readFile( + path.join(toolsDataPath, calfloraDataFileNameCounties), + ); + + for (const row of csvActive) { + this.#taxa[row["Taxon"]] = row; + } + for (const row of csvCounties) { + this.#taxa[row["Taxon"]] = row; + } + + for (const taxon of taxa.getTaxonList()) { + const name = taxon.getName(); + if (name.includes(" unknown")) { + continue; + } + const cfName = taxon.getCalfloraName(); + const cfData = Calflora.#taxa[cfName]; + if (!cfData) { + if ( + !exceptions.hasException(name, "calflora", "notintaxondata") + ) { + errorLog.log(name, "not found in Calflora files"); + } + continue; + } + + // Check native status. + const cfNative = cfData["Native Status"]; + let cfIsNative = cfNative === "rare" || cfNative === "native"; + // Override if exception is specified. + const nativeException = exceptions.getValue( + name, + "calflora", + "native", + undefined, + ); + if (typeof nativeException === "boolean") { + if (nativeException === cfIsNative) { + errorLog.log( + name, + "has unnecessary Calflora native override", + ); + } + cfIsNative = nativeException; + } + if (cfIsNative !== taxon.isCANative()) { + errorLog.log( + name, + "has different nativity status in Calflora", + cfIsNative.toString(), + ); + } + + // Check if it is active in Calflora. + const isActive = cfData["Active in Calflora?"]; + if (isActive !== "YES") { + errorLog.log(name, "is not active in Calflora", isActive); + } + + // Check Jepson IDs. + const cfJepsonID = cfData.TJMTID; + if (cfJepsonID !== taxon.getJepsonID()) { + if ( + !exceptions.hasException(name, "calflora", "badjepsonid") && + !exceptions.hasException(name, "calflora", "notintaxondata") + ) { + errorLog.log( + name, + "Jepson ID in Calflora is different than taxa.csv", + cfJepsonID, + taxon.getJepsonID(), + ); + } + } + + // Check Calflora ID. + const cfID = cfData["Calrecnum"]; + if (cfID !== taxon.getCalfloraID()) { + errorLog.log( + name, + "Calflora ID in Calflora is different than taxa.csv", + cfID, + taxon.getCalfloraID(), + ); + } + } + + this.#checkExceptions(taxa, exceptions, errorLog); + } + + /** + * @param {Taxa} taxa + * @param {import("../exceptions.js").Exceptions} exceptions + * @param {ErrorLog} errorLog + */ + static #checkExceptions(taxa, exceptions, errorLog) { + // Check the Calflora exceptions and make sure they still apply. + for (const [name, v] of exceptions.getExceptions()) { + const exceptions = v.calflora; + if (!exceptions) { + continue; + } + + // Make sure the taxon is still in our list. + const taxon = taxa.getTaxon(name); + if (!taxon) { + // Don't process global exceptions if taxon is not in local list. + if (taxa.isSubset() && !v.local) { + continue; + } + errorLog.log( + name, + "has Calflora exceptions but not in Taxa collection", + ); + continue; + } + + for (const [k] of Object.entries(exceptions)) { + const cfData = Calflora.#taxa[name]; + switch (k) { + case "badjepsonid": { + // Make sure Jepson ID is still wrong. + const cfID = cfData.TJMTID; + const jepsID = taxon.getJepsonID(); + if (cfID === jepsID) { + errorLog.log( + name, + "has Calflora badjepsonid exception but IDs are the same", + ); + } + break; + } + case "native": + break; + case "notintaxondata": + if (cfData) { + errorLog.log( + name, + "found in Calflora data but has notintaxondata exception", + ); + } + break; + default: + errorLog.log( + name, + "unrecognized Calflora exception", + k, + ); + } + } + } + } +} + +export { Calflora }; diff --git a/package.json b/package.json index ffedf6e..736379d 100644 --- a/package.json +++ b/package.json @@ -16,7 +16,8 @@ "bin": { "ca-plant-list": "scripts/build-site.js", "ca-plant-book": "scripts/build-ebook.js", - "cpl-photos": "scripts/cpl-photos.js" + "cpl-photos": "scripts/cpl-photos.js", + "cpl-tools": "scripts/cpl-tools.js" }, "dependencies": { "archiver": "^5.3.1", diff --git a/schemas/exceptions.schema.json b/schemas/exceptions.schema.json index 57daf6b..9218c97 100644 --- a/schemas/exceptions.schema.json +++ b/schemas/exceptions.schema.json @@ -9,6 +9,7 @@ "badjepsonid": { "const": true }, + "native": { "type": "boolean" }, "notintaxondata": { "const": true } @@ -54,4 +55,4 @@ }, "additionalProperties": false } -} \ No newline at end of file +} diff --git a/scripts/cpl-tools.js b/scripts/cpl-tools.js new file mode 100644 index 0000000..64133cd --- /dev/null +++ b/scripts/cpl-tools.js @@ -0,0 +1,162 @@ +#!/usr/bin/env node + +import * as path from "node:path"; +import { Option } from "commander"; +import { Taxa } from "../lib/taxa.js"; +import { Program } from "../lib/program.js"; +import { Calflora } from "../lib/tools/calflora.js"; +import { Exceptions } from "../lib/exceptions.js"; +import { ErrorLog } from "../lib/errorlog.js"; + +const TOOLS = { + CALFLORA: "calflora", + INAT: "inat", + JEPSON_EFLORA: "jepson-eflora", + JEPSON_FAM: "jepson-families", + RPI: "rpi", + TEXT: "text", +}; + +const ALL_TOOLS = [ + TOOLS.CALFLORA, + TOOLS.INAT, + TOOLS.JEPSON_EFLORA, + TOOLS.RPI, + TOOLS.TEXT, +]; + +const OPT_LOADER = "loader"; +const OPT_TOOL = "tool"; + +const TOOLS_DATA_DIR = "./external_data"; + +/** + * @param {import("commander").Command} program + * @param {import("commander").OptionValues} options + */ +async function build(program, options) { + let tools = options[OPT_TOOL]; + if (!tools) { + program.help(); + } + if (tools[0] === "all") { + tools = ALL_TOOLS; + } + + const exceptions = new Exceptions(options.datadir); + // const config = new Config(options.datadir); + const taxa = await getTaxa(options); + + const errorLog = new ErrorLog(options.outputdir + "/log.tsv", true); + for (const tool of tools) { + switch (tool) { + case TOOLS.CALFLORA: + await Calflora.analyze( + TOOLS_DATA_DIR, + taxa, + exceptions, + errorLog, + ); + break; + case TOOLS.INAT: + // await INat.analyze( + // TOOLS_DATA_DIR, + // taxa, + // exceptions, + // errorLog, + // options.inTaxafile, + // ); + break; + case TOOLS.JEPSON_EFLORA: { + // const eflora = new JepsonEFlora( + // TOOLS_DATA_DIR, + // taxa, + // errorLog, + // options.efLognotes, + // ); + // await eflora.analyze(exceptions); + break; + } + case TOOLS.JEPSON_FAM: + // await JepsonFamilies.build(TOOLS_DATA_DIR, options.outputdir); + break; + case TOOLS.RPI: + // await RPI.analyze( + // TOOLS_DATA_DIR, + // taxa, + // config, + // exceptions, + // errorLog, + // ); + break; + case TOOLS.TEXT: + // SupplementalText.analyze(taxa, errorLog); + break; + default: + console.log("unrecognized tool: " + tool); + return; + } + } + + errorLog.write(); +} + +/** + * @param {import("commander").OptionValues} options + */ +async function getTaxa(options) { + const errorLog = new ErrorLog(options.outputdir + "/errors.tsv", true); + + const loader = options[OPT_LOADER]; + let taxa; + if (loader) { + const taxaLoaderClass = await import("file:" + path.resolve(loader)); + taxa = await taxaLoaderClass.TaxaLoader.loadTaxa(options, errorLog); + } else { + taxa = new Taxa( + Program.getIncludeList(options.datadir), + errorLog, + options.showFlowerErrors, + ); + } + + errorLog.write(); + return taxa; +} + +const program = Program.getProgram(); +program.addOption( + new Option( + "-t, --tool ", + "The tools to run. Value may be any subset of the tools below.", + ).choices(["all"].concat(ALL_TOOLS).concat(TOOLS.JEPSON_FAM)), +); +program.option( + "--in-taxafile ", + "The name of the file containing the iNaturalist taxa. Can be used for testing on a smaller subset of the iNaturalist data.", + "inat_taxa.csv", +); +program.option( + "--ef-lognotes", + "When running the jepson-eflora tool, include eFlora notes, invalid names, etc. in the log file.", +); +program.option( + "--loader ", + "The path (relative to the current directory) of the JavaScript file containing the TaxaLoader class. If not provided, the default TaxaLoader will be used.", +); +program.addHelpText( + "after", + ` +Tools: + 'all' runs the 'calflora', 'inat', 'jepson-eflora', 'rpi', and 'text' tools. + '${TOOLS.CALFLORA}' retrieves data from Calflora and compares with local data. + '${TOOLS.INAT}' retrieves data from iNaturalist and compares with local data. + '${TOOLS.JEPSON_EFLORA}' retrieves data from Jepson eFlora indexes and compares with local data. + '${TOOLS.JEPSON_FAM}' retrieves section, family and genus data from Jepson eFlora and creates data files for use by ca-plant-list. + '${TOOLS.RPI}' retrieves data from the CNPS Rare Plant Inventory and compares with local data. + '${TOOLS.TEXT}' checks supplemental text files to make sure their names are referenced. + `, +); +program.action((options) => build(program, options)); + +await program.parseAsync(); diff --git a/types/classes.d.ts b/types/classes.d.ts index 3e4b1c8..2ccdbe3 100644 --- a/types/classes.d.ts +++ b/types/classes.d.ts @@ -83,6 +83,7 @@ declare class Taxa { getFlowerColors(): FlowerColor[]; getTaxon(name: string): Taxon; getTaxonList(): Taxon[]; + isSubset(): boolean; } declare class TaxaCol { @@ -96,6 +97,7 @@ declare class Taxon { getBaseFileName(): string; getBloomEnd(): number | undefined; getBloomStart(): number | undefined; + getCalfloraID(): string; getCalfloraName(): string; getCalfloraTaxonLink(): string | undefined; getCESA(): string | undefined; @@ -123,6 +125,7 @@ declare class Taxon { getRPITaxonLink(): string; getStatusDescription(config: Config): string; getSynonyms(): string[]; + isCANative(): boolean; isNative(): boolean; }