Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: load iNaturalist observation photos for taxa #21

Merged
merged 5 commits into from
Dec 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,27 @@ node scripts/build-ebook.js

# ebook is at output/ebplants.epub
```

## Usage

```zsh
npm i --save @ca-plant-list/ca-plant-list
```

### Customizing Photos

ca-plant-list loads photos of plants from iNat and maintains lists of photos of all the plants it knows about from both licensed iNat taxon photos and licensed iNat observation photos. If you want to customize the photos that appear on your site, you can add a file of iNat photo data to your local setup. This package provides some tools for doing so. For example, if you wanted to use photos from observations from Alameda and Contra Costa Counties, you could do this:

```zsh
npx node ./node_modules/@ca-plant-list/ca-plant-list/scripts/inatobsphotos.js -fn inatphotos.csv -q "place_id=845,1527"
```

That will create `data/inatphotos.csv` in your repo, populated by photos from verifiable observations of the taxa in data/taxa.csv from the place IDs identified in the `-q` param. It chooses from the most-faved observations with the "CC BY" or "CC BY-NC" licenses, or the CC0 declaration.

If you want to load from iNat taxon photos instead, you can do

```zsh
npx node ./node_modules/@ca-plant-list/ca-plant-list/scripts/inattaxonphotos.js -fn inatphotos.csv
```

The only reason to do that would be to get more recently-updated iNat taxon photos than ca-plant-list knows about.
9,286 changes: 9,286 additions & 0 deletions data/inatobsphotos.csv

Large diffs are not rendered by default.

64 changes: 45 additions & 19 deletions lib/taxa.js
Original file line number Diff line number Diff line change
Expand Up @@ -104,30 +104,56 @@ class Taxa {

/**
* @param {string} dataDir
* @param {string} filename
*/
#loadInatPhotos(dataDir) {
const photosFileName = "inattaxonphotos.csv";
if (fs.existsSync(path.join(dataDir, photosFileName))) {
/** @type {InatCsvPhoto[]} */
// @ts-ignore
const csvPhotos = CSV.parseFile(dataDir, photosFileName);
for (const csvPhoto of csvPhotos) {
const taxon = this.getTaxon(csvPhoto.name);
if (!taxon) {
continue;
}
taxon.addPhoto(
new InatPhoto(
csvPhoto.id,
csvPhoto.ext,
csvPhoto.licenseCode,
csvPhoto.attrName,
),
);
#loadPhotosFromFile(dataDir, filename) {
if (!fs.existsSync(path.join(dataDir, filename))) return;
/** @type {InatCsvPhoto[]} */
const csvPhotos = CSV.parseFile(dataDir, filename).map( row => {
/** @type {InatLicenseCode} */
let licenseCode = "cc-by";
if (row.licenseCode === "cc-by-nc-sa") licenseCode = "cc-by-nc-sa";
else if (row.licenseCode === "cc-by-nc") licenseCode = "cc-by-nc";
else if (row.licenseCode === "cc-by-nc-nd") licenseCode = "cc-by-nc-nd";
else if (row.licenseCode === "cc-by") licenseCode = "cc-by";
else if (row.licenseCode === "cc-by-sa") licenseCode = "cc-by-sa";
else if (row.licenseCode === "cc-by-nd") licenseCode = "cc-by-nd";
else if (row.licenseCode === "pd") licenseCode = "pd";
else if (row.licenseCode === "gdfl") licenseCode = "gdfl";
else if (row.licenseCode === "cc0") licenseCode = "cc0";
return {
attrName: row.attrName,
ext: row.ext,
id: Number( row.id ),
licenseCode,
name: row.name
};
} );
for (const csvPhoto of csvPhotos) {
const taxon = this.getTaxon(csvPhoto.name);
if (!taxon) {
continue;
}
taxon.addPhoto(
new InatPhoto(
csvPhoto.id,
csvPhoto.ext,
csvPhoto.licenseCode,
csvPhoto.attrName,
),
);
}
}

/**
* @param {string} dataDir
*/
#loadInatPhotos(dataDir) {
this.#loadPhotosFromFile( "./data", "inatphotos.csv" );
this.#loadPhotosFromFile( dataDir, "inattaxonphotos.csv" );
this.#loadPhotosFromFile( dataDir, "inatobsphotos.csv" );
}

getFamilies() {
return this.#families;
}
Expand Down
8 changes: 4 additions & 4 deletions lib/utils/inat-tools.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,10 @@ export async function getTaxonPhotos(taxaToUpdate) {
/** @type {InatPhotoInfo[]} */
const taxonPhotos = [];
for (const taxonPhoto of iNatTaxonPhotos) {
const ext = taxonPhoto.photo.medium_url.split(".").at(-1);
if (!ext) {
continue;
}
const url = taxonPhoto.photo.medium_url || taxonPhoto.photo.url;
if (!url) continue;
const ext = url.split(".").at(-1);
if (!ext) continue;
/** @type {InatPhotoInfo} */
const obj = {
id: taxonPhoto.photo.id.toString(),
Expand Down
2 changes: 1 addition & 1 deletion lib/web/pagetaxon.js
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ class PageTaxon extends GenericPage {

html += HTMLTaxon.getFooterHTML(this.#taxon);

const photos = this.#taxon.getPhotos();
const photos = this.#taxon.getPhotos().slice( 0, 5 );
if (photos.length > 0) {
let photosHtml = "";
for (const photo of photos) {
Expand Down
Empty file modified scripts/build-ebook.js
100644 → 100755
Empty file.
Empty file modified scripts/build-site.js
100644 → 100755
Empty file.
Empty file modified scripts/cpl-photos.js
100644 → 100755
Empty file.
Empty file modified scripts/cpl-tools.js
100644 → 100755
Empty file.
119 changes: 119 additions & 0 deletions scripts/inatobsphotos.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#!/usr/bin/env node

import fs from "fs";
import cliProgress from "cli-progress";
import { stringify } from "csv-stringify";
import path from "path";

import { ErrorLog } from "../lib/errorlog.js";
import { Program } from "../lib/program.js";
import { Taxa } from "../lib/taxa.js";
import { sleep } from "../lib/util.js";

// While I'm guessing the products of this data will be non-commercial, it's
// not clear how they'll be licensed so the ShareAlike clause is out, and
// they'll probably be derivative works so the "No Derivatives" clause should
// be respected.
const ALLOWED_LICENSE_CODES = [
"cc0", "cc-by", "cc-by-nc"
];

const DEFAULT_FILENAME = "inatobsphotos.csv";

/**
* @param {Taxon} taxon
* @param {InatObsPhotosCommandLineOptions} options
* @return {Promise<InatApiObservation[]>}
*/
async function fetchObservationsForTaxon( taxon, options ) {
const inatTaxonId = taxon.getINatID( );
if ( !inatTaxonId ) return [];
let url = `https://api.inaturalist.org/v2/observations/?taxon_id=${inatTaxonId}`
+ "&photo_license=" + ALLOWED_LICENSE_CODES.join( "," )
+ "&order=desc"
+ "&order_by=votes"
+ "&per_page=5"
+ "&fields=(observation_photos:(photo:(url:!t,attribution:!t,license_code:!t)))";
if ( typeof ( options.inatObsQuery ) === "string" ) {
url += `&${options.inatObsQuery}`;
}
const resp = await fetch( url );
if (!resp.ok) {
const error = await resp.text();
throw new Error(`Failed to fetch taxa from iNat: ${error}`);
}
const json = await resp.json();
return json.results;
}

/**
* @param {InatObsPhotosCommandLineOptions} options
*/
async function getObsPhotos( options ) {
console.log('[inatobsphotos.js] options', options);
const errorLog = new ErrorLog(options.outputdir + "/errors.tsv");
const taxa = new Taxa(
Program.getIncludeList(options.datadir),
errorLog,
false
);
const targetTaxa = taxa.getTaxonList( );

const filename = path.join("data", options.filename || DEFAULT_FILENAME);
const writableStream = fs.createWriteStream( filename );
const columns = [
"name",
"id",
"ext",
"licenseCode",
"attrName",
];
const stringifier = stringify( { header: true, columns: columns } );
stringifier.pipe(writableStream);
const prog = new cliProgress.SingleBar({
format: "Downloading [{bar}] {percentage}% | ETA: {eta}s | {value}/{total}",
etaBuffer: targetTaxa.length
});
prog.setMaxListeners( 100 );
prog.start( targetTaxa.length, 0 );

for ( const taxon of targetTaxa ) {
prog.increment( );
const observations = await fetchObservationsForTaxon( taxon, options );
// Just get the CC-licensed ones, 5 per taxon should be fine (max is 20 on iNat). Whether or not
const photos = observations.map( obs => obs.observation_photos.map( op => op.photo ) ).flat( )
.filter( photo => ALLOWED_LICENSE_CODES.includes( photo.license_code ) )
.slice( 0, 5 );
for ( const photo of photos ) {
const row = [
taxon.getName(),
photo.id,
String( photo.url ).split( "." ).at( -1 ),
// Need the license code to do attribution properly
photo.license_code,
// Photographers retain copyright for most CC licenses,
// except CC0, so attribution is a bit different
(
photo.attribution.match( /\(c\) (.*?),/ )?.[1]
|| photo.attribution.match( /uploaded by (.*)/ )?.[1]
)
];
stringifier.write( row );
}
await sleep( 1_100 );
}
prog.stop();
}

const program = Program.getProgram();
program.action(getObsPhotos).description( "Write a CSV to datadir with iNaturalist observation photos" )
.option(
"-q, --inat-obs-query <query>",
"Additional iNat observations API query terms to add, e.g. place_id=1234&d1=2020-01-01"
)
.option(
"-fn, --filename <filename>",
"Name of file to write to the data dir"
)

await program.parseAsync();
14 changes: 10 additions & 4 deletions scripts/inattaxonphotos.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ const ALLOWED_LICENSE_CODES = [
"cc0", "cc-by", "cc-by-nc"
];

const DEFAULT_FILENAME = "inattaxonphotos.csv";

/**
* @param {Taxon[]} taxa
* @return {Promise<InatApiTaxon[]>}
Expand All @@ -35,7 +37,7 @@ async function fetchInatTaxa( taxa ) {
}

/**
* @param {CommandLineOptions} options
* @param {InatTaxonPhotosCommandLineOptions} options
*/
async function getTaxonPhotos( options ) {
const errorLog = new ErrorLog(options.outputdir + "/errors.tsv");
Expand All @@ -46,7 +48,7 @@ async function getTaxonPhotos( options ) {
);
const targetTaxa = taxa.getTaxonList( );

const filename = path.join( "data", "inattaxonphotos.csv" );
const filename = path.join("data", options.filename || DEFAULT_FILENAME);
const writableStream = fs.createWriteStream( filename );
const columns = [
"name",
Expand Down Expand Up @@ -80,7 +82,7 @@ async function getTaxonPhotos( options ) {
const row = [
taxon.getName(),
taxonPhoto.photo.id,
taxonPhoto.photo.medium_url.split( "." ).at( -1 ),
String( taxonPhoto.photo.medium_url ).split( "." ).at( -1 ),
// Need the license code to do attribution properly
taxonPhoto.photo.license_code,
// Photographers retain copyright for most CC licenses,
Expand All @@ -101,6 +103,10 @@ async function getTaxonPhotos( options ) {
}

const program = Program.getProgram();
program.action(getTaxonPhotos).description( "Write a CSV to datadir with iNaturalist taxon photos" );
program.action(getTaxonPhotos).description( "Write a CSV to datadir with iNaturalist taxon photos" )
.option(
"-fn, --filename <filename>",
"Name of file to write to the data dir"
);

await program.parseAsync();
30 changes: 24 additions & 6 deletions types/classes.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -201,14 +201,32 @@ declare class InatCsvPhoto {
attrName: string;
}

declare class InatApiPhoto {
id: number;
attribution: string;
license_code: InatLicenseCode;
medium_url?: string;
url?: string;
}

declare class InatApiTaxon {
id: number;
taxon_photos: {
photo: {
id: number;
attribution: string;
license_code: InatLicenseCode;
medium_url: string;
};
photo: InatApiPhoto;
}[];
}

declare class InatApiObservation {
observation_photos: {
photo: InatApiPhoto;
}[]
}

declare class InatObsPhotosCommandLineOptions extends CommandLineOptions {
filename?: string;
inatObsQuery?: string;
}

declare class InatTaxonPhotosCommandLineOptions extends CommandLineOptions {
filename?: string;
}
Loading