From aa553807691c528b30c69b2d388fb5b765becce5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Tue, 17 Sep 2024 12:02:29 +0200 Subject: [PATCH 1/5] a $FILE_SERVER that tracks dependencies in the cache (rebased after #1662) --- src/fileWatchers.ts | 74 ++++++++------ src/loader.ts | 97 ++++++++++++++++--- src/preview.ts | 8 +- test/input/build/chain/chain-source.json.sh | 1 + test/input/build/chain/chain.json.ts | 1 + test/input/build/chain/chain.md | 9 ++ test/input/build/chain/chain1.json.ts | 3 + test/input/build/chain/chain2.csv.ts | 3 + .../build/chain/_file/chain1.550fb08c.json | 4 + .../build/chain/_file/chain2.b1220d22.csv | 3 + .../build/chain/_npm/d3-dsv@3.0.1/cd372fb8.js | 0 .../chain/_observablehq/client.00000001.js | 0 .../chain/_observablehq/runtime.00000002.js | 0 .../chain/_observablehq/stdlib.00000003.js | 0 .../theme-air,near-midnight.00000004.css | 0 test/output/build/chain/chain.html | 61 ++++++++++++ 16 files changed, 221 insertions(+), 43 deletions(-) create mode 100644 test/input/build/chain/chain-source.json.sh create mode 100644 test/input/build/chain/chain.json.ts create mode 100644 test/input/build/chain/chain.md create mode 100644 test/input/build/chain/chain1.json.ts create mode 100644 test/input/build/chain/chain2.csv.ts create mode 100644 test/output/build/chain/_file/chain1.550fb08c.json create mode 100644 test/output/build/chain/_file/chain2.b1220d22.csv create mode 100644 test/output/build/chain/_npm/d3-dsv@3.0.1/cd372fb8.js create mode 100644 test/output/build/chain/_observablehq/client.00000001.js create mode 100644 test/output/build/chain/_observablehq/runtime.00000002.js create mode 100644 test/output/build/chain/_observablehq/stdlib.00000003.js create mode 100644 test/output/build/chain/_observablehq/theme-air,near-midnight.00000004.css create mode 100644 test/output/build/chain/chain.html diff --git a/src/fileWatchers.ts b/src/fileWatchers.ts index c1dcdf15e..63b8dc02b 100644 --- a/src/fileWatchers.ts +++ b/src/fileWatchers.ts @@ -1,5 +1,6 @@ import type {FSWatcher} from "node:fs"; -import {watch} from "node:fs"; +import {readFileSync, watch} from "node:fs"; +import {join} from "node:path/posix"; import {isEnoent} from "./error.js"; import {maybeStat} from "./files.js"; import type {LoaderResolver} from "./loader.js"; @@ -11,38 +12,55 @@ export class FileWatchers { static async of(loaders: LoaderResolver, path: string, names: Iterable, callback: (name: string) => void) { const that = new FileWatchers(); const {watchers} = that; + const {root} = loaders; for (const name of names) { - const watchPath = loaders.getWatchPath(resolvePath(path, name)); - if (!watchPath) continue; - let currentStat = await maybeStat(watchPath); - let watcher: FSWatcher; - const index = watchers.length; + const path0 = resolvePath(path, name); + const paths = new Set([path0]); try { - watcher = watch(watchPath, async function watched(type) { - // Re-initialize the watcher on the original path on rename. - if (type === "rename") { - watcher.close(); - try { - watcher = watchers[index] = watch(watchPath, watched); - } catch (error) { - if (!isEnoent(error)) throw error; - console.error(`file no longer exists: ${watchPath}`); + for (const path of JSON.parse( + readFileSync(join(root, ".observablehq", "cache", `${path0}__dependencies`), "utf-8") + )) + paths.add(path); + } catch (error) { + if (!isEnoent(error)) { + throw error; + } + } + + for (const path of paths) { + const watchPath = loaders.getWatchPath(path); + if (!watchPath) continue; + console.warn(watchPath, name); + let currentStat = await maybeStat(watchPath); + let watcher: FSWatcher; + const index = watchers.length; + try { + watcher = watch(watchPath, async function watched(type) { + // Re-initialize the watcher on the original path on rename. + if (type === "rename") { + watcher.close(); + try { + watcher = watchers[index] = watch(watchPath, watched); + } catch (error) { + if (!isEnoent(error)) throw error; + console.error(`file no longer exists: ${watchPath}`); + return; + } + setTimeout(() => watched("change"), 100); // delay to avoid a possibly-empty file return; } - setTimeout(() => watched("change"), 100); // delay to avoid a possibly-empty file - return; - } - const newStat = await maybeStat(watchPath); - // Ignore if the file was truncated or not modified. - if (currentStat?.mtimeMs === newStat?.mtimeMs || newStat?.size === 0) return; - currentStat = newStat; - callback(name); - }); - } catch (error) { - if (!isEnoent(error)) throw error; - continue; + const newStat = await maybeStat(watchPath); + // Ignore if the file was truncated or not modified. + if (currentStat?.mtimeMs === newStat?.mtimeMs || newStat?.size === 0) return; + currentStat = newStat; + callback(name); + }); + } catch (error) { + if (!isEnoent(error)) throw error; + continue; + } + watchers[index] = watcher; } - watchers[index] = watcher; } return that; } diff --git a/src/loader.ts b/src/loader.ts index 643c58af0..b8d2d7fad 100644 --- a/src/loader.ts +++ b/src/loader.ts @@ -1,13 +1,13 @@ import {createHash} from "node:crypto"; import type {FSWatcher, WatchListener, WriteStream} from "node:fs"; -import {createReadStream, existsSync, statSync, watch} from "node:fs"; -import {open, readFile, rename, unlink} from "node:fs/promises"; +import {createReadStream, existsSync, readFileSync, statSync, watch} from "node:fs"; +import {open, readFile, rename, rm, unlink, writeFile} from "node:fs/promises"; import {dirname, extname, join} from "node:path/posix"; import {createGunzip} from "node:zlib"; import {spawn} from "cross-spawn"; import JSZip from "jszip"; import {extract} from "tar-stream"; -import {enoent} from "./error.js"; +import {enoent, isEnoent} from "./error.js"; import {maybeStat, prepareOutput, visitFiles} from "./files.js"; import {FileWatchers} from "./fileWatchers.js"; import {formatByteSize} from "./format.js"; @@ -16,6 +16,7 @@ import {findModule, getFileInfo, getLocalModuleHash, getModuleHash} from "./java import type {Logger, Writer} from "./logger.js"; import type {MarkdownPage, ParseOptions} from "./markdown.js"; import {parseMarkdown} from "./markdown.js"; +import {preview} from "./preview.js"; import {getModuleResolver, resolveImportPath} from "./resolvers.js"; import type {Params} from "./route.js"; import {isParameterized, requote, route} from "./route.js"; @@ -51,6 +52,9 @@ const defaultEffects: LoadEffects = { export interface LoadOptions { /** Whether to use a stale cache; true when building. */ useStale?: boolean; + + /** An asset server for chained data loaders. */ + FILE_SERVER?: string; } export interface LoaderOptions { @@ -61,7 +65,7 @@ export interface LoaderOptions { } export class LoaderResolver { - private readonly root: string; + readonly root: string; private readonly interpreters: Map; constructor({root, interpreters}: {root: string; interpreters?: Record}) { @@ -304,7 +308,21 @@ export class LoaderResolver { const info = getFileInfo(this.root, path); if (!info) return createHash("sha256").digest("hex"); const {hash} = info; - return path === name ? hash : createHash("sha256").update(hash).update(String(info.mtimeMs)).digest("hex"); + if (path === name) return hash; + const hash2 = createHash("sha256").update(hash).update(String(info.mtimeMs)); + try { + for (const path of JSON.parse( + readFileSync(join(this.root, ".observablehq", "cache", `${name}__dependencies`), "utf-8") + )) { + const info = getFileInfo(this.root, this.getSourceFilePath(path)); + if (info) hash2.update(info.hash).update(String(info.mtimeMs)); + } + } catch (error) { + if (!isEnoent(error)) { + throw error; + } + } + return hash2.digest("hex"); } getOutputFileHash(name: string): string { @@ -417,12 +435,37 @@ abstract class AbstractLoader implements Loader { const outputPath = join(".observablehq", "cache", this.targetPath); const cachePath = join(this.root, outputPath); const loaderStat = await maybeStat(loaderPath); - const cacheStat = await maybeStat(cachePath); - if (!cacheStat) effects.output.write(faint("[missing] ")); - else if (cacheStat.mtimeMs < loaderStat!.mtimeMs) { - if (useStale) return effects.output.write(faint("[using stale] ")), outputPath; - else effects.output.write(faint("[stale] ")); - } else return effects.output.write(faint("[fresh] ")), outputPath; + const paths = new Set([cachePath]); + try { + for (const path of JSON.parse(await readFile(`${cachePath}__dependencies`, "utf-8"))) paths.add(path); + } catch (error) { + if (!isEnoent(error)) { + throw error; + } + } + + const FRESH = 0; + const STALE = 1; + const MISSING = 2; + let status = FRESH; + for (const path of paths) { + const cacheStat = await maybeStat(path); + if (!cacheStat) { + status = MISSING; + break; + } else if (cacheStat.mtimeMs < loaderStat!.mtimeMs) status = Math.max(status, STALE); + } + switch (status) { + case FRESH: + return effects.output.write(faint("[fresh] ")), outputPath; + case STALE: + if (useStale) return effects.output.write(faint("[using stale] ")), outputPath; + effects.output.write(faint("[stale] ")); + break; + case MISSING: + effects.output.write(faint("[missing] ")); + break; + } const tempPath = join(this.root, ".observablehq", "cache", `${this.targetPath}.${process.pid}`); const errorPath = tempPath + ".err"; const errorStat = await maybeStat(errorPath); @@ -434,8 +477,17 @@ abstract class AbstractLoader implements Loader { await prepareOutput(tempPath); await prepareOutput(cachePath); const tempFd = await open(tempPath, "w"); + + // Launch a server for chained data loaders. TODO configure host? + const dependencies = new Set(); + const {server} = await preview({root: this.root, verbose: false, hostname: "127.0.0.1", dependencies}); + const address = server.address(); + if (!address || typeof address !== "object") + throw new Error("Couldn't launch server for chained data loaders!"); + const FILE_SERVER = `http://${address.address}:${address.port}/_file/`; + try { - await this.exec(tempFd.createWriteStream({highWaterMark: 1024 * 1024}), {useStale}, effects); + await this.exec(tempFd.createWriteStream({highWaterMark: 1024 * 1024}), {useStale, FILE_SERVER}, effects); await rename(tempPath, cachePath); } catch (error) { await rename(tempPath, errorPath); @@ -443,6 +495,19 @@ abstract class AbstractLoader implements Loader { } finally { await tempFd.close(); } + + const cachedeps = `${cachePath}__dependencies`; + if (dependencies.size) await writeFile(cachedeps, JSON.stringify([...dependencies]), "utf-8"); + else + try { + await rm(cachedeps); + } catch (error) { + if (!isEnoent(error)) throw error; + } + + // TODO: server.close() might be enough? + await new Promise((closed) => server.close(closed)); + return outputPath; })(); command.finally(() => runningCommands.delete(key)).catch(() => {}); @@ -495,8 +560,12 @@ class CommandLoader extends AbstractLoader { this.args = args; } - async exec(output: WriteStream): Promise { - const subprocess = spawn(this.command, this.args, {windowsHide: true, stdio: ["ignore", output, "inherit"]}); + async exec(output: WriteStream, {FILE_SERVER}): Promise { + const subprocess = spawn(this.command, this.args, { + windowsHide: true, + stdio: ["ignore", output, "inherit"], + env: {...process.env, FILE_SERVER} + }); const code = await new Promise((resolve, reject) => { subprocess.on("error", reject); subprocess.on("close", resolve); diff --git a/src/preview.ts b/src/preview.ts index 65c860fd5..0084f1ef9 100644 --- a/src/preview.ts +++ b/src/preview.ts @@ -45,6 +45,7 @@ export interface PreviewOptions { port?: number; origins?: string[]; verbose?: boolean; + dependencies?: Set; } export async function preview(options: PreviewOptions): Promise { @@ -58,19 +59,22 @@ export class PreviewServer { private readonly _server: ReturnType; private readonly _socketServer: WebSocketServer; private readonly _verbose: boolean; + private readonly dependencies: Set | undefined; private constructor({ config, root, origins = [], server, - verbose + verbose, + dependencies }: { config?: string; root?: string; origins?: string[]; server: Server; verbose: boolean; + dependencies?: Set; }) { this._config = config; this._root = root; @@ -80,6 +84,7 @@ export class PreviewServer { this._server.on("request", this._handleRequest); this._socketServer = new WebSocketServer({server: this._server}); this._socketServer.on("connection", this._handleConnection); + this.dependencies = dependencies; } static async start({verbose = true, hostname, port, open, ...options}: PreviewOptions) { @@ -172,6 +177,7 @@ export class PreviewServer { } throw enoent(path); } else if (pathname.startsWith("/_file/")) { + if (this.dependencies) this.dependencies.add(pathname.slice("/_file".length)); send(req, await loaders.loadFile(pathname.slice("/_file".length)), {root}).pipe(res); } else { if ((pathname = normalize(pathname)).startsWith("..")) throw new Error("Invalid path: " + pathname); diff --git a/test/input/build/chain/chain-source.json.sh b/test/input/build/chain/chain-source.json.sh new file mode 100644 index 000000000..3d99dd10c --- /dev/null +++ b/test/input/build/chain/chain-source.json.sh @@ -0,0 +1 @@ +echo '{"x": 3}' diff --git a/test/input/build/chain/chain.json.ts b/test/input/build/chain/chain.json.ts new file mode 100644 index 000000000..911822172 --- /dev/null +++ b/test/input/build/chain/chain.json.ts @@ -0,0 +1 @@ +console.log(JSON.stringify(process.env.address, null, 2)); \ No newline at end of file diff --git a/test/input/build/chain/chain.md b/test/input/build/chain/chain.md new file mode 100644 index 000000000..2356a792e --- /dev/null +++ b/test/input/build/chain/chain.md @@ -0,0 +1,9 @@ +# Chained data loaders + +```js +FileAttachment("chain1.json").json() +``` + +```js +FileAttachment("chain2.csv").csv({typed: true}) +``` diff --git a/test/input/build/chain/chain1.json.ts b/test/input/build/chain/chain1.json.ts new file mode 100644 index 000000000..37a628bcf --- /dev/null +++ b/test/input/build/chain/chain1.json.ts @@ -0,0 +1,3 @@ +const {FILE_SERVER} = process.env; +const {x} = await fetch(`${FILE_SERVER}chain-source.json`).then((response) => response.json()); +console.log(JSON.stringify({x, "x^2": x * x}, null, 2)); diff --git a/test/input/build/chain/chain2.csv.ts b/test/input/build/chain/chain2.csv.ts new file mode 100644 index 000000000..7c428b0c6 --- /dev/null +++ b/test/input/build/chain/chain2.csv.ts @@ -0,0 +1,3 @@ +const {FILE_SERVER} = process.env; +const {x} = await fetch(`${FILE_SERVER}chain-source.json`).then((response) => response.json()); +console.log(`name,value\nx,${x}\nx^2,${x * x}`); diff --git a/test/output/build/chain/_file/chain1.550fb08c.json b/test/output/build/chain/_file/chain1.550fb08c.json new file mode 100644 index 000000000..82cebd520 --- /dev/null +++ b/test/output/build/chain/_file/chain1.550fb08c.json @@ -0,0 +1,4 @@ +{ + "x": 3, + "x^2": 9 +} diff --git a/test/output/build/chain/_file/chain2.b1220d22.csv b/test/output/build/chain/_file/chain2.b1220d22.csv new file mode 100644 index 000000000..8d5b044f1 --- /dev/null +++ b/test/output/build/chain/_file/chain2.b1220d22.csv @@ -0,0 +1,3 @@ +name,value +x,3 +x^2,9 diff --git a/test/output/build/chain/_npm/d3-dsv@3.0.1/cd372fb8.js b/test/output/build/chain/_npm/d3-dsv@3.0.1/cd372fb8.js new file mode 100644 index 000000000..e69de29bb diff --git a/test/output/build/chain/_observablehq/client.00000001.js b/test/output/build/chain/_observablehq/client.00000001.js new file mode 100644 index 000000000..e69de29bb diff --git a/test/output/build/chain/_observablehq/runtime.00000002.js b/test/output/build/chain/_observablehq/runtime.00000002.js new file mode 100644 index 000000000..e69de29bb diff --git a/test/output/build/chain/_observablehq/stdlib.00000003.js b/test/output/build/chain/_observablehq/stdlib.00000003.js new file mode 100644 index 000000000..e69de29bb diff --git a/test/output/build/chain/_observablehq/theme-air,near-midnight.00000004.css b/test/output/build/chain/_observablehq/theme-air,near-midnight.00000004.css new file mode 100644 index 000000000..e69de29bb diff --git a/test/output/build/chain/chain.html b/test/output/build/chain/chain.html new file mode 100644 index 000000000..c276e2158 --- /dev/null +++ b/test/output/build/chain/chain.html @@ -0,0 +1,61 @@ + + + + +Chained data loaders + + + + + + + + + + + + + + + +
+
+

Chained data loaders

+
+
+
+ +
From e9217b8163ed9f8d8d692a0da106b3df0641a953 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Tue, 17 Sep 2024 14:11:53 +0200 Subject: [PATCH 2/5] dependency tree --- src/fileWatchers.ts | 23 +++++----------------- src/loader.ts | 48 ++++++++++++++++++++++----------------------- 2 files changed, 29 insertions(+), 42 deletions(-) diff --git a/src/fileWatchers.ts b/src/fileWatchers.ts index 63b8dc02b..d6ad12f3d 100644 --- a/src/fileWatchers.ts +++ b/src/fileWatchers.ts @@ -1,8 +1,8 @@ import type {FSWatcher} from "node:fs"; -import {readFileSync, watch} from "node:fs"; -import {join} from "node:path/posix"; +import {watch} from "node:fs"; import {isEnoent} from "./error.js"; import {maybeStat} from "./files.js"; +import {chainDependencies} from "./loader.js"; import type {LoaderResolver} from "./loader.js"; import {resolvePath} from "./path.js"; @@ -14,23 +14,10 @@ export class FileWatchers { const {watchers} = that; const {root} = loaders; for (const name of names) { - const path0 = resolvePath(path, name); - const paths = new Set([path0]); - try { - for (const path of JSON.parse( - readFileSync(join(root, ".observablehq", "cache", `${path0}__dependencies`), "utf-8") - )) - paths.add(path); - } catch (error) { - if (!isEnoent(error)) { - throw error; - } - } - - for (const path of paths) { - const watchPath = loaders.getWatchPath(path); + for (const p of chainDependencies(root, resolvePath(path, name))) { + const watchPath = loaders.getWatchPath(p); if (!watchPath) continue; - console.warn(watchPath, name); + console.warn(watchPath, "for", name); let currentStat = await maybeStat(watchPath); let watcher: FSWatcher; const index = watchers.length; diff --git a/src/loader.ts b/src/loader.ts index b8d2d7fad..86a7cba9e 100644 --- a/src/loader.ts +++ b/src/loader.ts @@ -309,18 +309,10 @@ export class LoaderResolver { if (!info) return createHash("sha256").digest("hex"); const {hash} = info; if (path === name) return hash; - const hash2 = createHash("sha256").update(hash).update(String(info.mtimeMs)); - try { - for (const path of JSON.parse( - readFileSync(join(this.root, ".observablehq", "cache", `${name}__dependencies`), "utf-8") - )) { - const info = getFileInfo(this.root, this.getSourceFilePath(path)); - if (info) hash2.update(info.hash).update(String(info.mtimeMs)); - } - } catch (error) { - if (!isEnoent(error)) { - throw error; - } + const hash2 = createHash("sha256"); + for (const p of chainDependencies(this.root, name)) { + const info = getFileInfo(this.root, this.getSourceFilePath(p)); + if (info) hash2.update(info.hash).update(String(info.mtimeMs)); } return hash2.digest("hex"); } @@ -432,29 +424,22 @@ abstract class AbstractLoader implements Loader { let command = runningCommands.get(key); if (!command) { command = (async () => { - const outputPath = join(".observablehq", "cache", this.targetPath); - const cachePath = join(this.root, outputPath); const loaderStat = await maybeStat(loaderPath); - const paths = new Set([cachePath]); - try { - for (const path of JSON.parse(await readFile(`${cachePath}__dependencies`, "utf-8"))) paths.add(path); - } catch (error) { - if (!isEnoent(error)) { - throw error; - } - } - + const paths = chainDependencies(this.root, this.targetPath); const FRESH = 0; const STALE = 1; const MISSING = 2; let status = FRESH; for (const path of paths) { - const cacheStat = await maybeStat(path); + const cachePath = join(this.root, ".observablehq", "cache", path); + const cacheStat = await maybeStat(cachePath); if (!cacheStat) { status = MISSING; break; } else if (cacheStat.mtimeMs < loaderStat!.mtimeMs) status = Math.max(status, STALE); } + const outputPath = join(".observablehq", "cache", this.targetPath); + const cachePath = join(this.root, outputPath); switch (status) { case FRESH: return effects.output.write(faint("[fresh] ")), outputPath; @@ -651,3 +636,18 @@ function formatElapsed(start: number): string { const elapsed = performance.now() - start; return `${Math.floor(elapsed)}ms`; } + +export function chainDependencies(root: string, path: string): Set { + const paths = new Set([path]); + for (const path of paths) { + try { + for (const f of JSON.parse(readFileSync(join(root, ".observablehq", "cache", `${path}__dependencies`), "utf-8"))) + paths.add(f); + } catch (error) { + if (!isEnoent(error)) { + throw error; + } + } + } + return paths; +} From 880ada206e394cd466c77e05072a7c1169db0fc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Tue, 17 Sep 2024 14:44:36 +0200 Subject: [PATCH 3/5] remove spurious log --- src/fileWatchers.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/fileWatchers.ts b/src/fileWatchers.ts index d6ad12f3d..4f04d4415 100644 --- a/src/fileWatchers.ts +++ b/src/fileWatchers.ts @@ -17,7 +17,6 @@ export class FileWatchers { for (const p of chainDependencies(root, resolvePath(path, name))) { const watchPath = loaders.getWatchPath(p); if (!watchPath) continue; - console.warn(watchPath, "for", name); let currentStat = await maybeStat(watchPath); let watcher: FSWatcher; const index = watchers.length; From 8997f06e63810664cf25560b422ffb5af573f649 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Tue, 17 Sep 2024 14:45:00 +0200 Subject: [PATCH 4/5] fix chain test --- test/build-test.ts | 1 + test/input/build/chain/chain-source.json.sh | 1 - test/input/build/chain/chain-source.json.ts | 10 ++++++++++ test/input/build/chain/chain1.json.ts | 2 +- test/input/build/chain/chain2.csv.ts | 2 +- .../{chain1.550fb08c.json => chain1.e1f60496.json} | 2 +- .../_file/{chain2.b1220d22.csv => chain2.18991dde.csv} | 2 +- test/output/build/chain/chain.html | 4 ++-- 8 files changed, 17 insertions(+), 7 deletions(-) delete mode 100644 test/input/build/chain/chain-source.json.sh create mode 100644 test/input/build/chain/chain-source.json.ts rename test/output/build/chain/_file/{chain1.550fb08c.json => chain1.e1f60496.json} (92%) rename test/output/build/chain/_file/{chain2.b1220d22.csv => chain2.18991dde.csv} (71%) diff --git a/test/build-test.ts b/test/build-test.ts index 2614ec21d..dfa1772d9 100644 --- a/test/build-test.ts +++ b/test/build-test.ts @@ -158,6 +158,7 @@ function* findFiles(root: string): Iterable { visited.add(status.ino); for (const entry of readdirSync(path)) { if (entry === ".DS_Store") continue; // macOS + if (entry === ".ignoreme") continue; // see inputs/build/chain/ queue.push(join(path, entry)); } } else { diff --git a/test/input/build/chain/chain-source.json.sh b/test/input/build/chain/chain-source.json.sh deleted file mode 100644 index 3d99dd10c..000000000 --- a/test/input/build/chain/chain-source.json.sh +++ /dev/null @@ -1 +0,0 @@ -echo '{"x": 3}' diff --git a/test/input/build/chain/chain-source.json.ts b/test/input/build/chain/chain-source.json.ts new file mode 100644 index 000000000..65e4dc5fe --- /dev/null +++ b/test/input/build/chain/chain-source.json.ts @@ -0,0 +1,10 @@ +import {existsSync, writeFileSync} from "node:fs"; +const testFile = "./test/output/build/chain-changed/.ignoreme"; + +const x = existsSync(testFile) ? 0 : 3; + +try { + writeFileSync(testFile, "—"); +} catch (error) { } + +process.stdout.write(JSON.stringify({ x })); diff --git a/test/input/build/chain/chain1.json.ts b/test/input/build/chain/chain1.json.ts index 37a628bcf..cb069d658 100644 --- a/test/input/build/chain/chain1.json.ts +++ b/test/input/build/chain/chain1.json.ts @@ -1,3 +1,3 @@ const {FILE_SERVER} = process.env; const {x} = await fetch(`${FILE_SERVER}chain-source.json`).then((response) => response.json()); -console.log(JSON.stringify({x, "x^2": x * x}, null, 2)); +process.stdout.write(JSON.stringify({x, "x^2": x * x}, null, 2)); diff --git a/test/input/build/chain/chain2.csv.ts b/test/input/build/chain/chain2.csv.ts index 7c428b0c6..f85d7098a 100644 --- a/test/input/build/chain/chain2.csv.ts +++ b/test/input/build/chain/chain2.csv.ts @@ -1,3 +1,3 @@ const {FILE_SERVER} = process.env; const {x} = await fetch(`${FILE_SERVER}chain-source.json`).then((response) => response.json()); -console.log(`name,value\nx,${x}\nx^2,${x * x}`); +process.stdout.write(`name,value\nx,${x}\nx^2,${x * x}`); diff --git a/test/output/build/chain/_file/chain1.550fb08c.json b/test/output/build/chain/_file/chain1.e1f60496.json similarity index 92% rename from test/output/build/chain/_file/chain1.550fb08c.json rename to test/output/build/chain/_file/chain1.e1f60496.json index 82cebd520..ec829e982 100644 --- a/test/output/build/chain/_file/chain1.550fb08c.json +++ b/test/output/build/chain/_file/chain1.e1f60496.json @@ -1,4 +1,4 @@ { "x": 3, "x^2": 9 -} +} \ No newline at end of file diff --git a/test/output/build/chain/_file/chain2.b1220d22.csv b/test/output/build/chain/_file/chain2.18991dde.csv similarity index 71% rename from test/output/build/chain/_file/chain2.b1220d22.csv rename to test/output/build/chain/_file/chain2.18991dde.csv index 8d5b044f1..32c0040a8 100644 --- a/test/output/build/chain/_file/chain2.b1220d22.csv +++ b/test/output/build/chain/_file/chain2.18991dde.csv @@ -1,3 +1,3 @@ name,value x,3 -x^2,9 +x^2,9 \ No newline at end of file diff --git a/test/output/build/chain/chain.html b/test/output/build/chain/chain.html index c276e2158..eb3d0333c 100644 --- a/test/output/build/chain/chain.html +++ b/test/output/build/chain/chain.html @@ -17,8 +17,8 @@ import {define} from "./_observablehq/client.00000001.js"; import {registerFile} from "./_observablehq/stdlib.00000003.js"; -registerFile("./chain1.json", {"name":"./chain1.json","mimeType":"application/json","path":"./_file/chain1.550fb08c.json","lastModified":/* ts */1706742000000,"size":25}); -registerFile("./chain2.csv", {"name":"./chain2.csv","mimeType":"text/csv","path":"./_file/chain2.b1220d22.csv","lastModified":/* ts */1706742000000,"size":21}); +registerFile("./chain1.json", {"name":"./chain1.json","mimeType":"application/json","path":"./_file/chain1.e1f60496.json","lastModified":/* ts */1706742000000,"size":24}); +registerFile("./chain2.csv", {"name":"./chain2.csv","mimeType":"text/csv","path":"./_file/chain2.18991dde.csv","lastModified":/* ts */1706742000000,"size":20}); define({id: "7ecb71dd", inputs: ["FileAttachment","display"], body: async (FileAttachment,display) => { display(await( From ae2ac1ceb87e7dd3fbe1f5941ea9c6a46b0acc09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Tue, 17 Sep 2024 16:02:51 +0200 Subject: [PATCH 5/5] document "file server" (aka chained data loaders) --- docs/data-loaders.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/docs/data-loaders.md b/docs/data-loaders.md index dd8800c61..96edb0770 100644 --- a/docs/data-loaders.md +++ b/docs/data-loaders.md @@ -322,3 +322,27 @@ RuntimeError: Unable to load file: quakes.csv ``` When any data loader fails, the entire build fails. + +## File Server + +Data loaders can request a file from Framework, by querying the `FILE_SERVER` HTTP endpoint indicated in their environment variables. For example, a bash data loader `mags.txt.sh` can read `quakes.json` and use [`jq`](https://jqlang.github.io/jq/) to extract the magnitude of recent earthquakes by calling: + +```sh +curl ${FILE_SERVER}quakes.json | jq .features[].properties.mag +``` + +similarly, the following JavaScript data loader `quakecount.txt.js` will return the number of recent earthquakes: + +```js run=false +const {FILE_SERVER} = process.env; +const quakes = await fetch(`${FILE_SERVER}quakes.json`).then((reponse) => response.json()); +process.stdout.write(quakes.features.length); +``` + +In the preview server, when `quakes.json` is updated, `mags.txt` and `quakecount.txt` get automatically refreshed. If `quakes.json` is in fact generated by a data loader `quakes.json.sh`, modifying that script live-updates both files. (Even though `quakes.json` is called by two loaders, the file server ensures that it runs only once, ensuring consistency and optimal performance.) The interpreters used are inconsequent: this mechanism allows a python data loader to talk to typescript, and vice-versa. + +
+ +The `FILE_SERVER` endpoint is typically used to chain data loaders, with a loader that downloads a large dataset from the Web or from a database. That payload is then cached, and the data loaders that queried can run various kinds of analysis. Make sure you don’t have circular dependencies, as they will lead the preview server and the build process to hang! Like [archives](#archives), files queried through this endpoint are added to the build only if [statically referenced](./files#static-analysis) by `FileAttachment`. + +