Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cache main page database query #50

Merged
merged 13 commits into from
Apr 23, 2024
14 changes: 10 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,31 @@ The primary purpose of this project is intended to be used as an accessibility a

Install dependencies with `pnpm install`

To set up the local database, run `pnpm run init`. It may take a minute to fetch all the data.
To set up the local database, run `pnpm init`. It may take a minute to fetch all the data.

Then, to open your local development environment, run

```bash
pnpm run dev
pnpm dev

# or start the server and open the app in a new browser tab
pnpm run dev -- --open
pnpm dev -- --open
```

## Worker

Every day, the worker runs via a cron trigger and collects data on the previous day's usage of alt text.

To test the scheduled worker, run `pnpm run worker`. You can go to <http://localhost:8787/__scheduled> to trigger the worker task.
To test the scheduled worker, run `pnpm worker`. You can go to <http://localhost:8787/__scheduled> to trigger the worker task.

To test daily slack notifications on the `alt-text-tracker` channel, create a .dev.vars file in the root of the directory, and add SLACK_WEBHOOK=\<your-webhook-here\>.

## Cache

Database reads are cached upon arrival on the main page, and are invalidated when the worker runs or after 24 hours.

Caching is not emulated on a development environment. To test the page with cache emulation, run `pnpm emulate`. Every time a change is made, you may have to wait a few seconds for SvelteKit to rebuild.

## Deployment

To deploy changes to the worker, run `npx wrangler deploy`. To change or add a new webhook or secret environment variable, run `npx wrangler secret put <KEY>`.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"private": true,
"scripts": {
"worker": "npx wrangler dev --env local --test-scheduled",
"emulate": "vite build -w & npx wrangler pages dev .svelte-kit/cloudflare --live-reload --local --binding PRODUCTION=false --d1 DB=DB",
"init": "./bin/init",
"update": "./bin/update",
"clear": "./bin/clear",
Expand Down
1 change: 1 addition & 0 deletions src/app.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ declare global {
interface Platform {
env: {
DB: D1Database;
PRODUCTION: string;
};
context: {
waitUntil(promise: Promise<unknown>): void;
Expand Down
23 changes: 23 additions & 0 deletions src/lib/storage.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import { secondsBeforeCronTrigger } from './time';
import type { CacheResponse } from './types';

export const D1CacheName = 'd1-michigan-daily-alt-text-tracker';
export const url = 'https://michigan-daily-alt-text-tracker.pages.dev';

export const cachePut = async (url: URL | string, cache: Cache, response: CacheResponse) => {
const entry = new Response(JSON.stringify(response));
entry.headers.append('Cache-Control', `s-maxage=${secondsBeforeCronTrigger()}`);

await cache.put(url, entry);
};

export const cacheGet = async (url: URL | string, cache: Cache) => {
const response = await cache.match(url);
const entry = await response?.json();

return entry;
};

export const cacheInvalidate = async (urls: (URL | string)[], cache: Cache) => {
return Promise.all(urls.map((url) => cache.delete(url))).then((status) => status);
};
16 changes: 12 additions & 4 deletions src/lib/time.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,25 @@
import type { DateStringOptions } from "./types";

export const lastWeek = new Date(new Date().getTime() - 7 * 24 * 60 * 60 * 1000);
export const lastMonth = new Date(new Date().getTime() - 31 * 24 * 60 * 60 * 1000);
export const lastSixMonths = new Date(new Date().getTime() - 31 * 24 * 60 * 60 * 1000 * 6);
export const lastYear = new Date(new Date().getTime() - 365 * 24 * 60 * 60 * 1000);
export const all = new Date('2022-12-31');

export function formatISODate(date: string, { weekday, year, month, day }: DateStringOptions) {
export function formatISODate(
date: string,
{ weekday, year, month, day }: Intl.DateTimeFormatOptions
) {
return new Date(date).toLocaleDateString('en-us', {
timeZone: "UTC",
timeZone: 'UTC',
weekday,
year,
month,
day
});
}

export function secondsBeforeCronTrigger() {
const date = new Date();
if (date.getUTCHours() >= 16) date.setUTCDate(date.getUTCDate() + 1)
yum25 marked this conversation as resolved.
Show resolved Hide resolved
date.setUTCHours(16, 1, 0, 0);
return (date.getTime() - new Date().getTime()) / 1000;
}
9 changes: 4 additions & 5 deletions src/lib/types.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,8 @@ export interface Block {
innerBlocks: Array<Block>;
}

interface DateStringOptions {
weekday: 'long' | 'short' | 'narrow' | undefined;
year: 'numeric' | '2-digit' | undefined;
month: 'long' | 'short' | 'narrow' | 'numeric' | '2-digit' | undefined;
day: 'numeric' | '2-digit' | undefined;
export interface CacheResponse {
entries: ArticleEntry[];
after?: string;
before?: string;
}
45 changes: 39 additions & 6 deletions src/routes/+page.server.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,49 @@
import type { PageServerLoad } from './$types';
import type { ArticleEntry } from '$lib/types';

import { D1CacheName, cacheGet, cachePut } from '$lib/storage';
import type { ArticleEntry, CacheResponse } from '$lib/types';
import { lastMonth } from '$lib/time';

import { error } from '@sveltejs/kit';

export const load: PageServerLoad = async ({ platform, url }) => {
const after = url.searchParams.get('after') ?? lastMonth.toISOString().split('T')[0];
if (platform === undefined) {
error(400, { message: 'Platform is undefined' });
}

const after = url.searchParams.get('after') ?? lastMonth.toISOString().split('T')[0];
const cache = await platform.caches.open(D1CacheName);

const cacheEntry = (await cacheGet(url.origin, cache)) as CacheResponse | undefined;

const resp = await platform?.env.DB.prepare(
if (cacheEntry && cacheEntry.after! <= after) {
return {
entries: cacheEntry.entries,
after,
cached: true
};
}

const response = await platform.env.DB.prepare(
'SELECT date, images_published, images_published_with_alt_text, categories FROM articles WHERE date > ?'
).bind(after).all();
)
.bind(after)
.all();

if (response.error) {
error(400, { message: response.error });
}

platform.context.waitUntil(
cachePut(url.origin, cache, {
entries: response.results as ArticleEntry[] | [],
after
})
);

return {
entries: resp?.results as Array<ArticleEntry> | [],
after
entries: response.results as ArticleEntry[] | [],
after,
cached: false
};
};
7 changes: 5 additions & 2 deletions src/routes/+page.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
import { lastWeek, lastMonth, lastSixMonths, lastYear, all } from '$lib/time.js';

export let data;
$: entries = data?.entries;
$: console.log("D1 Cached Status:", data.cached);

$: entries = data.entries;

let timerange = data.after ? new Date(data.after) : lastMonth;
$: category = null;
Expand All @@ -20,7 +22,8 @@

$: tidy = entries.filter(
(entry) =>
new Date(entry.date) >= timerange && (category ? JSON.parse(entry.categories).includes(category) : true)
new Date(entry.date) >= timerange &&
(category ? JSON.parse(entry.categories).includes(category) : true)
);

$: index = d3.rollup(
Expand Down
13 changes: 9 additions & 4 deletions src/routes/posts/+page.server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import type { PageServerLoad } from './$types';

import { parseContent } from '$lib/parse';
import type { Article } from '$lib/types';
import { D1CacheName, cacheInvalidate } from '$lib/storage';

export const load: PageServerLoad = async ({ platform, url }) => {
const page = parseInt(url.searchParams.get('page') ?? '0');
Expand Down Expand Up @@ -57,9 +58,9 @@ export const load: PageServerLoad = async ({ platform, url }) => {
};

export const actions: Actions = {
update: async ({ request, platform }) => {
update: async ({ request, platform, url }) => {
if (platform === undefined) {
error(400, { message: "Platform undefined"})
error(400, { message: 'Platform undefined' });
}

const data = await request.formData();
Expand Down Expand Up @@ -93,9 +94,13 @@ export const actions: Actions = {
}

if (response.meta.rows_written < 1) {
return fail(400, { message: "Failed to update article"})
return fail(400, { message: 'Failed to update article' });
}


const cache = await platform.caches.open(D1CacheName);
const baseUrl = platform.env.PRODUCTION === 'true' ? url.origin : 'http://localhost:8788';
platform.context.waitUntil(cacheInvalidate([baseUrl], cache));

redirect(304, String(data.get('path')));
}
};
53 changes: 27 additions & 26 deletions worker/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ async function parsePostQuery(
categories: post.categories
};

parseArticle(post.image, post.content ?? [], (i: Image) => addImageData(post.id, i, image_data));
parseArticle(post.image, post.content ?? [], (i: Image) =>
addImageData(post.id, i, image_data)
);
});
})
.catch((error: Error) => {
Expand All @@ -59,7 +61,11 @@ async function parsePostQuery(
return total_pages;
}

export interface Env {
async function emitLog(promise: Promise<unknown>) {
console.log(await promise);
}

interface Env {
DB: D1Database;
PRODUCTION: boolean;
SLACK_WEBHOOK: string;
Expand All @@ -68,54 +74,49 @@ export interface Env {
export default {
// The scheduled handler is invoked at the interval set in our wrangler.toml's
// [[triggers]] configuration.
async scheduled(event: ScheduledEvent, env: Env): Promise<void> {
const image_data: Record<string, ArticleEntry> = {};

async scheduled(event: ScheduledEvent, env: Env, ctx: ExecutionContext): Promise<void> {
// Get the date of the latest entry, and start fetching data at the date
const DB_resp: Record<string, string> | null = await env.DB.prepare(
`SELECT MAX(date) FROM articles`
).first();

const after: string = DB_resp ? DB_resp['MAX(date)'] ?? '2022-12-31' : '2022-12-31';

const image_data: Record<string, ArticleEntry> = {};
const total_pages = await parsePostQuery(0, after, image_data);

for (let i = 1; i < total_pages; ++i) {
await parsePostQuery(i, after, image_data);
}

// Batch insert/update all date entries
const stmt = env.DB.prepare(
`INSERT OR IGNORE INTO articles
(aid, date, images_published, images_published_with_alt_text, categories) VALUES
(?, ?, ?, ?, ?)`
);

const batchUpdate: Array<D1PreparedStatement> = [];

Object.entries(image_data).forEach(([aid, entry]) => {
batchUpdate.push(
stmt.bind(
aid,
entry.date,
entry.images_published,
entry.images_published_with_alt_text,
JSON.stringify(entry.categories)
// Batch insert all date entries
ctx.waitUntil(
emitLog(
env.DB.batch(
Object.entries(image_data).map(([aid, entry]) =>
stmt.bind(
aid,
entry.date,
entry.images_published,
entry.images_published_with_alt_text,
JSON.stringify(entry.categories)
)
)
)
);
});

const info = await env.DB.batch(batchUpdate);
)
);

// Send slack report
if (env.PRODUCTION) {
const [yesterday] = new Date(new Date().getTime() - 24 * 60 * 60 * 1000)
.toISOString()
.split('T');

const resp = await sendReport(env.SLACK_WEBHOOK, { date: yesterday, data: image_data });
console.log(resp);
ctx.waitUntil(emitLog(sendReport(env.SLACK_WEBHOOK, { date: yesterday, data: image_data })));
}

console.log(info);
}
};