Skip to content

Commit

Permalink
for back fill short-hash for past existing articles
Browse files Browse the repository at this point in the history
for thematters/matters-server#3808

eval run:

```console
fill 1000 rows in 3854ms;
```
  • Loading branch information
tx0c authored and TomasC committed Mar 1, 2024
1 parent e9c911f commit 2a61f3d
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 0 deletions.
71 changes: 71 additions & 0 deletions bin/fill-short-hash.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#!/usr/bin/env -S node --trace-warnings --loader ts-node/esm

import { sql, ARRAY_TYPE } from "../lib/db.js";

// as same as https://github.com/thematters/matters-server/blob/develop/src/common/utils/nanoid.ts
import { customAlphabet } from "nanoid";

const ALPHABET = "0123456789abcdefghijklmnopqrstuvwxyz";

const nanoid = customAlphabet(ALPHABET, 12); // ~35 years or 308M IDs needed, in order to have a 1% probability of one collision // https://zelark.github.io/nano-id-cc/

const BATCH_SIZE = Number.parseInt(process.env.BATCH_SIZE || "", 10) || 100;

async function main() {
const args = process.argv.slice(2);

// const [row] = await sql`SELECT VERSION(), CURRENT_TIMESTAMP`;
// console.log(new Date(), `get version:`, row);

const updated = await batchUpdateShortHashes(BATCH_SIZE);
// if (updated === 0) // set non null at the end;
}

async function batchUpdateShortHashes(BATCH_SIZE = 100) {
const started = Date.now();

const rows = await sql`-- batch fill short-hash for article table
WITH last_batch AS (
SELECT id, title, short_hash,
ROW_NUMBER() OVER(ORDER BY id DESC) ::int seq
FROM article
WHERE short_hash IS NULL
ORDER BY id DESC LIMIT ${BATCH_SIZE}
), last_batch_with_hash AS (
SELECT id, title, cte.*
FROM last_batch JOIN (
SELECT *
FROM UNNEST(
${sql.array(
Array.from({ length: BATCH_SIZE }).map((_, seq) => seq + 1),
ARRAY_TYPE
)} ::int[],
${sql.array(
Array.from({ length: BATCH_SIZE }).map(() => nanoid()),
ARRAY_TYPE
)} ::text[]
) AS t(seq, short_hash)
) cte USING(seq)
)
UPDATE article
SET short_hash=last_batch_with_hash.short_hash
FROM last_batch_with_hash
WHERE article.id=last_batch_with_hash.id
RETURNING article.* ; `;

const ended = new Date();
console.log(
ended,
`filled ${rows.length} rows in ${+ended - started}ms:`
// rows.slice(0, 3)
);

return rows.length;
}

async function alterSchemaNonNullable() {
await sql`ALTER TABLE article ALTER COLUMN short_hash SET NOT NULL ; `;
}

main().catch((err) => console.error(new Date(), "ERROR:", err));
2 changes: 2 additions & 0 deletions lib/db.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ export const sqlSIW = getPostgresJsClient(pgSearchDatabaseURL);

// import { Article } from "../lib/pg-zhparser-articles-indexer.js";

export const ARRAY_TYPE = 1009; // Postgres internal value shouldn't be here;

export interface Article {
id: string;
articleId: string;
Expand Down

0 comments on commit 2a61f3d

Please sign in to comment.