Skip to content

Commit

Permalink
left join to exclude cvr entries as it scales better
Browse files Browse the repository at this point in the history
  • Loading branch information
tantaman committed Dec 5, 2023
1 parent b36542e commit 39702f3
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 16 deletions.
45 changes: 30 additions & 15 deletions server/src/pull/cvr.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,21 +64,36 @@ export function findUnsentItems(
order: number,
limit: number,
) {
// The query used below is the fastest.
// Other query forms that were tried:
// 1. 10x slower: SELECT * FROM table WHERE NOT EXISTS (SELECT 1 FROM client_view_entry ...)
// 2. 2x slower: SELECT id, version FROM table EXCEPT SELECT entity_id, entity_version FROM client_view_entry ...
// 3. SELECT * FROM table LEFT JOIN client_view_entry ...
const sql = /*sql*/ `SELECT *
FROM "${table}" t
WHERE (t."id", t."version") NOT IN (
SELECT "entity_id", "entity_version"
FROM "client_view_entry"
WHERE "client_group_id" = $1
AND "client_view_version" <= $2
AND "entity" = $3
)
LIMIT $4;`;
// Left join scales the best as numbers of rows grow into the millions.
// const sql = /*sql*/ `SELECT *
// FROM "${table}" t
// WHERE (t."id", t."version") NOT IN (
// SELECT "entity_id", "entity_version"
// FROM "client_view_entry"
// WHERE "client_group_id" = $1
// AND "client_view_version" <= $2
// AND "entity" = $3
// )
// LIMIT $4;`;
const sql = `SELECT t.*
FROM "${table}" t
LEFT JOIN "client_view_entry" cve
ON t."id" = cve."entity_id"
AND t."version" = cve."entity_version"
AND cve."client_group_id" = $1
AND cve."client_view_version" <= $2
AND cve."entity" = $3
WHERE cve."entity_id" IS NULL
LIMIT $4;`;
// const sql = `SELECT * FROM "${table}" WHERE id IN (SELECT id
// FROM (SELECT "id", "version" from "${table}" t
// EXCEPT
// SELECT cve."entity_id" as "id", cve."entity_version" as "version"
// FROM "client_view_entry" cve
// WHERE cve."client_group_id" = $1
// AND cve."client_view_version" <= $2
// AND cve."entity" = $3
// LIMIT $4));`;

const params = [clientGroupID, order, TableOrdinal[table], limit];
return executor(sql, params);
Expand Down
9 changes: 9 additions & 0 deletions server/src/pull/next-page.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,16 @@ export async function readNextPage(
// TODO: optimize to not require 3 queries in turn.
// Issue all of them at once and throw away results over limit?
// Issue a query just for `ids` and `versions` via a union then fulfill a page of data?
let start = performance.now();
const {rows: issueRows} = await findUnsentItems(
executor,
'issue',
clientGroupID,
order,
remaining,
);
let end = performance.now();
console.log(`findUnsentItems took ${end - start}ms for issues`);
for (const r of issueRows) {
issueMeta.push({
id: r.id,
Expand All @@ -102,13 +105,16 @@ export async function readNextPage(
return buildReturn();
}

start = performance.now();
const {rows: descriptionRows} = await findUnsentItems(
executor,
'description',
clientGroupID,
order,
remaining,
);
end = performance.now();
console.log(`findUnsentItems took ${end - start}ms for descriptions`);
for (const r of descriptionRows) {
descriptionMeta.push({
id: r.id,
Expand All @@ -125,13 +131,16 @@ export async function readNextPage(
return buildReturn();
}

start = performance.now();
const {rows: commentRows} = await findUnsentItems(
executor,
'comment',
clientGroupID,
order,
remaining,
);
end = performance.now();
console.log(`findUnsentItems took ${end - start}ms for comments`);
for (const r of commentRows) {
commentMeta.push({
id: r.id,
Expand Down
4 changes: 4 additions & 0 deletions server/src/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ export async function createSchemaVersion1(executor: Executor) {
PRIMARY KEY ("client_group_id", "entity", "entity_id")
)`);

await executor(/*sql*/ `CREATE UNIQUE INDEX "client_view_entry_covering" ON "client_view_entry" (
"client_group_id", "client_view_version", "entity", "entity_id", "entity_version"
)`);

await executor(/*sql*/ `CREATE TABLE "client_view_delete_entry" (
"client_group_id" VARCHAR(36) NOT NULL,
"client_view_version" INTEGER NOT NULL,
Expand Down
2 changes: 1 addition & 1 deletion server/src/seed/sample-issues.ts
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ export async function getReactSampleData(): Promise<SampleData[]> {

// Can use this to generate artifically larger datasets for stress testing or
// smaller for debugging.
const factor = 1;
const factor = 10;
let commentId = 1;
if (factor >= 1) {
const multiplied: SampleData[] = [];
Expand Down

0 comments on commit 39702f3

Please sign in to comment.