Skip to content

Commit

Permalink
feat(db): init with source CSVs
Browse files Browse the repository at this point in the history
  • Loading branch information
iokkitan committed May 9, 2024
1 parent c45e911 commit ed5b75e
Show file tree
Hide file tree
Showing 12 changed files with 703 additions and 160 deletions.
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
.LSOverride

# Icon must end with two \r
Icon
Icon


# Thumbnails
._*
Expand Down Expand Up @@ -155,3 +156,7 @@ dist
.yarn/build-state.yml
.yarn/install-state.gz
.pnp.*

# PROJECT-db
db/init_sources/*
db/init_entries.sql.gz
27 changes: 22 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,24 @@ This example contains everything needed to get a Next.js development and product

## Prerequisites

Install [Docker Desktop](https://docs.docker.com/get-docker) for Mac, Windows, or Linux. Docker Desktop includes Docker Compose as part of the installation.
**Step 1.** Install [Docker Desktop](https://docs.docker.com/get-docker) for Mac, Windows, or Linux. Docker Desktop includes Docker Compose as part of the installation.

**Step 2.** Go to the folder https://drive.google.com/drive/folders/12UAoEKduxXXCi0suGezwEIguRLvn2FhZ and *Download* the files to [db/init_sources](db/init_sources):

* Chu-liāu Nôa-ūi Pió (To Gí-giân) > *Download* > *CSV* > [Chu-liāu Nôa-ūi Pió (To Gí-giân) - DictCode.csv](<db/init_sources/Chu-liāu Nôa-ūi Pió (To Gí-giân) - DictCode.csv>)
* Chu-liāu Nôa-ūi Pió (To Gí-giân) > *Download* > *CSV* > [Chu-liāu Nôa-ūi Pió (To Gí-giân) - JoinedDictColumnName.csv](<db/init_sources/Chu-liāu Nôa-ūi Pió (To Gí-giân) - JoinedDictColumnName.csv>)
* db/init_sources/JOINED_HEADER_ONLY.csv
* db/init_sources/JOINED.csv

Then, run the following scripts:

```bash
cd db
pnpm i
pnpm build
```

Continue to next section.

## Development

Expand All @@ -24,10 +41,6 @@ docker compose -f docker-compose.dev.yml up -d --build

# See logs
docker logs -f $(docker ps -aq)

# Init db
docker exec bangcham_nextjs pnpm db:setup
docker exec bangcham_nextjs pnpm db:seed
```

Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
Expand Down Expand Up @@ -71,6 +84,10 @@ Open [http://localhost:3000](http://localhost:3000).
## Useful commands

```bash
# When package.json changed
docker stop guntau_nextjs && docker rm guntau_nextjs
# Then start again

# Stop all running containers
docker compose -f docker-compose.dev.yml down --volumes

Expand Down
156 changes: 156 additions & 0 deletions db/build.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
import * as Path from "node:path";
import { readdir, open, mkdir } from "node:fs/promises";
import { createGzip } from "node:zlib";
import { createWriteStream, createReadStream } from "node:fs";
import { Readable, Duplex, compose } from "node:stream";
import { pipeline } from "node:stream/promises";
import sqlLint from "sql-lint";
import csvtojson from "csvtojson";

const SEGMENT_LENGTH = Math.pow(10, 3);

main(process.argv.slice(2)).catch(async (e) => {
console.error(e);
process.exit(1);
});

function formatRawCSV(rawCSV) {
return rawCSV
.map((value, index) => {
if ((0 === index || 2 === index) && value.match(/^\d+$/)) {
return value;
}
if ("" === value) {
return "NULL";
}
return `'${
value
.replaceAll(/'/g, it => `'${it}`)
}'`;
})
.join(",");
}

function formatChunk(rawCSVList) {
return rawCSVList.map(formatRawCSV).join("),\n (");
}

function processNoauiTable(sourceName, tableName) {
return async function* () {
let headerList;
const csv = csvtojson({
output: "csv",
});
csv.on("header", (header) => {
headerList = header;
});
const chunkRowCSVList = await csv.fromFile(
Path.resolve("init_sources", sourceName),
"utf-8"
);
const sql = `INSERT INTO ${tableName}
("${headerList.join(`","`)}")
VALUES
(${formatChunk(chunkRowCSVList)});\n`;

const errors = await sqlLint.default({
sql,
});
if (errors.length) {
console.error(errors);
setTimeout(() => {
throw new Error(sql);
});
}
yield sql;
};
}

function processChhoeTaigiTable({ supressError }) {
let headerList;
const csv = csvtojson({
output: "csv",
});
csv.on("header", (header) => {
headerList = header;
});
return compose(
createReadStream(Path.resolve("init_sources", "JOINED.csv"), "utf-8"),
csv,
async function* (source) {
let rowCSVList = [];
async function flush() {
if (!headerList) {
console.error(kind, detail, headerList, rowCSVList);
console.flush();
return;
}
const chunkRowCSVList = rowCSVList.splice(0, SEGMENT_LENGTH);
const sql = `INSERT INTO "public"."ChhoeTaigi"
("${headerList.join(`","`)}")
VALUES
(${formatChunk(chunkRowCSVList)});\n`;

if (true !== supressError) {
const errors = await sqlLint.default({
sql,
});
if (errors.length) {
console.error(errors);
setTimeout(() => {
throw new Error(sql);
});
}
}
return sql;
}
for await (const chunk of source) {
const chunkStr = chunk.toString("utf-8").trim();
rowCSVList.push(
...JSON.parse(`[
${
"," === chunkStr[chunkStr.length - 1]
? chunkStr.substr(0, chunkStr.length - 1)
: chunkStr
}
]`)
);
while (SEGMENT_LENGTH <= rowCSVList.length) {
yield flush();
}
}
if (rowCSVList.length) {
yield flush();
}
}
);
}

async function main() {
const entriesSQLName = `init_entries.sql.gz`;
const writeTo = compose(
createGzip(),
createWriteStream(Path.resolve(entriesSQLName), {
autoClose: false,
flush: true,
})
);
await pipeline(
Readable.from([
processNoauiTable(
"Chu-liāu Nôa-ūi Pió (To Gí-giân) - JoinedDictColumnName.csv",
`"public"."NoauiChhoeTaigi"`
),
processNoauiTable(
"Chu-liāu Nôa-ūi Pió (To Gí-giân) - DictCode.csv",
`"public"."NoauiDict"`
),
processChhoeTaigiTable({ supressError: true }),
]).flatMap((it) => Duplex.from(it)),
writeTo
);
writeTo.end();
console.log(`Done!`);
}
3 changes: 3 additions & 0 deletions db/dev.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
FROM library/postgres
COPY init_tables.sql /docker-entrypoint-initdb.d/0001_init_tables.sql
COPY init_entries.sql.gz /docker-entrypoint-initdb.d/0002_init_entries.sql.gz
Empty file added db/init_sources/.gitkeep
Empty file.
95 changes: 95 additions & 0 deletions db/init_tables.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
CREATE TABLE "public"."NoauiChhoeTaigi" (
"id" SERIAL PRIMARY KEY,
"JoinedDictColumnName" varchar(40),
"漢Lô" varchar(40),
"Pe̍h-ōe-jī" varchar(40),
"English" varchar(40),
"日本語" varchar(40),
"中文" varchar(40),
CONSTRAINT "JoinedDictColumnName_key" UNIQUE ("JoinedDictColumnName")
);
CREATE TABLE "public"."NoauiDict" (
"id" SERIAL PRIMARY KEY,
"DictCode" varchar(40),
"漢Lô" varchar(40),
"Pe̍h-ōe-jī" varchar(40),
"English" varchar(40),
"日本語" varchar(40),
"中文" varchar(40),
"noauiChhoeTaigiIdList" integer [],
CONSTRAINT "DictCode_key" UNIQUE ("DictCode")
);
CREATE INDEX "NoauiDict_DictCode_idx" ON "public"."NoauiDict" USING btree ("DictCode");
CREATE TABLE "public"."ChhoeTaigi" (
"JoinedWordID" integer PRIMARY KEY,
"DictCode" varchar(40) REFERENCES "NoauiDict"("DictCode"),
"DictWordID" integer NOT NULL,
"SuBe" text,
"PojUnicode" text,
"PojUnicodeOthers" text,
"PojInput" text,
"PojInputOthers" text,
"KipUnicode" text,
"KipUnicodeOthers" text,
"KipInput" text,
"KipInputOthers" text,
"LmjUnicode" text,
"LmjUnicodeOthers" text,
"HanLoTaibunPoj" text,
"HanLoTaibunKip" text,
"KaisoehPoj" text,
"KaisoehKip" text,
"KaisoehHanLoPoj" text,
"KaisoehHanLoKip" text,
"KaisoehEngbun" text,
"KaisoehJitbunPoj" text,
"LekuPoj" text,
"LekuKip" text,
"LekuHanLoPoj" text,
"LekuHanLoKip" text,
"LekuEngbun" text,
"LekuHoabun" text,
"LesuPoj" text,
"LekuJitbunPoj" text,
"HoaBun" text,
"EngBun" text,
"JitBun" text,
"KhehBun" text,
"HanbunImPojUnicode" text,
"HanbunImPojInput" text,
"HanbunImKipUnicode" text,
"HanbunImKipInput" text,
"KahToo" text,
"Abbreviation" text,
"NounClassifier" text,
"Reduplication" text,
"Synonym" text,
"Opposite" text,
"Confer" text,
"Hunlui" text,
"HunluiJitbun" text,
"KipDictDialects" text,
"KipDictWordProperty" text,
"KipDictHanjiTaibunOthers" text,
"KipDictKaisoehKhehbun" text,
"SoanntengMuitheSekinPoochhiongChuliau" text,
"SoanntengMuitheSekinHongsangJitki" text,
"SoanntengMuitheSekinChute" text,
"PageNumber" text,
"PageNumberChianoePan" text,
"SuBeSoatbeng" text,
"StoreLink" text,
"LaigoanMia" text,
"LaigoanBangchi" text,
"ChhehMia" text,
"ChhoetaigiPhianChu" text,
CONSTRAINT "ChhoeTaigi_DictCode_DictWordID_key" UNIQUE ("DictCode", "DictWordID")
) WITH (oids = false);
CREATE INDEX "ChhoeTaigi_DictCode_idx" ON "public"."ChhoeTaigi" USING btree ("DictCode");
CREATE INDEX "ChhoeTaigi_EngBun_idx" ON "public"."ChhoeTaigi" USING btree ("EngBun");
CREATE INDEX "ChhoeTaigi_HoaBun_idx" ON "public"."ChhoeTaigi" USING btree ("HoaBun");
CREATE INDEX "ChhoeTaigi_JitBun_idx" ON "public"."ChhoeTaigi" USING btree ("JitBun");
CREATE INDEX "ChhoeTaigi_KipInput_idx" ON "public"."ChhoeTaigi" USING btree ("KipInput");
CREATE INDEX "ChhoeTaigi_KipUnicode_idx" ON "public"."ChhoeTaigi" USING btree ("KipUnicode");
CREATE INDEX "ChhoeTaigi_PojInput_idx" ON "public"."ChhoeTaigi" USING btree ("PojInput");
CREATE INDEX "ChhoeTaigi_PojUnicode_idx" ON "public"."ChhoeTaigi" USING btree ("PojUnicode");
16 changes: 16 additions & 0 deletions db/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"name": "db",
"version": "1.0.0",
"description": "",
"scripts": {
"build": "node build.mjs",
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [],
"author": "",
"dependencies": {
"csvtojson": "^2.0.10",
"sql-lint": "^1.0.0",
"uuid": "^9.0.1"
}
}
Loading

0 comments on commit ed5b75e

Please sign in to comment.