From 7b2d5dc397d767e1a8a41675408bd65764f900dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1n=20Mertel?= <10438157+jancimertel@users.noreply.github.com> Date: Tue, 12 Mar 2024 01:16:02 +0100 Subject: [PATCH] updae export-a-c-r db job, finalize fix-acr job --- .../database/scripts/jobs/export-a-c-r.ts | 123 +++++++++++------- .../database/scripts/jobs/fix-acr-labels.ts | 37 ------ packages/database/scripts/jobs/fix-acr.ts | 105 +++++++++++++++ packages/database/scripts/jobs/index.ts | 4 +- 4 files changed, 183 insertions(+), 86 deletions(-) delete mode 100644 packages/database/scripts/jobs/fix-acr-labels.ts create mode 100644 packages/database/scripts/jobs/fix-acr.ts diff --git a/packages/database/scripts/jobs/export-a-c-r.ts b/packages/database/scripts/jobs/export-a-c-r.ts index 7f00522f3..049e1f7cf 100644 --- a/packages/database/scripts/jobs/export-a-c-r.ts +++ b/packages/database/scripts/jobs/export-a-c-r.ts @@ -1,4 +1,10 @@ -import { IAction, IConcept, IReference, IResource } from "@shared/types"; +import { + IAction, + IConcept, + IReference, + IResource, + IValue, +} from "@shared/types"; import { Connection, r as rethink, RDatum, WriteResult } from "rethinkdb-ts"; import { IJob } from "."; import { DbEnums, EntityEnums, RelationEnums } from "@shared/enums"; @@ -7,11 +13,12 @@ import { Db } from "@service/rethink"; import Relation from "@models/relation/relation"; import { Relation as RelationTypes } from "@shared/types"; import { question } from "scripts/import/prompts"; -import * as fs from "fs" -import * as path from "path" +import * as fs from "fs"; +import * as path from "path"; import Entity from "@models/entity/entity"; import Resource from "@models/resource/resource"; import { v4 as uuidv4 } from "uuid"; +import Value from "@models/value/value"; export async function getEntitiesDataByClass( db: Connection, @@ -24,7 +31,7 @@ export async function getEntitiesDataByClass( .run(connection); } - async function findForEntities ( +async function findForEntities( db: Connection, entityIds: string[], relType?: RelationEnums.Type, @@ -40,9 +47,7 @@ export async function getEntitiesDataByClass( .run(db); if (position !== undefined) { - return items.filter( - (d) => entityIds.indexOf(d.entityIds[position]) !== -1 - ); + return items.filter((d) => entityIds.indexOf(d.entityIds[position]) !== -1); } return items; } @@ -51,81 +56,104 @@ export async function getEntitiesDataByClass( const originResource = new Resource({ id: "dissinet-resource", data: { - partValueBaseURL: "" , - partValueLabel: "" , + partValueBaseURL: "", + partValueLabel: "", url: "https://dissinet.cz/", - } , + }, label: "DISSINET Database (DDB1)", language: EntityEnums.Language.English, - notes: [] , - status: EntityEnums.Status.Approved -}) + notes: [], + status: EntityEnums.Status.Approved, +}); class ACRGenerator extends Generator { getPath(filename?: string) { if (!this.datasetName) { - throw new Error("Dataset name not yet set, cannot create the path to directory") + throw new Error( + "Dataset name not yet set, cannot create the path to directory" + ); } - let parts = [__dirname, "..", "..", Generator.DIRECTORY, this.datasetName] + let parts = [__dirname, "..", "..", Generator.DIRECTORY, this.datasetName]; if (filename) { - parts.push(filename) + parts.push(filename); } - return path.join.apply(undefined, parts) + return path.join.apply(undefined, parts); } async getUserInfo() { this.datasetName = await question( - "Name of the dataset?", - (input: string): string => { - return input; - }, - "" - ); - if (!this.datasetName) { - throw new Error("Dataset name should not be empty") - } - const datasetPath = this.getPath() - if(fs.existsSync(datasetPath)) { - throw new Error(`The dataset path (${datasetPath}) already exists`) - } - } + "Name of the dataset?", + (input: string): string => { + return input; + }, + "" + ); + if (!this.datasetName) { + throw new Error("Dataset name should not be empty"); + } + const datasetPath = this.getPath(); + if (fs.existsSync(datasetPath)) { + throw new Error(`The dataset path (${datasetPath}) already exists`); + } + } } const exportACR: IJob = async (db: Connection): Promise => { const generator = new ACRGenerator(); - await generator.getUserInfo() + await generator.getUserInfo(); + + const values: IValue[] = []; // retrieve all actions and push origin resource into list of references // + // replace original label with the id - const actions = (await getEntitiesDataByClass(db, EntityEnums.Class.Action)).map(a => { + const actions = ( + await getEntitiesDataByClass(db, EntityEnums.Class.Action) + ).map((a) => { + const v = new Value({ + id: uuidv4(), + label: a.id, + }); a.references.push({ id: uuidv4(), resource: originResource.id, - value: a.id, + value: v.id, } as IReference); - return a + return a; }); // retrieve all concepts and push origin resource into list of references // + // replace original label with the id - const concepts = (await getEntitiesDataByClass(db, EntityEnums.Class.Concept)).map(a => { + const concepts = ( + await getEntitiesDataByClass(db, EntityEnums.Class.Concept) + ).map((a) => { + const v = new Value({ + id: uuidv4(), + label: a.id, + }); + values.push(v); a.references.push({ id: uuidv4(), resource: originResource.id, - value: a.id, + value: v.id, } as IReference); - return a + return a; }); - const resources = await getEntitiesDataByClass(db, EntityEnums.Class.Resource); + const resources = await getEntitiesDataByClass( + db, + EntityEnums.Class.Resource + ); - const allIds = actions.map(a => a.id).concat(concepts.map(c => c.id)).concat(resources.map(r => r.id)); + const allIds = actions + .map((a) => a.id) + .concat(concepts.map((c) => c.id)) + .concat(resources.map((r) => r.id)); // allow only relations, which have all entities in lists above - const rels = (await findForEntities(db, allIds)).filter(r => { + const rels = (await findForEntities(db, allIds)).filter((r) => { let matches = 0; for (const entityId of r.entityIds) { for (const allid of allIds) { @@ -141,14 +169,15 @@ const exportACR: IJob = async (db: Connection): Promise => { } return false; - }) + }); - generator.entities.entities.A = actions - generator.entities.entities.C = concepts - generator.entities.entities.R = [originResource, ...resources] + generator.entities.entities.A = actions; + generator.entities.entities.C = concepts; + generator.entities.entities.V = values; + generator.entities.entities.R = [originResource, ...resources]; generator.relations.relations.A1S = rels; - generator.output() -} + generator.output(); +}; export default exportACR; diff --git a/packages/database/scripts/jobs/fix-acr-labels.ts b/packages/database/scripts/jobs/fix-acr-labels.ts deleted file mode 100644 index 2b8a576d2..000000000 --- a/packages/database/scripts/jobs/fix-acr-labels.ts +++ /dev/null @@ -1,37 +0,0 @@ -import { r, Connection } from "rethinkdb-ts"; -import { IJob } from "."; -import Dataset from "./Dataset"; -import { question } from "../import/prompts"; -import { IEntity } from "@shared/types"; - -const fixACRLabels: IJob = async (db: Connection): Promise => { - const dir = await question("which directory?", (input: string): string => { - return input; - }, ""); - - if (!dir) { - throw new Error("cannot continue without dir"); - } - - const dataset = new Dataset(dir); - const localEntities = await dataset.loadData("entities.json") - - // find entity from json in database and replace its label to id - for (const localEntity of localEntities) { - // this should be intacted - if (localEntity.id === "dissinet-resource") { - continue - } - - const storedEntity = (await r.table("entities").get(localEntity.id).run(db) as IEntity) - if (!storedEntity) { - console.warn(`Entity ${localEntity.id} not found in database`) - continue - } - await r.table("entities").update({ - label: storedEntity.id, - }).run(db); - } -} - -export default fixACRLabels; diff --git a/packages/database/scripts/jobs/fix-acr.ts b/packages/database/scripts/jobs/fix-acr.ts new file mode 100644 index 000000000..4e1aa87e4 --- /dev/null +++ b/packages/database/scripts/jobs/fix-acr.ts @@ -0,0 +1,105 @@ +import { r, Connection, RDatum } from "rethinkdb-ts"; +import { IJob } from "."; +import Dataset from "./Dataset"; +import { question } from "../import/prompts"; +import { IEntity } from "@shared/types"; +import { EntityEnums } from "@shared/enums"; +import Value from "@models/value/value"; +import Entity from "@models/entity/entity"; +import { ModelNotValidError } from "@shared/types/errors"; + +const save = async ( + db: Connection | undefined, + entity: IEntity +): Promise => { + entity.createdAt = new Date(); + + const result = await r + .table(Entity.table) + .insert({ ...entity, id: undefined }) + .run(db); + + if (result.generated_keys) { + entity.id = result.generated_keys[0]; + } + + if (result.first_error && result.first_error.indexOf("Duplicate") !== -1) { + throw new ModelNotValidError("id already exists"); + } + + return result.inserted === 1; +}; + +const fixACRLabels: IJob = async (db: Connection): Promise => { + const dir = await question( + "which directory?", + (input: string): string => { + return input; + }, + "" + ); + + if (!dir) { + throw new Error("cannot continue without dir"); + } + + const dataset = new Dataset(dir); + const localEntities = await dataset.loadData("entities.json"); + console.log( + localEntities.filter( + (l) => + l.class === EntityEnums.Class.Action || + l.class === EntityEnums.Class.Concept + ).length + ); + + while (true) { + const todo = (await r + .table("entities") + .filter(function (row: RDatum) { + return row("references").contains(function (ref: RDatum) { + return ref("value").eq(row("id")); + }); + }) + .limit(10) + .run(db)) as IEntity[]; + + if (todo.length === 0) { + break; + } + + for (const item of todo) { + const ref = item.references.find( + (r) => r.value === item.id && r.resource === "dissinet-resource" + ); + if (!ref) { + console.warn(`Entity ${item.id} without original reference`); + continue; + } + + const v = new Value({ + label: item.id, + }); + + await save(db, v); + if (!v.id) { + console.warn(`Entity ${item.id} without V entity`); + continue; + } + + // @ts-ignore + ref.value = v.id; + + console.log("Updating", item.id); + await r + .table("entities") + .get(item.id) + .update({ + references: item.references, + }) + .run(db); + } + } +}; + +export default fixACRLabels; diff --git a/packages/database/scripts/jobs/index.ts b/packages/database/scripts/jobs/index.ts index a89078b83..b48fe0d02 100644 --- a/packages/database/scripts/jobs/index.ts +++ b/packages/database/scripts/jobs/index.ts @@ -5,7 +5,7 @@ import fixDuplicatedElementsJob from "./fix-duplicated-array-elements"; import addPosFieldJob from "./add-pos-field"; import generateDatasetJob from "./generate-datasets/generate-dataset"; import exportACR from "./export-a-c-r"; -import fixACRLabels from "./fix-acr-labels"; +import fixACR from "./fix-acr"; export type IJob = (db: Connection) => Promise @@ -16,7 +16,7 @@ const alljobs: Record = { addPosFieldJob, generateDatasetJob, exportACR, - fixACRLabels, + fixACR, } export default alljobs;