Skip to content

Commit

Permalink
updae export-a-c-r db job, finalize fix-acr job
Browse files Browse the repository at this point in the history
  • Loading branch information
jancimertel committed Mar 12, 2024
1 parent a54bd7d commit 7b2d5dc
Show file tree
Hide file tree
Showing 4 changed files with 183 additions and 86 deletions.
123 changes: 76 additions & 47 deletions packages/database/scripts/jobs/export-a-c-r.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
import { IAction, IConcept, IReference, IResource } from "@shared/types";
import {
IAction,
IConcept,
IReference,
IResource,
IValue,
} from "@shared/types";
import { Connection, r as rethink, RDatum, WriteResult } from "rethinkdb-ts";
import { IJob } from ".";
import { DbEnums, EntityEnums, RelationEnums } from "@shared/enums";
Expand All @@ -7,11 +13,12 @@ import { Db } from "@service/rethink";
import Relation from "@models/relation/relation";
import { Relation as RelationTypes } from "@shared/types";
import { question } from "scripts/import/prompts";
import * as fs from "fs"
import * as path from "path"
import * as fs from "fs";
import * as path from "path";
import Entity from "@models/entity/entity";
import Resource from "@models/resource/resource";
import { v4 as uuidv4 } from "uuid";
import Value from "@models/value/value";

export async function getEntitiesDataByClass<T>(
db: Connection,
Expand All @@ -24,7 +31,7 @@ export async function getEntitiesDataByClass<T>(
.run(connection);
}

async function findForEntities<T extends RelationTypes.IRelation> (
async function findForEntities<T extends RelationTypes.IRelation>(
db: Connection,
entityIds: string[],
relType?: RelationEnums.Type,
Expand All @@ -40,9 +47,7 @@ export async function getEntitiesDataByClass<T>(
.run(db);

if (position !== undefined) {
return items.filter(
(d) => entityIds.indexOf(d.entityIds[position]) !== -1
);
return items.filter((d) => entityIds.indexOf(d.entityIds[position]) !== -1);
}
return items;
}
Expand All @@ -51,81 +56,104 @@ export async function getEntitiesDataByClass<T>(
const originResource = new Resource({
id: "dissinet-resource",
data: {
partValueBaseURL: "" ,
partValueLabel: "" ,
partValueBaseURL: "",
partValueLabel: "",
url: "https://dissinet.cz/",
} ,
},
label: "DISSINET Database (DDB1)",
language: EntityEnums.Language.English,
notes: [] ,
status: EntityEnums.Status.Approved
})
notes: [],
status: EntityEnums.Status.Approved,
});

class ACRGenerator extends Generator {
getPath(filename?: string) {
if (!this.datasetName) {
throw new Error("Dataset name not yet set, cannot create the path to directory")
throw new Error(
"Dataset name not yet set, cannot create the path to directory"
);
}

let parts = [__dirname, "..", "..", Generator.DIRECTORY, this.datasetName]
let parts = [__dirname, "..", "..", Generator.DIRECTORY, this.datasetName];
if (filename) {
parts.push(filename)
parts.push(filename);
}
return path.join.apply(undefined, parts)
return path.join.apply(undefined, parts);
}

async getUserInfo() {
this.datasetName = await question<string>(
"Name of the dataset?",
(input: string): string => {
return input;
},
""
);
if (!this.datasetName) {
throw new Error("Dataset name should not be empty")
}
const datasetPath = this.getPath()
if(fs.existsSync(datasetPath)) {
throw new Error(`The dataset path (${datasetPath}) already exists`)
}
}
"Name of the dataset?",
(input: string): string => {
return input;
},
""
);
if (!this.datasetName) {
throw new Error("Dataset name should not be empty");
}
const datasetPath = this.getPath();
if (fs.existsSync(datasetPath)) {
throw new Error(`The dataset path (${datasetPath}) already exists`);
}
}
}

const exportACR: IJob = async (db: Connection): Promise<void> => {
const generator = new ACRGenerator();
await generator.getUserInfo()
await generator.getUserInfo();

const values: IValue[] = [];

// retrieve all actions and push origin resource into list of references
// +
// replace original label with the id
const actions = (await getEntitiesDataByClass<IAction>(db, EntityEnums.Class.Action)).map(a => {
const actions = (
await getEntitiesDataByClass<IAction>(db, EntityEnums.Class.Action)
).map((a) => {
const v = new Value({
id: uuidv4(),
label: a.id,
});
a.references.push({
id: uuidv4(),
resource: originResource.id,
value: a.id,
value: v.id,
} as IReference);

return a
return a;
});

// retrieve all concepts and push origin resource into list of references
// +
// replace original label with the id
const concepts = (await getEntitiesDataByClass<IConcept>(db, EntityEnums.Class.Concept)).map(a => {
const concepts = (
await getEntitiesDataByClass<IConcept>(db, EntityEnums.Class.Concept)
).map((a) => {
const v = new Value({
id: uuidv4(),
label: a.id,
});
values.push(v);
a.references.push({
id: uuidv4(),
resource: originResource.id,
value: a.id,
value: v.id,
} as IReference);
return a
return a;
});
const resources = await getEntitiesDataByClass<IResource>(db, EntityEnums.Class.Resource);
const resources = await getEntitiesDataByClass<IResource>(
db,
EntityEnums.Class.Resource
);

const allIds = actions.map(a => a.id).concat(concepts.map(c => c.id)).concat(resources.map(r => r.id));
const allIds = actions
.map((a) => a.id)
.concat(concepts.map((c) => c.id))
.concat(resources.map((r) => r.id));

// allow only relations, which have all entities in lists above
const rels = (await findForEntities(db, allIds)).filter(r => {
const rels = (await findForEntities(db, allIds)).filter((r) => {
let matches = 0;
for (const entityId of r.entityIds) {
for (const allid of allIds) {
Expand All @@ -141,14 +169,15 @@ const exportACR: IJob = async (db: Connection): Promise<void> => {
}

return false;
})
});

generator.entities.entities.A = actions
generator.entities.entities.C = concepts
generator.entities.entities.R = [originResource, ...resources]
generator.entities.entities.A = actions;
generator.entities.entities.C = concepts;
generator.entities.entities.V = values;
generator.entities.entities.R = [originResource, ...resources];
generator.relations.relations.A1S = rels;

generator.output()
}
generator.output();
};

export default exportACR;
37 changes: 0 additions & 37 deletions packages/database/scripts/jobs/fix-acr-labels.ts

This file was deleted.

105 changes: 105 additions & 0 deletions packages/database/scripts/jobs/fix-acr.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import { r, Connection, RDatum } from "rethinkdb-ts";
import { IJob } from ".";
import Dataset from "./Dataset";
import { question } from "../import/prompts";
import { IEntity } from "@shared/types";
import { EntityEnums } from "@shared/enums";
import Value from "@models/value/value";
import Entity from "@models/entity/entity";
import { ModelNotValidError } from "@shared/types/errors";

const save = async (
db: Connection | undefined,
entity: IEntity
): Promise<boolean> => {
entity.createdAt = new Date();

const result = await r
.table(Entity.table)
.insert({ ...entity, id: undefined })
.run(db);

if (result.generated_keys) {
entity.id = result.generated_keys[0];
}

if (result.first_error && result.first_error.indexOf("Duplicate") !== -1) {
throw new ModelNotValidError("id already exists");
}

return result.inserted === 1;
};

const fixACRLabels: IJob = async (db: Connection): Promise<void> => {
const dir = await question(
"which directory?",
(input: string): string => {
return input;
},
""
);

if (!dir) {
throw new Error("cannot continue without dir");
}

const dataset = new Dataset(dir);
const localEntities = await dataset.loadData("entities.json");
console.log(
localEntities.filter(
(l) =>
l.class === EntityEnums.Class.Action ||
l.class === EntityEnums.Class.Concept
).length
);

while (true) {
const todo = (await r
.table("entities")
.filter(function (row: RDatum) {
return row("references").contains(function (ref: RDatum) {
return ref("value").eq(row("id"));
});
})
.limit(10)
.run(db)) as IEntity[];

if (todo.length === 0) {
break;
}

for (const item of todo) {
const ref = item.references.find(
(r) => r.value === item.id && r.resource === "dissinet-resource"
);
if (!ref) {
console.warn(`Entity ${item.id} without original reference`);
continue;
}

const v = new Value({
label: item.id,
});

await save(db, v);
if (!v.id) {
console.warn(`Entity ${item.id} without V entity`);
continue;
}

// @ts-ignore
ref.value = v.id;

console.log("Updating", item.id);
await r
.table("entities")
.get(item.id)
.update({
references: item.references,
})
.run(db);
}
}
};

export default fixACRLabels;
4 changes: 2 additions & 2 deletions packages/database/scripts/jobs/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import fixDuplicatedElementsJob from "./fix-duplicated-array-elements";
import addPosFieldJob from "./add-pos-field";
import generateDatasetJob from "./generate-datasets/generate-dataset";
import exportACR from "./export-a-c-r";
import fixACRLabels from "./fix-acr-labels";
import fixACR from "./fix-acr";

export type IJob = (db: Connection) => Promise<void>

Expand All @@ -16,7 +16,7 @@ const alljobs: Record<string, IJob> = {
addPosFieldJob,
generateDatasetJob,
exportACR,
fixACRLabels,
fixACR,
}

export default alljobs;

0 comments on commit 7b2d5dc

Please sign in to comment.