Skip to content

Commit

Permalink
Set up RPPD transformation and Strapi import (RPB-81)
Browse files Browse the repository at this point in the history
  • Loading branch information
fsteeg committed Sep 21, 2023
1 parent a0812e9 commit 2a31e14
Show file tree
Hide file tree
Showing 7 changed files with 191 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,5 @@ application-log-*.gz
RPB-Export_HBZ_SW.txt
RPB-Export_HBZ_Tit.txt
RPB-Export_HBZ_Tit_hbzIds.txt
RPB-Export_HBZ_Bio.txt
nohup.out*
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,22 @@ sbt "runMain rpb.ETL conf/rpb-systematik-import.flux INPUT=rpb.ndjson PATH=rpb-n
sbt "runMain rpb.ETL conf/rpb-systematik-import.flux INPUT=rpb-spatial.ndjson PATH=rpb-spatials"
```

### Run RPPD transformation to strapi data

```bash
sbt "runMain rpb.ETL conf/rppd-to-strapi.flux"
```

Writes output to `output/output-rppd-strapi.ndjson`.

### Import RPPD strapi data

```bash
sbt "runMain rpb.ETL conf/rppd-import.flux"
```

This attempts to import RPPD data to strapi, and prints the server responses.

### Run transformation to lobid data

```bash
Expand Down
44 changes: 44 additions & 0 deletions conf/RPB-Export_HBZ_Bio_Test.txt

Large diffs are not rendered by default.

44 changes: 44 additions & 0 deletions conf/output/test-output-rppd.json

Large diffs are not rendered by default.

15 changes: 15 additions & 0 deletions conf/rppd-import.flux
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
//API_URL = "http://test-metadaten-nrw.hbz-nrw.de:1339/api/rppds";
API_URL = "http://localhost:1339/api/rppds";

// FLUX_DIR + "output/output-rppd-strapi.ndjson"
FLUX_DIR + "output/test-output-rppd.json"
| open-file
| as-lines
| regex-decode("(?<data>.+)")
| stream-to-triples
| template("{\"${p}\":${o}}") // wrap into 'data' object for strapi
| log-object("Will POST: ")
| open-http(url=API_URL, method="POST", contentType="application/json")
| as-lines
| log-object("POST Response: ")
;
57 changes: 57 additions & 0 deletions conf/rppd-to-strapi.fix
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
do put_macro("to_bool")
if exists("$[field]")
move_field("$[field]","$[field]~")
set_field("$[field]~", "true")
end
end

do put_macro("add")
if exists("$[from]")
set_array("_temp")
copy_field("$[from]", "_temp.$append.$[to]")
move_field("_temp.*", "$[to][].$append")
remove_field("$[from]")
end
end

do put_macro("to_array")
split_field("$[field]", "@")
set_array("$[field][]")
call_macro("add", to: "$[field]", from: "$[field]")
end

do put_macro("add_all")
split_field("$[from]", ";")
set_array("$[to][]")
call_macro("add", to: "$[to]", from: "$[from]")
end

call_macro("to_bool", field: "f11_")
call_macro("to_bool", field: "f13_")
call_macro("to_bool", field: "f14_")
call_macro("to_bool", field: "f15_")

call_macro("to_array", field: "f1nc")
call_macro("to_array", field: "f1nd")
call_macro("to_array", field: "f1nn")
call_macro("to_array", field: "f1no")
call_macro("to_array", field: "f1nt")
call_macro("to_array", field: "f1nu")
call_macro("to_array", field: "f1nw")
call_macro("to_array", field: "f1nx")

call_macro("add_all", from: "f1z1", to: "f1z1")
# TODO: 1ü1...1ü9, 1ä1, 1ö1, 1ß1 separat?

# f82b ist 'required' und 'unique'
if all_equal("f82b", "Keine GND-Ansetzung")
paste("f82b", "~Keine GND-Ansetzung für", "f00_")
end
unless exists("f82b")
paste("f82b", "~Keine GND-Ansetzung für", "f00_")
end

replace_all("f1nv", "w+", "www") # z.B. ww -> www
replace_all("f1nv", "m+", "mmm") # z.B. mmmm -> mmm

vacuum()
13 changes: 13 additions & 0 deletions conf/rppd-to-strapi.flux
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// default outfile = "conf/output/output-rppd-strapi.ndjson";
default outfile = "conf/output/test-output-rppd.json";

// wget http://lobid.org/download/rpb-gesamtexport/2023-06-01/RPB-Export_HBZ_Bio.txt
// FLUX_DIR + "RPB-Export_HBZ_Bio.txt"
FLUX_DIR + "RPB-Export_HBZ_Bio_Test.txt"
| open-file(encoding="IBM437")
| as-lines
| rpb.Decode
| fix(FLUX_DIR + "rppd-to-strapi.fix")
| encode-json(prettyPrinting="false", booleanMarker="~")
| write(outfile)
;

0 comments on commit 2a31e14

Please sign in to comment.