From 1a29c6a1f4f13e8ae7260ed08b4a1dec650d5f63 Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Thu, 21 Dec 2023 15:44:54 +0100 Subject: [PATCH] Include lookup table creation in `transformRppd.sh` (RPB-120, #61) --- conf/maps/gndId-to-rppdId.tsv | 4 ++-- conf/output/test-output-rppd-lobid-46.json | 8 ++++---- conf/rppd-rppdId-with-label-map.flux | 2 +- conf/rppd-to-gnd-mapping.flux | 2 +- transformRppd.sh | 2 ++ 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/conf/maps/gndId-to-rppdId.tsv b/conf/maps/gndId-to-rppdId.tsv index 53e890ac..6c7f8f17 100644 --- a/conf/maps/gndId-to-rppdId.tsv +++ b/conf/maps/gndId-to-rppdId.tsv @@ -5871,7 +5871,7 @@ pk06406 https://d-nb.info/gnd/1145330444 Blaum, Josef pk06407 https://d-nb.info/gnd/114532956X Stiehl, Wilhelm pk06408 https://d-nb.info/gnd/1148572872 Theis, Heinrich pk06409 https://d-nb.info/gnd/1147711046 Blankart, Kuno ¬von¬ -pk06410 https://d-nb.info/gnd/133313646 Hoëcker, Bernhard +pk06410 https://d-nb.info/gnd/133313646 Hoe▐êcker, Bernhard pk06411 https://d-nb.info/gnd/122700392 Bötsch, Wolfgang pk06412 https://d-nb.info/gnd/1068872489 Henn, Armin pk06413 https://d-nb.info/gnd/1148019863 Schommer, Bern @@ -12957,7 +12957,7 @@ pta0922 https://d-nb.info/gnd/1051217199 Neyses, Matthias pta0923 https://d-nb.info/gnd/1051211808 Niederprüm, Johann Adam pta0925 https://d-nb.info/gnd/1051211816 Nießen, Franz Ludwig pta0926 https://d-nb.info/gnd/11699620X Nikolaus, Paul -pta0927 https://d-nb.info/gnd/1051211832 Noël, André Joseph +pta0927 https://d-nb.info/gnd/1051211832 Noe▐êl, André Joseph pta0928 https://d-nb.info/gnd/1051211840 Nonn, Mathias pta0929 https://d-nb.info/gnd/1051211859 Nottar, Nikolaus Vincenz pta0930 https://d-nb.info/gnd/1051211867 Nusbaum, Nikolaus diff --git a/conf/output/test-output-rppd-lobid-46.json b/conf/output/test-output-rppd-lobid-46.json index a9656976..52cf2a8f 100644 --- a/conf/output/test-output-rppd-lobid-46.json +++ b/conf/output/test-output-rppd-lobid-46.json @@ -27,13 +27,13 @@ "label" : "Alexander, Franz Ambros", "id" : "https://d-nb.info/gnd/130566969" }, { - "label" : "Alexander, Philipp / 1787-1864", + "label" : "Alexander, Philipp", "id" : "https://rppd.lobid.org/pk06675" }, { "label" : "Alexander, Kaspar Anton", "id" : "https://d-nb.info/gnd/130566993" }, { - "label" : "Alexander, Franz Anton / 1838-1926", + "label" : "Alexander, Franz Anton", "id" : "https://rppd.lobid.org/pk06682" }, { "label" : "Alexander, Georg Philipp", @@ -42,7 +42,7 @@ "label" : "Alexander, Anton", "id" : "https://d-nb.info/gnd/131993518" }, { - "label" : "Alexander, Georg Philipp / 1879-1916", + "label" : "Alexander, Georg Philipp", "id" : "https://rppd.lobid.org/pk06677" }, { "label" : "Alexander, Philipp", @@ -54,7 +54,7 @@ "label" : "Alexander, Hans Peter", "id" : "https://d-nb.info/gnd/133467317" }, { - "label" : "Alexander, Georg Philipp / 1969-", + "label" : "Alexander, Georg Philipp", "id" : "https://rppd.lobid.org/pk06681" }, { "label" : "Wagner, Richard / 1813-1883", diff --git a/conf/rppd-rppdId-with-label-map.flux b/conf/rppd-rppdId-with-label-map.flux index d8768510..29bbb39d 100644 --- a/conf/rppd-rppdId-with-label-map.flux +++ b/conf/rppd-rppdId-with-label-map.flux @@ -1,4 +1,4 @@ -FLUX_DIR + "output/output-rppd.json" +FLUX_DIR + "output/output-rppd-strapi.ndjson" | open-file | as-lines | decode-json diff --git a/conf/rppd-to-gnd-mapping.flux b/conf/rppd-to-gnd-mapping.flux index ccbbfe29..5bef284f 100644 --- a/conf/rppd-to-gnd-mapping.flux +++ b/conf/rppd-to-gnd-mapping.flux @@ -1,4 +1,4 @@ -FLUX_DIR + "output/output-rppd.json" +FLUX_DIR + "output/output-rppd-strapi.ndjson" | open-file | as-lines | decode-json diff --git a/transformRppd.sh b/transformRppd.sh index f409cac6..44fa3a76 100644 --- a/transformRppd.sh +++ b/transformRppd.sh @@ -4,6 +4,8 @@ set -eu bash transformBeacons.sh rm conf/output/bulk/rppd/* sbt "runMain rpb.ETL conf/rppd-to-strapi.flux IN_FILE=RPB-Export_HBZ_Bio.txt OUT_FILE=output-rppd-strapi.ndjson" +sbt "runMain rpb.ETL conf/rppd-to-gnd-mapping.flux" +sbt "runMain rpb.ETL conf/rppd-rppdId-with-label-map.flux" sbt "runMain rpb.ETL conf/rppd-to-lobid.flux" # Indexing happens in rppd/transformAndIndexRppd.sh (lobid-gnd repo, branch 'rppd'), which calls this script