diff --git a/.gitignore b/.gitignore index 8ba9df23..70d9b4c6 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,10 @@ RPB-Export_HBZ_SW.txt RPB-Export_HBZ_Tit.txt RPB-Export_HBZ_Tit_hbzIds.txt RPB-Export_HBZ_Bio.txt +RPB-Export_HBZ_Ort.txt +RPB-Export_HBZ_Raum.txt +RPB-Export_HBZ_SWN.txt +RPB-Export_HBZ_Syst.txt +RPB-Export_HBZ_ZSS.txt +conf/RPBEXP/*.ZIP nohup.out* diff --git a/conf/RPBEXP/.empty b/conf/RPBEXP/.empty new file mode 100644 index 00000000..e69de29b diff --git a/conf/rpb-titel-to-lobid.flux b/conf/rpb-titel-to-lobid.flux index 87ebf44b..c0ca0400 100644 --- a/conf/rpb-titel-to-lobid.flux +++ b/conf/rpb-titel-to-lobid.flux @@ -6,6 +6,6 @@ default outfile = "conf/output/bulk/bulk-${i}.ndjson"; | fix(FLUX_DIR + "rpb-titel-to-lobid.fix") | batch-reset(batchsize="1000") | encode-json(prettyPrinting="false") -| json-to-elasticsearch-bulk(idkey="id", type="resource", index="resources-alma-fix-staging") +| json-to-elasticsearch-bulk(idkey="id", type="resource", index="resources-rpb-test") | write(outfile) ; diff --git a/transformAndIndex.sh b/transformAndIndex.sh index c49504c6..2cba2b7e 100644 --- a/transformAndIndex.sh +++ b/transformAndIndex.sh @@ -2,10 +2,19 @@ set -eu IFS=$'\n\t' +# Get the daily Allegro dump: +cd conf +wget https://rpb.lbz-rlp.de/rpb04/intern/RPBEXP.ZIP +unzip -o RPBEXP.ZIP +mv RPBEXP.ZIP RPBEXP/RPBEXP-$(date "+%Y%m%d-%H%M").ZIP +cd .. + +# Transform the data: sbt "runMain rpb.ETL conf/rpb-sw.flux" sbt "runMain rpb.ETL conf/rpb-titel-to-strapi.flux" sbt "runMain rpb.ETL conf/rpb-titel-to-lobid.flux" +# Index to Elasticsearch: unset http_proxy # for posting to weywot3 for filename in conf/output/bulk/bulk-*.ndjson do