Skip to content

Commit

Permalink
Use daily Allegro dump in transformAndIndex.sh script (RPB-101)
Browse files Browse the repository at this point in the history
Add extracted files to .gitignore, backup ZIP with timestamp
  • Loading branch information
fsteeg committed Oct 26, 2023
1 parent e1a739b commit 8b525f8
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 1 deletion.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,10 @@ RPB-Export_HBZ_SW.txt
RPB-Export_HBZ_Tit.txt
RPB-Export_HBZ_Tit_hbzIds.txt
RPB-Export_HBZ_Bio.txt
RPB-Export_HBZ_Ort.txt
RPB-Export_HBZ_Raum.txt
RPB-Export_HBZ_SWN.txt
RPB-Export_HBZ_Syst.txt
RPB-Export_HBZ_ZSS.txt
conf/RPBEXP/*.ZIP
nohup.out*
Empty file added conf/RPBEXP/.empty
Empty file.
2 changes: 1 addition & 1 deletion conf/rpb-titel-to-lobid.flux
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ default outfile = "conf/output/bulk/bulk-${i}.ndjson";
| fix(FLUX_DIR + "rpb-titel-to-lobid.fix")
| batch-reset(batchsize="1000")
| encode-json(prettyPrinting="false")
| json-to-elasticsearch-bulk(idkey="id", type="resource", index="resources-alma-fix-staging")
| json-to-elasticsearch-bulk(idkey="id", type="resource", index="resources-rpb-test")
| write(outfile)
;
9 changes: 9 additions & 0 deletions transformAndIndex.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,19 @@
set -eu
IFS=$'\n\t'

# Get the daily Allegro dump:
cd conf
wget https://rpb.lbz-rlp.de/rpb04/intern/RPBEXP.ZIP
unzip -o RPBEXP.ZIP
mv RPBEXP.ZIP RPBEXP/RPBEXP-$(date "+%Y%m%d-%H%M").ZIP
cd ..

# Transform the data:
sbt "runMain rpb.ETL conf/rpb-sw.flux"
sbt "runMain rpb.ETL conf/rpb-titel-to-strapi.flux"
sbt "runMain rpb.ETL conf/rpb-titel-to-lobid.flux"

# Index to Elasticsearch:
unset http_proxy # for posting to weywot3
for filename in conf/output/bulk/bulk-*.ndjson
do
Expand Down

0 comments on commit 8b525f8

Please sign in to comment.