Skip to content

Commit

Permalink
Tweak rppd-to-lobid transformation for lobid-gnd usage (RPB-102)
Browse files Browse the repository at this point in the history
- Prefer GND IDs and namespace for `id` field
- Add `type` field with hard-coded values
- Set filenames and indexing settings
  • Loading branch information
fsteeg committed Oct 30, 2023
1 parent 50a2878 commit 74c69d3
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 12 deletions.
24 changes: 15 additions & 9 deletions conf/rppd-to-lobid.fix
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,21 @@ nothing()
#00 RPPD-ID
# Komentar Doku: (Achtung: 00 BLANK)

copy_field("f00_","rppdId")
copy_field("f00_","id")
prepend("id","https://lbz.rlp.de/rppd/")
copy_field("f00_","rppdId")
prepend("id","https://rppd.lobid.org/")

# -------
#82b (GND-ID (R)) -> gndIdentifier
# Kommentar Doku: ohne das vorangestellte Präfix (DE-588)

copy_field("f82b", "gndIdentifier")
unless all_contain("f82b", "Keine GND-Ansetzung")
copy_field("f82b","id")
prepend("id","https://d-nb.info/gnd/")
end

set_array("type[]", "AuthorityResource", "Person", "DifferentiatedPerson")

# #1na (Name, bevorzugte Form) -> preferredName
# Name ist aber Kombination aus Geburtsdaten und Name e.g. "f1na": "Marquard, Udo / 1959-"
Expand Down Expand Up @@ -117,20 +128,15 @@ end
replace_all("f1ny", "(\\d{4})(\\d{2})(\\d{2})", "$1-$2-$3")
copy_field("f1ny", "describedBy.dateModified")

# -------
#82b (GND-ID (R)) -> gndIdentifier
# Kommentar Doku: ohne das vorangestellte Präfix (DE-588)

copy_field("f82b", "gndIdentifier")

# -------
#1z1 (1. biogr. Anmerkung) -> biographicalOrHistoricalInformation
# Kommentar Doku: getrennt durch "; ". Keine Abkürzungen benutzen, da die Stichworte in Register 9 indexiert werden. Zitate aus Quellen in Anführungszeichen; bei sehr langen, biogr. Anmerkungen wird der Text auf mehrere Kategorien aufgeteilt: #1z2, #1z3, #1z4 ... #1z9. Bei eingespielten Biographien werden die Angaben zum Originalwerk am Ende angegeben: --- [Daten übernommen aus: ....]

vacuum()
retain(
"rppdId",
"id",
"id",
"type[]",
"preferredName",
"variantName[]",
"dateOfBirth[]",
Expand Down
6 changes: 3 additions & 3 deletions conf/rppd-to-lobid.flux
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
default outfile = "conf/output/bulk/bulk-rppd-${i}.ndjson";
"conf/output/test-output-rppd.json"
default outfile = "conf/output/bulk/rppd/bulk-rppd-${i}.jsonl"; // lobid-gnd expects *.jsonl suffix
"conf/output/output-rppd-strapi.ndjson"
| open-file
| as-lines
| decode-json
| fix(FLUX_DIR + "rppd-to-lobid.fix")
| batch-reset(batchsize="1000")
| encode-json(prettyPrinting="false")
| json-to-elasticsearch-bulk(idkey="id", type="resource", index="resources-alma-fix-staging")
| json-to-elasticsearch-bulk(idkey="id", type="authority", index="gnd-rppd-test")
| write(outfile)
;

0 comments on commit 74c69d3

Please sign in to comment.