From 92e0896a98c071613a149f1f8113a2d2019ee68e Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Wed, 24 Apr 2024 13:20:09 +0200 Subject: [PATCH 1/2] Improve beacon processing to get unique link labels (RPB-156) Try in that order: name, message, description, institution, domain --- conf/output/test-output-rppd-lobid-1.json | 4 ++-- conf/output/test-output-rppd-lobid-10.json | 2 +- conf/output/test-output-rppd-lobid-11.json | 2 +- conf/output/test-output-rppd-lobid-12.json | 2 +- conf/output/test-output-rppd-lobid-14.json | 2 +- conf/output/test-output-rppd-lobid-17.json | 4 ++-- conf/output/test-output-rppd-lobid-18.json | 2 +- conf/output/test-output-rppd-lobid-20.json | 4 ++-- conf/output/test-output-rppd-lobid-21.json | 2 +- conf/output/test-output-rppd-lobid-22.json | 2 +- conf/output/test-output-rppd-lobid-23.json | 2 +- conf/output/test-output-rppd-lobid-24.json | 2 +- conf/output/test-output-rppd-lobid-26.json | 2 +- conf/output/test-output-rppd-lobid-27.json | 2 +- conf/output/test-output-rppd-lobid-32.json | 2 +- conf/output/test-output-rppd-lobid-33.json | 2 +- conf/output/test-output-rppd-lobid-36.json | 2 +- conf/output/test-output-rppd-lobid-37.json | 2 +- conf/output/test-output-rppd-lobid-39.json | 2 +- conf/output/test-output-rppd-lobid-41.json | 2 +- conf/output/test-output-rppd-lobid-48.json | 2 +- conf/output/test-output-rppd-lobid-5.json | 2 +- conf/output/test-output-rppd-lobid-7.json | 2 +- conf/rppd-beacon-to-tsv.flux | 15 ++++++++++++++- 24 files changed, 40 insertions(+), 27 deletions(-) diff --git a/conf/output/test-output-rppd-lobid-1.json b/conf/output/test-output-rppd-lobid-1.json index 1271d81f..33b5bc55 100644 --- a/conf/output/test-output-rppd-lobid-1.json +++ b/conf/output/test-output-rppd-lobid-1.json @@ -11,7 +11,7 @@ }, { "id" : "http://persondata.toolforge.org/redirect/gnd/de/11855476X", "collection" : { - "name" : "Deutschsprachige Wikipedia" + "name" : "Wikipedia-Personenartikel" } }, { "id" : "https://www.lagis-hessen.de/pnd/11855476X", @@ -21,7 +21,7 @@ }, { "id" : "https://www.deutsche-biographie.de/pnd11855476X.html#ndbcontent", "collection" : { - "name" : "Historische Kommission bei der Bayerischen Akademie der Wissenschaften und Bayerische Staatsbibliothek" + "name" : "Biographien der NDB" } } ], "type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ], diff --git a/conf/output/test-output-rppd-lobid-10.json b/conf/output/test-output-rppd-lobid-10.json index e80f0d9e..d46c534b 100644 --- a/conf/output/test-output-rppd-lobid-10.json +++ b/conf/output/test-output-rppd-lobid-10.json @@ -6,7 +6,7 @@ "sameAs" : [ { "id" : "http://persondata.toolforge.org/redirect/gnd/de/122507487", "collection" : { - "name" : "Deutschsprachige Wikipedia" + "name" : "Wikipedia-Personenartikel" } } ], "type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ], diff --git a/conf/output/test-output-rppd-lobid-11.json b/conf/output/test-output-rppd-lobid-11.json index dabbd8b2..d28dff27 100644 --- a/conf/output/test-output-rppd-lobid-11.json +++ b/conf/output/test-output-rppd-lobid-11.json @@ -6,7 +6,7 @@ "sameAs" : [ { "id" : "http://persondata.toolforge.org/redirect/gnd/de/1051147387", "collection" : { - "name" : "Deutschsprachige Wikipedia" + "name" : "Wikipedia-Personenartikel" } } ], "type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ], diff --git a/conf/output/test-output-rppd-lobid-12.json b/conf/output/test-output-rppd-lobid-12.json index 688be353..bd850f2a 100644 --- a/conf/output/test-output-rppd-lobid-12.json +++ b/conf/output/test-output-rppd-lobid-12.json @@ -11,7 +11,7 @@ }, { "id" : "http://persondata.toolforge.org/redirect/gnd/de/123205670", "collection" : { - "name" : "Deutschsprachige Wikipedia" + "name" : "Wikipedia-Personenartikel" } } ], "type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ], diff --git a/conf/output/test-output-rppd-lobid-14.json b/conf/output/test-output-rppd-lobid-14.json index f0c97114..d5847ba6 100644 --- a/conf/output/test-output-rppd-lobid-14.json +++ b/conf/output/test-output-rppd-lobid-14.json @@ -6,7 +6,7 @@ "sameAs" : [ { "id" : "https://www.deutsche-biographie.de/pnd118649558.html#adbcontent", "collection" : { - "name" : "Historische Kommission bei der Bayerischen Akademie der Wissenschaften und Bayerische Staatsbibliothek" + "name" : "Biographien der ADB" } } ], "type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ], diff --git a/conf/output/test-output-rppd-lobid-17.json b/conf/output/test-output-rppd-lobid-17.json index 4d073b31..99533f6d 100644 --- a/conf/output/test-output-rppd-lobid-17.json +++ b/conf/output/test-output-rppd-lobid-17.json @@ -6,7 +6,7 @@ "sameAs" : [ { "id" : "https://www.deutsche-biographie.de/pnd119280957.html#adbcontent", "collection" : { - "name" : "Historische Kommission bei der Bayerischen Akademie der Wissenschaften und Bayerische Staatsbibliothek" + "name" : "Biographien der ADB" } }, { "id" : "https://persondata.toolforge.org/redirect/gnd/commons/119280957", @@ -16,7 +16,7 @@ }, { "id" : "http://persondata.toolforge.org/redirect/gnd/de/119280957", "collection" : { - "name" : "Deutschsprachige Wikipedia" + "name" : "Wikipedia-Personenartikel" } }, { "id" : "http://www.tripota.uni-trier.de/beacon.php?ID=119280957", diff --git a/conf/output/test-output-rppd-lobid-18.json b/conf/output/test-output-rppd-lobid-18.json index 2b5c07b3..b8836201 100644 --- a/conf/output/test-output-rppd-lobid-18.json +++ b/conf/output/test-output-rppd-lobid-18.json @@ -6,7 +6,7 @@ "sameAs" : [ { "id" : "http://persondata.toolforge.org/redirect/gnd/de/120260948", "collection" : { - "name" : "Deutschsprachige Wikipedia" + "name" : "Wikipedia-Personenartikel" } } ], "type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ], diff --git a/conf/output/test-output-rppd-lobid-20.json b/conf/output/test-output-rppd-lobid-20.json index fec52f56..e98c8af0 100644 --- a/conf/output/test-output-rppd-lobid-20.json +++ b/conf/output/test-output-rppd-lobid-20.json @@ -6,12 +6,12 @@ "sameAs" : [ { "id" : "http://persondata.toolforge.org/redirect/gnd/de/11698211X", "collection" : { - "name" : "Deutschsprachige Wikipedia" + "name" : "Wikipedia-Personenartikel" } }, { "id" : "http://opac.regesta-imperii.de/lang_de/suche.php?tags=11698211X", "collection" : { - "name" : "Akademieprojekt Regesta Imperii (Quellen zur Reichsgeschichte) - Akademie der Wissenschaften und der Literatur Mainz" + "name" : "REGESTA IMPERII RI OPAC GND" } }, { "id" : "http://www.tripota.uni-trier.de/beacon.php?ID=11698211X", diff --git a/conf/output/test-output-rppd-lobid-21.json b/conf/output/test-output-rppd-lobid-21.json index 8c26b08e..214a966d 100644 --- a/conf/output/test-output-rppd-lobid-21.json +++ b/conf/output/test-output-rppd-lobid-21.json @@ -6,7 +6,7 @@ "sameAs" : [ { "id" : "http://persondata.toolforge.org/redirect/gnd/de/105121548X", "collection" : { - "name" : "Deutschsprachige Wikipedia" + "name" : "Wikipedia-Personenartikel" } } ], "type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ], diff --git a/conf/output/test-output-rppd-lobid-22.json b/conf/output/test-output-rppd-lobid-22.json index 1bc89638..8b151f2a 100644 --- a/conf/output/test-output-rppd-lobid-22.json +++ b/conf/output/test-output-rppd-lobid-22.json @@ -6,7 +6,7 @@ "sameAs" : [ { "id" : "http://persondata.toolforge.org/redirect/gnd/de/1051215498", "collection" : { - "name" : "Deutschsprachige Wikipedia" + "name" : "Wikipedia-Personenartikel" } } ], "type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ], diff --git a/conf/output/test-output-rppd-lobid-23.json b/conf/output/test-output-rppd-lobid-23.json index 7482c9af..fe3271db 100644 --- a/conf/output/test-output-rppd-lobid-23.json +++ b/conf/output/test-output-rppd-lobid-23.json @@ -6,7 +6,7 @@ "sameAs" : [ { "id" : "http://persondata.toolforge.org/redirect/gnd/de/105121551X", "collection" : { - "name" : "Deutschsprachige Wikipedia" + "name" : "Wikipedia-Personenartikel" } } ], "type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ], diff --git a/conf/output/test-output-rppd-lobid-24.json b/conf/output/test-output-rppd-lobid-24.json index 4ca9e688..7fef210b 100644 --- a/conf/output/test-output-rppd-lobid-24.json +++ b/conf/output/test-output-rppd-lobid-24.json @@ -6,7 +6,7 @@ "sameAs" : [ { "id" : "http://persondata.toolforge.org/redirect/gnd/de/126790086", "collection" : { - "name" : "Deutschsprachige Wikipedia" + "name" : "Wikipedia-Personenartikel" } } ], "type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ], diff --git a/conf/output/test-output-rppd-lobid-26.json b/conf/output/test-output-rppd-lobid-26.json index ea707e0f..ef145a1a 100644 --- a/conf/output/test-output-rppd-lobid-26.json +++ b/conf/output/test-output-rppd-lobid-26.json @@ -6,7 +6,7 @@ "sameAs" : [ { "id" : "http://persondata.toolforge.org/redirect/gnd/de/137243324", "collection" : { - "name" : "Deutschsprachige Wikipedia" + "name" : "Wikipedia-Personenartikel" } } ], "type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ], diff --git a/conf/output/test-output-rppd-lobid-27.json b/conf/output/test-output-rppd-lobid-27.json index 54b94f90..00948f20 100644 --- a/conf/output/test-output-rppd-lobid-27.json +++ b/conf/output/test-output-rppd-lobid-27.json @@ -11,7 +11,7 @@ }, { "id" : "http://persondata.toolforge.org/redirect/gnd/de/1051215536", "collection" : { - "name" : "Deutschsprachige Wikipedia" + "name" : "Wikipedia-Personenartikel" } }, { "id" : "http://swb.bsz-bw.de/DB=2.114/CMD?ACT=SRCHA&IKT=2011&TRM=gnd:1051215536&REC=2", diff --git a/conf/output/test-output-rppd-lobid-32.json b/conf/output/test-output-rppd-lobid-32.json index c95a0d9b..43620dd5 100644 --- a/conf/output/test-output-rppd-lobid-32.json +++ b/conf/output/test-output-rppd-lobid-32.json @@ -6,7 +6,7 @@ "sameAs" : [ { "id" : "http://persondata.toolforge.org/redirect/gnd/de/117006084", "collection" : { - "name" : "Deutschsprachige Wikipedia" + "name" : "Wikipedia-Personenartikel" } }, { "id" : "http://www.tripota.uni-trier.de/beacon.php?ID=117006084", diff --git a/conf/output/test-output-rppd-lobid-33.json b/conf/output/test-output-rppd-lobid-33.json index e68fdc88..2138c280 100644 --- a/conf/output/test-output-rppd-lobid-33.json +++ b/conf/output/test-output-rppd-lobid-33.json @@ -11,7 +11,7 @@ }, { "id" : "http://persondata.toolforge.org/redirect/gnd/de/117021652", "collection" : { - "name" : "Deutschsprachige Wikipedia" + "name" : "Wikipedia-Personenartikel" } } ], "type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ], diff --git a/conf/output/test-output-rppd-lobid-36.json b/conf/output/test-output-rppd-lobid-36.json index 0dde67f7..ed1f88a6 100644 --- a/conf/output/test-output-rppd-lobid-36.json +++ b/conf/output/test-output-rppd-lobid-36.json @@ -6,7 +6,7 @@ "sameAs" : [ { "id" : "http://persondata.toolforge.org/redirect/gnd/de/1051215609", "collection" : { - "name" : "Deutschsprachige Wikipedia" + "name" : "Wikipedia-Personenartikel" } } ], "type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ], diff --git a/conf/output/test-output-rppd-lobid-37.json b/conf/output/test-output-rppd-lobid-37.json index 79ce0c1f..9b74f39a 100644 --- a/conf/output/test-output-rppd-lobid-37.json +++ b/conf/output/test-output-rppd-lobid-37.json @@ -6,7 +6,7 @@ "sameAs" : [ { "id" : "http://persondata.toolforge.org/redirect/gnd/de/1028922108", "collection" : { - "name" : "Deutschsprachige Wikipedia" + "name" : "Wikipedia-Personenartikel" } } ], "type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ], diff --git a/conf/output/test-output-rppd-lobid-39.json b/conf/output/test-output-rppd-lobid-39.json index 4e4b16b6..eeac9bce 100644 --- a/conf/output/test-output-rppd-lobid-39.json +++ b/conf/output/test-output-rppd-lobid-39.json @@ -6,7 +6,7 @@ "sameAs" : [ { "id" : "http://persondata.toolforge.org/redirect/gnd/de/117269476", "collection" : { - "name" : "Deutschsprachige Wikipedia" + "name" : "Wikipedia-Personenartikel" } } ], "type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ], diff --git a/conf/output/test-output-rppd-lobid-41.json b/conf/output/test-output-rppd-lobid-41.json index d2fadbd8..8f62a90c 100644 --- a/conf/output/test-output-rppd-lobid-41.json +++ b/conf/output/test-output-rppd-lobid-41.json @@ -6,7 +6,7 @@ "sameAs" : [ { "id" : "http://persondata.toolforge.org/redirect/gnd/de/1120816653", "collection" : { - "name" : "Deutschsprachige Wikipedia" + "name" : "Wikipedia-Personenartikel" } } ], "type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ], diff --git a/conf/output/test-output-rppd-lobid-48.json b/conf/output/test-output-rppd-lobid-48.json index 1db41444..90686411 100644 --- a/conf/output/test-output-rppd-lobid-48.json +++ b/conf/output/test-output-rppd-lobid-48.json @@ -6,7 +6,7 @@ "sameAs" : [ { "id" : "http://persondata.toolforge.org/redirect/gnd/de/121384462", "collection" : { - "name" : "Deutschsprachige Wikipedia" + "name" : "Wikipedia-Personenartikel" } } ], "type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ], diff --git a/conf/output/test-output-rppd-lobid-5.json b/conf/output/test-output-rppd-lobid-5.json index 2888d742..f228a554 100644 --- a/conf/output/test-output-rppd-lobid-5.json +++ b/conf/output/test-output-rppd-lobid-5.json @@ -6,7 +6,7 @@ "sameAs" : [ { "id" : "http://persondata.toolforge.org/redirect/gnd/de/120526433", "collection" : { - "name" : "Deutschsprachige Wikipedia" + "name" : "Wikipedia-Personenartikel" } } ], "type" : [ "AuthorityResource", "Person", "DifferentiatedPerson" ], diff --git a/conf/output/test-output-rppd-lobid-7.json b/conf/output/test-output-rppd-lobid-7.json index 12c0eccb..d7626fa6 100644 --- a/conf/output/test-output-rppd-lobid-7.json +++ b/conf/output/test-output-rppd-lobid-7.json @@ -6,7 +6,7 @@ "sameAs" : [ { "id" : "http://persondata.toolforge.org/redirect/gnd/de/116324899", "collection" : { - "name" : "Deutschsprachige Wikipedia" + "name" : "Wikipedia-Personenartikel" } }, { "id" : "http://www.leo-bw.de/web/guest/detail/-/Detail/details/PERSON/wlbblb_personen/116324899/person", diff --git a/conf/rppd-beacon-to-tsv.flux b/conf/rppd-beacon-to-tsv.flux index 0cd37eb8..d01b3fb0 100644 --- a/conf/rppd-beacon-to-tsv.flux +++ b/conf/rppd-beacon-to-tsv.flux @@ -3,10 +3,20 @@ default ENCODING = "UTF-8"; IN | open-http(encoding=ENCODING) -| read-beacon(metadataFilter="name|institution") +| read-beacon(metadataFilter=".*") | fix(" # temporary workaround until https://www.historische-kommission-muenchen-editionen.de/beacond/bsb_personen.php?beacon is fixed: replace_all('seeAlso.url', 'https://personenlexika.digitale-sammlungen.dehttps://personenlexika.digitale-sammlungen.de', 'https://personenlexika.digitale-sammlungen.de') + +vacuum() # remove empty fields + +# for the name label we try, in that order: name, message, description, institution, domain: +unless exists('seeAlso.name') + copy_field('seeAlso.message', 'seeAlso.name') +end +unless exists('seeAlso.name') + copy_field('seeAlso.description', 'seeAlso.name') +end unless exists('seeAlso.name') copy_field('seeAlso.institution', 'seeAlso.name') end @@ -14,6 +24,9 @@ unless exists('seeAlso.name') copy_field(seeAlso.url, seeAlso.name) replace_all(seeAlso.name, 'https?://(?:www\\\\.)?([^/]+).*', '$1') end + +replace_all(seeAlso.name, ' +', ' ') + retain('seeAlso.url', 'seeAlso.name') ") | encode-csv(includeRecordId="true", includeHeader="true", noQuotes="true", separator="\t") From 6a3f7a454a36e96e4626cc145f4e862154daf3a3 Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Wed, 24 Apr 2024 13:58:47 +0200 Subject: [PATCH 2/2] Remove `""` introduced by CSV encoder after beacon lookup (RPB-156) --- conf/rppd-to-lobid.fix | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/rppd-to-lobid.fix b/conf/rppd-to-lobid.fix index 62640e6b..78af4b3c 100644 --- a/conf/rppd-to-lobid.fix +++ b/conf/rppd-to-lobid.fix @@ -10,6 +10,7 @@ do put_macro("beacon_lookup") move_field("_temp", "sameAs[].$append.id") copy_field("gndIdentifier", "_temp") lookup("_temp", "beacon_$[id]_name", delete: "true") + replace_all('_temp', '\\"+', '\\"') move_field("_temp", "sameAs[].$last.collection.name") end end