From 6dc147c51f22ac5cbeccb5ddde44ae13915ca22b Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Tue, 5 Nov 2024 19:13:55 +0100 Subject: [PATCH 1/9] Set up PUT route for indexing single records (RPB-223) --- app/controllers/nwbib/Application.java | 8 ++++++++ conf/nwbib.conf | 1 + conf/nwbib.routes | 3 ++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/app/controllers/nwbib/Application.java b/app/controllers/nwbib/Application.java index c56dd0d..babd0c6 100644 --- a/app/controllers/nwbib/Application.java +++ b/app/controllers/nwbib/Application.java @@ -943,4 +943,12 @@ private static List starredIds() { return new ArrayList<>(Arrays.asList(currentlyStarred().split(" ")).stream() .filter(s -> !s.trim().isEmpty()).collect(Collectors.toList())); } + + public static Result put(String id, String secret) { + JsonNode inputData = request().body().asJson(); + System.out.println(id + ", will transform: " + inputData); // TODO run flux transformation + boolean willIndex = !secret.trim().isEmpty() && secret.equals(CONFIG.getString("secret")); + System.out.printf("Secret '%s', will index: %b\n", secret, willIndex); + return ok(Json.toJson("TODO: not implemented")); + } } diff --git a/conf/nwbib.conf b/conf/nwbib.conf index 114cd0c..23343b8 100644 --- a/conf/nwbib.conf +++ b/conf/nwbib.conf @@ -4,6 +4,7 @@ item.api="http://lobid.org/items" hbz01.api="http://lobid.org/hbz01" orgs.api="http://lobid.org/organisations" nwbib.filter="inCollection.id:\"http://lobid.org/resources/HT013494180#!\"" +secret="" # Embedded Elasticsearch index for classification data index { diff --git a/conf/nwbib.routes b/conf/nwbib.routes index ee232ad..b445db6 100644 --- a/conf/nwbib.routes +++ b/conf/nwbib.routes @@ -33,4 +33,5 @@ GET /robots.txt controllers.Assets.at(path="/public",file="robots. GET /cgi-bin/wwwalleg/:name.pl controllers.nwbib.Application.showPl(name, db ?= "rnam", index: Int ?= 1, zeilen: Int ?= 1, s1) GET /sw/:rpbId controllers.nwbib.Application.showSw(rpbId) GET /o:id controllers.nwbib.Application.searchSpatial(id, from:Int?=0, size:Int?=25, format?="html") -GET /:id controllers.nwbib.Application.show(id, format ?= "") \ No newline at end of file +GET /:id controllers.nwbib.Application.show(id, format ?= "") +PUT /:id controllers.nwbib.Application.put(id, secret ?= "") \ No newline at end of file From ebb6ae6f418f0b7ff1809d327403d755b4793c4e Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Thu, 7 Nov 2024 15:48:24 +0100 Subject: [PATCH 2/9] Transform and index in PUT route (RPB-223) --- app/controllers/nwbib/Application.java | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/app/controllers/nwbib/Application.java b/app/controllers/nwbib/Application.java index babd0c6..24deca7 100644 --- a/app/controllers/nwbib/Application.java +++ b/app/controllers/nwbib/Application.java @@ -3,6 +3,7 @@ package controllers.nwbib; import java.io.File; +import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; @@ -28,6 +29,8 @@ import java.util.stream.Stream; import java.util.stream.StreamSupport; +import org.antlr.runtime.RecognitionException; +import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.tuple.Pair; import org.elasticsearch.common.base.Charsets; import org.elasticsearch.common.geo.GeoPoint; @@ -54,6 +57,7 @@ import play.mvc.Result; import play.mvc.Results; import play.twirl.api.HtmlFormat; +import rpb.ETL; import views.html.browse_classification; import views.html.browse_register; import views.html.classification; @@ -944,11 +948,20 @@ private static List starredIds() { .filter(s -> !s.trim().isEmpty()).collect(Collectors.toList())); } - public static Result put(String id, String secret) { - JsonNode inputData = request().body().asJson(); - System.out.println(id + ", will transform: " + inputData); // TODO run flux transformation - boolean willIndex = !secret.trim().isEmpty() && secret.equals(CONFIG.getString("secret")); - System.out.printf("Secret '%s', will index: %b\n", secret, willIndex); - return ok(Json.toJson("TODO: not implemented")); + public static Promise put(String id, String secret) throws FileNotFoundException, RecognitionException, IOException { + File input = new File("conf/output/test-output-strapi.json"); + File output = new File("conf/output/test-output-0.json"); + FileUtils.writeStringToFile(input, request().body().asJson().toString(), Charsets.UTF_8); + ETL.main(new String[] {"conf/rpb-test-titel-to-lobid.flux"}); + String result = FileUtils.readFileToString(output, Charsets.UTF_8); + boolean authorized = !secret.trim().isEmpty() && secret.equals(CONFIG.getString("secret")); + if (authorized) { + String url = "http://weywot3:9200/resources-rpb-test/resource/" + + URLEncoder.encode("https://lobid.org/resources/" + id, "UTF-8"); + WSRequest request = WS.url(url).setHeader("Content-Type", "application/json"); + return request.put(result).map(response -> status(response.getStatus(), response.getBody())); + } else { + return Promise.pure(unauthorized()); + } } } From b409bb5763d9f7422392f3f5b38bcdb8793b664b Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Fri, 8 Nov 2024 17:12:14 +0100 Subject: [PATCH 3/9] Disable cache for instant update after edit (RPB-223) --- build.sbt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/build.sbt b/build.sbt index 1178a7b..538a59f 100644 --- a/build.sbt +++ b/build.sbt @@ -45,6 +45,8 @@ lazy val root = (project in file(".")).enablePlugins(PlayJava) javacOptions ++= Seq("-source", "1.8", "-target", "1.8") +javaOptions += "-Dnet.sf.ehcache.disabled=true" + import com.typesafe.sbteclipse.core.EclipsePlugin.EclipseKeys EclipseKeys.projectFlavor := EclipseProjectFlavor.Java // Java project. Don't expect Scala IDE From da7b017e98eab356823e81d76878b8c51007ed60 Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Fri, 8 Nov 2024 18:33:06 +0100 Subject: [PATCH 4/9] Tweak file IO to fix transformation in Play prod mode (RPB-223) --- app/controllers/nwbib/Application.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/app/controllers/nwbib/Application.java b/app/controllers/nwbib/Application.java index 24deca7..089db9a 100644 --- a/app/controllers/nwbib/Application.java +++ b/app/controllers/nwbib/Application.java @@ -9,6 +9,8 @@ import java.io.UnsupportedEncodingException; import java.net.URL; import java.net.URLEncoder; +import java.nio.file.Files; +import java.nio.file.Paths; import java.text.Collator; import java.util.ArrayList; import java.util.Arrays; @@ -30,7 +32,6 @@ import java.util.stream.StreamSupport; import org.antlr.runtime.RecognitionException; -import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.tuple.Pair; import org.elasticsearch.common.base.Charsets; import org.elasticsearch.common.geo.GeoPoint; @@ -951,9 +952,9 @@ private static List starredIds() { public static Promise put(String id, String secret) throws FileNotFoundException, RecognitionException, IOException { File input = new File("conf/output/test-output-strapi.json"); File output = new File("conf/output/test-output-0.json"); - FileUtils.writeStringToFile(input, request().body().asJson().toString(), Charsets.UTF_8); + Files.write(Paths.get(input.getAbsolutePath()), request().body().asJson().toString().getBytes(Charsets.UTF_8)); ETL.main(new String[] {"conf/rpb-test-titel-to-lobid.flux"}); - String result = FileUtils.readFileToString(output, Charsets.UTF_8); + String result = Files.readAllLines(Paths.get(output.getAbsolutePath())).stream().collect(Collectors.joining("\n")); boolean authorized = !secret.trim().isEmpty() && secret.equals(CONFIG.getString("secret")); if (authorized) { String url = "http://weywot3:9200/resources-rpb-test/resource/" From 9ea23ad8cdbcd199754c579c6a3b87abb0a34fa6 Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Fri, 18 Oct 2024 16:41:56 +0200 Subject: [PATCH 5/9] Add type `Article` if no type is set in Strapi data (RPB-202) Article is a separate content type in Strapi, has no own type field --- conf/rpb-titel-to-lobid.fix | 3 +++ 1 file changed, 3 insertions(+) diff --git a/conf/rpb-titel-to-lobid.fix b/conf/rpb-titel-to-lobid.fix index 9581654..020e562 100644 --- a/conf/rpb-titel-to-lobid.fix +++ b/conf/rpb-titel-to-lobid.fix @@ -28,6 +28,9 @@ copy_field("rpbId", "_id") # ------- Set "type" ------- set_array("type[]", "BibliographicResource") +unless exists("type") + set_field("type[].$append", "Article") +end if all_equal("type", "u") set_field("type[].$append", "Article") end From 87ce58dd1ea23626abf7458006ca778d71b330fd Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Tue, 12 Nov 2024 08:54:42 +0100 Subject: [PATCH 6/9] Keep Strapi labels in to-lobid transformation (RPB-223) --- conf/rpb-titel-to-lobid.fix | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/conf/rpb-titel-to-lobid.fix b/conf/rpb-titel-to-lobid.fix index 020e562..6302ac7 100644 --- a/conf/rpb-titel-to-lobid.fix +++ b/conf/rpb-titel-to-lobid.fix @@ -43,7 +43,10 @@ end if any_equal("type", "Band") set_field("type[].$append", "Book") call_macro("move_here", field: "isPartOf[]") - copy_field("isPartOf[].value", "superordinateLabel") + copy_field("isPartOf[].label", "superordinateLabel") + unless exists("superordinateLabel") + copy_field("isPartOf[].value", "superordinateLabel") + end set_array("isPartOf[]") set_array("isPartOf[].$append.type[]", "IsPartOfRelation") set_array("isPartOf[].$last.hasSuperordinate[]") @@ -124,9 +127,14 @@ if exists("isPartOf[]") set_array("temp") set_array("temp.$append.type[]", "IsPartOfRelation") set_array("temp.$last.hasSuperordinate[]") - split_field("isPartOf[].1.value", " ; ") - copy_field("isPartOf[].1.value.1", "temp.$last.hasSuperordinate[].$append.label") - copy_field("isPartOf[].1.value.2", "temp.$last.numbering") + if exists("isPartOf[].1.label") + copy_field("isPartOf[].1.value", "temp.$last.hasSuperordinate[].$append.id") + copy_field("isPartOf[].1.label", "temp.$last.hasSuperordinate[].$last.label") + else + split_field("isPartOf[].1.value", " ; ") + copy_field("isPartOf[].1.value.1", "temp.$last.hasSuperordinate[].$append.label") + copy_field("isPartOf[].1.value.2", "temp.$last.numbering") + end move_field("temp", "isPartOf[]") end end @@ -208,10 +216,12 @@ end do list (path:"temp_subject.*.componentList[]", "var": "$i") move_field("$i.value", "$i.id") - copy_field("$i.id", "$i.label") - replace_all("$i.label","^http.+[/#](.+)$","$1") - replace_all("$i.label","^(\\d+)-(\\d+)$","$1n$2") - lookup("$i.label", "./RPB-Export_HBZ_SW.tsv","sep_char":"\t") + unless exists("$i.label") + copy_field("$i.id", "$i.label") + replace_all("$i.label","^http.+[/#](.+)$","$1") + replace_all("$i.label","^(\\d+)-(\\d+)$","$1n$2") + lookup("$i.label", "./RPB-Export_HBZ_SW.tsv","sep_char":"\t") + end if all_match("$i.id","^http://rpb.lobid.org/sw/.*$") add_field("$i.source.id", "http://rpb.lobid.org/sw") add_field("$i.source.label", "RPB-Sachsystematik") @@ -287,11 +297,14 @@ do put_macro("contributions") set_array("_temp") copy_field("$[from][].*.value", "_temp.$append.agent.id") do list_as(f: "$[from][]", c: "_temp") - copy_field("c.agent.id", "c.agent.label") + copy_field("f.label", "c.agent.label") + unless exists("c.agent.label") + copy_field("c.agent.id", "c.agent.label") + replace_all("c.agent.label","^http.+[/#](.+)$","$1") + replace_all("c.agent.label","^(\\d+)-(\\d+)$","$1n$2") + lookup("c.agent.label", "./RPB-Export_HBZ_SW.tsv","sep_char":"\t") + end set_array("c.agent.type[]", "Person") - replace_all("c.agent.label","^http.+[/#](.+)$","$1") - replace_all("c.agent.label","^(\\d+)-(\\d+)$","$1n$2") - lookup("c.agent.label", "./RPB-Export_HBZ_SW.tsv","sep_char":"\t") copy_field("f.type", "c.role.id") copy_field("f.type", "c.role.label") lookup("c.role.id", "role-to-uri") From cb21eff899f54d7aa32f9917e01fc0adc4ec6baf Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Tue, 12 Nov 2024 10:16:09 +0100 Subject: [PATCH 7/9] Handle `; `-delimited values in `bibliographicCitation` (RPB-223) --- app/views/TableRow.java | 1 + 1 file changed, 1 insertion(+) diff --git a/app/views/TableRow.java b/app/views/TableRow.java index b9b3750..bd74f2d 100644 --- a/app/views/TableRow.java +++ b/app/views/TableRow.java @@ -38,6 +38,7 @@ public String process(JsonNode doc, String property, String param, return filtered.isEmpty() ? "" : String.format("%s%s", label, filtered.stream() + .flatMap(s -> Arrays.asList(s.split("; ")).stream()) .map(val -> label(doc, property, param, val, keys)) .collect(Collectors.joining( property.equals("subjectChain") ? "
" : " | "))); From 575cbbdfa1752ddd9fdf7d801d81ed01d6d308ff Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Tue, 12 Nov 2024 10:19:10 +0100 Subject: [PATCH 8/9] Exclude subjects from details view if empty --- app/views/tags/result_doc.scala.html | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app/views/tags/result_doc.scala.html b/app/views/tags/result_doc.scala.html index c869143..86a069f 100644 --- a/app/views/tags/result_doc.scala.html +++ b/app/views/tags/result_doc.scala.html @@ -137,6 +137,7 @@ } @subjects(subjects: Seq[JsValue]) = { + @if(!subjects.isEmpty) { Schlagwörter @@ -157,6 +158,7 @@ } else { | } } + } } @sortedPublications(seq: Seq[JsValue]) = @{ From 8ac8111dfdecc58f5de1db735a9997d047b6af6f Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Tue, 12 Nov 2024 14:49:56 +0100 Subject: [PATCH 9/9] Tweak display of URIs in details view (RPB-223) --- app/controllers/nwbib/Lobid.java | 2 +- app/views/TableRow.java | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/app/controllers/nwbib/Lobid.java b/app/controllers/nwbib/Lobid.java index 834908f..98bed8d 100644 --- a/app/controllers/nwbib/Lobid.java +++ b/app/controllers/nwbib/Lobid.java @@ -324,7 +324,7 @@ public static String resourceLabel(String id) { Callable getLabel = () -> { // e.g. take TT000086525 from http://lobid.org/resources/TT000086525#! String simpleId = - id.replaceAll("https?://[^/]+/resources?/(?:search\\?q=rpbId:)?(.+?)[^A-Z0-9]*$", "$1"); + id.replaceAll("https?://[^/]+/(?:resources?/)?(?:search\\?q=rpbId:)?(.+?)[^A-Z0-9]*$", "$1"); JsonNode json = getResource(simpleId).findValue("title"); String label = json == null ? "" : HtmlEscapers.htmlEscaper().escape(json.asText()); diff --git a/app/views/TableRow.java b/app/views/TableRow.java index bd74f2d..cd3fbc3 100644 --- a/app/views/TableRow.java +++ b/app/views/TableRow.java @@ -49,7 +49,9 @@ private String label(JsonNode doc, String property, String param, String value = property.equals("subjectChain") ? val.replaceAll("\\([\\d,]+\\)$", "").trim() : val; if (!labels.isPresent()) { - return refAndLabel(property, value, labels)[0]; + String[] refAndLabel = refAndLabel(property, value, labels); + return value.startsWith("http") ? String.format("%s", + refAndLabel[0], refAndLabel[0], refAndLabel[1]) : refAndLabel[0]; } String term = value; if (param.equals("q")) { @@ -211,11 +213,9 @@ private static String lifeDates(JsonNode node) { String[] refAndLabel(String property, String value, Optional> labels) { if ((property.equals("containedIn") || property.equals("hasPart") - || property.equals("isPartOf") || property.equals("hasSuperordinate")) - && value.contains("lobid.org")) { - return new String[] { - value.replaceAll("https://lobid.org/resources?/", "http://rpb.lobid.org/"), - Lobid.resourceLabel(value) }; + || property.equals("isPartOf") || property.equals("hasSuperordinate") + || property.equals("bibliographicCitation")) && value.contains("lobid.org")) { + return new String[] { value, Lobid.resourceLabel(value) }; } String label = labels.isPresent() && labels.get().size() > 0 ? labels.get().get(0)