Skip to content

Commit

Permalink
Thesaurus / Add support for thesaurus described using OWL format
Browse files Browse the repository at this point in the history
A number of DCAT related vocabularies are published using OWL format describing a concept schema and concepts.
eg. https://mobilitydcat-ap.github.io/controlled-vocabularies/network-coverage/latest/index.html

```xml
<owl:NamedIndividual rdf:about="https://w3id.org/mobilitydcat-ap/mobility-theme/accesibility-information-for-vehicles">
    <rdf:type rdf:resource="http://www.w3.org/2004/02/skos/core#Concept"/>
    <skos:broader rdf:resource="https://w3id.org/mobilitydcat-ap/mobility-theme/data-content-sub-category"/>
    <skos:broader rdf:resource="https://w3id.org/mobilitydcat-ap/mobility-theme/public-transport-non-scheduled-transport"/>
    <skos:inScheme rdf:resource="https://w3id.org/mobilitydcat-ap/mobility-theme"/>
    <skos:prefLabel xml:lang="en">Accesibility information for vehicles</skos:prefLabel>
</owl:NamedIndividual>

```

Add the possibility to import those files directly by converting them from OWL to SKOS format supported by GeoNetwork.

API changes:
* when using the `stylesheet` parameter of the upload thesaurus operation, restrict the use to known file in
`xslt/services/thesaurus/` folder.

Also some Sonar lint items.

Funded by Wallonia region (SPW)
  • Loading branch information
fxprunayre committed Feb 12, 2024
1 parent c98ce9d commit a09a000
Show file tree
Hide file tree
Showing 4 changed files with 2,127 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
import org.fao.geonet.api.tools.i18n.LanguageUtils;
import org.fao.geonet.constants.Geonet;
import org.fao.geonet.domain.ISODate;
import org.fao.geonet.exceptions.BadParameterEx;
import org.fao.geonet.kernel.*;
import org.fao.geonet.kernel.search.KeywordsSearcher;
import org.fao.geonet.kernel.search.keyword.*;
Expand Down Expand Up @@ -139,6 +140,8 @@ public class KeywordsApi {
@Autowired
ThesaurusManager thesaurusManager;

List<String> allowedExtensions = Arrays.asList("rdf", "owl", "xml");

/**
* Search keywords.
*
Expand Down Expand Up @@ -570,8 +573,8 @@ private Object getKeyword(
String key = ((Map.Entry) entry).getKey().toString();
String value = ((Map.Entry) entry).getValue().toString();
Element conv = new Element("conversion");
conv.setAttribute("from",key.toString());
conv.setAttribute("to",value.toString().replace("#",""));
conv.setAttribute("from", key);
conv.setAttribute("to", value.replace("#",""));
langConversion.addContent(conv);
}

Expand Down Expand Up @@ -610,9 +613,7 @@ private Object getKeyword(
root.addContent(gui);
root.addContent(nodeUrl);
root.addContent(nodeId);
final Element transform = Xml.transform(root, convertXsl);

return transform;
return Xml.transform(root, convertXsl);
}
}

Expand All @@ -632,7 +633,8 @@ private Object getKeyword(
value = "/{thesaurus:.+}",
method = RequestMethod.GET,
produces = {
MediaType.TEXT_XML_VALUE
MediaType.TEXT_XML_VALUE,
MediaType.APPLICATION_XML_VALUE
})
@ApiResponses(value = {
@ApiResponse(responseCode = "200", description = "Thesaurus in SKOS format.",
Expand Down Expand Up @@ -808,13 +810,14 @@ public String uploadThesaurus(

String extension = FilenameUtils.getExtension(fname);

if (extension.equalsIgnoreCase("rdf") ||
extension.equalsIgnoreCase("xml")) {
if (allowedExtensions.contains(extension.toLowerCase())) {
Log.debug(Geonet.THESAURUS, "Uploading thesaurus: " + fname);

// Rename .xml to .rdf for all thesaurus
fname = fname.replace(extension, "rdf");
uploadThesaurus(rdfFile, stylesheet, context, fname, type.toString(), dir);
uploadThesaurus(rdfFile,
getStylesheetForExtension(stylesheet, extension),
context, fname, type.toString(), dir);
} else {
Log.debug(Geonet.THESAURUS, "Incorrect extension for thesaurus named: " + fname);
throw new Exception("Incorrect extension for thesaurus named: "
Expand All @@ -827,13 +830,13 @@ public String uploadThesaurus(
return String.format("Thesaurus '%s' loaded in %d sec.",
fname, duration);
} finally {
if (tempDir != null) {
FileUtils.deleteQuietly(tempDir);
}
FileUtils.deleteQuietly(tempDir);
}
}


private static String getStylesheetForExtension(String stylesheet, String extension) {
return extension.equals("owl") ? "owl-to-skos" : stylesheet;
}


/**
Expand Down Expand Up @@ -935,7 +938,7 @@ public void importCsvAsThesaurus(
}

long fsize;
if (csvFile != null && Files.exists(csvFile)) {
if (Files.exists(csvFile)) {
fsize = Files.size(csvFile);
} else {
throw new MissingServletRequestParameterException("CSV file doesn't exist", "file");
Expand Down Expand Up @@ -978,9 +981,7 @@ public void importCsvAsThesaurus(
response.getOutputStream().write(Xml.getString(element).getBytes());
}
} finally {
if (tempDir != null) {
FileUtils.deleteQuietly(tempDir);
}
FileUtils.deleteQuietly(tempDir);
}
}

Expand Down Expand Up @@ -1049,7 +1050,7 @@ public Element convertCsvToSkos(Path csvFile,
extractRelated(key, thesaurusNamespaceUrl, csvParser, csvRecord,
conceptLinkSeparator, conceptBroaderIdColumn,
broaderLinks);
if (broaderLinks.get(key) == null || broaderLinks.get(key).size() == 0) {
if (broaderLinks.get(key) == null || broaderLinks.get(key).isEmpty()) {
topConcepts.add(key);
}
extractRelated(key, thesaurusNamespaceUrl, csvParser, csvRecord,
Expand All @@ -1070,7 +1071,7 @@ public Element convertCsvToSkos(Path csvFile,
}

Element scheme = buildConceptScheme(csvFile, thesaurusTitle, thesaurusNamespaceUrl);
if(broaderLinks.size() > 0 && topConcepts.size() > 0) {
if(broaderLinks.size() > 0 && !topConcepts.isEmpty()) {
topConcepts.forEach(t -> {
Element topConcept = new Element("hasTopConcept", SKOS_NAMESPACE);
topConcept.setAttribute("resource", t, RDF_NAMESPACE);
Expand Down Expand Up @@ -1281,13 +1282,14 @@ public String uploadThesaurusFromUrl(

String extension = FilenameUtils.getExtension(fname);

if (extension.equalsIgnoreCase("rdf") ||
extension.equalsIgnoreCase("xml")) {
if (allowedExtensions.contains(extension.toLowerCase())) {
Log.debug(Geonet.THESAURUS, "Uploading thesaurus: " + fname);

// Rename .xml to .rdf for all thesaurus
fname = fname.replace(extension, "rdf");
uploadThesaurus(rdfFile, stylesheet, context, fname, type.toString(), dir);
uploadThesaurus(rdfFile,
getStylesheetForExtension(stylesheet, extension),
context, fname, type.toString(), dir);
} else {
Log.debug(Geonet.THESAURUS, "Incorrect extension for thesaurus named: " + fname);
throw new MissingServletRequestParameterException("Incorrect extension for thesaurus", fname);
Expand Down Expand Up @@ -1423,7 +1425,7 @@ private Path extractSKOSFromRegistry(String registryUrl, REGISTRY_TYPE registryT
* @throws IOException Signals that an I/O exception has occurred.
* @throws MalformedURLException the malformed URL exception
*/
private Path getXMLContentFromUrl(String url, ServiceContext context) throws URISyntaxException, IOException, MalformedURLException {
private Path getXMLContentFromUrl(String url, ServiceContext context) throws URISyntaxException, IOException {
Path rdfFile;
URI uri = new URI(url);
rdfFile = Files.createTempFile("thesaurus", ".rdf");
Expand Down Expand Up @@ -1451,17 +1453,21 @@ private void uploadThesaurus(Path rdfFile, String style,
ServiceContext context, String fname, String type, String dir)
throws Exception {

Path stylePath = context.getAppPath().resolve(Geonet.Path.STYLESHEETS);

Element tsXml;
Element xml = Xml.loadFile(rdfFile);
xml.detach();

if (!"_none_".equals(style)) {
FilePathChecker.verify(style);

tsXml = Xml.transform(xml, stylePath.resolve(style));
tsXml.detach();
Path xsltPath = dataDirectory.getWebappDir().resolve(String.format(
"xslt/services/thesaurus/%s.xsl", style));
if (Files.exists(xsltPath)) {
tsXml = Xml.transform(xml, xsltPath);
tsXml.detach();
} else {
throw new BadParameterEx(String.format(
"XSL transformation '%s' not found. Only conversion provided in xslt/services/thesaurus can be used.", style));
}
} else {
tsXml = xml;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,24 @@
import org.junit.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.mock.web.MockHttpServletResponse;
import org.springframework.mock.web.MockHttpSession;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.mock.web.MockMultipartHttpServletRequest;
import org.springframework.test.web.servlet.MockMvc;
import org.springframework.test.web.servlet.MvcResult;
import org.springframework.test.web.servlet.setup.MockMvcBuilders;
import org.springframework.web.context.WebApplicationContext;

import javax.servlet.http.HttpSession;
import java.util.List;

import static org.fao.geonet.csw.common.Csw.NAMESPACE_DC;
import static org.fao.geonet.csw.common.Csw.NAMESPACE_DCT;
import static org.fao.geonet.kernel.rdf.Selectors.RDF_NAMESPACE;
import static org.fao.geonet.kernel.rdf.Selectors.SKOS_NAMESPACE;
import static org.junit.Assert.assertEquals;
import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get;
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;

/**
* export CATALOG=http://localhost:8080/geonetwork
Expand All @@ -61,6 +69,8 @@ public class KeywordsApiTest extends AbstractServiceIntegrationTest {
@Autowired
private SpringLocalServiceInvoker invoker;

@Autowired
private WebApplicationContext wac;

@Test
public void testConvertCsvToSkos() throws Exception {
Expand Down Expand Up @@ -199,4 +209,46 @@ public void testConvertCsvToSkosDefaultTitleAndNamespace() throws Exception {
assertEquals(
"taxref.csv", scheme.getChildText("title", NAMESPACE_DC));
}


@Test
public void testImportOntologyToSkos() throws Exception {
createServiceContext();
User user = new User().setId(USER_ID);
HttpSession session = loginAs(user);
MockHttpSession mockHttpSession = loginAsAdmin();

MockMultipartHttpServletRequest request = new MockMultipartHttpServletRequest(session.getServletContext());
request.setRequestURI("/srv/api/registries/vocabularies");
MockMultipartFile file = new MockMultipartFile(
"file",
"mobility-theme.owl",
null,
getClass().getClassLoader().getResourceAsStream("mobility-theme.owl"));
request.addFile(file);
request.setSession(session);
request.setParameter("type", "external");
request.setParameter("dir", "theme");
MockHttpServletResponse response = new MockHttpServletResponse();
invoker.invoke(request, response);
assertEquals(200, response.getStatus());


MockMvc mockMvc = MockMvcBuilders.webAppContextSetup(this.wac).build();
MvcResult result = mockMvc.perform(get("/srv/api/registries/vocabularies/external.theme.mobility-theme")
.accept("application/xml")
.session(mockHttpSession))
.andExpect(status().isOk())
.andReturn();

Element thesaurus = Xml.loadString(result.getResponse().getContentAsString(), false);
Element scheme = (Element) thesaurus.getChildren("ConceptScheme", SKOS_NAMESPACE).get(0);
assertEquals(
"https://w3id.org/mobilitydcat-ap/mobility-theme", scheme.getAttributeValue("about", RDF_NAMESPACE));
assertEquals(
"Mobility Theme", scheme.getChildText("title", NAMESPACE_DCT));

List concepts = thesaurus.getChildren("Concept", SKOS_NAMESPACE);
assertEquals(123, concepts.size());
}
}
Loading

0 comments on commit a09a000

Please sign in to comment.