diff --git a/core/src/main/scala/eu/ostrzyciel/jelly/core/ConverterFactory.scala b/core/src/main/scala/eu/ostrzyciel/jelly/core/ConverterFactory.scala index 127e8c73..aa3f904f 100644 --- a/core/src/main/scala/eu/ostrzyciel/jelly/core/ConverterFactory.scala +++ b/core/src/main/scala/eu/ostrzyciel/jelly/core/ConverterFactory.scala @@ -68,7 +68,7 @@ trait ConverterFactory[ namespaceHandler: (String, TNode) => Unit = (_, _) => () ): QuadsDecoder[TNode, TDatatype, TTriple, TQuad] = - new QuadsDecoder(decoderConverter, supportedOptions.getOrElse(defaultSupportedOptions)) + new QuadsDecoder(decoderConverter(namespaceHandler), supportedOptions.getOrElse(defaultSupportedOptions)) /** * Create a new [[GraphsAsQuadsDecoder]]. @@ -84,7 +84,7 @@ trait ConverterFactory[ namespaceHandler: (String, TNode) => Unit = (_, _) => () ): GraphsAsQuadsDecoder[TNode, TDatatype, TTriple, TQuad] = - new GraphsAsQuadsDecoder(decoderConverter, supportedOptions.getOrElse(defaultSupportedOptions)) + new GraphsAsQuadsDecoder(decoderConverter(namespaceHandler), supportedOptions.getOrElse(defaultSupportedOptions)) /** * Create a new [[GraphsDecoder]]. @@ -100,7 +100,7 @@ trait ConverterFactory[ namespaceHandler: (String, TNode) => Unit = (_, _) => () ): GraphsDecoder[TNode, TDatatype, TTriple, TQuad] = - new GraphsDecoder(decoderConverter, supportedOptions.getOrElse(defaultSupportedOptions)) + new GraphsDecoder(decoderConverter(namespaceHandler), supportedOptions.getOrElse(defaultSupportedOptions)) /** * Create a new [[AnyStatementDecoder]]. @@ -116,7 +116,7 @@ trait ConverterFactory[ namespaceHandler: (String, TNode) => Unit = (_, _) => () ): AnyStatementDecoder[TNode, TDatatype, TTriple, TQuad] = - new AnyStatementDecoder(decoderConverter, supportedOptions.getOrElse(defaultSupportedOptions)) + new AnyStatementDecoder(decoderConverter(namespaceHandler), supportedOptions.getOrElse(defaultSupportedOptions)) /** * Create a new [[ProtoEncoder]]. Namespace declarations are disabled by default. diff --git a/jena/src/main/scala/eu/ostrzyciel/jelly/convert/jena/riot/JellyWriter.scala b/jena/src/main/scala/eu/ostrzyciel/jelly/convert/jena/riot/JellyWriter.scala index d66631ab..c27f1a08 100644 --- a/jena/src/main/scala/eu/ostrzyciel/jelly/convert/jena/riot/JellyWriter.scala +++ b/jena/src/main/scala/eu/ostrzyciel/jelly/convert/jena/riot/JellyWriter.scala @@ -10,7 +10,7 @@ import org.apache.jena.sparql.core.{DatasetGraph, Quad} import org.apache.jena.sparql.util.Context import java.io.{OutputStream, Writer} -import scala.collection.mutable.ArrayBuffer +import scala.collection.mutable.{ArrayBuffer, ListBuffer} import scala.jdk.CollectionConverters.* @@ -77,7 +77,7 @@ final class JellyGraphWriter(opt: JellyFormatVariant) extends WriterGraphRIOTBas .withLogicalType(LogicalStreamType.FLAT_TRIPLES) ) val inner = JellyStreamWriter(variant, out) - if opt.enableNamespaceDeclarations then + if variant.enableNamespaceDeclarations then for (prefix, iri) <- prefixMap.getMapping.asScala do inner.prefix(prefix, iri) for triple <- graph.find().asScala do @@ -106,7 +106,7 @@ final class JellyDatasetWriter(opt: JellyFormatVariant) extends WriterDatasetRIO .withLogicalType(LogicalStreamType.FLAT_QUADS) ) val inner = JellyStreamWriter(variant, out) - if opt.enableNamespaceDeclarations then + if variant.enableNamespaceDeclarations then for (prefix, iri) <- prefixMap.getMapping.asScala do inner.prefix(prefix, iri) for quad <- dataset.find().asScala do @@ -135,6 +135,13 @@ object JellyStreamWriterFactory extends StreamRDFWriterFactory: */ final class JellyStreamWriterAutodetectType(opt: JellyFormatVariant, out: OutputStream) extends StreamRDF: private var inner: JellyStreamWriter = null + // If we start receiving prefix() calls before the first triple/quad, we need to store them + private val prefixBacklog: ListBuffer[(String, String)] = new ListBuffer[(String, String)]() + + private def clearPrefixBacklog(): Unit = + for (prefix, iri) <- prefixBacklog do + inner.prefix(prefix, iri) + prefixBacklog.clear() override def start(): Unit = () @@ -144,6 +151,7 @@ final class JellyStreamWriterAutodetectType(opt: JellyFormatVariant, out: Output opt = opt.opt.withPhysicalType(PhysicalStreamType.TRIPLES) .withLogicalType(LogicalStreamType.FLAT_TRIPLES), ), out) + clearPrefixBacklog() inner.triple(triple) override def quad(quad: Quad): Unit = @@ -152,6 +160,7 @@ final class JellyStreamWriterAutodetectType(opt: JellyFormatVariant, out: Output opt = opt.opt.withPhysicalType(PhysicalStreamType.QUADS) .withLogicalType(LogicalStreamType.FLAT_QUADS), ), out) + clearPrefixBacklog() inner.quad(quad) // Not supported @@ -160,6 +169,8 @@ final class JellyStreamWriterAutodetectType(opt: JellyFormatVariant, out: Output override def prefix(prefix: String, iri: String): Unit = if inner != null then inner.prefix(prefix, iri) + else + prefixBacklog += ((prefix, iri)) override def finish(): Unit = if inner != null then diff --git a/jena/src/test/scala/eu/ostrzyciel/jelly/convert/jena/riot/JenaNamespaceDeclarationSpec.scala b/jena/src/test/scala/eu/ostrzyciel/jelly/convert/jena/riot/JenaNamespaceDeclarationSpec.scala new file mode 100644 index 00000000..2fda2005 --- /dev/null +++ b/jena/src/test/scala/eu/ostrzyciel/jelly/convert/jena/riot/JenaNamespaceDeclarationSpec.scala @@ -0,0 +1,156 @@ +package eu.ostrzyciel.jelly.convert.jena.riot + +import eu.ostrzyciel.jelly.convert.jena.traits.JenaTest +import eu.ostrzyciel.jelly.core.proto.v1.{RdfStreamFrame, RdfStreamRow} +import org.apache.jena.graph.NodeFactory +import org.apache.jena.rdf.model.ModelFactory +import org.apache.jena.riot.system.StreamRDFWriter +import org.apache.jena.riot.{RDFDataMgr, RDFWriter, RIOT} +import org.apache.jena.sparql.core.DatasetGraphFactory +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +import java.io.{ByteArrayInputStream, ByteArrayOutputStream} + +/** + * Round-trip tests for namespace declarations. + */ +class JenaNamespaceDeclarationSpec extends AnyWordSpec, Matchers, JenaTest: + // Prepare data + val m = ModelFactory.createDefaultModel() + m.add( + m.createResource("http://example.com/s"), + m.createProperty("http://example.com/p"), + m.createResource("http://example.com/o") + ) + m.setNsPrefix("ex", "http://example.com/") + m.setNsPrefix("ex2", "http://example2.com/") + + val ds = DatasetGraphFactory.create() + ds.addGraph( + NodeFactory.createURI("http://example2.com/g"), + m.getGraph + ) + ds.prefixes().putAll(m.getNsPrefixMap) + + private def checkDeclarations(out: ByteArrayOutputStream, shouldBeThere: Boolean) = + val rows: Seq[RdfStreamRow] = RdfStreamFrame.parseDelimitedFrom(ByteArrayInputStream(out.toByteArray)).get.rows + val nsDecls = rows.filter(_.row.isNamespace).map(_.row.namespace) + if shouldBeThere then + nsDecls.size should be (2) + nsDecls.map(_.nsName) should contain allOf ("ex", "ex2") + else + nsDecls.size should be (0) + + "JellyGraphWriter" should { + "preserve namespace declarations" in { + val out = new ByteArrayOutputStream() + RDFWriter + .source(m) + .lang(JellyLanguage.JELLY) + .set(JellyLanguage.SYMBOL_ENABLE_NAMESPACE_DECLARATIONS, true) + .output(out) + + checkDeclarations(out, true) + val m2 = ModelFactory.createDefaultModel() + RDFDataMgr.read(m2, ByteArrayInputStream(out.toByteArray), JellyLanguage.JELLY) + m2.getNsPrefixMap should be (m.getNsPrefixMap) + } + + "not preserve namespace declarations if disabled" in { + val out = new ByteArrayOutputStream() + RDFWriter + .source(m) + .lang(JellyLanguage.JELLY) + // Default is false + // .set(JellyLanguage.SYMBOL_ENABLE_NAMESPACE_DECLARATIONS, false) + .output(out) + + checkDeclarations(out, false) + val m2 = ModelFactory.createDefaultModel() + RDFDataMgr.read(m2, ByteArrayInputStream(out.toByteArray), JellyLanguage.JELLY) + m2.getNsPrefixMap should be (java.util.Map.of()) + } + } + + "JellyDatasetWriter" should { + "preserve namespace declarations" in { + val out = new ByteArrayOutputStream() + RDFWriter + .source(ds) + .lang(JellyLanguage.JELLY) + .set(JellyLanguage.SYMBOL_ENABLE_NAMESPACE_DECLARATIONS, true) + .output(out) + + checkDeclarations(out, true) + val ds2 = DatasetGraphFactory.create() + RDFDataMgr.read(ds2, ByteArrayInputStream(out.toByteArray), JellyLanguage.JELLY) + ds2.prefixes().getMapping should be (ds.prefixes().getMapping) + } + + "not preserve namespace declarations if disabled" in { + val out = new ByteArrayOutputStream() + RDFWriter + .source(ds) + .lang(JellyLanguage.JELLY) + // Default is false + // .set(JellyLanguage.SYMBOL_ENABLE_NAMESPACE_DECLARATIONS, false) + .output(out) + + checkDeclarations(out, false) + val ds2 = DatasetGraphFactory.create() + RDFDataMgr.read(ds2, ByteArrayInputStream(out.toByteArray), JellyLanguage.JELLY) + ds2.prefixes().getMapping should be (java.util.Map.of()) + } + } + + "JellyStreamWriterAutodetectType" should { + "preserve namespace declarations (prefixes before triples)" in { + val out = new ByteArrayOutputStream() + val writer = StreamRDFWriter.getWriterStream( + out, + JellyLanguage.JELLY, + RIOT.getContext.copy().set(JellyLanguage.SYMBOL_ENABLE_NAMESPACE_DECLARATIONS, true) + ) + writer.start() + writer.prefix("ex", "http://example.com") + writer.prefix("ex2", "http://example2.com") + writer.triple(m.getGraph.find().next()) + writer.finish() + + checkDeclarations(out, true) + } + + "preserve namespace declarations (triples before prefixes)" in { + val out = new ByteArrayOutputStream() + val writer = StreamRDFWriter.getWriterStream( + out, + JellyLanguage.JELLY, + RIOT.getContext.copy().set(JellyLanguage.SYMBOL_ENABLE_NAMESPACE_DECLARATIONS, true) + ) + writer.start() + writer.triple(m.getGraph.find().next()) + writer.prefix("ex", "http://example.com") + writer.prefix("ex2", "http://example2.com") + writer.finish() + + checkDeclarations(out, true) + } + + "not preserve namespace declarations if disabled" in { + val out = new ByteArrayOutputStream() + val writer = StreamRDFWriter.getWriterStream( + out, + JellyLanguage.JELLY, + // default is false + RIOT.getContext.copy() // .set(JellyLanguage.SYMBOL_ENABLE_NAMESPACE_DECLARATIONS, false) + ) + writer.start() + writer.prefix("ex", "http://example.com") + writer.prefix("ex2", "http://example2.com") + writer.triple(m.getGraph.find().next()) + writer.finish() + + checkDeclarations(out, false) + } + } diff --git a/rdf4j/src/test/scala/eu/ostrzyciel/jelly/convert/rdf4j/rio/Rdf4jNamespaceDeclarationSpec.scala b/rdf4j/src/test/scala/eu/ostrzyciel/jelly/convert/rdf4j/rio/Rdf4jNamespaceDeclarationSpec.scala new file mode 100644 index 00000000..8d997680 --- /dev/null +++ b/rdf4j/src/test/scala/eu/ostrzyciel/jelly/convert/rdf4j/rio/Rdf4jNamespaceDeclarationSpec.scala @@ -0,0 +1,86 @@ +package eu.ostrzyciel.jelly.convert.rdf4j.rio + +import eu.ostrzyciel.jelly.core.proto.v1.* +import org.eclipse.rdf4j.model.impl.SimpleValueFactory +import org.eclipse.rdf4j.rio.helpers.AbstractRDFHandler +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +import java.io.{ByteArrayInputStream, ByteArrayOutputStream} + +/** + * Round-trip tests for namespace declarations. + */ +class Rdf4jNamespaceDeclarationSpec extends AnyWordSpec, Matchers: + private def checkDeclarations(out: ByteArrayOutputStream, shouldBeThere: Boolean) = + val rows: Seq[RdfStreamRow] = RdfStreamFrame.parseDelimitedFrom(ByteArrayInputStream(out.toByteArray)).get.rows + val nsDecls = rows.filter(_.row.isNamespace).map(_.row.namespace) + + val parser = JellyParserFactory().getParser() + val namespaces = new collection.mutable.HashMap[String, String]() + parser.setRDFHandler(new AbstractRDFHandler() { + override def handleNamespace(prefix: String, uri: String): Unit = { + namespaces.put(prefix, uri) + } + }) + parser.parse(new ByteArrayInputStream(out.toByteArray), "") + + if shouldBeThere then + nsDecls.size should be(2) + nsDecls.map(_.nsName) should contain allOf("ex", "ex2") + namespaces should be (Map("ex" -> "http://example.com/", "ex2" -> "http://example2.com/")) + else + nsDecls.size should be(0) + namespaces should be (Map.empty) + + val vf = SimpleValueFactory.getInstance() + val triple = vf.createStatement( + vf.createIRI("http://example2.com/s"), + vf.createIRI("http://example.com/p"), + vf.createIRI("http://example.com/o") + ) + + "JellyWriter and JellyReader" should { + "preserve namespace declarations (prefixes before triples)" in { + val out = new ByteArrayOutputStream() + val writer = JellyWriterFactory().getWriter(out) + writer.set(JellyWriterSettings.ENABLE_NAMESPACE_DECLARATIONS, true) + + writer.startRDF() + writer.handleNamespace("ex", "http://example.com/") + writer.handleNamespace("ex2", "http://example2.com/") + writer.handleStatement(triple) + writer.endRDF() + + checkDeclarations(out, shouldBeThere = true) + } + + "preserve namespace declarations (triples before prefixes)" in { + val out = new ByteArrayOutputStream() + val writer = JellyWriterFactory().getWriter(out) + writer.set(JellyWriterSettings.ENABLE_NAMESPACE_DECLARATIONS, true) + + writer.startRDF() + writer.handleStatement(triple) + writer.handleNamespace("ex", "http://example.com/") + writer.handleNamespace("ex2", "http://example2.com/") + writer.endRDF() + + checkDeclarations(out, shouldBeThere = true) + } + + "not preserve namespace declarations if disabled" in { + val out = new ByteArrayOutputStream() + val writer = JellyWriterFactory().getWriter(out) + // Default is false + // writer.set(JellyWriterSettings.ENABLE_NAMESPACE_DECLARATIONS, false) + + writer.startRDF() + writer.handleNamespace("ex", "http://example.com/") + writer.handleNamespace("ex2", "http://example2.com/") + writer.handleStatement(triple) + writer.endRDF() + + checkDeclarations(out, shouldBeThere = false) + } + }