diff --git a/core/src/main/scala/eu/ostrzyciel/jelly/core/ProtoEncoder.scala b/core/src/main/scala/eu/ostrzyciel/jelly/core/ProtoEncoder.scala index 4f27fef..a371710 100644 --- a/core/src/main/scala/eu/ostrzyciel/jelly/core/ProtoEncoder.scala +++ b/core/src/main/scala/eu/ostrzyciel/jelly/core/ProtoEncoder.scala @@ -19,11 +19,14 @@ object ProtoEncoder: * @param enableNamespaceDeclarations whether to allow namespace declarations in the stream. * If true, this will raise the stream version to 2 (Jelly 1.1.0). Otherwise, * the stream version will be 1 (Jelly 1.0.0). + * @param maybeRowBuffer optional buffer for storing stream rows that should go into a stream frame. + * If provided, the encoder will append the rows to this buffer instead of + * returning them, so methods like `addTripleStatement` will return Seq(). */ abstract class ProtoEncoder[TNode, -TTriple, -TQuad, -TQuoted]( final val options: RdfStreamOptions, final val enableNamespaceDeclarations: Boolean, - final val maybeRowBuffer: Option[mutable.Buffer[RdfStreamRow]] = None, + final val maybeRowBuffer: Option[mutable.Buffer[RdfStreamRow]], ): import ProtoEncoder.* @@ -219,14 +222,15 @@ abstract class ProtoEncoder[TNode, -TTriple, -TQuad, -TQuoted]( ) private inline def handleHeader(): Unit = - if iResponsibleForBufferClear then - rowBuffer.clear() if !emittedOptions then emitOptions() private def appendAndReturn(row: RdfStreamRow): Iterable[RdfStreamRow] = rowBuffer.append(row) // This branch will always be correctly predicted - if iResponsibleForBufferClear then rowBuffer.toList + if iResponsibleForBufferClear then + val list = rowBuffer.toList + rowBuffer.clear() + list else emptyRowBuffer private def emitOptions(): Unit = diff --git a/core/src/main/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderImpl.scala b/core/src/main/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderImpl.scala index 2e04021..4183637 100644 --- a/core/src/main/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderImpl.scala +++ b/core/src/main/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderImpl.scala @@ -3,7 +3,7 @@ package eu.ostrzyciel.jelly.core import eu.ostrzyciel.jelly.core.proto.v1.* import scala.annotation.switch -import scala.collection.mutable.ArrayBuffer +import scala.collection.mutable.ListBuffer /** * Fast implementation of the ProtoTranscoder interface. @@ -48,7 +48,7 @@ private final class ProtoTranscoderImpl( private var inputUsesPrefixes = false // Current output stream state - private val rowBuffer = new ArrayBuffer[RdfStreamRow](128) + private val rowBuffer = new ListBuffer[RdfStreamRow]() private var changeInTerms = false private var emittedOptions = false diff --git a/core/src/test/scala/eu/ostrzyciel/jelly/core/NodeEncoderSpec.scala b/core/src/test/scala/eu/ostrzyciel/jelly/core/NodeEncoderSpec.scala index ab4d033..a06d103 100644 --- a/core/src/test/scala/eu/ostrzyciel/jelly/core/NodeEncoderSpec.scala +++ b/core/src/test/scala/eu/ostrzyciel/jelly/core/NodeEncoderSpec.scala @@ -6,7 +6,7 @@ import org.scalatest.Inspectors import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec -import scala.collection.mutable.ArrayBuffer +import scala.collection.mutable.ListBuffer import scala.util.Random class NodeEncoderSpec extends AnyWordSpec, Inspectors, Matchers: @@ -16,8 +16,8 @@ class NodeEncoderSpec extends AnyWordSpec, Inspectors, Matchers: maxDatatypeTableSize = 8, ) - private def getEncoder(prefixTableSize: Int = 8): (NodeEncoder[Mrl.Node], ArrayBuffer[RdfStreamRow]) = - val buffer = new ArrayBuffer[RdfStreamRow]() + private def getEncoder(prefixTableSize: Int = 8): (NodeEncoder[Mrl.Node], ListBuffer[RdfStreamRow]) = + val buffer = new ListBuffer[RdfStreamRow]() (NodeEncoder[Mrl.Node](smallOptions(prefixTableSize), 16, 16, 16), buffer) "A NodeEncoder" when { diff --git a/core/src/test/scala/eu/ostrzyciel/jelly/core/ProtoEncoderSpec.scala b/core/src/test/scala/eu/ostrzyciel/jelly/core/ProtoEncoderSpec.scala index f5ae156..a3eceb7 100644 --- a/core/src/test/scala/eu/ostrzyciel/jelly/core/ProtoEncoderSpec.scala +++ b/core/src/test/scala/eu/ostrzyciel/jelly/core/ProtoEncoderSpec.scala @@ -7,6 +7,8 @@ import eu.ostrzyciel.jelly.core.proto.v1.* import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec +import scala.collection.mutable.ListBuffer + class ProtoEncoderSpec extends AnyWordSpec, Matchers: import ProtoTestCases.* @@ -32,6 +34,22 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: assertEncoded(encoded, Triples2NsDecl.encoded(encoder.options)) } + "encode triple statements with ns decls and an external buffer" in { + val buffer = ListBuffer[RdfStreamRow]() + val encoder = MockProtoEncoder( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), + enableNamespaceDeclarations = true, Some(buffer) + ) + for triple <- Triples2NsDecl.mrl do + val result = triple match + case t: Triple => encoder.addTripleStatement(t) + case ns: NamespaceDecl => encoder.declareNamespace(ns.name, ns.iri) + // external buffer – nothing should be returned directly + result.size should be (0) + + assertEncoded(buffer.toSeq, Triples2NsDecl.encoded(encoder.options)) + } + "encode quad statements" in { val encoder = MockProtoEncoder( JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS) @@ -40,6 +58,20 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: assertEncoded(encoded, Quads1.encoded(encoder.options.withVersion(Constants.protoVersionNoNsDecl))) } + "encode quad statements with an external buffer" in { + val buffer = ListBuffer[RdfStreamRow]() + val encoder = MockProtoEncoder( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS), + false, Some(buffer) + ) + for quad <- Quads1.mrl do + val result = encoder.addQuadStatement(quad) + // external buffer – nothing should be returned directly + result.size should be (0) + + assertEncoded(buffer.toSeq, Quads1.encoded(encoder.options.withVersion(Constants.protoVersionNoNsDecl))) + } + "encode quad statements (repeated default graph)" in { val encoder = MockProtoEncoder( JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS) @@ -60,6 +92,24 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: assertEncoded(encoded, Graphs1.encoded(encoder.options.withVersion(Constants.protoVersionNoNsDecl))) } + "encode graphs with an external buffer" in { + val buffer = ListBuffer[RdfStreamRow]() + val encoder = MockProtoEncoder( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS), + false, Some(buffer) + ) + for (graphName, triples) <- Graphs1.mrl do + val start = encoder.startGraph(graphName) + start.size should be (0) + for triple <- triples do + val result = encoder.addTripleStatement(triple) + result.size should be (0) + val end = encoder.endGraph() + end.size should be (0) + + assertEncoded(buffer.toSeq, Graphs1.encoded(encoder.options.withVersion(Constants.protoVersionNoNsDecl))) + } + "not allow to end a graph before starting one" in { val encoder = MockProtoEncoder( JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS) diff --git a/jena/src/main/scala/eu/ostrzyciel/jelly/convert/jena/riot/JellyWriter.scala b/jena/src/main/scala/eu/ostrzyciel/jelly/convert/jena/riot/JellyWriter.scala index cc84af1..58c55bf 100644 --- a/jena/src/main/scala/eu/ostrzyciel/jelly/convert/jena/riot/JellyWriter.scala +++ b/jena/src/main/scala/eu/ostrzyciel/jelly/convert/jena/riot/JellyWriter.scala @@ -10,7 +10,7 @@ import org.apache.jena.sparql.core.{DatasetGraph, Quad} import org.apache.jena.sparql.util.Context import java.io.{OutputStream, Writer} -import scala.collection.mutable.{ArrayBuffer, ListBuffer} +import scala.collection.mutable.ListBuffer import scala.jdk.CollectionConverters.* diff --git a/rdf4j/src/main/scala/eu/ostrzyciel/jelly/convert/rdf4j/rio/JellyWriter.scala b/rdf4j/src/main/scala/eu/ostrzyciel/jelly/convert/rdf4j/rio/JellyWriter.scala index 914c8f7..62ae853 100644 --- a/rdf4j/src/main/scala/eu/ostrzyciel/jelly/convert/rdf4j/rio/JellyWriter.scala +++ b/rdf4j/src/main/scala/eu/ostrzyciel/jelly/convert/rdf4j/rio/JellyWriter.scala @@ -8,7 +8,7 @@ import org.eclipse.rdf4j.rio.helpers.AbstractRDFWriter import java.io.OutputStream import java.util -import scala.collection.mutable.{ArrayBuffer, ListBuffer} +import scala.collection.mutable.ListBuffer /** * RDF4J Rio writer for Jelly RDF format.