Add unit tests

Friendseeker · Oct 14, 2024 · d7a4dfe · d7a4dfe
1 parent 90bbcba
commit d7a4dfe
Show file tree

Hide file tree

Showing 3 changed files with 153 additions and 22 deletions.
diff --git a/...-persist/src/test/scala/sbt/inc/consistent/ConsistentAnalysisFormatIntegrationSuite.scala b/...-persist/src/test/scala/sbt/inc/consistent/ConsistentAnalysisFormatIntegrationSuite.scala
@@ -1,11 +1,11 @@
 package sbt.inc.consistent
 
-import java.io.File
+import java.io.{ File, FileInputStream }
 import java.util.Arrays
 import org.scalatest.funsuite.AnyFunSuite
 import sbt.internal.inc.consistent.ConsistentFileAnalysisStore
 import sbt.internal.inc.{ Analysis, FileAnalysisStore }
-import sbt.io.IO
+import sbt.io.{ IO, Using }
 import xsbti.compile.{ AnalysisContents, AnalysisStore }
 import xsbti.compile.analysis.ReadWriteMappers
 
@@ -50,6 +50,22 @@ class ConsistentAnalysisFormatIntegrationSuite extends AnyFunSuite {
     }
   }
 
+  test("compression ratio") {
+    for (d <- data) {
+      assert(d.exists())
+      val api = read(FileAnalysisStore.text(d))
+      val file = write("cbin1.zip", api)
+      val uncompressedSize = Using.gzipInputStream(new FileInputStream(file)) { in =>
+        val content = IO.readBytes(in)
+        content.length
+      }
+      val compressedSize = d.length()
+      val compressionRatio = compressedSize.toDouble / uncompressedSize.toDouble
+      assert(compressionRatio < 0.85)
+      // compression rate for each data: 0.8185090254676337, 0.7247774786370688, 0.8346021341469837
+    }
+  }
+
   def read(store: AnalysisStore): AnalysisContents = {
     val api = store.unsafeGet()
     // Force loading of companion file and check that the companion data is present:

diff --git a/internal/zinc-persist/src/test/scala/sbt/inc/consistent/ConsistentAnalysisFormatSuite.scala b/internal/zinc-persist/src/test/scala/sbt/inc/consistent/ConsistentAnalysisFormatSuite.scala
@@ -81,24 +81,4 @@ class ConsistentAnalysisFormatSuite extends AnyFunSuite {
     writeTo(SerializerFactory.binary.serializerFor(out))
     readFrom(SerializerFactory.binary.deserializerFor(new ByteArrayInputStream(out.toByteArray)))
   }
-
-  test("ParallelGzip") {
-    val bs = 64 * 1024
-    val rnd = new Random(0L)
-    for {
-      threads <- Seq(1, 8)
-      size <- Seq(0, bs - 1, bs, bs + 1, bs * 8 - 1, bs * 8, bs * 8 + 1)
-    } {
-      val a = new Array[Byte](size)
-      rnd.nextBytes(a)
-      val bout = new ByteArrayOutputStream()
-      val gout = new ParallelGzipOutputStream(bout, parallelism = threads)
-      gout.write(a)
-      gout.close()
-      val gin =
-        new BufferedInputStream(new GZIPInputStream(new ByteArrayInputStream(bout.toByteArray)))
-      val a2 = IO.readBytes(gin)
-      assert(Arrays.equals(a, a2), s"threads = $threads, size = $size")
-    }
-  }
 }
diff --git a/...inc-persist/src/test/scala/sbt/inc/consistent/ParallelGzipOutputStreamSpecification.scala b/...inc-persist/src/test/scala/sbt/inc/consistent/ParallelGzipOutputStreamSpecification.scala
@@ -0,0 +1,135 @@
+package sbt.inc.consistent
+
+import org.scalatest.flatspec.AnyFlatSpec
+import org.scalatest.matchers.should.Matchers
+
+import java.io.{ BufferedInputStream, ByteArrayInputStream, ByteArrayOutputStream }
+import java.util.zip.GZIPInputStream
+import sbt.internal.inc.consistent.ParallelGzipOutputStream
+import sbt.io.IO
+import sbt.io.Using
+
+import java.util.Arrays
+import collection.parallel.CollectionConverters.*
+import scala.util.Random
+import scala.concurrent.{ Future, Await }
+import scala.concurrent.ExecutionContext.Implicits.global
+import scala.concurrent.duration._
+
+class ParallelGzipOutputStreamSpecification extends AnyFlatSpec with Matchers {
+  val defaultSize: Int = 64 * 1024
+  val sizes: Seq[Int] = Seq(
+    0,
+    1,
+    3,
+    32,
+    127,
+    1025,
+    defaultSize - 1,
+    defaultSize,
+    defaultSize + 1,
+    defaultSize * 8 - 1,
+    defaultSize * 8,
+    defaultSize * 8 + 1
+  )
+  val numberOfGzipStreams: Seq[Int] = Seq(1, 2, 4, 8, 15)
+  val parallelisms: Seq[Int] = 1 to 17
+
+  def decompress(data: Array[Byte]): Array[Byte] = {
+    Using.gzipInputStream(new ByteArrayInputStream(data))(IO.readBytes)
+  }
+
+  def compress(data: Array[Byte], parallelism: Int): Array[Byte] = {
+    val bout = new ByteArrayOutputStream()
+    val gout = new ParallelGzipOutputStream(bout, parallelism)
+    try {
+      gout.write(data)
+    } finally {
+      gout.close()
+    }
+    bout.toByteArray
+  }
+
+  def verifyRoundTrip(data: Array[Byte], parallelism: Int, errorMessage: String): Unit = {
+    val compressed = compress(data, parallelism)
+    val decompressed = decompress(compressed)
+    assert(Arrays.equals(data, decompressed), errorMessage)
+  }
+
+  def randomArray(size: Int): Array[Byte] = {
+    val rnd = new Random(0L)
+    val data = new Array[Byte](size)
+    rnd.nextBytes(data)
+    data
+  }
+
+  it should "compress and decompress data correctly" in {
+    for {
+      parallelism <- parallelisms
+      size <- sizes
+    } {
+      val data = randomArray(size)
+      verifyRoundTrip(data, parallelism, s"parallelism = $parallelism, size = $size")
+    }
+  }
+
+  it should "handle highly redundant data correctly" in {
+    for {
+      parallelism <- parallelisms
+      size <- sizes
+    } {
+      val data = Array.fill(size)(0.toByte)
+      verifyRoundTrip(data, parallelism, s"parallelism = $parallelism, size = $size")
+    }
+  }
+
+  it should "handle large data sizes" in {
+    val largeData = randomArray(64 * 1024 * 1024) // 64 MB
+    for (parallelism <- parallelisms) {
+      verifyRoundTrip(largeData, parallelism, s"parallelism = $parallelism, large data size")
+    }
+  }
+
+  it should "handle very large parallelism" in {
+    val data = randomArray(defaultSize * 16)
+    val maxNumberOfThreads = 200
+    verifyRoundTrip(data, maxNumberOfThreads, s"parallelism = $maxNumberOfThreads, large data")
+  }
+
+  it should "handle multiple ParallelGzipOutputStream concurrently" in {
+    for {
+      numberOfGzipStream <- numberOfGzipStreams
+      parallelism <- parallelisms
+      size <- sizes
+    } {
+      val verifications = Future.traverse(1 to numberOfGzipStream)(numberOfGzipStream =>
+        Future {
+          val data = randomArray(size)
+          verifyRoundTrip(
+            data,
+            parallelism,
+            s"numberOfStreams: $numberOfGzipStream, parallelism = $parallelism, size = $size"
+          )
+        }
+      )
+      Await.result(verifications, 60.seconds)
+    }
+  }
+
+  it should "handle multiple ParallelGzipOutputStream with varying config concurrently" in {
+    val verifications = Future.traverse(for {
+      parallelism <- parallelisms.take(10)
+      size <- sizes
+    } yield (parallelism, size)) { case (parallelism, size) =>
+      Future {
+        val data = randomArray(size)
+        verifyRoundTrip(
+          data,
+          parallelism,
+          s"parallelism = $parallelism, size = $size"
+        )
+      }
+    }
+    Await.result(verifications, 60.seconds)
+  }
+}