Skip to content

Commit

Permalink
New analysis format
Browse files Browse the repository at this point in the history
A new implementation of Zinc's incremental state serialization.

- Full structural serialization (like the existing protobuf format), no shortcuts with sbinary or Java serialization (like the existing text format).
- A single implementation that supports an efficient binary format for production use and a text format for development and debugging.
- Consistent output files: If two compiler runs result in the same internal representation of incremental state (after applying WriteMappers), they produce identical zinc files. This is important for build tools like Bazel where skipping a build entirely when the outputs are identical is much cheaper than having to run Zinc to perform the incremental state analysis.
- Smaller output files than the existing binary format.
- Faster serialization and deserialization than the existing binary format.
- Smaller implementation than either of the existing formats.
- Optional unsorted output that trades consistency and small file sizes for much faster writing.

Benchmark data based on scala-library + reflect + compiler:

|                             | Write time | Read time | File size |
|-----------------------------|------------|-----------|-----------|
| sbt Text                    |    1002 ms |    791 ms |   7102 kB |
| sbt Binary                  |     654 ms |    277 ms |   6182 kB |
| ConsistentBinary            |     157 ms |    100 ms |   3097 kB |
| ConsistentBinary (unsorted) |      79 ms |           |   3796 kB |

This PR makes the new format available via the new ConsistentFileAnalysisStore. It does not replace the existing formats (but it should; it's a better choice for almost every use case).

We have been using iterations of this format internally over the last few months for the Bazel build (with our own Zinc-based tooling) of our two main monorepos totaling about 27000 Scala (+ Java/mixed) targets ranging in size from a few LOC to almost 1 million LOC.
  • Loading branch information
szeiger committed Jan 5, 2024
1 parent 604f73a commit d08b03c
Show file tree
Hide file tree
Showing 12 changed files with 1,914 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
package xsbt

import java.io.File
import java.util.concurrent.TimeUnit
import scala.collection.mutable

import org.openjdk.jmh.annotations._
import org.openjdk.jmh.infra.Blackhole
import sbt.internal.inc.consistent._
import sbt.internal.inc.{ Analysis, FileAnalysisStore }
import sbt.io.IO
import xsbti.compile.analysis.ReadWriteMappers
import xsbti.compile.{ AnalysisContents, AnalysisStore }

@BenchmarkMode(Array(Mode.AverageTime))
@Fork(1)
@Threads(1)
@Warmup(iterations = 5)
@Measurement(iterations = 5)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@State(Scope.Benchmark)
class AnalysisFormatBenchmark {

var temp: File = _
val sets = IndexedSeq("compiler", "reflect", "library")
var cached: Map[String, AnalysisContents] = _

@Setup
def setup(): Unit = {
this.temp = IO.createTemporaryDirectory
sets.foreach { s =>
val f = new File("../../../test-data", s"${s}.zip")
assert(f.exists())
val f2 = new File(temp, f.getName)
IO.copyFile(f, f2)
assert(f2.exists())
}
this.cached = readAll("", FileAnalysisStore.binary(_))
writeAll("-ref-text", FileAnalysisStore.text(_), cached)
// writeAll("-ref-ctext", ConsistentFileAnalysisStore.text(_, ReadWriteMappers.getEmptyMappers), cached)
writeAll(
"-ref-cbin",
ConsistentFileAnalysisStore.binary(_, ReadWriteMappers.getEmptyMappers),
cached
)
writeAll(
"-ref-cbin-nosort",
ConsistentFileAnalysisStore.binary(_, ReadWriteMappers.getEmptyMappers, sort = false),
cached
)
println("Sizes:")
temp.listFiles().foreach { p => println(s"$p: ${p.length()}") }
val cbinTotal = temp.listFiles().filter(_.getName.endsWith("-cbin.zip")).map(_.length()).sum
println(s"cbin total = $cbinTotal, ${cbinTotal / 1024}k")
val cbinNoSortTotal =
temp.listFiles().filter(_.getName.endsWith("-cbin-nosort.zip")).map(_.length()).sum
println(s"cbin-nosort total = $cbinNoSortTotal, ${cbinNoSortTotal / 1024}k")
}

@TearDown
def tearDown(): Unit = {
if (temp != null) IO.delete(temp)
}

@Benchmark
def readBinary(bh: Blackhole): Unit = bh.consume(readAll("", FileAnalysisStore.binary(_)))

@Benchmark
def readText(bh: Blackhole): Unit = bh.consume(readAll("-ref-text", FileAnalysisStore.text(_)))

@Benchmark
def readConsistentBinary(bh: Blackhole): Unit =
bh.consume(
readAll("-ref-cbin", ConsistentFileAnalysisStore.binary(_, ReadWriteMappers.getEmptyMappers))
)

@Benchmark
def writeBinary(bh: Blackhole): Unit =
bh.consume(writeAll("-test-bin", FileAnalysisStore.binary(_), cached))

@Benchmark
def writeText(bh: Blackhole): Unit =
bh.consume(writeAll("-test-text", FileAnalysisStore.text(_), cached))

@Benchmark
def writeConsistentBinary(bh: Blackhole): Unit =
bh.consume(
writeAll(
"-test-cbin",
ConsistentFileAnalysisStore.binary(_, ReadWriteMappers.getEmptyMappers),
cached
)
)

@Benchmark
def writeConsistentBinaryNoSort(bh: Blackhole): Unit =
bh.consume(
writeAll(
"-test-cbin-nosort",
ConsistentFileAnalysisStore.binary(_, ReadWriteMappers.getEmptyMappers, sort = false),
cached
)
)

@Benchmark
def writeNull(bh: Blackhole): Unit = {
cached.foreach {
case (s, a) =>
val ser = new NullSerializer
val af = new ConsistentAnalysisFormat(ReadWriteMappers.getEmptyMappers, sort = true)
af.write(ser, a.getAnalysis, a.getMiniSetup)
bh.consume(ser.count)
}
}

@Benchmark
def writeNullNoSort(bh: Blackhole): Unit = {
cached.foreach {
case (s, a) =>
val ser = new NullSerializer
val af = new ConsistentAnalysisFormat(ReadWriteMappers.getEmptyMappers, sort = false)
af.write(ser, a.getAnalysis, a.getMiniSetup)
bh.consume(ser.count)
}
}

def readAll(suffix: String, store: File => AnalysisStore): Map[String, AnalysisContents] =
sets.iterator.map(s => (s, read(s, suffix, store))).toMap

def writeAll(
suffix: String,
store: File => AnalysisStore,
map: Map[String, AnalysisContents]
): Unit =
map.foreach { case (s, a) => write(s, suffix, store, a) }

def read(set: String, suffix: String, store: File => AnalysisStore): AnalysisContents = {
val api = store((new File(temp, s"${set}${suffix}.zip"))).unsafeGet()
assert(api.getAnalysis.asInstanceOf[Analysis].apis.internal.head._2.api() != null)
api
}

def write(
set: String,
suffix: String,
store: File => AnalysisStore,
analysis: AnalysisContents
): Unit = {
assert(analysis.getMiniSetup.storeApis())
val f = new File(temp, s"${set}${suffix}.zip")
IO.delete(f)
store(f).set(analysis)
assert(f.exists())
}
}

class NullSerializer extends Serializer {
private[this] val strings = mutable.HashMap.empty[String, String]
private[this] var _count = 0
def count: Int = _count
def startBlock(name: String): Unit = _count += 1
def startArray(name: String, length: Int): Unit = _count += 1
def endBlock(): Unit = _count += 1
def endArray(): Unit = _count += 1
def string(s: String): Unit = {
if (!strings.contains(s)) {
strings.put(s, s)
_count += 1
}
}
def bool(b: Boolean): Unit = _count += 1
def int(i: Int): Unit = _count += 1
def byte(b: Byte): Unit = _count += 1
def long(l: Long): Unit = _count += 1
def end(): Unit = _count += 1
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package sbt.internal.inc.consistent

import java.util.Arrays
import scala.collection.{ MapLike, SetLike, SortedMap, SortedMapLike }
import scala.collection.generic.{
CanBuildFrom,
GenericTraversableTemplate,
MapFactory,
SeqFactory,
SetFactory,
SortedMapFactory
}

// some simple compatibility shims for 2.12 so we don't need to depend on collection-compat
object Compat {
type Factory[-A, +C] = CanBuildFrom[Nothing, A, C]

implicit def sortedMapFactoryToCBF[CC[A, B] <: SortedMap[A, B] with SortedMapLike[
A,
B,
CC[A, B]
], K: Ordering, V](f: SortedMapFactory[CC]): Factory[(K, V), CC[K, V]] =
new f.SortedMapCanBuildFrom

implicit def mapFactoryToCBF[CC[A, B] <: Map[A, B] with MapLike[A, B, CC[A, B]], K, V](
f: MapFactory[CC]
): Factory[(K, V), CC[K, V]] =
new f.MapCanBuildFrom

implicit def seqFactoryToCBF[CC[X] <: Seq[X] with GenericTraversableTemplate[X, CC], E](
f: SeqFactory[CC]
): Factory[E, CC[E]] =
new f.GenericCanBuildFrom

implicit def setFactoryToCBF[CC[X] <: Set[X] with SetLike[X, CC[X]], E](f: SetFactory[CC])
: Factory[E, CC[E]] =
f.setCanBuildFrom

implicit class FactoryOps[-A, +C](private val factory: Factory[A, C]) {
def newBuilder: scala.collection.mutable.Builder[A, C] = factory()
}

type IterableOnce[+E] = TraversableOnce[E]

implicit class IterableOnceOps[+E](private val it: IterableOnce[E]) {
def iterator: Iterator[E] = it match {
case it: Iterator[_] => it.asInstanceOf[Iterator[E]]
case it => it.asInstanceOf[Iterable[E]].iterator
}
}

implicit class ArrayOps[A <: AnyRef](private val a: Array[A]) {
def sortInPlaceBy[B](f: A => B)(implicit ord: Ordering[B]): Unit = Arrays.sort(a, ord on f)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
package sbt.internal.inc.consistent

object Compat {
type Factory[-A, +C] = scala.collection.Factory[A, C]
}
Loading

0 comments on commit d08b03c

Please sign in to comment.