Skip to content

Commit

Permalink
Remove secondary string table
Browse files Browse the repository at this point in the history
I realized how stupid it was the idea of keeping two string tables
intead of one, since it's unlikely we'll ever have more than
2147483647 unique string instances and the JVM doesn't allow to
instantiate such a big array in normal heap sizes. So we remove this
part of the design that we inherited from pprof.
  • Loading branch information
jvican committed Aug 27, 2018
1 parent 90b56cd commit 11092ad
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 49 deletions.
36 changes: 20 additions & 16 deletions internal/zinc-core/src/main/protobuf/zprof.proto
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ package sbt.internal.inc;
///////////////////////////////////////// ZINC PROF ///////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////

// This protobuf scheme is inspired by https://github.com/google/pprof/blob/master/proto/profile.proto
// As pprof, it uses a string table and all the supposed strings in the format are represented as an
// index (int32) of that string table. This is done to minimize overhead in memory and disk.

message Profile {
repeated ZincRun runs = 1;
repeated string string_table = 2;
Expand All @@ -17,52 +21,52 @@ message ZincRun {
}

message CycleInvalidation {
repeated int64 invalidated = 1;
repeated int64 invalidatedByPackageObjects = 2;
repeated int64 initialSources = 3;
repeated int64 invalidatedSources = 4;
repeated int64 recompiledClasses = 5;
repeated int32 invalidated = 1;
repeated int32 invalidatedByPackageObjects = 2;
repeated int32 initialSources = 3;
repeated int32 invalidatedSources = 4;
repeated int32 recompiledClasses = 5;

int64 startTimeNanos = 6; // Start time of compilation (UTC) as nanoseconds past the epoch
int64 compilationDurationNanos = 7; // Duration of the compilation profile in nanoseconds
repeated ApiChange changesAfterRecompilation = 8;

repeated InvalidationEvent events = 9;
repeated int64 nextInvalidations = 10;
repeated int32 nextInvalidations = 10;
bool shouldCompileIncrementally = 11;
}

message InvalidationEvent {
string kind = 1;
repeated int64 inputs = 2;
repeated int64 outputs = 3;
repeated int32 inputs = 2;
repeated int32 outputs = 3;
string reason = 4;
}

message Changes {
repeated int64 added = 1;
repeated int64 removed = 2;
repeated int64 modified = 3;
repeated int32 added = 1;
repeated int32 removed = 2;
repeated int32 modified = 3;
}

message ApiChange {
int64 modifiedClass = 1;
int32 modifiedClass = 1;
string reason = 2;
repeated UsedName usedNames = 3; // Can be empty if the change is not related to names
}

message InitialChanges {
Changes changes = 1;
repeated int64 removedProducts = 2;
repeated int64 binaryDependencies = 3;
repeated int32 removedProducts = 2;
repeated int32 binaryDependencies = 3;
repeated ApiChange externalChanges = 4;
}

message UsedName {
int64 name = 1;
int32 name = 1;
repeated Scope scopes = 2;
}

message Scope {
int64 kind = 1;
int32 kind = 1;
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,18 @@ object InvalidationProfiler {
}

class ZincInvalidationProfiler extends InvalidationProfiler {
private final var lastKnownIndex: Long = -1L
/* These string tables contain any kind of repeated string that is likely to occur
private final var lastKnownIndex: Int = -1
/* The string table contains any kind of repeated string that is likely to occur
* in the protobuf profiling data. This includes used names, class names, source
* files and class files (their paths), as well as other repeated strings. This is
* done to keep the memory overhead of the profiler to a minimum. */
private final val stringTable1: ArrayBuffer[String] = new ArrayBuffer[String](1000)
private final val stringTable2: ArrayBuffer[String] = new ArrayBuffer[String](0)

private final val stringTable: ArrayBuffer[String] = new ArrayBuffer[String](1000)

/* Maps strings to indices. The indices are long because we're overprotecting ourselves
* in case the string table grows gigantic. This should not happen, but as the profiling
* scheme of pprof does it and it's not cumbersome to implement it, we replicate the same design. */
private final val stringTableIndices: mutable.HashMap[String, Long] =
new mutable.HashMap[String, Long]
private final val stringTableIndices: mutable.HashMap[String, Int] =
new mutable.HashMap[String, Int]

def profileRun: RunProfiler = new ZincProfilerImplementation

Expand All @@ -56,46 +55,36 @@ class ZincInvalidationProfiler extends InvalidationProfiler {
*
* It is recommended to only perform this operation when we are
* going to persist the profiled protobuf data to disk. Do not
* call this function after every compiler iteration as the aggregation
* of the symbol tables may be expensive, it's recommended to
* persist this file periodically.
* call this function after every compiler iteration as you will
* write a symbol table in every persisted protobuf file. It's
* better to persist this file periodically after several runs
* so that the overhead in disk is not high.
*
* @return An immutable zprof profile that can be persisted via protobuf.
*/
def toProfile: zprof.Profile = zprof.Profile(
runs = runs,
stringTable = stringTable1 ++ stringTable2
stringTable = stringTable
)

private[inc] class ZincProfilerImplementation extends RunProfiler {
private def toStringTableIndex(string: String): Long = {
private def toStringTableIndex(string: String): Int = {
stringTableIndices.get(string) match {
case Some(index) =>
if (index <= Integer.MAX_VALUE) {
val newIndex = index.toInt
stringTable1.apply(newIndex)
newIndex
} else {
val newIndex = (index - Integer.MAX_VALUE.toLong).toInt
stringTable2.apply(newIndex)
newIndex
}
val newIndex = index.toInt
stringTable.apply(newIndex)
newIndex
case None =>
val newIndex = lastKnownIndex + 1
// Depending on the size of the index, use the first or second symbol table
if (newIndex <= Integer.MAX_VALUE) {
stringTable1.insert(newIndex.toInt, string)
} else {
val newIndex2 = (newIndex - Integer.MAX_VALUE.toLong).toInt
stringTable2.insert(newIndex2, string)
}
stringTable.insert(newIndex.toInt, string)
stringTableIndices.put(string, newIndex)
lastKnownIndex = lastKnownIndex + 1
newIndex
}
}

private def toStringTableIndices(strings: Iterable[String]): Iterable[Long] =
private def toStringTableIndices(strings: Iterable[String]): Iterable[Int] =
strings.map(toStringTableIndex(_))

private final var compilationStartNanos: Long = 0L
Expand Down Expand Up @@ -210,11 +199,11 @@ class ZincInvalidationProfiler extends InvalidationProfiler {
}

/**
* Defines the interface of a profiler. This interface is used in the guts of
* [[IncrementalCommon]] and [[IncrementalNameHashing]]. A profiler of a run
* is instantiated afresh in `Incremental.compile` and then added to the profiler
* instance managed by the client.
*/
* Defines the interface of a profiler. This interface is used in the guts of
* [[IncrementalCommon]] and [[IncrementalNameHashing]]. A profiler of a run
* is instantiated afresh in `Incremental.compile` and then added to the profiler
* instance managed by the client.
*/
abstract class RunProfiler {
def timeCompilation(
startNanos: Long,
Expand Down

0 comments on commit 11092ad

Please sign in to comment.