From d8b3605a85c6996db5cd6475ae7676b01b1c0d2c Mon Sep 17 00:00:00 2001
From: Leandro Motta Barros <lmb@stackedboxes.org>
Date: Tue, 13 Jun 2023 15:01:01 -0300
Subject: [PATCH 1/2] Add script to run delta benchmarks

The script is not extremely high-quality, but keeping it in the repo
will make it easier to improve it. And, of course, it is already useful
and may help us when working on more delta changes in the future.

Signed-off-by: Leandro Motta Barros <leandro@balena.io>
Change-type: patch
---
 .dockerignore                           |   1 +
 balena-benchmarking/README.md           |  34 +++++
 balena-benchmarking/delta-benchmarks.sh | 185 ++++++++++++++++++++++++
 3 files changed, 220 insertions(+)
 create mode 100644 balena-benchmarking/README.md
 create mode 100755 balena-benchmarking/delta-benchmarks.sh

diff --git a/.dockerignore b/.dockerignore
index df466067e1..8c08126b3c 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -3,3 +3,4 @@
 .gopath
 bundles
 vendor/pkg
+balena-benchmarking
diff --git a/balena-benchmarking/README.md b/balena-benchmarking/README.md
new file mode 100644
index 0000000000..1c29e47fc7
--- /dev/null
+++ b/balena-benchmarking/README.md
@@ -0,0 +1,34 @@
+# balenaEngine Benchmarking
+
+This directory contains some scripts we use to benchmark balenaEngine. They are
+not super stable and ready for public consumption, but they are good enough for
+informing decisions when working in improvements.
+
+Currently, there's actually just one script here.
+
+## `delta-benchmarks.sh`
+
+This script collects some metrics on the generation of deltas. Namely, it
+measures how long it takes, how much memory it uses, and how large are the
+resulting deltas.
+
+The script does this for a list of references (branches, tags, commits) defined
+in the `branches` variable at its start, and for each of the test cases defined
+in `testCases`. You can customize these two variables as you need.
+
+You need to run this as the superuser ()`root`), from the root of the
+balena-engine repository. Something like this should work:
+
+```sh
+sudo ./balena-benchmarking/delta-benchmarks.sh
+```
+
+All required images will be pulled into
+`./balena-benchmarking/balenad-data-root`. If you already have pulled all
+images, you can run a bit faster by using this:
+
+```sh
+sudo SKIP_PULL=y ./balena-benchmarking/delta-benchmarks.sh
+```
+
+Results are written to `./balena-benchmarking/delta.csv`.
diff --git a/balena-benchmarking/delta-benchmarks.sh b/balena-benchmarking/delta-benchmarks.sh
new file mode 100755
index 0000000000..6c4220cb22
--- /dev/null
+++ b/balena-benchmarking/delta-benchmarks.sh
@@ -0,0 +1,185 @@
+#!/bin/bash
+
+# Environment variables affecting the script behavior:
+#
+# - SKIP_PULL: if set to 'y', images will not be pulled before running the
+#   benchmarks. Useful to spare some time if you are sure you already have all
+#   needed images locally.
+
+set -e
+
+# Branches to benchmark.
+branches=(
+    "master"
+    "lmb/librsync-memory"
+)
+
+# Test cases to benchmark (test case name, basis image, target image).
+testCases=(
+    "busybox-1.36.0-1.36.1          busybox:1.36.0                                   busybox:1.36.1"
+    "busybox-1.25.0-1.36.1          busybox:1.25.0                                   busybox:1.36.1"
+    "debian-10.0-11.7               debian:10.0                                      debian:11.7"
+    "debian-11.6-11.7               debian:11.6                                      debian:11.7"
+    "debian-slim-11.7               debian:11.7-slim                                 debian:11.7"
+    "debian-11.7-slim               debian:11.7                                      debian:11.7-slim"
+    "ubuntu-18.04-23.04             ubuntu:18.04                                     ubuntu:23.04"
+    "alpine-3.7-3.18                alpine:3.7                                       alpine:3.18"
+    "audio-aarch64-0.5.5-0.5.6      bh.cr/balenalabs/audio-aarch64/0.5.5             bh.cr/balenalabs/audio-aarch64/0.5.6"
+    "audio-amd64-0.5.5-0.5.6        bh.cr/balenalabs/audio-amd64/0.5.5               bh.cr/balenalabs/audio-amd64/0.5.6"
+    "browser-aarch64-2.3.7-2.4.7    bh.cr/balenalabs/browser-aarch64/2.3.7           bh.cr/balenalabs/browser-aarch64/2.4.7"
+    "browser-amd64-2.3.7-2.4.7      bh.cr/balenalabs/browser-amd64/2.3.7             bh.cr/balenalabs/browser-amd64/2.4.7"
+    "nodered-aarch64-2.4.0-2.4.1    bh.cr/balenalabs/balena-node-red-aarch64/2.4.0   bh.cr/balenalabs/balena-node-red-aarch64/2.4.1"
+    "ca-priv-amd64-0.0.12-0.0.13    bh.cr/balena/ca-private-amd64/0.0.12             bh.cr/balena/ca-private-amd64/0.0.13"
+    "ca-priv-amd64-0.0.13-0.0.12    bh.cr/balena/ca-private-amd64/0.0.13             bh.cr/balena/ca-private-amd64/0.0.12"
+    "unzoner-armv7hf-1.2.0-1.2.23   bh.cr/belodetek/unzoner-armv7hf/1.2.0            bh.cr/belodetek/unzoner-armv7hf/1.2.23"
+    "unzoner-armv7hf-1.2.23-1.2.0   bh.cr/belodetek/unzoner-armv7hf/1.2.23           bh.cr/belodetek/unzoner-armv7hf/1.2.0"
+    # TODO maybe: https://gitlab.com/nvidia/container-images/l4t-base
+)
+
+balenadDataRoot="./balena-benchmarking/balenad-data-root"
+balenadPIDFile="/var/run/balena-engine.pid"
+deltaTag="balena-engine-delta-benchmark-image"
+
+function assertRunningFromRepoRoot() {
+    if [ ! -f "Makefile" ]; then
+        echo "Please run from the root of the balena-engine repository."
+        exit 1
+    fi
+}
+
+# Build balenaEngine from branch $1.
+function buildBalenaEngine() {
+    echo
+    echo "BUILDING BALENA ENGINE FROM BRANCH $1"
+    echo
+    git checkout "$1"
+    make dynbinary
+}
+
+function startBalenad() {
+    echo "Starting balenad..."
+    mkdir -p "$balenadDataRoot"
+
+    if [ -f "$balenadPIDFile" ]; then
+        killBalenad
+    fi
+
+    balenad --data-root "$balenadDataRoot" --pidfile $balenadPIDFile &> /dev/null &
+    echo -n "Waiting for balenad to start... "
+
+    while [ ! -f "$balenadPIDFile" ]; do
+        sleep 1
+    done
+
+    while [ ! balena-engine info &> /dev/null ]; do
+        sleep 1
+    done
+    echo " done! (PID = $(cat "$balenadPIDFile"))"
+}
+
+function killBalenad() {
+    echo "Killing balenad..."
+
+    if [ ! -f "$balenadPIDFile" ]; then
+        return
+    fi
+    kill $(cat $balenadPIDFile)
+    sleep 5
+    if [ -f "$balenadPIDFile" ]; then
+        echo "balenaEngine still running, killing with -KILL"
+        kill -KILL $(cat $balenadPIDFile)
+    fi
+}
+
+function pullAllImages() {
+    if [ "$SKIP_PULL" == "y" ]; then
+        return
+    fi
+
+    echo
+    echo "PULLING ALL IMAGES"
+    echo
+
+    buildBalenaEngine "master"
+
+    startBalenad
+
+    for testCase in "${testCases[@]}"; do
+        tcBasis=$(echo $testCase | awk '{print $2}')
+        tcTarget=$(echo $testCase | awk '{print $3}')
+
+        balena-engine pull "$tcBasis"
+        balena-engine pull "$tcTarget"
+    done
+
+    killBalenad
+}
+
+function balenadMaxMemory() {
+    if [ ! -f "$balenadPIDFile" ]; then
+        echo "balenad not running!"
+        exit 1
+    fi
+
+    # Read the high water mark (VmHWM) of the balenad process.
+    cat /proc/$(cat "$balenadPIDFile")/status | grep VmHWM | awk '{print $2}'
+}
+
+export PATH="$(pwd)/bundles/dynbinary-daemon:$PATH"
+
+assertRunningFromRepoRoot
+
+# Remember the current branch so we can switch back to it later.
+originalBranch=$(git rev-parse --abbrev-ref HEAD)
+echo "Running from this branch: $originalBranch"
+
+pullAllImages
+
+# The CSV file where results will be stored.
+csvResults="./balena-benchmarking/delta.csv"
+tmpResults="./balena-benchmarking/delta.tmp"
+
+# Initialize the CSV files with headers.
+echo "Case,Branch,BasisSize,DeltaSize,DeltaTime,DeltaMem" > "$csvResults"
+rm -f "$tmpResults"
+
+for branch in "${branches[@]}"; do
+    echo "Running benchmarks for branch $branch"
+
+    buildBalenaEngine "$branch"
+
+    for testCase in "${testCases[@]}"; do
+        tcName=$(echo $testCase | awk '{print $1}')
+        tcBasis=$(echo $testCase | awk '{print $2}')
+        tcTarget=$(echo $testCase | awk '{print $3}')
+
+        echo "Running benchmark for $branch / $tcName"
+
+        startBalenad
+
+        # baselineMemInKB=$(balenadMaxMemory)
+        deltaTimeInSecs=$(\time -f%e balena-engine image delta "$tcBasis" "$tcTarget" --tag "$deltaTag" 2>&1 | tail -n 1)
+        usedMemInKB=$(balenadMaxMemory)
+        usedMemInBytes=$((usedMemInKB * 1024))
+        basisSizeInBytes=$(balena-engine inspect "$tcBasis" --format "{{.Size}}")
+        deltaSizeInBytes=$(balena-engine inspect "$deltaTag" --format "{{.Size}}")
+
+        # Collect data.
+        echo "$tcName,$branch,$basisSizeInBytes,$deltaSizeInBytes,$deltaTimeInSecs,$usedMemInBytes" >> "$tmpResults"
+
+        # Thanks Engine, you may go now.
+        killBalenad
+    done
+
+    echo "Done with branch $branch"
+done
+
+echo "Preparing final results..."
+sort "$tmpResults" >> "$csvResults"
+rm -f "$tmpResults"
+
+# Switch back to the original branch.
+echo "Restoring original branch $originalBranch..."
+git checkout "$originalBranch"
+
+echo "Done with everything!"

From f0b580115aa2b75e80ce18c140cf716a8b9eb91b Mon Sep 17 00:00:00 2001
From: Leandro Motta Barros <lmb@stackedboxes.org>
Date: Fri, 7 Jul 2023 11:09:43 -0300
Subject: [PATCH 2/2] Use newer librsync-go with slightly better memory
 handling

Signed-off-by: Leandro Motta Barros <leandro@balena.io>
Change-type: patch
---
 daemon/images/image_delta.go                         |  5 ++++-
 vendor.conf                                          |  2 +-
 vendor/github.com/balena-os/librsync-go/signature.go | 10 +++++++++-
 3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/daemon/images/image_delta.go b/daemon/images/image_delta.go
index 05c4051f76..1e61ad804a 100644
--- a/daemon/images/image_delta.go
+++ b/daemon/images/image_delta.go
@@ -65,7 +65,10 @@ func (i *ImageService) DeltaCreate(deltaSrc, deltaDest string, options types.Ima
 	defer progressReader.Close()
 
 	sigStart := time.Now()
-	srcSig, err := librsync.Signature(bufio.NewReaderSize(progressReader, 65536), ioutil.Discard, 512, 32, librsync.BLAKE2_SIG_MAGIC)
+
+	bufioReader := bufio.NewReaderSize(progressReader, 65536)
+	numBlocks := int(srcDataLen/512) + 1
+	srcSig, err := librsync.SignatureWithBlockCount(bufioReader, ioutil.Discard, 512, 32, librsync.BLAKE2_SIG_MAGIC, numBlocks)
 	if err != nil {
 		return err
 	}
diff --git a/vendor.conf b/vendor.conf
index ebff4f8499..9a2e722ded 100644
--- a/vendor.conf
+++ b/vendor.conf
@@ -195,7 +195,7 @@ github.com/willf/bitset                             559910e8471e48d76d9e5a1ba158
 github.com/urfave/cli                               c71fbcefd21552b70cd625b2c54466006e258ad7 # v1.22.1
 
 # balena deltas
-github.com/balena-os/librsync-go                    7b435f8f590637e8ba24e72f7cfc2d62f17e3848 # v0.8.5
+github.com/balena-os/librsync-go                    114605554123d206908a21541aad3dc1fbb6ac0e # lmb/prealloc-sig-stuff
 github.com/balena-os/circbuf                        2d080deeceffbd01dea0fcfa165dce64d2d6c9fc # v0.1.3
 
 # runc
diff --git a/vendor/github.com/balena-os/librsync-go/signature.go b/vendor/github.com/balena-os/librsync-go/signature.go
index e47ac79025..a88b6d51b2 100644
--- a/vendor/github.com/balena-os/librsync-go/signature.go
+++ b/vendor/github.com/balena-os/librsync-go/signature.go
@@ -37,6 +37,13 @@ func CalcStrongSum(data []byte, sigType MagicNumber, strongLen uint32) ([]byte,
 }
 
 func Signature(input io.Reader, output io.Writer, blockLen, strongLen uint32, sigType MagicNumber) (*SignatureType, error) {
+	return SignatureWithBlockCount(input, output, blockLen, strongLen, sigType, 0)
+}
+
+// SignatureWithBlockCount is a version of Signature that allows the caller to
+// pass in the expected number of blocks in the Signature. This is used to
+// pre-allocate the internal data structures.
+func SignatureWithBlockCount(input io.Reader, output io.Writer, blockLen, strongLen uint32, sigType MagicNumber, blockCount int) (*SignatureType, error) {
 	var maxStrongLen uint32
 
 	switch sigType {
@@ -68,7 +75,8 @@ func Signature(input io.Reader, output io.Writer, blockLen, strongLen uint32, si
 	block := make([]byte, blockLen)
 
 	var ret SignatureType
-	ret.weak2block = make(map[uint32]int)
+	ret.weak2block = make(map[uint32]int, blockCount)
+	ret.strongSigs = make([][]byte, 0, blockCount)
 	ret.sigType = sigType
 	ret.strongLen = strongLen
 	ret.blockLen = blockLen