From dc632350eee933c8659fb03b9b82a90169d9d314 Mon Sep 17 00:00:00 2001 From: Leandro Motta Barros Date: Thu, 29 Jul 2021 18:40:59 -0300 Subject: [PATCH] Use optimal block length to generate deltas Previously, we used a block length hardcoded to 512 bytes. Our measurements have shown that this value was generally inadequate: it produced relatively large deltas and took relatively long times to do that. librsync, by default, uses block length equals to the square root of the old (basis) file. This value results in significantly smaller deltas and shorter run times. In this commit, we do one more optimization and round this value up to the next power of two value. Since librsync-go has a code path optimized for buffers with sizes that are powers of two, this gives us another performance gain. --- daemon/images/image_delta.go | 29 +++++++++++++++++++++- daemon/images/image_delta_test.go | 41 +++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 daemon/images/image_delta_test.go diff --git a/daemon/images/image_delta.go b/daemon/images/image_delta.go index 05c4051f76..0b35afc603 100644 --- a/daemon/images/image_delta.go +++ b/daemon/images/image_delta.go @@ -6,6 +6,7 @@ import ( "encoding/json" "io" "io/ioutil" + "math" "os" "time" @@ -64,8 +65,9 @@ func (i *ImageService) DeltaCreate(deltaSrc, deltaDest string, options types.Ima progressReader := progress.NewProgressReader(srcData, progressOutput, srcDataLen, deltaSrc, "Fingerprinting") defer progressReader.Close() + blockLen := deltaBlockSize(srcDataLen) sigStart := time.Now() - srcSig, err := librsync.Signature(bufio.NewReaderSize(progressReader, 65536), ioutil.Discard, 512, 32, librsync.BLAKE2_SIG_MAGIC) + srcSig, err := librsync.Signature(bufio.NewReaderSize(progressReader, 65536), ioutil.Discard, blockLen, 32, librsync.BLAKE2_SIG_MAGIC) if err != nil { return err } @@ -285,3 +287,28 @@ func (lock *imglock) unlock(ls layer.Store) { layer.ReleaseAndLog(ls, l) } } + +// deltaBlockSize returns the block size to use when generating a delta for a +// basis file that is basisSize bytes long. +func deltaBlockSize(basisSize int64) uint32 { + // Start with the "ideal" size recommended by the librsync devs. + x := uint32(math.Sqrt(float64(basisSize))) + + // Remain within reasonable limits. + if x == 0 { + return 1 // always return a positive block size. + } else if x >= 2147483648 { + return 2147483648 // the largest power of two that fits an uint32. + } + + // Round to the next power of two (because librsync-go has an optimized code + // path for power of two blocks). This algorithm is from Hacker's Delight, + // 2nd Edition, p.62. + x -= 1 + x |= x >> 1 + x |= x >> 2 + x |= x >> 4 + x |= x >> 8 + x |= x >> 16 + return x + 1 +} diff --git a/daemon/images/image_delta_test.go b/daemon/images/image_delta_test.go new file mode 100644 index 0000000000..8871286e7f --- /dev/null +++ b/daemon/images/image_delta_test.go @@ -0,0 +1,41 @@ +package images + +import ( + "fmt" + "math" + "testing" +) + +func Test_deltaBlockSize(t *testing.T) { + tests := []struct { + x int64 + want uint32 + }{ + {0, 1}, + {1, 1}, + {100, 16}, + {256, 16}, + {288, 16}, + {289, 32}, + {1_024, 32}, + {33_333, 256}, + {88_887, 512}, + {262_144, 512}, + {262_145, 512}, + {777_111, 1024}, + {22_654_123, 8192}, + {1_333_555_888, 65536}, + {35_000_000_000, 262144}, + {123_456_678_901, 524288}, + {4_611_686_018_427_387_904, 2147483648}, + {5_000_000_000_000_000_000, 2147483648}, + {math.MaxInt64, 2147483648}, + } + for _, tt := range tests { + t.Run(fmt.Sprintf("deltaBlockSize(%v)", tt.x), func(t *testing.T) { + if got := deltaBlockSize(tt.x); got != tt.want { + t.Errorf("got deltaBlockSize(%v) = %v, want %v", tt.x, got, tt.want) + } + }) + } +}