From e1be80e37f5818b02d29ba2aa23f330fdc31bcbf Mon Sep 17 00:00:00 2001 From: Leandro Motta Barros Date: Thu, 29 Jul 2021 18:40:59 -0300 Subject: [PATCH] Use optimal block length to generate deltas Previously, we used a block length hardcoded to 512 bytes. Our measurements have shown that this value was generally inadequate: it produced relatively large deltas and took relatively long times to do that. librsync, by default, uses block length equals to the square root of the old (basis) file (and a minimum of 256). This value results in significantly smaller deltas and shorter run times. In this commit, we do one more optimization and round this value up to the next power of two value. Since librsync-go has a code path optimized for buffers with sizes that are powers of two, this gives us another performance gain. --- daemon/images/image_delta.go | 31 +++++++++++++++++++++++- daemon/images/image_delta_test.go | 39 +++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 daemon/images/image_delta_test.go diff --git a/daemon/images/image_delta.go b/daemon/images/image_delta.go index 05c4051f76..2a6d7e25e8 100644 --- a/daemon/images/image_delta.go +++ b/daemon/images/image_delta.go @@ -6,6 +6,7 @@ import ( "encoding/json" "io" "io/ioutil" + "math" "os" "time" @@ -64,8 +65,9 @@ func (i *ImageService) DeltaCreate(deltaSrc, deltaDest string, options types.Ima progressReader := progress.NewProgressReader(srcData, progressOutput, srcDataLen, deltaSrc, "Fingerprinting") defer progressReader.Close() + blockLen := deltaBlockSize(srcDataLen) sigStart := time.Now() - srcSig, err := librsync.Signature(bufio.NewReaderSize(progressReader, 65536), ioutil.Discard, 512, 32, librsync.BLAKE2_SIG_MAGIC) + srcSig, err := librsync.Signature(bufio.NewReaderSize(progressReader, 65536), ioutil.Discard, blockLen, 32, librsync.BLAKE2_SIG_MAGIC) if err != nil { return err } @@ -285,3 +287,30 @@ func (lock *imglock) unlock(ls layer.Store) { layer.ReleaseAndLog(ls, l) } } + +// deltaBlockSize returns the block size to use when generating a delta for a +// basis file that is basisSize bytes long. +func deltaBlockSize(basisSize int64) uint32 { + // Start with the "ideal" size recommended by the librsync devs. See + // https://github.com/librsync/librsync/pull/109/files#diff-7a3cd9075c1eaa0d219f7c0a516a10679dae11922bd1dd0ff54a10cdaa457f6fR147 + if basisSize <= 256*256 { + return 256 + } + x := uint32(math.Sqrt(float64(basisSize))) + + // Avoid overflowing. + if x >= 2147483648 { + return 2147483648 // the largest power of two that fits into an uint32. + } + + // Round to the next power of two (because librsync-go has an optimized code + // path for power of two blocks). This algorithm is from Hacker's Delight, + // 2nd Edition, p.62. + x -= 1 + x |= x >> 1 + x |= x >> 2 + x |= x >> 4 + x |= x >> 8 + x |= x >> 16 + return x + 1 +} diff --git a/daemon/images/image_delta_test.go b/daemon/images/image_delta_test.go new file mode 100644 index 0000000000..d4a283b0b6 --- /dev/null +++ b/daemon/images/image_delta_test.go @@ -0,0 +1,39 @@ +package images + +import ( + "fmt" + "math" + "testing" +) + +func Test_deltaBlockSize(t *testing.T) { + tests := []struct { + x int64 + want uint32 + }{ + {0, 256}, + {1, 256}, + {100, 256}, + {1_024, 256}, + {33_333, 256}, + {65_536, 256}, + {88_887, 512}, + {262_144, 512}, + {262_145, 512}, + {777_111, 1024}, + {22_654_123, 8192}, + {1_333_555_888, 65536}, + {35_000_000_000, 262144}, + {123_456_678_901, 524288}, + {4_611_686_018_427_387_904, 2147483648}, + {5_000_000_000_000_000_000, 2147483648}, + {math.MaxInt64, 2147483648}, + } + for _, tt := range tests { + t.Run(fmt.Sprintf("deltaBlockSize(%v)", tt.x), func(t *testing.T) { + if got := deltaBlockSize(tt.x); got != tt.want { + t.Errorf("got deltaBlockSize(%v) = %v, want %v", tt.x, got, tt.want) + } + }) + } +}