Skip to content

Commit

Permalink
pack-bitmap: enable reuse from all bitmapped packs
Browse files Browse the repository at this point in the history
Now that both the pack-bitmap and pack-objects code are prepared to
handle marking and using objects from multiple bitmapped packs for
verbatim reuse, allow marking objects from all bitmapped packs as
eligible for reuse.

Within the `reuse_partial_packfile_from_bitmap()` function, we no longer
only mark the pack whose first object is at bit position zero for reuse,
and instead mark any pack contained in the MIDX as a reuse candidate.

Provide a handful of test cases in a new script (t5332) exercising
interesting behavior for multi-pack reuse to ensure that we performed
all of the previous steps correctly.

Signed-off-by: Taylor Blau <[email protected]>
Signed-off-by: Junio C Hamano <[email protected]>
  • Loading branch information
ttaylorr authored and gitster committed Dec 14, 2023
1 parent 9410741 commit af626ac
Show file tree
Hide file tree
Showing 5 changed files with 245 additions and 17 deletions.
16 changes: 11 additions & 5 deletions Documentation/config/pack.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,17 @@ all existing objects. You can force recompression by passing the -F option
to linkgit:git-repack[1].

pack.allowPackReuse::
When true or "single", and when reachability bitmaps are enabled,
pack-objects will try to send parts of the bitmapped packfile
verbatim. This can reduce memory and CPU usage to serve fetches,
but might result in sending a slightly larger pack. Defaults to
true.
When true or "single", and when reachability bitmaps are
enabled, pack-objects will try to send parts of the bitmapped
packfile verbatim. When "multi", and when a multi-pack
reachability bitmap is available, pack-objects will try to send
parts of all packs in the MIDX.
+
If only a single pack bitmap is available, and
`pack.allowPackReuse` is set to "multi", reuse parts of just the
bitmapped packfile. This can reduce memory and CPU usage to
serve fetches, but might result in sending a slightly larger
pack. Defaults to true.

pack.island::
An extended regular expression configuring a set of delta
Expand Down
6 changes: 5 additions & 1 deletion builtin/pack-objects.c
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ static int use_bitmap_index = -1;
static enum {
NO_PACK_REUSE = 0,
SINGLE_PACK_REUSE,
MULTI_PACK_REUSE,
} allow_pack_reuse = SINGLE_PACK_REUSE;
static enum {
WRITE_BITMAP_FALSE = 0,
Expand Down Expand Up @@ -3251,6 +3252,8 @@ static int git_pack_config(const char *k, const char *v,
if (res < 0) {
if (!strcasecmp(v, "single"))
allow_pack_reuse = SINGLE_PACK_REUSE;
else if (!strcasecmp(v, "multi"))
allow_pack_reuse = MULTI_PACK_REUSE;
else
die(_("invalid pack.allowPackReuse value: '%s'"), v);
} else if (res) {
Expand Down Expand Up @@ -4029,7 +4032,8 @@ static int get_object_list_from_bitmap(struct rev_info *revs)
reuse_partial_packfile_from_bitmap(bitmap_git,
&reuse_packfiles,
&reuse_packfiles_nr,
&reuse_packfile_bitmap);
&reuse_packfile_bitmap,
allow_pack_reuse == MULTI_PACK_REUSE);

if (reuse_packfiles) {
reuse_packfile_objects = bitmap_popcount(reuse_packfile_bitmap);
Expand Down
34 changes: 24 additions & 10 deletions pack-bitmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -2040,7 +2040,8 @@ static int bitmapped_pack_cmp(const void *va, const void *vb)
void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
struct bitmapped_pack **packs_out,
size_t *packs_nr_out,
struct bitmap **reuse_out)
struct bitmap **reuse_out,
int multi_pack_reuse)
{
struct repository *r = the_repository;
struct bitmapped_pack *packs = NULL;
Expand All @@ -2064,37 +2065,50 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
free(packs);
return;
}

if (!pack.bitmap_nr)
continue; /* no objects from this pack */
if (pack.bitmap_pos)
continue; /* not preferred pack */
continue;

if (!multi_pack_reuse && pack.bitmap_pos) {
/*
* If we're only reusing a single pack, skip
* over any packs which are not positioned at
* the beginning of the MIDX bitmap.
*
* This is consistent with the existing
* single-pack reuse behavior, which only reuses
* parts of the MIDX's preferred pack.
*/
continue;
}

ALLOC_GROW(packs, packs_nr + 1, packs_alloc);
memcpy(&packs[packs_nr++], &pack, sizeof(pack));

objects_nr += pack.p->num_objects;

if (!multi_pack_reuse)
break;
}

QSORT(packs, packs_nr, bitmapped_pack_cmp);
} else {
ALLOC_GROW(packs, packs_nr + 1, packs_alloc);

packs[packs_nr].p = bitmap_git->pack;
packs[packs_nr].bitmap_pos = 0;
packs[packs_nr].bitmap_nr = bitmap_git->pack->num_objects;
packs[packs_nr].bitmap_pos = 0;

objects_nr = packs[packs_nr++].p->num_objects;
objects_nr = packs[packs_nr++].bitmap_nr;
}

word_alloc = objects_nr / BITS_IN_EWORD;
if (objects_nr % BITS_IN_EWORD)
word_alloc++;
reuse = bitmap_word_alloc(word_alloc);

if (packs_nr != 1)
BUG("pack reuse not yet implemented for multiple packs");

reuse_partial_packfile_from_bitmap_1(bitmap_git, packs, reuse);
for (i = 0; i < packs_nr; i++)
reuse_partial_packfile_from_bitmap_1(bitmap_git, &packs[i], reuse);

if (bitmap_is_empty(reuse)) {
free(packs);
Expand Down
3 changes: 2 additions & 1 deletion pack-bitmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
struct bitmapped_pack **packs_out,
size_t *packs_nr_out,
struct bitmap **reuse_out);
struct bitmap **reuse_out,
int multi_pack_reuse);
int rebuild_existing_bitmaps(struct bitmap_index *, struct packing_data *mapping,
kh_oid_map_t *reused_bitmaps, int show_progress);
void free_bitmap_index(struct bitmap_index *);
Expand Down
203 changes: 203 additions & 0 deletions t/t5332-multi-pack-reuse.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
#!/bin/sh

test_description='pack-objects multi-pack reuse'

. ./test-lib.sh
. "$TEST_DIRECTORY"/lib-bitmap.sh

objdir=.git/objects
packdir=$objdir/pack

test_pack_reused () {
test_trace2_data pack-objects pack-reused "$1"
}

test_packs_reused () {
test_trace2_data pack-objects packs-reused "$1"
}


# pack_position <object> </path/to/pack.idx
pack_position () {
git show-index >objects &&
grep "$1" objects | cut -d" " -f1
}

test_expect_success 'preferred pack is reused for single-pack reuse' '
test_config pack.allowPackReuse single &&
for i in A B
do
test_commit "$i" &&
git repack -d || return 1
done &&
git multi-pack-index write --bitmap &&
: >trace2.txt &&
GIT_TRACE2_EVENT="$PWD/trace2.txt" \
git pack-objects --stdout --revs --all >/dev/null &&
test_pack_reused 3 <trace2.txt &&
test_packs_reused 1 <trace2.txt
'

test_expect_success 'enable multi-pack reuse' '
git config pack.allowPackReuse multi
'

test_expect_success 'reuse all objects from subset of bitmapped packs' '
test_commit C &&
git repack -d &&
git multi-pack-index write --bitmap &&
cat >in <<-EOF &&
$(git rev-parse C)
^$(git rev-parse A)
EOF
: >trace2.txt &&
GIT_TRACE2_EVENT="$PWD/trace2.txt" \
git pack-objects --stdout --revs <in >/dev/null &&
test_pack_reused 6 <trace2.txt &&
test_packs_reused 2 <trace2.txt
'

test_expect_success 'reuse all objects from all packs' '
: >trace2.txt &&
GIT_TRACE2_EVENT="$PWD/trace2.txt" \
git pack-objects --stdout --revs --all >/dev/null &&
test_pack_reused 9 <trace2.txt &&
test_packs_reused 3 <trace2.txt
'

test_expect_success 'reuse objects from first pack with middle gap' '
for i in D E F
do
test_commit "$i" || return 1
done &&
# Set "pack.window" to zero to ensure that we do not create any
# deltas, which could alter the amount of pack reuse we perform
# (if, for e.g., we are not sending one or more bases).
D="$(git -c pack.window=0 pack-objects --all --unpacked $packdir/pack)" &&
d_pos="$(pack_position $(git rev-parse D) <$packdir/pack-$D.idx)" &&
e_pos="$(pack_position $(git rev-parse E) <$packdir/pack-$D.idx)" &&
f_pos="$(pack_position $(git rev-parse F) <$packdir/pack-$D.idx)" &&
# commits F, E, and D, should appear in that order at the
# beginning of the pack
test $f_pos -lt $e_pos &&
test $e_pos -lt $d_pos &&
# Ensure that the pack we are constructing sorts ahead of any
# other packs in lexical/bitmap order by choosing it as the
# preferred pack.
git multi-pack-index write --bitmap --preferred-pack="pack-$D.idx" &&
cat >in <<-EOF &&
$(git rev-parse E)
^$(git rev-parse D)
EOF
: >trace2.txt &&
GIT_TRACE2_EVENT="$PWD/trace2.txt" \
git pack-objects --stdout --delta-base-offset --revs <in >/dev/null &&
test_pack_reused 3 <trace2.txt &&
test_packs_reused 1 <trace2.txt
'

test_expect_success 'reuse objects from middle pack with middle gap' '
rm -fr $packdir/multi-pack-index* &&
# Ensure that the pack we are constructing sort into any
# position *but* the first one, by choosing a different pack as
# the preferred one.
git multi-pack-index write --bitmap --preferred-pack="pack-$A.idx" &&
cat >in <<-EOF &&
$(git rev-parse E)
^$(git rev-parse D)
EOF
: >trace2.txt &&
GIT_TRACE2_EVENT="$PWD/trace2.txt" \
git pack-objects --stdout --delta-base-offset --revs <in >/dev/null &&
test_pack_reused 3 <trace2.txt &&
test_packs_reused 1 <trace2.txt
'

test_expect_success 'omit delta with uninteresting base (same pack)' '
git repack -adk &&
test_seq 32 >f &&
git add f &&
test_tick &&
git commit -m "delta" &&
delta="$(git rev-parse HEAD)" &&
test_seq 64 >f &&
test_tick &&
git commit -a -m "base" &&
base="$(git rev-parse HEAD)" &&
test_commit other &&
git repack -d &&
have_delta "$(git rev-parse $delta:f)" "$(git rev-parse $base:f)" &&
git multi-pack-index write --bitmap &&
cat >in <<-EOF &&
$(git rev-parse other)
^$base
EOF
: >trace2.txt &&
GIT_TRACE2_EVENT="$PWD/trace2.txt" \
git pack-objects --stdout --delta-base-offset --revs <in >/dev/null &&
# We can only reuse the 3 objects corresponding to "other" from
# the latest pack.
#
# This is because even though we want "delta", we do not want
# "base", meaning that we have to inflate the delta/base-pair
# corresponding to the blob in commit "delta", which bypasses
# the pack-reuse mechanism.
#
# The remaining objects from the other pack are similarly not
# reused because their objects are on the uninteresting side of
# the query.
test_pack_reused 3 <trace2.txt &&
test_packs_reused 1 <trace2.txt
'

test_expect_success 'omit delta from uninteresting base (cross pack)' '
cat >in <<-EOF &&
$(git rev-parse $base)
^$(git rev-parse $delta)
EOF
P="$(git pack-objects --revs $packdir/pack <in)" &&
git multi-pack-index write --bitmap --preferred-pack="pack-$P.idx" &&
: >trace2.txt &&
GIT_TRACE2_EVENT="$PWD/trace2.txt" \
git pack-objects --stdout --delta-base-offset --all >/dev/null &&
packs_nr="$(find $packdir -type f -name "pack-*.pack" | wc -l)" &&
objects_nr="$(git rev-list --count --all --objects)" &&
test_pack_reused $(($objects_nr - 1)) <trace2.txt &&
test_packs_reused $packs_nr <trace2.txt
'

test_done

0 comments on commit af626ac

Please sign in to comment.