diff --git a/Documentation/config/pack.txt b/Documentation/config/pack.txt index f50df9dbce8989..9c630863e6ff18 100644 --- a/Documentation/config/pack.txt +++ b/Documentation/config/pack.txt @@ -28,11 +28,17 @@ all existing objects. You can force recompression by passing the -F option to linkgit:git-repack[1]. pack.allowPackReuse:: - When true, and when reachability bitmaps are enabled, - pack-objects will try to send parts of the bitmapped packfile - verbatim. This can reduce memory and CPU usage to serve fetches, - but might result in sending a slightly larger pack. Defaults to - true. + When true or "single", and when reachability bitmaps are + enabled, pack-objects will try to send parts of the bitmapped + packfile verbatim. When "multi", and when a multi-pack + reachability bitmap is available, pack-objects will try to send + parts of all packs in the MIDX. ++ + If only a single pack bitmap is available, and + `pack.allowPackReuse` is set to "multi", reuse parts of just the + bitmapped packfile. This can reduce memory and CPU usage to + serve fetches, but might result in sending a slightly larger + pack. Defaults to true. pack.island:: An extended regular expression configuring a set of delta diff --git a/Documentation/gitformat-pack.txt b/Documentation/gitformat-pack.txt index 9fcb29a9c844cc..d6ae229be56859 100644 --- a/Documentation/gitformat-pack.txt +++ b/Documentation/gitformat-pack.txt @@ -396,6 +396,15 @@ CHUNK DATA: is padded at the end with between 0 and 3 NUL bytes to make the chunk size a multiple of 4 bytes. + Bitmapped Packfiles (ID: {'B', 'T', 'M', 'P'}) + Stores a table of two 4-byte unsigned integers in network order. + Each table entry corresponds to a single pack (in the order that + they appear above in the `PNAM` chunk). The values for each table + entry are as follows: + - The first bit position (in pseudo-pack order, see below) to + contain an object from that pack. + - The number of bits whose objects are selected from that pack. + OID Fanout (ID: {'O', 'I', 'D', 'F'}) The ith entry, F[i], stores the number of OIDs with first byte at most i. Thus F[255] stores the total @@ -509,6 +518,73 @@ packs arranged in MIDX order (with the preferred pack coming first). The MIDX's reverse index is stored in the optional 'RIDX' chunk within the MIDX itself. +=== `BTMP` chunk + +The Bitmapped Packfiles (`BTMP`) chunk encodes additional information +about the objects in the multi-pack index's reachability bitmap. Recall +that objects from the MIDX are arranged in "pseudo-pack" order (see +above) for reachability bitmaps. + +From the example above, suppose we have packs "a", "b", and "c", with +10, 15, and 20 objects, respectively. In pseudo-pack order, those would +be arranged as follows: + + |a,0|a,1|...|a,9|b,0|b,1|...|b,14|c,0|c,1|...|c,19| + +When working with single-pack bitmaps (or, equivalently, multi-pack +reachability bitmaps with a preferred pack), linkgit:git-pack-objects[1] +performs ``verbatim'' reuse, attempting to reuse chunks of the bitmapped +or preferred packfile instead of adding objects to the packing list. + +When a chunk of bytes is reused from an existing pack, any objects +contained therein do not need to be added to the packing list, saving +memory and CPU time. But a chunk from an existing packfile can only be +reused when the following conditions are met: + + - The chunk contains only objects which were requested by the caller + (i.e. does not contain any objects which the caller didn't ask for + explicitly or implicitly). + + - All objects stored in non-thin packs as offset- or reference-deltas + also include their base object in the resulting pack. + +The `BTMP` chunk encodes the necessary information in order to implement +multi-pack reuse over a set of packfiles as described above. +Specifically, the `BTMP` chunk encodes three pieces of information (all +32-bit unsigned integers in network byte-order) for each packfile `p` +that is stored in the MIDX, as follows: + +`bitmap_pos`:: The first bit position (in pseudo-pack order) in the + multi-pack index's reachability bitmap occupied by an object from `p`. + +`bitmap_nr`:: The number of bit positions (including the one at + `bitmap_pos`) that encode objects from that pack `p`. + +For example, the `BTMP` chunk corresponding to the above example (with +packs ``a'', ``b'', and ``c'') would look like: + +[cols="1,2,2"] +|=== +| |`bitmap_pos` |`bitmap_nr` + +|packfile ``a'' +|`0` +|`10` + +|packfile ``b'' +|`10` +|`15` + +|packfile ``c'' +|`25` +|`20` +|=== + +With this information in place, we can treat each packfile as +individually reusable in the same fashion as verbatim pack reuse is +performed on individual packs prior to the implementation of the `BTMP` +chunk. + == cruft packs The cruft packs feature offer an alternative to Git's traditional mechanism of diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 5c8bfe1035feca..d8c2128a979282 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -218,13 +218,19 @@ static int thin; static int num_preferred_base; static struct progress *progress_state; -static struct packed_git *reuse_packfile; +static struct bitmapped_pack *reuse_packfiles; +static size_t reuse_packfiles_nr; +static size_t reuse_packfiles_used_nr; static uint32_t reuse_packfile_objects; static struct bitmap *reuse_packfile_bitmap; static int use_bitmap_index_default = 1; static int use_bitmap_index = -1; -static int allow_pack_reuse = 1; +static enum { + NO_PACK_REUSE = 0, + SINGLE_PACK_REUSE, + MULTI_PACK_REUSE, +} allow_pack_reuse = SINGLE_PACK_REUSE; static enum { WRITE_BITMAP_FALSE = 0, WRITE_BITMAP_QUIET, @@ -1010,7 +1016,9 @@ static off_t find_reused_offset(off_t where) return reused_chunks[lo-1].difference; } -static void write_reused_pack_one(size_t pos, struct hashfile *out, +static void write_reused_pack_one(struct packed_git *reuse_packfile, + size_t pos, struct hashfile *out, + off_t pack_start, struct pack_window **w_curs) { off_t offset, next, cur; @@ -1020,7 +1028,8 @@ static void write_reused_pack_one(size_t pos, struct hashfile *out, offset = pack_pos_to_offset(reuse_packfile, pos); next = pack_pos_to_offset(reuse_packfile, pos + 1); - record_reused_object(offset, offset - hashfile_total(out)); + record_reused_object(offset, + offset - (hashfile_total(out) - pack_start)); cur = offset; type = unpack_object_header(reuse_packfile, w_curs, &cur, &size); @@ -1088,41 +1097,93 @@ static void write_reused_pack_one(size_t pos, struct hashfile *out, copy_pack_data(out, reuse_packfile, w_curs, offset, next - offset); } -static size_t write_reused_pack_verbatim(struct hashfile *out, +static size_t write_reused_pack_verbatim(struct bitmapped_pack *reuse_packfile, + struct hashfile *out, + off_t pack_start, struct pack_window **w_curs) { - size_t pos = 0; + size_t pos = reuse_packfile->bitmap_pos; + size_t end; + + if (pos % BITS_IN_EWORD) { + size_t word_pos = (pos / BITS_IN_EWORD); + size_t offset = pos % BITS_IN_EWORD; + size_t last; + eword_t word = reuse_packfile_bitmap->words[word_pos]; + + if (offset + reuse_packfile->bitmap_nr < BITS_IN_EWORD) + last = offset + reuse_packfile->bitmap_nr; + else + last = BITS_IN_EWORD; + + for (; offset < last; offset++) { + if (word >> offset == 0) + return word_pos; + if (!bitmap_get(reuse_packfile_bitmap, + word_pos * BITS_IN_EWORD + offset)) + return word_pos; + } - while (pos < reuse_packfile_bitmap->word_alloc && - reuse_packfile_bitmap->words[pos] == (eword_t)~0) - pos++; + pos += BITS_IN_EWORD - (pos % BITS_IN_EWORD); + } + + /* + * Now we're going to copy as many whole eword_t's as possible. + * "end" is the index of the last whole eword_t we copy, but + * there may be additional bits to process. Those are handled + * individually by write_reused_pack(). + * + * Begin by advancing to the first word boundary in range of the + * bit positions occupied by objects in "reuse_packfile". Then + * pick the last word boundary in the same range. If we have at + * least one word's worth of bits to process, continue on. + */ + end = reuse_packfile->bitmap_pos + reuse_packfile->bitmap_nr; + if (end % BITS_IN_EWORD) + end -= end % BITS_IN_EWORD; + if (pos >= end) + return reuse_packfile->bitmap_pos / BITS_IN_EWORD; - if (pos) { - off_t to_write; + while (pos < end && + reuse_packfile_bitmap->words[pos / BITS_IN_EWORD] == (eword_t)~0) + pos += BITS_IN_EWORD; - written = (pos * BITS_IN_EWORD); - to_write = pack_pos_to_offset(reuse_packfile, written) - - sizeof(struct pack_header); + if (pos > end) + pos = end; + + if (reuse_packfile->bitmap_pos < pos) { + off_t pack_start_off = pack_pos_to_offset(reuse_packfile->p, 0); + off_t pack_end_off = pack_pos_to_offset(reuse_packfile->p, + pos - reuse_packfile->bitmap_pos); + + written += pos - reuse_packfile->bitmap_pos; /* We're recording one chunk, not one object. */ - record_reused_object(sizeof(struct pack_header), 0); + record_reused_object(pack_start_off, + pack_start_off - (hashfile_total(out) - pack_start)); hashflush(out); - copy_pack_data(out, reuse_packfile, w_curs, - sizeof(struct pack_header), to_write); + copy_pack_data(out, reuse_packfile->p, w_curs, + pack_start_off, pack_end_off - pack_start_off); display_progress(progress_state, written); } - return pos; + if (pos % BITS_IN_EWORD) + BUG("attempted to jump past a word boundary to %"PRIuMAX, + (uintmax_t)pos); + return pos / BITS_IN_EWORD; } -static void write_reused_pack(struct hashfile *f) +static void write_reused_pack(struct bitmapped_pack *reuse_packfile, + struct hashfile *f) { - size_t i = 0; + size_t i = reuse_packfile->bitmap_pos / BITS_IN_EWORD; uint32_t offset; + off_t pack_start = hashfile_total(f) - sizeof(struct pack_header); struct pack_window *w_curs = NULL; if (allow_ofs_delta) - i = write_reused_pack_verbatim(f, &w_curs); + i = write_reused_pack_verbatim(reuse_packfile, f, pack_start, + &w_curs); for (; i < reuse_packfile_bitmap->word_alloc; ++i) { eword_t word = reuse_packfile_bitmap->words[i]; @@ -1133,16 +1194,23 @@ static void write_reused_pack(struct hashfile *f) break; offset += ewah_bit_ctz64(word >> offset); + if (pos + offset < reuse_packfile->bitmap_pos) + continue; + if (pos + offset >= reuse_packfile->bitmap_pos + reuse_packfile->bitmap_nr) + goto done; /* * Can use bit positions directly, even for MIDX * bitmaps. See comment in try_partial_reuse() * for why. */ - write_reused_pack_one(pos + offset, f, &w_curs); + write_reused_pack_one(reuse_packfile->p, + pos + offset - reuse_packfile->bitmap_pos, + f, pack_start, &w_curs); display_progress(progress_state, ++written); } } +done: unuse_pack(&w_curs); } @@ -1194,9 +1262,14 @@ static void write_pack_file(void) offset = write_pack_header(f, nr_remaining); - if (reuse_packfile) { + if (reuse_packfiles_nr) { assert(pack_to_stdout); - write_reused_pack(f); + for (j = 0; j < reuse_packfiles_nr; j++) { + reused_chunks_nr = 0; + write_reused_pack(&reuse_packfiles[j], f); + if (reused_chunks_nr) + reuse_packfiles_used_nr++; + } offset = hashfile_total(f); } @@ -3172,7 +3245,19 @@ static int git_pack_config(const char *k, const char *v, return 0; } if (!strcmp(k, "pack.allowpackreuse")) { - allow_pack_reuse = git_config_bool(k, v); + int res = git_parse_maybe_bool_text(v); + if (res < 0) { + if (!strcasecmp(v, "single")) + allow_pack_reuse = SINGLE_PACK_REUSE; + else if (!strcasecmp(v, "multi")) + allow_pack_reuse = MULTI_PACK_REUSE; + else + die(_("invalid pack.allowPackReuse value: '%s'"), v); + } else if (res) { + allow_pack_reuse = SINGLE_PACK_REUSE; + } else { + allow_pack_reuse = NO_PACK_REUSE; + } return 0; } if (!strcmp(k, "pack.threads")) { @@ -3931,7 +4016,7 @@ static void loosen_unused_packed_objects(void) */ static int pack_options_allow_reuse(void) { - return allow_pack_reuse && + return allow_pack_reuse != NO_PACK_REUSE && pack_to_stdout && !ignore_packed_keep_on_disk && !ignore_packed_keep_in_core && @@ -3944,13 +4029,18 @@ static int get_object_list_from_bitmap(struct rev_info *revs) if (!(bitmap_git = prepare_bitmap_walk(revs, 0))) return -1; - if (pack_options_allow_reuse() && - !reuse_partial_packfile_from_bitmap( - bitmap_git, - &reuse_packfile, - &reuse_packfile_objects, - &reuse_packfile_bitmap)) { - assert(reuse_packfile_objects); + if (pack_options_allow_reuse()) + reuse_partial_packfile_from_bitmap(bitmap_git, + &reuse_packfiles, + &reuse_packfiles_nr, + &reuse_packfile_bitmap, + allow_pack_reuse == MULTI_PACK_REUSE); + + if (reuse_packfiles) { + reuse_packfile_objects = bitmap_popcount(reuse_packfile_bitmap); + if (!reuse_packfile_objects) + BUG("expected non-empty reuse bitmap"); + nr_result += reuse_packfile_objects; nr_seen += reuse_packfile_objects; display_progress(progress_state, nr_seen); @@ -4518,11 +4608,20 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) fprintf_ln(stderr, _("Total %"PRIu32" (delta %"PRIu32")," " reused %"PRIu32" (delta %"PRIu32")," - " pack-reused %"PRIu32), + " pack-reused %"PRIu32" (from %"PRIuMAX")"), written, written_delta, reused, reused_delta, - reuse_packfile_objects); + reuse_packfile_objects, + (uintmax_t)reuse_packfiles_used_nr); + + trace2_data_intmax("pack-objects", the_repository, "written", written); + trace2_data_intmax("pack-objects", the_repository, "written/delta", written_delta); + trace2_data_intmax("pack-objects", the_repository, "reused", reused); + trace2_data_intmax("pack-objects", the_repository, "reused/delta", reused_delta); + trace2_data_intmax("pack-objects", the_repository, "pack-reused", reuse_packfile_objects); + trace2_data_intmax("pack-objects", the_repository, "packs-reused", reuse_packfiles_used_nr); cleanup: + clear_packing_data(&to_pack); list_objects_filter_release(&filter_options); strvec_clear(&rp); diff --git a/ewah/bitmap.c b/ewah/bitmap.c index 7b525b1ecd896e..ac7e0af622a8fc 100644 --- a/ewah/bitmap.c +++ b/ewah/bitmap.c @@ -169,6 +169,15 @@ size_t bitmap_popcount(struct bitmap *self) return count; } +int bitmap_is_empty(struct bitmap *self) +{ + size_t i; + for (i = 0; i < self->word_alloc; i++) + if (self->words[i]) + return 0; + return 1; +} + int bitmap_equals(struct bitmap *self, struct bitmap *other) { struct bitmap *big, *small; diff --git a/ewah/ewok.h b/ewah/ewok.h index 7eb8b9b63013da..c11d76c6f33693 100644 --- a/ewah/ewok.h +++ b/ewah/ewok.h @@ -189,5 +189,6 @@ void bitmap_or_ewah(struct bitmap *self, struct ewah_bitmap *other); void bitmap_or(struct bitmap *self, const struct bitmap *other); size_t bitmap_popcount(struct bitmap *self); +int bitmap_is_empty(struct bitmap *self); #endif diff --git a/git-compat-util.h b/git-compat-util.h index 603c97e3b3f096..7c2a6538e5afea 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -1015,6 +1015,15 @@ static inline unsigned long cast_size_t_to_ulong(size_t a) return (unsigned long)a; } +static inline uint32_t cast_size_t_to_uint32_t(size_t a) +{ + if (a != (uint32_t)a) + die("object too large to read on this platform: %" + PRIuMAX" is cut off to %u", + (uintmax_t)a, (uint32_t)a); + return (uint32_t)a; +} + static inline int cast_size_t_to_int(size_t a) { if (a > INT_MAX) diff --git a/midx.c b/midx.c index 1d14661dade4a6..85e1c2cd1287b3 100644 --- a/midx.c +++ b/midx.c @@ -21,6 +21,7 @@ #include "refs.h" #include "revision.h" #include "list-objects.h" +#include "pack-revindex.h" #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ #define MIDX_VERSION 1 @@ -33,6 +34,7 @@ #define MIDX_CHUNK_ALIGNMENT 4 #define MIDX_CHUNKID_PACKNAMES 0x504e414d /* "PNAM" */ +#define MIDX_CHUNKID_BITMAPPEDPACKS 0x42544d50 /* "BTMP" */ #define MIDX_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */ #define MIDX_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */ #define MIDX_CHUNKID_OBJECTOFFSETS 0x4f4f4646 /* "OOFF" */ @@ -41,6 +43,7 @@ #define MIDX_CHUNK_FANOUT_SIZE (sizeof(uint32_t) * 256) #define MIDX_CHUNK_OFFSET_WIDTH (2 * sizeof(uint32_t)) #define MIDX_CHUNK_LARGE_OFFSET_WIDTH (sizeof(uint64_t)) +#define MIDX_CHUNK_BITMAPPED_PACKS_WIDTH (2 * sizeof(uint32_t)) #define MIDX_LARGE_OFFSET_NEEDED 0x80000000 #define PACK_EXPIRED UINT_MAX @@ -175,6 +178,8 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local m->num_packs = get_be32(m->data + MIDX_BYTE_NUM_PACKS); + m->preferred_pack_idx = -1; + cf = init_chunkfile(NULL); if (read_table_of_contents(cf, m->data, midx_size, @@ -193,6 +198,9 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets, &m->chunk_large_offsets_len); + pair_chunk(cf, MIDX_CHUNKID_BITMAPPEDPACKS, + (const unsigned char **)&m->chunk_bitmapped_packs, + &m->chunk_bitmapped_packs_len); if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1)) pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex, @@ -286,6 +294,26 @@ int prepare_midx_pack(struct repository *r, struct multi_pack_index *m, uint32_t return 0; } +int nth_bitmapped_pack(struct repository *r, struct multi_pack_index *m, + struct bitmapped_pack *bp, uint32_t pack_int_id) +{ + if (!m->chunk_bitmapped_packs) + return error(_("MIDX does not contain the BTMP chunk")); + + if (prepare_midx_pack(r, m, pack_int_id)) + return error(_("could not load bitmapped pack %"PRIu32), pack_int_id); + + bp->p = m->packs[pack_int_id]; + bp->bitmap_pos = get_be32((char *)m->chunk_bitmapped_packs + + MIDX_CHUNK_BITMAPPED_PACKS_WIDTH * pack_int_id); + bp->bitmap_nr = get_be32((char *)m->chunk_bitmapped_packs + + MIDX_CHUNK_BITMAPPED_PACKS_WIDTH * pack_int_id + + sizeof(uint32_t)); + bp->pack_int_id = pack_int_id; + + return 0; +} + int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result) { return bsearch_hash(oid->hash, m->chunk_oid_fanout, m->chunk_oid_lookup, @@ -403,7 +431,8 @@ static int cmp_idx_or_pack_name(const char *idx_or_pack_name, return strcmp(idx_or_pack_name, idx_name); } -int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name) +int midx_locate_pack(struct multi_pack_index *m, const char *idx_or_pack_name, + uint32_t *pos) { uint32_t first = 0, last = m->num_packs; @@ -414,8 +443,11 @@ int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name) current = m->pack_names[mid]; cmp = cmp_idx_or_pack_name(idx_or_pack_name, current); - if (!cmp) + if (!cmp) { + if (pos) + *pos = mid; return 1; + } if (cmp > 0) { first = mid + 1; continue; @@ -426,6 +458,28 @@ int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name) return 0; } +int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name) +{ + return midx_locate_pack(m, idx_or_pack_name, NULL); +} + +int midx_preferred_pack(struct multi_pack_index *m, uint32_t *pack_int_id) +{ + if (m->preferred_pack_idx == -1) { + if (load_midx_revindex(m) < 0) { + m->preferred_pack_idx = -2; + return -1; + } + + m->preferred_pack_idx = + nth_midxed_pack_int_id(m, pack_pos_to_midx(m, 0)); + } else if (m->preferred_pack_idx == -2) + return -1; /* no revindex */ + + *pack_int_id = m->preferred_pack_idx; + return 0; +} + int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, int local) { struct multi_pack_index *m; @@ -468,13 +522,31 @@ static size_t write_midx_header(struct hashfile *f, return MIDX_HEADER_SIZE; } +#define BITMAP_POS_UNKNOWN (~((uint32_t)0)) + struct pack_info { uint32_t orig_pack_int_id; char *pack_name; struct packed_git *p; + + uint32_t bitmap_pos; + uint32_t bitmap_nr; + unsigned expired : 1; }; +static void fill_pack_info(struct pack_info *info, + struct packed_git *p, const char *pack_name, + uint32_t orig_pack_int_id) +{ + memset(info, 0, sizeof(struct pack_info)); + + info->orig_pack_int_id = orig_pack_int_id; + info->pack_name = xstrdup(pack_name); + info->p = p; + info->bitmap_pos = BITMAP_POS_UNKNOWN; +} + static int pack_info_compare(const void *_a, const void *_b) { struct pack_info *a = (struct pack_info *)_a; @@ -515,6 +587,7 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len, const char *file_name, void *data) { struct write_midx_context *ctx = data; + struct packed_git *p; if (ends_with(file_name, ".idx")) { display_progress(ctx->progress, ++ctx->pack_paths_checked); @@ -541,27 +614,22 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len, ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc); - ctx->info[ctx->nr].p = add_packed_git(full_path, - full_path_len, - 0); - - if (!ctx->info[ctx->nr].p) { + p = add_packed_git(full_path, full_path_len, 0); + if (!p) { warning(_("failed to add packfile '%s'"), full_path); return; } - if (open_pack_index(ctx->info[ctx->nr].p)) { + if (open_pack_index(p)) { warning(_("failed to open pack-index '%s'"), full_path); - close_pack(ctx->info[ctx->nr].p); - FREE_AND_NULL(ctx->info[ctx->nr].p); + close_pack(p); + free(p); return; } - ctx->info[ctx->nr].pack_name = xstrdup(file_name); - ctx->info[ctx->nr].orig_pack_int_id = ctx->nr; - ctx->info[ctx->nr].expired = 0; + fill_pack_info(&ctx->info[ctx->nr], p, file_name, ctx->nr); ctx->nr++; } } @@ -817,6 +885,26 @@ static int write_midx_pack_names(struct hashfile *f, void *data) return 0; } +static int write_midx_bitmapped_packs(struct hashfile *f, void *data) +{ + struct write_midx_context *ctx = data; + size_t i; + + for (i = 0; i < ctx->nr; i++) { + struct pack_info *pack = &ctx->info[i]; + if (pack->expired) + continue; + + if (pack->bitmap_pos == BITMAP_POS_UNKNOWN && pack->bitmap_nr) + BUG("pack '%s' has no bitmap position, but has %d bitmapped object(s)", + pack->pack_name, pack->bitmap_nr); + + hashwrite_be32(f, pack->bitmap_pos); + hashwrite_be32(f, pack->bitmap_nr); + } + return 0; +} + static int write_midx_oid_fanout(struct hashfile *f, void *data) { @@ -984,8 +1072,19 @@ static uint32_t *midx_pack_order(struct write_midx_context *ctx) QSORT(data, ctx->entries_nr, midx_pack_order_cmp); ALLOC_ARRAY(pack_order, ctx->entries_nr); - for (i = 0; i < ctx->entries_nr; i++) + for (i = 0; i < ctx->entries_nr; i++) { + struct pack_midx_entry *e = &ctx->entries[data[i].nr]; + struct pack_info *pack = &ctx->info[ctx->pack_perm[e->pack_int_id]]; + if (pack->bitmap_pos == BITMAP_POS_UNKNOWN) + pack->bitmap_pos = i; + pack->bitmap_nr++; pack_order[i] = data[i].nr; + } + for (i = 0; i < ctx->nr; i++) { + struct pack_info *pack = &ctx->info[ctx->pack_perm[i]]; + if (pack->bitmap_pos == BITMAP_POS_UNKNOWN) + pack->bitmap_pos = 0; + } free(data); trace2_region_leave("midx", "midx_pack_order", the_repository); @@ -1286,6 +1385,7 @@ static int write_midx_internal(const char *object_dir, struct hashfile *f = NULL; struct lock_file lk; struct write_midx_context ctx = { 0 }; + int bitmapped_packs_concat_len = 0; int pack_name_concat_len = 0; int dropped_packs = 0; int result = 0; @@ -1321,11 +1421,6 @@ static int write_midx_internal(const char *object_dir, for (i = 0; i < ctx.m->num_packs; i++) { ALLOC_GROW(ctx.info, ctx.nr + 1, ctx.alloc); - ctx.info[ctx.nr].orig_pack_int_id = i; - ctx.info[ctx.nr].pack_name = xstrdup(ctx.m->pack_names[i]); - ctx.info[ctx.nr].p = ctx.m->packs[i]; - ctx.info[ctx.nr].expired = 0; - if (flags & MIDX_WRITE_REV_INDEX) { /* * If generating a reverse index, need to have @@ -1341,10 +1436,10 @@ static int write_midx_internal(const char *object_dir, if (open_pack_index(ctx.m->packs[i])) die(_("could not open index for %s"), ctx.m->packs[i]->pack_name); - ctx.info[ctx.nr].p = ctx.m->packs[i]; } - ctx.nr++; + fill_pack_info(&ctx.info[ctx.nr++], ctx.m->packs[i], + ctx.m->pack_names[i], i); } } @@ -1503,8 +1598,10 @@ static int write_midx_internal(const char *object_dir, } for (i = 0; i < ctx.nr; i++) { - if (!ctx.info[i].expired) - pack_name_concat_len += strlen(ctx.info[i].pack_name) + 1; + if (ctx.info[i].expired) + continue; + pack_name_concat_len += strlen(ctx.info[i].pack_name) + 1; + bitmapped_packs_concat_len += 2 * sizeof(uint32_t); } /* Check that the preferred pack wasn't expired (if given). */ @@ -1564,6 +1661,9 @@ static int write_midx_internal(const char *object_dir, add_chunk(cf, MIDX_CHUNKID_REVINDEX, st_mult(ctx.entries_nr, sizeof(uint32_t)), write_midx_revindex); + add_chunk(cf, MIDX_CHUNKID_BITMAPPEDPACKS, + bitmapped_packs_concat_len, + write_midx_bitmapped_packs); } write_midx_header(f, get_num_chunks(cf), ctx.nr - dropped_packs); @@ -1603,8 +1703,13 @@ static int write_midx_internal(const char *object_dir, flags) < 0) { error(_("could not write multi-pack bitmap")); result = 1; + clear_packing_data(&pdata); + free(commits); goto cleanup; } + + clear_packing_data(&pdata); + free(commits); } /* * NOTE: Do not use ctx.entries beyond this point, since it might diff --git a/midx.h b/midx.h index eb57a37519ce01..b374a7afafb867 100644 --- a/midx.h +++ b/midx.h @@ -6,6 +6,7 @@ struct object_id; struct pack_entry; struct repository; +struct bitmapped_pack; #define GIT_TEST_MULTI_PACK_INDEX "GIT_TEST_MULTI_PACK_INDEX" #define GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP \ @@ -27,11 +28,14 @@ struct multi_pack_index { unsigned char num_chunks; uint32_t num_packs; uint32_t num_objects; + int preferred_pack_idx; int local; const unsigned char *chunk_pack_names; size_t chunk_pack_names_len; + const uint32_t *chunk_bitmapped_packs; + size_t chunk_bitmapped_packs_len; const uint32_t *chunk_oid_fanout; const unsigned char *chunk_oid_lookup; const unsigned char *chunk_object_offsets; @@ -57,6 +61,8 @@ void get_midx_rev_filename(struct strbuf *out, struct multi_pack_index *m); struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local); int prepare_midx_pack(struct repository *r, struct multi_pack_index *m, uint32_t pack_int_id); +int nth_bitmapped_pack(struct repository *r, struct multi_pack_index *m, + struct bitmapped_pack *bp, uint32_t pack_int_id); int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result); off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos); uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos); @@ -64,7 +70,11 @@ struct object_id *nth_midxed_object_oid(struct object_id *oid, struct multi_pack_index *m, uint32_t n); int fill_midx_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e, struct multi_pack_index *m); -int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name); +int midx_contains_pack(struct multi_pack_index *m, + const char *idx_or_pack_name); +int midx_locate_pack(struct multi_pack_index *m, const char *idx_or_pack_name, + uint32_t *pos); +int midx_preferred_pack(struct multi_pack_index *m, uint32_t *pack_int_id); int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, int local); /* diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index be4733e3bdcff5..990a9498d73194 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -195,6 +195,13 @@ struct bb_commit { unsigned idx; /* within selected array */ }; +static void clear_bb_commit(struct bb_commit *commit) +{ + free_commit_list(commit->reverse_edges); + bitmap_free(commit->commit_mask); + bitmap_free(commit->bitmap); +} + define_commit_slab(bb_data, struct bb_commit); struct bitmap_builder { @@ -336,7 +343,7 @@ static void bitmap_builder_init(struct bitmap_builder *bb, static void bitmap_builder_clear(struct bitmap_builder *bb) { - clear_bb_data(&bb->data); + deep_clear_bb_data(&bb->data, clear_bb_commit); free(bb->commits); bb->commits_nr = bb->commits_alloc = 0; } diff --git a/pack-bitmap.c b/pack-bitmap.c index 0260890341b5a3..229a11fb00fc20 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -338,7 +338,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git, struct stat st; char *bitmap_name = midx_bitmap_filename(midx); int fd = git_open(bitmap_name); - uint32_t i; + uint32_t i, preferred_pack; struct packed_git *preferred; if (fd < 0) { @@ -393,7 +393,12 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git, } } - preferred = bitmap_git->midx->packs[midx_preferred_pack(bitmap_git)]; + if (midx_preferred_pack(bitmap_git->midx, &preferred_pack) < 0) { + warning(_("could not determine MIDX preferred pack")); + goto cleanup; + } + + preferred = bitmap_git->midx->packs[preferred_pack]; if (!is_pack_valid(preferred)) { warning(_("preferred pack (%s) is invalid"), preferred->pack_name); @@ -1280,6 +1285,8 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git, base = fill_in_bitmap(bitmap_git, revs, base, seen); } + object_list_free(¬_mapped); + return base; } @@ -1834,8 +1841,10 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, * -1 means "stop trying further objects"; 0 means we may or may not have * reused, but you can keep feeding bits. */ -static int try_partial_reuse(struct packed_git *pack, - size_t pos, +static int try_partial_reuse(struct bitmap_index *bitmap_git, + struct bitmapped_pack *pack, + size_t bitmap_pos, + uint32_t pack_pos, struct bitmap *reuse, struct pack_window **w_curs) { @@ -1843,40 +1852,18 @@ static int try_partial_reuse(struct packed_git *pack, enum object_type type; unsigned long size; - /* - * try_partial_reuse() is called either on (a) objects in the - * bitmapped pack (in the case of a single-pack bitmap) or (b) - * objects in the preferred pack of a multi-pack bitmap. - * Importantly, the latter can pretend as if only a single pack - * exists because: - * - * - The first pack->num_objects bits of a MIDX bitmap are - * reserved for the preferred pack, and - * - * - Ties due to duplicate objects are always resolved in - * favor of the preferred pack. - * - * Therefore we do not need to ever ask the MIDX for its copy of - * an object by OID, since it will always select it from the - * preferred pack. Likewise, the selected copy of the base - * object for any deltas will reside in the same pack. - * - * This means that we can reuse pos when looking up the bit in - * the reuse bitmap, too, since bits corresponding to the - * preferred pack precede all bits from other packs. - */ - - if (pos >= pack->num_objects) - return -1; /* not actually in the pack or MIDX preferred pack */ + if (pack_pos >= pack->p->num_objects) + return -1; /* not actually in the pack */ - offset = delta_obj_offset = pack_pos_to_offset(pack, pos); - type = unpack_object_header(pack, w_curs, &offset, &size); + offset = delta_obj_offset = pack_pos_to_offset(pack->p, pack_pos); + type = unpack_object_header(pack->p, w_curs, &offset, &size); if (type < 0) return -1; /* broken packfile, punt */ if (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA) { off_t base_offset; uint32_t base_pos; + uint32_t base_bitmap_pos; /* * Find the position of the base object so we can look it up @@ -1886,24 +1873,48 @@ static int try_partial_reuse(struct packed_git *pack, * and the normal slow path will complain about it in * more detail. */ - base_offset = get_delta_base(pack, w_curs, &offset, type, + base_offset = get_delta_base(pack->p, w_curs, &offset, type, delta_obj_offset); if (!base_offset) return 0; - if (offset_to_pack_pos(pack, base_offset, &base_pos) < 0) - return 0; - /* - * We assume delta dependencies always point backwards. This - * lets us do a single pass, and is basically always true - * due to the way OFS_DELTAs work. You would not typically - * find REF_DELTA in a bitmapped pack, since we only bitmap - * packs we write fresh, and OFS_DELTA is the default). But - * let's double check to make sure the pack wasn't written with - * odd parameters. - */ - if (base_pos >= pos) - return 0; + offset_to_pack_pos(pack->p, base_offset, &base_pos); + + if (bitmap_is_midx(bitmap_git)) { + /* + * Cross-pack deltas are rejected for now, but could + * theoretically be supported in the future. + * + * We would need to ensure that we're sending both + * halves of the delta/base pair, regardless of whether + * or not the two cross a pack boundary. If they do, + * then we must convert the delta to an REF_DELTA to + * refer back to the base in the other pack. + * */ + if (midx_pair_to_pack_pos(bitmap_git->midx, + pack->pack_int_id, + base_offset, + &base_bitmap_pos) < 0) { + return 0; + } + } else { + if (offset_to_pack_pos(pack->p, base_offset, + &base_pos) < 0) + return 0; + /* + * We assume delta dependencies always point backwards. + * This lets us do a single pass, and is basically + * always true due to the way OFS_DELTAs work. You would + * not typically find REF_DELTA in a bitmapped pack, + * since we only bitmap packs we write fresh, and + * OFS_DELTA is the default). But let's double check to + * make sure the pack wasn't written with odd + * parameters. + */ + if (base_pos >= pack_pos) + return 0; + base_bitmap_pos = pack->bitmap_pos + base_pos; + } /* * And finally, if we're not sending the base as part of our @@ -1913,77 +1924,89 @@ static int try_partial_reuse(struct packed_git *pack, * to REF_DELTA on the fly. Better to just let the normal * object_entry code path handle it. */ - if (!bitmap_get(reuse, base_pos)) + if (!bitmap_get(reuse, base_bitmap_pos)) return 0; } /* * If we got here, then the object is OK to reuse. Mark it. */ - bitmap_set(reuse, pos); + bitmap_set(reuse, bitmap_pos); return 0; } -uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git) +static void reuse_partial_packfile_from_bitmap_1(struct bitmap_index *bitmap_git, + struct bitmapped_pack *pack, + struct bitmap *reuse) { - struct multi_pack_index *m = bitmap_git->midx; - if (!m) - BUG("midx_preferred_pack: requires non-empty MIDX"); - return nth_midxed_pack_int_id(m, pack_pos_to_midx(bitmap_git->midx, 0)); -} - -int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, - struct packed_git **packfile_out, - uint32_t *entries, - struct bitmap **reuse_out) -{ - struct repository *r = the_repository; - struct packed_git *pack; struct bitmap *result = bitmap_git->result; - struct bitmap *reuse; struct pack_window *w_curs = NULL; - size_t i = 0; - uint32_t offset; - uint32_t objects_nr; + size_t pos = pack->bitmap_pos / BITS_IN_EWORD; - assert(result); + if (!pack->bitmap_pos) { + /* + * If we're processing the first (in the case of a MIDX, the + * preferred pack) or the only (in the case of single-pack + * bitmaps) pack, then we can reuse whole words at a time. + * + * This is because we know that any deltas in this range *must* + * have their bases chosen from the same pack, since: + * + * - In the single pack case, there is no other pack to choose + * them from. + * + * - In the MIDX case, the first pack is the preferred pack, so + * all ties are broken in favor of that pack (i.e. the one + * we're currently processing). So any duplicate bases will be + * resolved in favor of the pack we're processing. + */ + while (pos < result->word_alloc && + pos < pack->bitmap_nr / BITS_IN_EWORD && + result->words[pos] == (eword_t)~0) + pos++; + memset(reuse->words, 0xFF, pos * sizeof(eword_t)); + } - load_reverse_index(r, bitmap_git); + for (; pos < result->word_alloc; pos++) { + eword_t word = result->words[pos]; + size_t offset; - if (bitmap_is_midx(bitmap_git)) - pack = bitmap_git->midx->packs[midx_preferred_pack(bitmap_git)]; - else - pack = bitmap_git->pack; - objects_nr = pack->num_objects; + for (offset = 0; offset < BITS_IN_EWORD; offset++) { + size_t bit_pos; + uint32_t pack_pos; - while (i < result->word_alloc && result->words[i] == (eword_t)~0) - i++; + if (word >> offset == 0) + break; - /* - * Don't mark objects not in the packfile or preferred pack. This bitmap - * marks objects eligible for reuse, but the pack-reuse code only - * understands how to reuse a single pack. Since the preferred pack is - * guaranteed to have all bases for its deltas (in a multi-pack bitmap), - * we use it instead of another pack. In single-pack bitmaps, the choice - * is made for us. - */ - if (i > objects_nr / BITS_IN_EWORD) - i = objects_nr / BITS_IN_EWORD; + offset += ewah_bit_ctz64(word >> offset); - reuse = bitmap_word_alloc(i); - memset(reuse->words, 0xFF, i * sizeof(eword_t)); + bit_pos = pos * BITS_IN_EWORD + offset; + if (bit_pos < pack->bitmap_pos) + continue; + if (bit_pos >= pack->bitmap_pos + pack->bitmap_nr) + goto done; - for (; i < result->word_alloc; ++i) { - eword_t word = result->words[i]; - size_t pos = (i * BITS_IN_EWORD); + if (bitmap_is_midx(bitmap_git)) { + uint32_t midx_pos; + off_t ofs; - for (offset = 0; offset < BITS_IN_EWORD; ++offset) { - if ((word >> offset) == 0) - break; + midx_pos = pack_pos_to_midx(bitmap_git->midx, bit_pos); + ofs = nth_midxed_offset(bitmap_git->midx, midx_pos); - offset += ewah_bit_ctz64(word >> offset); - if (try_partial_reuse(pack, pos + offset, - reuse, &w_curs) < 0) { + if (offset_to_pack_pos(pack->p, ofs, &pack_pos) < 0) + BUG("could not find object in pack %s " + "at offset %"PRIuMAX" in MIDX", + pack_basename(pack->p), (uintmax_t)ofs); + } else { + pack_pos = cast_size_t_to_uint32_t(st_sub(bit_pos, pack->bitmap_pos)); + if (pack_pos >= pack->p->num_objects) + BUG("advanced beyond the end of pack %s (%"PRIuMAX" > %"PRIu32")", + pack_basename(pack->p), (uintmax_t)pack_pos, + pack->p->num_objects); + } + + if (try_partial_reuse(bitmap_git, pack, bit_pos, + pack_pos, reuse, &w_curs) < 0) { /* * try_partial_reuse indicated we couldn't reuse * any bits, so there is no point in trying more @@ -2000,11 +2023,97 @@ int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, done: unuse_pack(&w_curs); +} - *entries = bitmap_popcount(reuse); - if (!*entries) { - bitmap_free(reuse); +static int bitmapped_pack_cmp(const void *va, const void *vb) +{ + const struct bitmapped_pack *a = va; + const struct bitmapped_pack *b = vb; + + if (a->bitmap_pos < b->bitmap_pos) return -1; + if (a->bitmap_pos > b->bitmap_pos) + return 1; + return 0; +} + +void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, + struct bitmapped_pack **packs_out, + size_t *packs_nr_out, + struct bitmap **reuse_out, + int multi_pack_reuse) +{ + struct repository *r = the_repository; + struct bitmapped_pack *packs = NULL; + struct bitmap *result = bitmap_git->result; + struct bitmap *reuse; + size_t i; + size_t packs_nr = 0, packs_alloc = 0; + size_t word_alloc; + uint32_t objects_nr = 0; + + assert(result); + + load_reverse_index(r, bitmap_git); + + if (bitmap_is_midx(bitmap_git)) { + for (i = 0; i < bitmap_git->midx->num_packs; i++) { + struct bitmapped_pack pack; + if (nth_bitmapped_pack(r, bitmap_git->midx, &pack, i) < 0) { + warning(_("unable to load pack: '%s', disabling pack-reuse"), + bitmap_git->midx->pack_names[i]); + free(packs); + return; + } + + if (!pack.bitmap_nr) + continue; + + if (!multi_pack_reuse && pack.bitmap_pos) { + /* + * If we're only reusing a single pack, skip + * over any packs which are not positioned at + * the beginning of the MIDX bitmap. + * + * This is consistent with the existing + * single-pack reuse behavior, which only reuses + * parts of the MIDX's preferred pack. + */ + continue; + } + + ALLOC_GROW(packs, packs_nr + 1, packs_alloc); + memcpy(&packs[packs_nr++], &pack, sizeof(pack)); + + objects_nr += pack.p->num_objects; + + if (!multi_pack_reuse) + break; + } + + QSORT(packs, packs_nr, bitmapped_pack_cmp); + } else { + ALLOC_GROW(packs, packs_nr + 1, packs_alloc); + + packs[packs_nr].p = bitmap_git->pack; + packs[packs_nr].bitmap_nr = bitmap_git->pack->num_objects; + packs[packs_nr].bitmap_pos = 0; + + objects_nr = packs[packs_nr++].bitmap_nr; + } + + word_alloc = objects_nr / BITS_IN_EWORD; + if (objects_nr % BITS_IN_EWORD) + word_alloc++; + reuse = bitmap_word_alloc(word_alloc); + + for (i = 0; i < packs_nr; i++) + reuse_partial_packfile_from_bitmap_1(bitmap_git, &packs[i], reuse); + + if (bitmap_is_empty(reuse)) { + free(packs); + bitmap_free(reuse); + return; } /* @@ -2012,9 +2121,9 @@ int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, * need to be handled separately. */ bitmap_and_not(result, reuse); - *packfile_out = pack; + *packs_out = packs; + *packs_nr_out = packs_nr; *reuse_out = reuse; - return 0; } int bitmap_walk_contains(struct bitmap_index *bitmap_git, diff --git a/pack-bitmap.h b/pack-bitmap.h index 5273a6a019708c..c7dea13217a00e 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -52,6 +52,15 @@ typedef int (*show_reachable_fn)( struct bitmap_index; +struct bitmapped_pack { + struct packed_git *p; + + uint32_t bitmap_pos; + uint32_t bitmap_nr; + + uint32_t pack_int_id; /* MIDX only */ +}; + struct bitmap_index *prepare_bitmap_git(struct repository *r); struct bitmap_index *prepare_midx_bitmap_git(struct multi_pack_index *midx); void count_bitmap_commit_list(struct bitmap_index *, uint32_t *commits, @@ -68,11 +77,11 @@ int test_bitmap_hashes(struct repository *r); struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, int filter_provided_objects); -uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git); -int reuse_partial_packfile_from_bitmap(struct bitmap_index *, - struct packed_git **packfile, - uint32_t *entries, - struct bitmap **reuse_out); +void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, + struct bitmapped_pack **packs_out, + size_t *packs_nr_out, + struct bitmap **reuse_out, + int multi_pack_reuse); int rebuild_existing_bitmaps(struct bitmap_index *, struct packing_data *mapping, kh_oid_map_t *reused_bitmaps, int show_progress); void free_bitmap_index(struct bitmap_index *); diff --git a/pack-objects.c b/pack-objects.c index f403ca6986a9d4..a9d9855063aea8 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -151,6 +151,21 @@ void prepare_packing_data(struct repository *r, struct packing_data *pdata) init_recursive_mutex(&pdata->odb_lock); } +void clear_packing_data(struct packing_data *pdata) +{ + if (!pdata) + return; + + free(pdata->cruft_mtime); + free(pdata->in_pack); + free(pdata->in_pack_by_idx); + free(pdata->in_pack_pos); + free(pdata->index); + free(pdata->layer); + free(pdata->objects); + free(pdata->tree_depth); +} + struct object_entry *packlist_alloc(struct packing_data *pdata, const struct object_id *oid) { diff --git a/pack-objects.h b/pack-objects.h index 0d78db40cb2f11..b9898a4e64b8b4 100644 --- a/pack-objects.h +++ b/pack-objects.h @@ -169,6 +169,7 @@ struct packing_data { }; void prepare_packing_data(struct repository *r, struct packing_data *pdata); +void clear_packing_data(struct packing_data *pdata); /* Protect access to object database */ static inline void packing_data_lock(struct packing_data *pdata) diff --git a/pack-revindex.c b/pack-revindex.c index acf1dd9786cd3c..a7624d8be8e58e 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -520,19 +520,12 @@ static int midx_pack_order_cmp(const void *va, const void *vb) return 0; } -int midx_to_pack_pos(struct multi_pack_index *m, uint32_t at, uint32_t *pos) +static int midx_key_to_pack_pos(struct multi_pack_index *m, + struct midx_pack_key *key, + uint32_t *pos) { - struct midx_pack_key key; uint32_t *found; - if (!m->revindex_data) - BUG("midx_to_pack_pos: reverse index not yet loaded"); - if (m->num_objects <= at) - BUG("midx_to_pack_pos: out-of-bounds object at %"PRIu32, at); - - key.pack = nth_midxed_pack_int_id(m, at); - key.offset = nth_midxed_offset(m, at); - key.midx = m; /* * The preferred pack sorts first, so determine its identifier by * looking at the first object in pseudo-pack order. @@ -542,14 +535,43 @@ int midx_to_pack_pos(struct multi_pack_index *m, uint32_t at, uint32_t *pos) * implicitly is preferred (and includes all its objects, since ties are * broken first by pack identifier). */ - key.preferred_pack = nth_midxed_pack_int_id(m, pack_pos_to_midx(m, 0)); + if (midx_preferred_pack(key->midx, &key->preferred_pack) < 0) + return error(_("could not determine preferred pack")); - found = bsearch(&key, m->revindex_data, m->num_objects, - sizeof(*m->revindex_data), midx_pack_order_cmp); + found = bsearch(key, m->revindex_data, m->num_objects, + sizeof(*m->revindex_data), + midx_pack_order_cmp); if (!found) - return error("bad offset for revindex"); + return -1; *pos = found - m->revindex_data; return 0; } + +int midx_to_pack_pos(struct multi_pack_index *m, uint32_t at, uint32_t *pos) +{ + struct midx_pack_key key; + + if (!m->revindex_data) + BUG("midx_to_pack_pos: reverse index not yet loaded"); + if (m->num_objects <= at) + BUG("midx_to_pack_pos: out-of-bounds object at %"PRIu32, at); + + key.pack = nth_midxed_pack_int_id(m, at); + key.offset = nth_midxed_offset(m, at); + key.midx = m; + + return midx_key_to_pack_pos(m, &key, pos); +} + +int midx_pair_to_pack_pos(struct multi_pack_index *m, uint32_t pack_int_id, + off_t ofs, uint32_t *pos) +{ + struct midx_pack_key key = { + .pack = pack_int_id, + .offset = ofs, + .midx = m, + }; + return midx_key_to_pack_pos(m, &key, pos); +} diff --git a/pack-revindex.h b/pack-revindex.h index 6dd47efea10ec6..422c2487ae32d8 100644 --- a/pack-revindex.h +++ b/pack-revindex.h @@ -142,4 +142,7 @@ uint32_t pack_pos_to_midx(struct multi_pack_index *m, uint32_t pos); */ int midx_to_pack_pos(struct multi_pack_index *midx, uint32_t at, uint32_t *pos); +int midx_pair_to_pack_pos(struct multi_pack_index *midx, uint32_t pack_id, + off_t ofs, uint32_t *pos); + #endif diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c index e9a444ddba55b4..4acae41bb993c8 100644 --- a/t/helper/test-read-midx.c +++ b/t/helper/test-read-midx.c @@ -6,6 +6,7 @@ #include "pack-bitmap.h" #include "packfile.h" #include "setup.h" +#include "gettext.h" static int read_midx_file(const char *object_dir, int show_objects) { @@ -79,7 +80,7 @@ static int read_midx_checksum(const char *object_dir) static int read_midx_preferred_pack(const char *object_dir) { struct multi_pack_index *midx = NULL; - struct bitmap_index *bitmap = NULL; + uint32_t preferred_pack; setup_git_directory(); @@ -87,23 +88,45 @@ static int read_midx_preferred_pack(const char *object_dir) if (!midx) return 1; - bitmap = prepare_bitmap_git(the_repository); - if (!bitmap) + if (midx_preferred_pack(midx, &preferred_pack) < 0) { + warning(_("could not determine MIDX preferred pack")); return 1; - if (!bitmap_is_midx(bitmap)) { - free_bitmap_index(bitmap); + } + + printf("%s\n", midx->pack_names[preferred_pack]); + return 0; +} + +static int read_midx_bitmapped_packs(const char *object_dir) +{ + struct multi_pack_index *midx = NULL; + struct bitmapped_pack pack; + uint32_t i; + + setup_git_directory(); + + midx = load_multi_pack_index(object_dir, 1); + if (!midx) return 1; + + for (i = 0; i < midx->num_packs; i++) { + if (nth_bitmapped_pack(the_repository, midx, &pack, i) < 0) + return 1; + + printf("%s\n", pack_basename(pack.p)); + printf(" bitmap_pos: %"PRIuMAX"\n", (uintmax_t)pack.bitmap_pos); + printf(" bitmap_nr: %"PRIuMAX"\n", (uintmax_t)pack.bitmap_nr); } - printf("%s\n", midx->pack_names[midx_preferred_pack(bitmap)]); - free_bitmap_index(bitmap); + close_midx(midx); + return 0; } int cmd__read_midx(int argc, const char **argv) { if (!(argc == 2 || argc == 3)) - usage("read-midx [--show-objects|--checksum|--preferred-pack] "); + usage("read-midx [--show-objects|--checksum|--preferred-pack|--bitmap] "); if (!strcmp(argv[1], "--show-objects")) return read_midx_file(argv[2], 1); @@ -111,5 +134,7 @@ int cmd__read_midx(int argc, const char **argv) return read_midx_checksum(argv[2]); else if (!strcmp(argv[1], "--preferred-pack")) return read_midx_preferred_pack(argv[2]); + else if (!strcmp(argv[1], "--bitmap")) + return read_midx_bitmapped_packs(argv[2]); return read_midx_file(argv[1], 0); } diff --git a/t/perf/p5332-multi-pack-reuse.sh b/t/perf/p5332-multi-pack-reuse.sh new file mode 100755 index 00000000000000..5c6c575d62c64b --- /dev/null +++ b/t/perf/p5332-multi-pack-reuse.sh @@ -0,0 +1,81 @@ +#!/bin/sh + +test_description='tests pack performance with multi-pack reuse' + +. ./perf-lib.sh +. "${TEST_DIRECTORY}/perf/lib-pack.sh" + +packdir=.git/objects/pack + +test_perf_large_repo + +find_pack () { + for idx in $packdir/pack-*.idx + do + if git show-index <$idx | grep -q "$1" + then + basename $idx + fi || return 1 + done +} + +repack_into_n_chunks () { + git repack -adk && + + test "$1" -eq 1 && return || + + find $packdir -type f | sort >packs.before && + + # partition the repository into $1 chunks of consecutive commits, and + # then create $1 packs with the objects reachable from each chunk + # (excluding any objects reachable from the previous chunks) + sz="$(($(git rev-list --count --all) / $1))" + for rev in $(git rev-list --all | awk "NR % $sz == 0" | tac) + do + pack="$(echo "$rev" | git pack-objects --revs \ + --honor-pack-keep --delta-base-offset $packdir/pack)" && + touch $packdir/pack-$pack.keep || return 1 + done + + # grab any remaining objects not packed by the previous step(s) + git pack-objects --revs --all --honor-pack-keep --delta-base-offset \ + $packdir/pack && + + find $packdir -type f | sort >packs.after && + + # and install the whole thing + for f in $(comm -12 packs.before packs.after) + do + rm -f "$f" || return 1 + done + rm -fr $packdir/*.keep +} + +for nr_packs in 1 10 100 +do + test_expect_success "create $nr_packs-pack scenario" ' + repack_into_n_chunks $nr_packs + ' + + test_expect_success "setup bitmaps for $nr_packs-pack scenario" ' + find $packdir -type f -name "*.idx" | sed -e "s/.*\/\(.*\)$/+\1/g" | + git multi-pack-index write --stdin-packs --bitmap \ + --preferred-pack="$(find_pack $(git rev-parse HEAD))" + ' + + for reuse in single multi + do + test_perf "clone for $nr_packs-pack scenario ($reuse-pack reuse)" " + git for-each-ref --format='%(objectname)' refs/heads refs/tags >in && + git -c pack.allowPackReuse=$reuse pack-objects \ + --revs --delta-base-offset --use-bitmap-index \ + --stdout result + " + + test_size "clone size for $nr_packs-pack scenario ($reuse-pack reuse)" ' + wc -c packs && + + git multi-pack-index write --stdin-packs err && + cat >expect <<-\EOF && + error: MIDX does not contain the BTMP chunk + EOF + test_cmp expect err && + + git multi-pack-index write --stdin-packs --bitmap \ + --preferred-pack="$(head -n1 actual && + for i in $(test_seq $(wc -l expect && + test_cmp expect actual + ) +' + test_done diff --git a/t/t5332-multi-pack-reuse.sh b/t/t5332-multi-pack-reuse.sh new file mode 100755 index 00000000000000..2ba788b0421b6a --- /dev/null +++ b/t/t5332-multi-pack-reuse.sh @@ -0,0 +1,203 @@ +#!/bin/sh + +test_description='pack-objects multi-pack reuse' + +. ./test-lib.sh +. "$TEST_DIRECTORY"/lib-bitmap.sh + +objdir=.git/objects +packdir=$objdir/pack + +test_pack_reused () { + test_trace2_data pack-objects pack-reused "$1" +} + +test_packs_reused () { + test_trace2_data pack-objects packs-reused "$1" +} + + +# pack_position objects && + grep "$1" objects | cut -d" " -f1 +} + +test_expect_success 'preferred pack is reused for single-pack reuse' ' + test_config pack.allowPackReuse single && + + for i in A B + do + test_commit "$i" && + git repack -d || return 1 + done && + + git multi-pack-index write --bitmap && + + : >trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --revs --all >/dev/null && + + test_pack_reused 3 in <<-EOF && + $(git rev-parse C) + ^$(git rev-parse A) + EOF + + : >trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --revs /dev/null && + + test_pack_reused 6 trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --revs --all >/dev/null && + + test_pack_reused 9 in <<-EOF && + $(git rev-parse E) + ^$(git rev-parse D) + EOF + + : >trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --delta-base-offset --revs /dev/null && + + test_pack_reused 3 in <<-EOF && + $(git rev-parse E) + ^$(git rev-parse D) + EOF + + : >trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --delta-base-offset --revs /dev/null && + + test_pack_reused 3 f && + git add f && + test_tick && + git commit -m "delta" && + delta="$(git rev-parse HEAD)" && + + test_seq 64 >f && + test_tick && + git commit -a -m "base" && + base="$(git rev-parse HEAD)" && + + test_commit other && + + git repack -d && + + have_delta "$(git rev-parse $delta:f)" "$(git rev-parse $base:f)" && + + git multi-pack-index write --bitmap && + + cat >in <<-EOF && + $(git rev-parse other) + ^$base + EOF + + : >trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --delta-base-offset --revs /dev/null && + + # We can only reuse the 3 objects corresponding to "other" from + # the latest pack. + # + # This is because even though we want "delta", we do not want + # "base", meaning that we have to inflate the delta/base-pair + # corresponding to the blob in commit "delta", which bypasses + # the pack-reuse mechanism. + # + # The remaining objects from the other pack are similarly not + # reused because their objects are on the uninteresting side of + # the query. + test_pack_reused 3 in <<-EOF && + $(git rev-parse $base) + ^$(git rev-parse $delta) + EOF + + P="$(git pack-objects --revs $packdir/pack trace2.txt && + GIT_TRACE2_EVENT="$PWD/trace2.txt" \ + git pack-objects --stdout --delta-base-offset --all >/dev/null && + + packs_nr="$(find $packdir -type f -name "pack-*.pack" | wc -l)" && + objects_nr="$(git rev-list --count --all --objects)" && + + test_pack_reused $(($objects_nr - 1)) +# +# For example, to look for trace2_data_intmax("pack-objects", repo, +# "reused", N) in an invocation of "git pack-objects", run: +# +# GIT_TRACE2_EVENT="$(pwd)/trace.txt" git pack-objects ... && +# test_trace2_data pack-objects reused N