Skip to content

Commit

Permalink
builtin/repack.c: support generating a cruft pack
Browse files Browse the repository at this point in the history
Expose a way to split the contents of a repository into a main and cruft
pack when doing an all-into-one repack with `git repack --cruft -d`, and
a complementary configuration variable.

Signed-off-by: Taylor Blau <[email protected]>
Signed-off-by: Junio C Hamano <[email protected]>
  • Loading branch information
ttaylorr authored and gitster committed May 26, 2022
1 parent a7d4938 commit f9825d1
Show file tree
Hide file tree
Showing 4 changed files with 319 additions and 6 deletions.
11 changes: 11 additions & 0 deletions Documentation/git-repack.txt
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,17 @@ to the new separate pack will be written.
Also run 'git prune-packed' to remove redundant
loose object files.

--cruft::
Same as `-a`, unless `-d` is used. Then any unreachable objects
are packed into a separate cruft pack. Unreachable objects can
be pruned using the normal expiry rules with the next `git gc`
invocation (see linkgit:git-gc[1]). Incompatible with `-k`.

--cruft-expiration=<approxidate>::
Expire unreachable objects older than `<approxidate>`
immediately instead of waiting for the next `git gc` invocation.
Only useful with `--cruft -d`.

-l::
Pass the `--local` option to 'git pack-objects'. See
linkgit:git-pack-objects[1].
Expand Down
2 changes: 1 addition & 1 deletion Documentation/technical/cruft-packs.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ pruned according to normal expiry rules with the next 'git gc' invocation.

Unreachable objects aren't removed immediately, since doing so could race with
an incoming push which may reference an object which is about to be deleted.
Instead, those unreachable objects are stored as loose object and stay that way
Instead, those unreachable objects are stored as loose objects and stay that way
until they are older than the expiration window, at which point they are removed
by linkgit:git-prune[1].

Expand Down
105 changes: 100 additions & 5 deletions builtin/repack.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,18 @@
#include "pack-bitmap.h"
#include "refs.h"

#define ALL_INTO_ONE 1
#define LOOSEN_UNREACHABLE 2
#define PACK_CRUFT 4

static int pack_everything;
static int delta_base_offset = 1;
static int pack_kept_objects = -1;
static int write_bitmaps = -1;
static int use_delta_islands;
static int run_update_server_info = 1;
static char *packdir, *packtmp_name, *packtmp;
static char *cruft_expiration;

static const char *const git_repack_usage[] = {
N_("git repack [<options>]"),
Expand Down Expand Up @@ -305,9 +311,6 @@ static void repack_promisor_objects(const struct pack_objects_args *args,
die(_("could not finish pack-objects to repack promisor objects"));
}

#define ALL_INTO_ONE 1
#define LOOSEN_UNREACHABLE 2

struct pack_geometry {
struct packed_git **pack;
uint32_t pack_nr, pack_alloc;
Expand Down Expand Up @@ -344,6 +347,8 @@ static void init_pack_geometry(struct pack_geometry **geometry_p)
for (p = get_all_packs(the_repository); p; p = p->next) {
if (!pack_kept_objects && p->pack_keep)
continue;
if (p->is_cruft)
continue;

ALLOC_GROW(geometry->pack,
geometry->pack_nr + 1,
Expand Down Expand Up @@ -605,6 +610,67 @@ static int write_midx_included_packs(struct string_list *include,
return finish_command(&cmd);
}

static int write_cruft_pack(const struct pack_objects_args *args,
const char *pack_prefix,
struct string_list *names,
struct string_list *existing_packs,
struct string_list *existing_kept_packs)
{
struct child_process cmd = CHILD_PROCESS_INIT;
struct strbuf line = STRBUF_INIT;
struct string_list_item *item;
FILE *in, *out;
int ret;

prepare_pack_objects(&cmd, args);

strvec_push(&cmd.args, "--cruft");
if (cruft_expiration)
strvec_pushf(&cmd.args, "--cruft-expiration=%s",
cruft_expiration);

strvec_push(&cmd.args, "--honor-pack-keep");
strvec_push(&cmd.args, "--non-empty");
strvec_push(&cmd.args, "--max-pack-size=0");

cmd.in = -1;

ret = start_command(&cmd);
if (ret)
return ret;

/*
* names has a confusing double use: it both provides the list
* of just-written new packs, and accepts the name of the cruft
* pack we are writing.
*
* By the time it is read here, it contains only the pack(s)
* that were just written, which is exactly the set of packs we
* want to consider kept.
*/
in = xfdopen(cmd.in, "w");
for_each_string_list_item(item, names)
fprintf(in, "%s-%s.pack\n", pack_prefix, item->string);
for_each_string_list_item(item, existing_packs)
fprintf(in, "-%s.pack\n", item->string);
for_each_string_list_item(item, existing_kept_packs)
fprintf(in, "%s.pack\n", item->string);
fclose(in);

out = xfdopen(cmd.out, "r");
while (strbuf_getline_lf(&line, out) != EOF) {
if (line.len != the_hash_algo->hexsz)
die(_("repack: Expecting full hex object ID lines only "
"from pack-objects."));
string_list_append(names, line.buf);
}
fclose(out);

strbuf_release(&line);

return finish_command(&cmd);
}

int cmd_repack(int argc, const char **argv, const char *prefix)
{
struct child_process cmd = CHILD_PROCESS_INIT;
Expand All @@ -621,7 +687,6 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
int show_progress;

/* variables to be filled by option parsing */
int pack_everything = 0;
int delete_redundant = 0;
const char *unpack_unreachable = NULL;
int keep_unreachable = 0;
Expand All @@ -636,6 +701,11 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
OPT_BIT('A', NULL, &pack_everything,
N_("same as -a, and turn unreachable objects loose"),
LOOSEN_UNREACHABLE | ALL_INTO_ONE),
OPT_BIT(0, "cruft", &pack_everything,
N_("same as -a, pack unreachable cruft objects separately"),
PACK_CRUFT),
OPT_STRING(0, "cruft-expiration", &cruft_expiration, N_("approxidate"),
N_("with -C, expire objects older than this")),
OPT_BOOL('d', NULL, &delete_redundant,
N_("remove redundant packs, and run git-prune-packed")),
OPT_BOOL('f', NULL, &po_args.no_reuse_delta,
Expand Down Expand Up @@ -688,6 +758,15 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
(unpack_unreachable || (pack_everything & LOOSEN_UNREACHABLE)))
die(_("options '%s' and '%s' cannot be used together"), "--keep-unreachable", "-A");

if (pack_everything & PACK_CRUFT) {
pack_everything |= ALL_INTO_ONE;

if (unpack_unreachable || (pack_everything & LOOSEN_UNREACHABLE))
die(_("options '%s' and '%s' cannot be used together"), "--cruft", "-A");
if (keep_unreachable)
die(_("options '%s' and '%s' cannot be used together"), "--cruft", "-k");
}

if (write_bitmaps < 0) {
if (!write_midx &&
(!(pack_everything & ALL_INTO_ONE) || !is_bare_repository()))
Expand Down Expand Up @@ -771,7 +850,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
if (pack_everything & ALL_INTO_ONE) {
repack_promisor_objects(&po_args, &names);

if (existing_nonkept_packs.nr && delete_redundant) {
if (existing_nonkept_packs.nr && delete_redundant &&
!(pack_everything & PACK_CRUFT)) {
for_each_string_list_item(item, &names) {
strvec_pushf(&cmd.args, "--keep-pack=%s-%s.pack",
packtmp_name, item->string);
Expand Down Expand Up @@ -833,6 +913,21 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
if (!names.nr && !po_args.quiet)
printf_ln(_("Nothing new to pack."));

if (pack_everything & PACK_CRUFT) {
const char *pack_prefix;
if (!skip_prefix(packtmp, packdir, &pack_prefix))
die(_("pack prefix %s does not begin with objdir %s"),
packtmp, packdir);
if (*pack_prefix == '/')
pack_prefix++;

ret = write_cruft_pack(&po_args, pack_prefix, &names,
&existing_nonkept_packs,
&existing_kept_packs);
if (ret)
return ret;
}

for_each_string_list_item(item, &names) {
item->util = (void *)(uintptr_t)populate_pack_exts(item->string);
}
Expand Down
Loading

0 comments on commit f9825d1

Please sign in to comment.