Skip to content

Commit

Permalink
survey: collect stats from blobs during treewalk
Browse files Browse the repository at this point in the history
Signed-off-by: Jeff Hostetler <[email protected]>
  • Loading branch information
jeffhostetler committed Apr 30, 2024
1 parent 02dc3a8 commit 3c9cfb8
Showing 1 changed file with 113 additions and 1 deletion.
114 changes: 113 additions & 1 deletion builtin/survey.c
Original file line number Diff line number Diff line change
Expand Up @@ -340,10 +340,47 @@ struct survey_stats_trees {
struct obj_hist_bin entry_qbin[QBIN_LEN];
};

/*
* Stats for reachable blobs.
*/
struct survey_stats_blobs {
uint32_t cnt_seen; /* nr_blobs -- number of observed observed */

uint32_t cnt_missing; /* we may have a partial clone. */

/*
* nr_blobs grouped by where they are stored on disk. This is
* a function of how the ODB is packed.
*/
uint32_t cnt_cached; /* see oi.whence */
uint32_t cnt_loose; /* see oi.whence */
uint32_t cnt_packed; /* see oi.whence */
uint32_t cnt_dbcached; /* see oi.whence */

/*
* In the following, blob_size is the expanded size of the blob
* object. disk_size refers to the compressed and/or delta-fied
* representation on disk.
*
* In both cases we group blobs by the blob_size so that we have
* the same set in each histogram and therefore can get a feel
* for the effectiveness of the compression of delta-chaining.
*/
uint64_t sum_size; /* sum(blob_size) */
uint64_t sum_disk_size; /* sum(blob_size) */

/*
* A histogram of the count of blobs, the observed size, and
* the on-disk size grouped by the observed size.
*/
struct obj_hist_bin size_hbin[HBIN_LEN];
};

struct survey_stats {
struct survey_stats_refs refs;
struct survey_stats_commits commits;
struct survey_stats_trees trees;
struct survey_stats_blobs blobs;
};

static struct survey_stats survey_stats = { 0 };
Expand Down Expand Up @@ -528,6 +565,54 @@ static void traverse_object_cb_tree(struct object *obj)
incr_obj_hist_bin(&pst->entry_qbin[qb], object_length, disk_sizep);
}

static void traverse_object_cb_blob(struct object *obj)
{
struct survey_stats_blobs *psb = &survey_stats.blobs;
struct object_info oi = OBJECT_INFO_INIT;
unsigned oi_flags = OBJECT_INFO_FOR_PREFETCH;
unsigned long object_length;
off_t disk_sizep;
enum object_type type;
struct tree_desc desc;

Check failure on line 576 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / linux-leaks (ubuntu-latest)

builtin/survey.c:576:26: unused variable ‘desc’ [-Werror=unused-variable]

Check failure on line 576 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / linux-reftable-leaks (ubuntu-latest)

builtin/survey.c:576:26: unused variable ‘desc’ [-Werror=unused-variable]

Check failure on line 576 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / linux-gcc (ubuntu-20.04)

builtin/survey.c:576:19: unused variable ‘desc’ [-Werror=unused-variable]

Check failure on line 576 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / linux-TEST-vars (ubuntu-20.04)

builtin/survey.c:576:19: unused variable ‘desc’ [-Werror=unused-variable]

Check failure on line 576 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / linux-gcc-default (ubuntu-latest)

builtin/survey.c:576:26: unused variable ‘desc’ [-Werror=unused-variable]

Check failure on line 576 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / linux-sha256 (ubuntu-latest)

builtin/survey.c:576:19: unused variable 'desc' [-Werror,-Wunused-variable]

Check failure on line 576 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / linux-reftable (ubuntu-latest)

builtin/survey.c:576:19: unused variable 'desc' [-Werror,-Wunused-variable]

Check failure on line 576 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / linux-musl (alpine)

builtin/survey.c:576:26: unused variable 'desc' [-Werror=unused-variable]

Check failure on line 576 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / linux32 (daald/ubuntu32:xenial)

builtin/survey.c:576:19: unused variable 'desc' [-Werror=unused-variable]

Check failure on line 576 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / pedantic (fedora)

builtin/survey.c:576:26: unused variable 'desc' [-Werror=unused-variable]

Check failure on line 576 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / linux-asan-ubsan (ubuntu-latest)

builtin/survey.c:576:19: unused variable 'desc' [-Werror,-Wunused-variable]

Check failure on line 576 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / win build

builtin/survey.c:576:26: unused variable 'desc' [-Werror=unused-variable]
struct name_entry entry;

Check failure on line 577 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / linux-leaks (ubuntu-latest)

builtin/survey.c:577:27: unused variable ‘entry’ [-Werror=unused-variable]

Check failure on line 577 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / linux-reftable-leaks (ubuntu-latest)

builtin/survey.c:577:27: unused variable ‘entry’ [-Werror=unused-variable]

Check failure on line 577 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / linux-gcc (ubuntu-20.04)

builtin/survey.c:577:20: unused variable ‘entry’ [-Werror=unused-variable]

Check failure on line 577 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / linux-TEST-vars (ubuntu-20.04)

builtin/survey.c:577:20: unused variable ‘entry’ [-Werror=unused-variable]

Check failure on line 577 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / linux-gcc-default (ubuntu-latest)

builtin/survey.c:577:27: unused variable ‘entry’ [-Werror=unused-variable]

Check failure on line 577 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / linux-sha256 (ubuntu-latest)

builtin/survey.c:577:20: unused variable 'entry' [-Werror,-Wunused-variable]

Check failure on line 577 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / linux-reftable (ubuntu-latest)

builtin/survey.c:577:20: unused variable 'entry' [-Werror,-Wunused-variable]

Check failure on line 577 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / linux-musl (alpine)

builtin/survey.c:577:27: unused variable 'entry' [-Werror=unused-variable]

Check failure on line 577 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / linux32 (daald/ubuntu32:xenial)

builtin/survey.c:577:20: unused variable 'entry' [-Werror=unused-variable]

Check failure on line 577 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / pedantic (fedora)

builtin/survey.c:577:27: unused variable 'entry' [-Werror=unused-variable]

Check failure on line 577 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / linux-asan-ubsan (ubuntu-latest)

builtin/survey.c:577:20: unused variable 'entry' [-Werror,-Wunused-variable]

Check failure on line 577 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / win build

builtin/survey.c:577:27: unused variable 'entry' [-Werror=unused-variable]
int hb;

psb->cnt_seen++;

oi.typep = &type;
oi.sizep = &object_length;
oi.disk_sizep = &disk_sizep;

if (oid_object_info_extended(the_repository, &obj->oid, &oi, oi_flags) < 0 ||
type != OBJ_BLOB) {
psb->cnt_missing++;
return;
}

switch (oi.whence) {
case OI_CACHED:
psb->cnt_cached++;
break;
case OI_LOOSE:
psb->cnt_loose++;
break;
case OI_PACKED:
psb->cnt_packed++;
break;
case OI_DBCACHED:
psb->cnt_dbcached++;
break;
default:
break;
}

psb->sum_size += object_length;
psb->sum_disk_size += disk_sizep;

hb = hbin(object_length);
incr_obj_hist_bin(&psb->size_hbin[hb], object_length, disk_sizep);
}

static void traverse_object_cb(struct object *obj, const char *name, void *data)
{
display_progress(survey_progress, ++survey_progress_total);
Expand All @@ -537,7 +622,7 @@ static void traverse_object_cb(struct object *obj, const char *name, void *data)
traverse_object_cb_tree(obj);
return;
case OBJ_BLOB:
// traverse_object_cb_blob(obj);
traverse_object_cb_blob(obj);
return;
case OBJ_TAG: /* ignore -- counted when loading REFS */
case OBJ_COMMIT: /* ignore/bug -- seen in the other callback */
Expand Down Expand Up @@ -747,6 +832,7 @@ static void survey_json(struct json_writer *jw, int pretty)
struct survey_stats_refs *prs = &survey_stats.refs;
struct survey_stats_commits *psc = &survey_stats.commits;
struct survey_stats_trees *pst = &survey_stats.trees;
struct survey_stats_blobs *psb = &survey_stats.blobs;
struct strbuf buf = STRBUF_INIT;

Check failure on line 836 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / linux-leaks (ubuntu-latest)

builtin/survey.c:836:23: unused variable ‘buf’ [-Werror=unused-variable]

Check failure on line 836 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / linux-reftable-leaks (ubuntu-latest)

builtin/survey.c:836:23: unused variable ‘buf’ [-Werror=unused-variable]

Check failure on line 836 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / linux-gcc (ubuntu-20.04)

builtin/survey.c:836:16: unused variable ‘buf’ [-Werror=unused-variable]

Check failure on line 836 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / linux-TEST-vars (ubuntu-20.04)

builtin/survey.c:836:16: unused variable ‘buf’ [-Werror=unused-variable]

Check failure on line 836 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / linux-gcc-default (ubuntu-latest)

builtin/survey.c:836:23: unused variable ‘buf’ [-Werror=unused-variable]

Check failure on line 836 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / linux-sha256 (ubuntu-latest)

builtin/survey.c:836:16: unused variable 'buf' [-Werror,-Wunused-variable]

Check failure on line 836 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / linux-reftable (ubuntu-latest)

builtin/survey.c:836:16: unused variable 'buf' [-Werror,-Wunused-variable]

Check failure on line 836 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / linux-musl (alpine)

builtin/survey.c:836:23: unused variable 'buf' [-Werror=unused-variable]

Check failure on line 836 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / linux32 (daald/ubuntu32:xenial)

builtin/survey.c:836:16: unused variable 'buf' [-Werror=unused-variable]

Check failure on line 836 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / pedantic (fedora)

builtin/survey.c:836:23: unused variable 'buf' [-Werror=unused-variable]

Check failure on line 836 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / linux-asan-ubsan (ubuntu-latest)

builtin/survey.c:836:16: unused variable 'buf' [-Werror,-Wunused-variable]

Check failure on line 836 in builtin/survey.c

View workflow job for this annotation

GitHub Actions / win build

builtin/survey.c:836:23: unused variable 'buf' [-Werror=unused-variable]
int k;

Expand Down Expand Up @@ -852,6 +938,32 @@ static void survey_json(struct json_writer *jw, int pretty)
write_hbin_json(jw, "dist_by_size", pst->size_hbin);
}
jw_end(jw);


jw_object_inline_begin_object(jw, "blobs");
{
jw_object_intmax(jw, "count", psb->cnt_seen);
jw_object_intmax(jw, "sum_size", psb->sum_size);
jw_object_intmax(jw, "sum_disk_size", psb->sum_disk_size);

jw_object_inline_begin_object(jw, "count_by_whence");
{
/*
* Missing is not technically a "whence" value, but
* we don't need to clutter up the results with that
* distinction.
*/
JW_OBJ_INT_NZ(jw, "missing", psb->cnt_missing);
JW_OBJ_INT_NZ(jw, "cached", psb->cnt_cached);
JW_OBJ_INT_NZ(jw, "loose", psb->cnt_loose);
JW_OBJ_INT_NZ(jw, "packed", psb->cnt_packed);
JW_OBJ_INT_NZ(jw, "dbcached", psb->cnt_dbcached);
}
jw_end(jw);

write_hbin_json(jw, "dist_by_size", psb->size_hbin);
}
jw_end(jw);
}
jw_end(jw);
}
Expand Down

0 comments on commit 3c9cfb8

Please sign in to comment.