Skip to content

Commit

Permalink
chunk-format: create read chunk API
Browse files Browse the repository at this point in the history
Add the capability to read the table of contents, then pair the chunks
with necessary logic using read_chunk_fn pointers. Callers will be added
in future changes, but the typical outline will be:

 1. initialize a 'struct chunkfile' with init_chunkfile(NULL).
 2. call read_table_of_contents().
 3. for each chunk to parse,
    a. call pair_chunk() to assign a pointer with the chunk position, or
    b. call read_chunk() to run a callback on the chunk start and size.
 4. call free_chunkfile() to clear the 'struct chunkfile' data.

We are re-using the anonymous 'struct chunkfile' data, as it is internal
to the chunk-format API. This gives it essentially two modes: write and
read. If the same struct instance was used for both reads and writes,
then there would be failures.

Helped-by: Junio C Hamano <[email protected]>
Signed-off-by: Derrick Stolee <[email protected]>
Signed-off-by: Junio C Hamano <[email protected]>
  • Loading branch information
derrickstolee authored and gitster committed Feb 18, 2021
1 parent 63a8f0e commit 5f0879f
Show file tree
Hide file tree
Showing 2 changed files with 127 additions and 0 deletions.
80 changes: 80 additions & 0 deletions chunk-format.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ struct chunk_info {
uint32_t id;
uint64_t size;
chunk_write_fn write_fn;

const void *start;
};

struct chunkfile {
Expand Down Expand Up @@ -88,3 +90,81 @@ int write_chunkfile(struct chunkfile *cf, void *data)

return 0;
}

int read_table_of_contents(struct chunkfile *cf,
const unsigned char *mfile,
size_t mfile_size,
uint64_t toc_offset,
int toc_length)
{
uint32_t chunk_id;
const unsigned char *table_of_contents = mfile + toc_offset;

ALLOC_GROW(cf->chunks, toc_length, cf->chunks_alloc);

while (toc_length--) {
uint64_t chunk_offset, next_chunk_offset;

chunk_id = get_be32(table_of_contents);
chunk_offset = get_be64(table_of_contents + 4);

if (!chunk_id) {
error(_("terminating chunk id appears earlier than expected"));
return 1;
}

table_of_contents += CHUNK_TOC_ENTRY_SIZE;
next_chunk_offset = get_be64(table_of_contents + 4);

if (next_chunk_offset < chunk_offset ||
next_chunk_offset > mfile_size - the_hash_algo->rawsz) {
error(_("improper chunk offset(s) %"PRIx64" and %"PRIx64""),
chunk_offset, next_chunk_offset);
return -1;
}

cf->chunks[cf->chunks_nr].id = chunk_id;
cf->chunks[cf->chunks_nr].start = mfile + chunk_offset;
cf->chunks[cf->chunks_nr].size = next_chunk_offset - chunk_offset;
cf->chunks_nr++;
}

chunk_id = get_be32(table_of_contents);
if (chunk_id) {
error(_("final chunk has non-zero id %"PRIx32""), chunk_id);
return -1;
}

return 0;
}

static int pair_chunk_fn(const unsigned char *chunk_start,
size_t chunk_size,
void *data)
{
const unsigned char **p = data;
*p = chunk_start;
return 0;
}

int pair_chunk(struct chunkfile *cf,
uint32_t chunk_id,
const unsigned char **p)
{
return read_chunk(cf, chunk_id, pair_chunk_fn, p);
}

int read_chunk(struct chunkfile *cf,
uint32_t chunk_id,
chunk_read_fn fn,
void *data)
{
int i;

for (i = 0; i < cf->chunks_nr; i++) {
if (cf->chunks[i].id == chunk_id)
return fn(cf->chunks[i].start, cf->chunks[i].size, data);
}

return CHUNK_NOT_FOUND;
}
47 changes: 47 additions & 0 deletions chunk-format.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,20 @@ struct chunkfile;

#define CHUNK_TOC_ENTRY_SIZE (sizeof(uint32_t) + sizeof(uint64_t))

/*
* Initialize a 'struct chunkfile' for writing _or_ reading a file
* with the chunk format.
*
* If writing a file, supply a non-NULL 'struct hashfile *' that will
* be used to write.
*
* If reading a file, use a NULL 'struct hashfile *' and then call
* read_table_of_contents(). Supply the memory-mapped data to the
* pair_chunk() or read_chunk() methods, as appropriate.
*
* DO NOT MIX THESE MODES. Use different 'struct chunkfile' instances
* for reading and writing.
*/
struct chunkfile *init_chunkfile(struct hashfile *f);
void free_chunkfile(struct chunkfile *cf);
int get_num_chunks(struct chunkfile *cf);
Expand All @@ -18,4 +32,37 @@ void add_chunk(struct chunkfile *cf,
chunk_write_fn fn);
int write_chunkfile(struct chunkfile *cf, void *data);

int read_table_of_contents(struct chunkfile *cf,
const unsigned char *mfile,
size_t mfile_size,
uint64_t toc_offset,
int toc_length);

#define CHUNK_NOT_FOUND (-2)

/*
* Find 'chunk_id' in the given chunkfile and assign the
* given pointer to the position in the mmap'd file where
* that chunk begins.
*
* Returns CHUNK_NOT_FOUND if the chunk does not exist.
*/
int pair_chunk(struct chunkfile *cf,
uint32_t chunk_id,
const unsigned char **p);

typedef int (*chunk_read_fn)(const unsigned char *chunk_start,
size_t chunk_size, void *data);
/*
* Find 'chunk_id' in the given chunkfile and call the
* given chunk_read_fn method with the information for
* that chunk.
*
* Returns CHUNK_NOT_FOUND if the chunk does not exist.
*/
int read_chunk(struct chunkfile *cf,
uint32_t chunk_id,
chunk_read_fn fn,
void *data);

#endif

0 comments on commit 5f0879f

Please sign in to comment.