[thirdparty/git.git] / chunk-format.c

#include "git-compat-util.h"
#include "alloc.h"
#include "chunk-format.h"
#include "csum-file.h"
#include "gettext.h"
#include "hash.h"
#include "trace2.h"

/*
 * When writing a chunk-based file format, collect the chunks in
 * an array of chunk_info structs. The size stores the _expected_
 * amount of data that will be written by write_fn.
 */
struct chunk_info {
	uint32_t id;
	uint64_t size;
	chunk_write_fn write_fn;

	const void *start;
};

struct chunkfile {
	struct hashfile *f;

	struct chunk_info *chunks;
	size_t chunks_nr;
	size_t chunks_alloc;
};

struct chunkfile *init_chunkfile(struct hashfile *f)
{
	struct chunkfile *cf = xcalloc(1, sizeof(*cf));
	cf->f = f;
	return cf;
}

void free_chunkfile(struct chunkfile *cf)
{
	if (!cf)
		return;
	free(cf->chunks);
	free(cf);
}

int get_num_chunks(struct chunkfile *cf)
{
	return cf->chunks_nr;
}

void add_chunk(struct chunkfile *cf,
	       uint32_t id,
	       size_t size,
	       chunk_write_fn fn)
{
	ALLOC_GROW(cf->chunks, cf->chunks_nr + 1, cf->chunks_alloc);

	cf->chunks[cf->chunks_nr].id = id;
	cf->chunks[cf->chunks_nr].write_fn = fn;
	cf->chunks[cf->chunks_nr].size = size;
	cf->chunks_nr++;
}

int write_chunkfile(struct chunkfile *cf, void *data)
{
	int i, result = 0;
	uint64_t cur_offset = hashfile_total(cf->f);

	trace2_region_enter("chunkfile", "write", the_repository);

	/* Add the table of contents to the current offset */
	cur_offset += (cf->chunks_nr + 1) * CHUNK_TOC_ENTRY_SIZE;

	for (i = 0; i < cf->chunks_nr; i++) {
		hashwrite_be32(cf->f, cf->chunks[i].id);
		hashwrite_be64(cf->f, cur_offset);

		cur_offset += cf->chunks[i].size;
	}

	/* Trailing entry marks the end of the chunks */
	hashwrite_be32(cf->f, 0);
	hashwrite_be64(cf->f, cur_offset);

	for (i = 0; i < cf->chunks_nr; i++) {
		off_t start_offset = hashfile_total(cf->f);
		result = cf->chunks[i].write_fn(cf->f, data);

		if (result)
			goto cleanup;

		if (hashfile_total(cf->f) - start_offset != cf->chunks[i].size)
			BUG("expected to write %"PRId64" bytes to chunk %"PRIx32", but wrote %"PRId64" instead",
			    cf->chunks[i].size, cf->chunks[i].id,
			    hashfile_total(cf->f) - start_offset);
	}

cleanup:
	trace2_region_leave("chunkfile", "write", the_repository);
	return result;
}

int read_table_of_contents(struct chunkfile *cf,
			   const unsigned char *mfile,
			   size_t mfile_size,
			   uint64_t toc_offset,
			   int toc_length)
{
	int i;
	uint32_t chunk_id;
	const unsigned char *table_of_contents = mfile + toc_offset;

	ALLOC_GROW(cf->chunks, toc_length, cf->chunks_alloc);

	while (toc_length--) {
		uint64_t chunk_offset, next_chunk_offset;

		chunk_id = get_be32(table_of_contents);
		chunk_offset = get_be64(table_of_contents + 4);

		if (!chunk_id) {
			error(_("terminating chunk id appears earlier than expected"));
			return 1;
		}

		table_of_contents += CHUNK_TOC_ENTRY_SIZE;
		next_chunk_offset = get_be64(table_of_contents + 4);

		if (next_chunk_offset < chunk_offset ||
		    next_chunk_offset > mfile_size - the_hash_algo->rawsz) {
			error(_("improper chunk offset(s) %"PRIx64" and %"PRIx64""),
			      chunk_offset, next_chunk_offset);
			return -1;
		}

		for (i = 0; i < cf->chunks_nr; i++) {
			if (cf->chunks[i].id == chunk_id) {
				error(_("duplicate chunk ID %"PRIx32" found"),
					chunk_id);
				return -1;
			}
		}

		cf->chunks[cf->chunks_nr].id = chunk_id;
		cf->chunks[cf->chunks_nr].start = mfile + chunk_offset;
		cf->chunks[cf->chunks_nr].size = next_chunk_offset - chunk_offset;
		cf->chunks_nr++;
	}

	chunk_id = get_be32(table_of_contents);
	if (chunk_id) {
		error(_("final chunk has non-zero id %"PRIx32""), chunk_id);
		return -1;
	}

	return 0;
}

static int pair_chunk_fn(const unsigned char *chunk_start,
			 size_t chunk_size,
			 void *data)
{
	const unsigned char **p = data;
	*p = chunk_start;
	return 0;
}

int pair_chunk(struct chunkfile *cf,
	       uint32_t chunk_id,
	       const unsigned char **p)
{
	return read_chunk(cf, chunk_id, pair_chunk_fn, p);
}

int read_chunk(struct chunkfile *cf,
	       uint32_t chunk_id,
	       chunk_read_fn fn,
	       void *data)
{
	int i;

	for (i = 0; i < cf->chunks_nr; i++) {
		if (cf->chunks[i].id == chunk_id)
			return fn(cf->chunks[i].start, cf->chunks[i].size, data);
	}

	return CHUNK_NOT_FOUND;
}

uint8_t oid_version(const struct git_hash_algo *algop)
{
	switch (hash_algo_by_ptr(algop)) {
	case GIT_HASH_SHA1:
		return 1;
	case GIT_HASH_SHA256:
		return 2;
	default:
		die(_("invalid hash version"));
	}
}
Commit	Line	Data
36bf1958 EN	1	#include "git-compat-util.h"
36bf1958 EN	2	#include "alloc.h"
570df426 DS	3	#include "chunk-format.h"
570df426 DS	4	#include "csum-file.h"
f394e093	5	#include "gettext.h"
d1cbe1e6	6	#include "hash.h"
ec2f0269	7	#include "trace2.h"
570df426 DS	8
	9	/*
	10	* When writing a chunk-based file format, collect the chunks in
	11	* an array of chunk_info structs. The size stores the _expected_
	12	* amount of data that will be written by write_fn.
	13	*/
	14	struct chunk_info {
	15	uint32_t id;
	16	uint64_t size;
	17	chunk_write_fn write_fn;
5f0879f5 DS	18
5f0879f5 DS	19	const void *start;
570df426 DS	20	};
	21
	22	struct chunkfile {
	23	struct hashfile *f;
	24
	25	struct chunk_info *chunks;
	26	size_t chunks_nr;
	27	size_t chunks_alloc;
	28	};
	29
	30	struct chunkfile init_chunkfile(struct hashfile f)
	31	{
	32	struct chunkfile cf = xcalloc(1, sizeof(cf));
	33	cf->f = f;
	34	return cf;
	35	}
	36
	37	void free_chunkfile(struct chunkfile *cf)
	38	{
	39	if (!cf)
	40	return;
	41	free(cf->chunks);
	42	free(cf);
	43	}
	44
	45	int get_num_chunks(struct chunkfile *cf)
	46	{
	47	return cf->chunks_nr;
	48	}
	49
	50	void add_chunk(struct chunkfile *cf,
	51	uint32_t id,
	52	size_t size,
	53	chunk_write_fn fn)
	54	{
	55	ALLOC_GROW(cf->chunks, cf->chunks_nr + 1, cf->chunks_alloc);
	56
	57	cf->chunks[cf->chunks_nr].id = id;
	58	cf->chunks[cf->chunks_nr].write_fn = fn;
	59	cf->chunks[cf->chunks_nr].size = size;
	60	cf->chunks_nr++;
	61	}
	62
	63	int write_chunkfile(struct chunkfile cf, void data)
	64	{
2ca245f8	65	int i, result = 0;
570df426 DS	66	uint64_t cur_offset = hashfile_total(cf->f);
570df426 DS	67
2ca245f8 DS	68	trace2_region_enter("chunkfile", "write", the_repository);
2ca245f8 DS	69
570df426 DS	70	/* Add the table of contents to the current offset */
	71	cur_offset += (cf->chunks_nr + 1) * CHUNK_TOC_ENTRY_SIZE;
	72
	73	for (i = 0; i < cf->chunks_nr; i++) {
	74	hashwrite_be32(cf->f, cf->chunks[i].id);
	75	hashwrite_be64(cf->f, cur_offset);
	76
	77	cur_offset += cf->chunks[i].size;
	78	}
	79
	80	/* Trailing entry marks the end of the chunks */
	81	hashwrite_be32(cf->f, 0);
	82	hashwrite_be64(cf->f, cur_offset);
	83
	84	for (i = 0; i < cf->chunks_nr; i++) {
	85	off_t start_offset = hashfile_total(cf->f);
2ca245f8	86	result = cf->chunks[i].write_fn(cf->f, data);
570df426 DS	87
570df426 DS	88	if (result)
2ca245f8	89	goto cleanup;
570df426 DS	90
	91	if (hashfile_total(cf->f) - start_offset != cf->chunks[i].size)
	92	BUG("expected to write %"PRId64" bytes to chunk %"PRIx32", but wrote %"PRId64" instead",
	93	cf->chunks[i].size, cf->chunks[i].id,
	94	hashfile_total(cf->f) - start_offset);
	95	}
	96
2ca245f8 DS	97	cleanup:
	98	trace2_region_leave("chunkfile", "write", the_repository);
	99	return result;
570df426	100	}
5f0879f5 DS	101
	102	int read_table_of_contents(struct chunkfile *cf,
	103	const unsigned char *mfile,
	104	size_t mfile_size,
	105	uint64_t toc_offset,
	106	int toc_length)
	107	{
5387fefa	108	int i;
5f0879f5 DS	109	uint32_t chunk_id;
	110	const unsigned char *table_of_contents = mfile + toc_offset;
	111
	112	ALLOC_GROW(cf->chunks, toc_length, cf->chunks_alloc);
	113
	114	while (toc_length--) {
	115	uint64_t chunk_offset, next_chunk_offset;
	116
	117	chunk_id = get_be32(table_of_contents);
	118	chunk_offset = get_be64(table_of_contents + 4);
	119
	120	if (!chunk_id) {
	121	error(_("terminating chunk id appears earlier than expected"));
	122	return 1;
	123	}
	124
	125	table_of_contents += CHUNK_TOC_ENTRY_SIZE;
	126	next_chunk_offset = get_be64(table_of_contents + 4);
	127
	128	if (next_chunk_offset < chunk_offset \|\|
	129	next_chunk_offset > mfile_size - the_hash_algo->rawsz) {
	130	error(_("improper chunk offset(s) %"PRIx64" and %"PRIx64""),
	131	chunk_offset, next_chunk_offset);
	132	return -1;
	133	}
	134
5387fefa DS	135	for (i = 0; i < cf->chunks_nr; i++) {
	136	if (cf->chunks[i].id == chunk_id) {
	137	error(_("duplicate chunk ID %"PRIx32" found"),
	138	chunk_id);
	139	return -1;
	140	}
	141	}
	142
5f0879f5 DS	143	cf->chunks[cf->chunks_nr].id = chunk_id;
	144	cf->chunks[cf->chunks_nr].start = mfile + chunk_offset;
	145	cf->chunks[cf->chunks_nr].size = next_chunk_offset - chunk_offset;
	146	cf->chunks_nr++;
	147	}
	148
	149	chunk_id = get_be32(table_of_contents);
	150	if (chunk_id) {
	151	error(_("final chunk has non-zero id %"PRIx32""), chunk_id);
	152	return -1;
	153	}
	154
	155	return 0;
	156	}
	157
	158	static int pair_chunk_fn(const unsigned char *chunk_start,
	159	size_t chunk_size,
	160	void *data)
	161	{
	162	const unsigned char **p = data;
	163	*p = chunk_start;
	164	return 0;
	165	}
	166
	167	int pair_chunk(struct chunkfile *cf,
	168	uint32_t chunk_id,
	169	const unsigned char **p)
	170	{
	171	return read_chunk(cf, chunk_id, pair_chunk_fn, p);
	172	}
	173
	174	int read_chunk(struct chunkfile *cf,
	175	uint32_t chunk_id,
	176	chunk_read_fn fn,
	177	void *data)
	178	{
	179	int i;
	180
	181	for (i = 0; i < cf->chunks_nr; i++) {
	182	if (cf->chunks[i].id == chunk_id)
	183	return fn(cf->chunks[i].start, cf->chunks[i].size, data);
	184	}
	185
	186	return CHUNK_NOT_FOUND;
	187	}
d9fef9d9 TB	188
	189	uint8_t oid_version(const struct git_hash_algo *algop)
	190	{
	191	switch (hash_algo_by_ptr(algop)) {
	192	case GIT_HASH_SHA1:
	193	return 1;
	194	case GIT_HASH_SHA256:
	195	return 2;
	196	default:
	197	die(_("invalid hash version"));
	198	}
	199	}