chunk-format.c

   1 #include "git-compat-util.h"
   2 #include "alloc.h"
   3 #include "chunk-format.h"
   4 #include "csum-file.h"
   5 #include "gettext.h"
   6 #include "hash.h"
   7 #include "trace2.h"
   8
   9 /*
  10  * When writing a chunk-based file format, collect the chunks in
  11  * an array of chunk_info structs. The size stores the _expected_
  12  * amount of data that will be written by write_fn.
  13  */
  14 struct chunk_info {
  15         uint32_t id;
  16         uint64_t size;
  17         chunk_write_fn write_fn;
  18
  19         const void *start;
  20 };
  21
  22 struct chunkfile {
  23         struct hashfile *f;
  24
  25         struct chunk_info *chunks;
  26         size_t chunks_nr;
  27         size_t chunks_alloc;
  28 };
  29
  30 struct chunkfile *init_chunkfile(struct hashfile *f)
  31 {
  32         struct chunkfile *cf = xcalloc(1, sizeof(*cf));
  33         cf->f = f;
  34         return cf;
  35 }
  36
  37 void free_chunkfile(struct chunkfile *cf)
  38 {
  39         if (!cf)
  40                 return;
  41         free(cf->chunks);
  42         free(cf);
  43 }
  44
  45 int get_num_chunks(struct chunkfile *cf)
  46 {
  47         return cf->chunks_nr;
  48 }
  49
  50 void add_chunk(struct chunkfile *cf,
  51                uint32_t id,
  52                size_t size,
  53                chunk_write_fn fn)
  54 {
  55         ALLOC_GROW(cf->chunks, cf->chunks_nr + 1, cf->chunks_alloc);
  56
  57         cf->chunks[cf->chunks_nr].id = id;
  58         cf->chunks[cf->chunks_nr].write_fn = fn;
  59         cf->chunks[cf->chunks_nr].size = size;
  60         cf->chunks_nr++;
  61 }
  62
  63 int write_chunkfile(struct chunkfile *cf, void *data)
  64 {
  65         int i, result = 0;
  66         uint64_t cur_offset = hashfile_total(cf->f);
  67
  68         trace2_region_enter("chunkfile", "write", the_repository);
  69
  70         /* Add the table of contents to the current offset */
  71         cur_offset += (cf->chunks_nr + 1) * CHUNK_TOC_ENTRY_SIZE;
  72
  73         for (i = 0; i < cf->chunks_nr; i++) {
  74                 hashwrite_be32(cf->f, cf->chunks[i].id);
  75                 hashwrite_be64(cf->f, cur_offset);
  76
  77                 cur_offset += cf->chunks[i].size;
  78         }
  79
  80         /* Trailing entry marks the end of the chunks */
  81         hashwrite_be32(cf->f, 0);
  82         hashwrite_be64(cf->f, cur_offset);
  83
  84         for (i = 0; i < cf->chunks_nr; i++) {
  85                 off_t start_offset = hashfile_total(cf->f);
  86                 result = cf->chunks[i].write_fn(cf->f, data);
  87
  88                 if (result)
  89                         goto cleanup;
  90
  91                 if (hashfile_total(cf->f) - start_offset != cf->chunks[i].size)
  92                         BUG("expected to write %"PRId64" bytes to chunk %"PRIx32", but wrote %"PRId64" instead",
  93                             cf->chunks[i].size, cf->chunks[i].id,
  94                             hashfile_total(cf->f) - start_offset);
  95         }
  96
  97 cleanup:
  98         trace2_region_leave("chunkfile", "write", the_repository);
  99         return result;
 100 }
 101
 102 int read_table_of_contents(struct chunkfile *cf,
 103                            const unsigned char *mfile,
 104                            size_t mfile_size,
 105                            uint64_t toc_offset,
 106                            int toc_length)
 107 {
 108         int i;
 109         uint32_t chunk_id;
 110         const unsigned char *table_of_contents = mfile + toc_offset;
 111
 112         ALLOC_GROW(cf->chunks, toc_length, cf->chunks_alloc);
 113
 114         while (toc_length--) {
 115                 uint64_t chunk_offset, next_chunk_offset;
 116
 117                 chunk_id = get_be32(table_of_contents);
 118                 chunk_offset = get_be64(table_of_contents + 4);
 119
 120                 if (!chunk_id) {
 121                         error(_("terminating chunk id appears earlier than expected"));
 122                         return 1;
 123                 }
 124
 125                 table_of_contents += CHUNK_TOC_ENTRY_SIZE;
 126                 next_chunk_offset = get_be64(table_of_contents + 4);
 127
 128                 if (next_chunk_offset < chunk_offset ||
 129                     next_chunk_offset > mfile_size - the_hash_algo->rawsz) {
 130                         error(_("improper chunk offset(s) %"PRIx64" and %"PRIx64""),
 131                               chunk_offset, next_chunk_offset);
 132                         return -1;
 133                 }
 134
 135                 for (i = 0; i < cf->chunks_nr; i++) {
 136                         if (cf->chunks[i].id == chunk_id) {
 137                                 error(_("duplicate chunk ID %"PRIx32" found"),
 138                                         chunk_id);
 139                                 return -1;
 140                         }
 141                 }
 142
 143                 cf->chunks[cf->chunks_nr].id = chunk_id;
 144                 cf->chunks[cf->chunks_nr].start = mfile + chunk_offset;
 145                 cf->chunks[cf->chunks_nr].size = next_chunk_offset - chunk_offset;
 146                 cf->chunks_nr++;
 147         }
 148
 149         chunk_id = get_be32(table_of_contents);
 150         if (chunk_id) {
 151                 error(_("final chunk has non-zero id %"PRIx32""), chunk_id);
 152                 return -1;
 153         }
 154
 155         return 0;
 156 }
 157
 158 static int pair_chunk_fn(const unsigned char *chunk_start,
 159                          size_t chunk_size,
 160                          void *data)
 161 {
 162         const unsigned char **p = data;
 163         *p = chunk_start;
 164         return 0;
 165 }
 166
 167 int pair_chunk(struct chunkfile *cf,
 168                uint32_t chunk_id,
 169                const unsigned char **p)
 170 {
 171         return read_chunk(cf, chunk_id, pair_chunk_fn, p);
 172 }
 173
 174 int read_chunk(struct chunkfile *cf,
 175                uint32_t chunk_id,
 176                chunk_read_fn fn,
 177                void *data)
 178 {
 179         int i;
 180
 181         for (i = 0; i < cf->chunks_nr; i++) {
 182                 if (cf->chunks[i].id == chunk_id)
 183                         return fn(cf->chunks[i].start, cf->chunks[i].size, data);
 184         }
 185
 186         return CHUNK_NOT_FOUND;
 187 }
 188
 189 uint8_t oid_version(const struct git_hash_algo *algop)
 190 {
 191         switch (hash_algo_by_ptr(algop)) {
 192         case GIT_HASH_SHA1:
 193                 return 1;
 194         case GIT_HASH_SHA256:
 195                 return 2;
 196         default:
 197                 die(_("invalid hash version"));
 198         }
 199 }