pack-write.c

   1 #include "cache.h"
   2 #include "pack.h"
   3 #include "csum-file.h"
   4 #include "remote.h"
   5
   6 void reset_pack_idx_option(struct pack_idx_option *opts)
   7 {
   8         memset(opts, 0, sizeof(*opts));
   9         opts->version = 2;
  10         opts->off32_limit = 0x7fffffff;
  11 }
  12
  13 static int sha1_compare(const void *_a, const void *_b)
  14 {
  15         struct pack_idx_entry *a = *(struct pack_idx_entry **)_a;
  16         struct pack_idx_entry *b = *(struct pack_idx_entry **)_b;
  17         return oidcmp(&a->oid, &b->oid);
  18 }
  19
  20 static int cmp_uint32(const void *a_, const void *b_)
  21 {
  22         uint32_t a = *((uint32_t *)a_);
  23         uint32_t b = *((uint32_t *)b_);
  24
  25         return (a < b) ? -1 : (a != b);
  26 }
  27
  28 static int need_large_offset(off_t offset, const struct pack_idx_option *opts)
  29 {
  30         uint32_t ofsval;
  31
  32         if ((offset >> 31) || (opts->off32_limit < offset))
  33                 return 1;
  34         if (!opts->anomaly_nr)
  35                 return 0;
  36         ofsval = offset;
  37         return !!bsearch(&ofsval, opts->anomaly, opts->anomaly_nr,
  38                          sizeof(ofsval), cmp_uint32);
  39 }
  40
  41 /*
  42  * The *sha1 contains the pack content SHA1 hash.
  43  * The objects array passed in will be sorted by SHA1 on exit.
  44  */
  45 const char *write_idx_file(const char *index_name, struct pack_idx_entry **objects,
  46                            int nr_objects, const struct pack_idx_option *opts,
  47                            const unsigned char *sha1)
  48 {
  49         struct hashfile *f;
  50         struct pack_idx_entry **sorted_by_sha, **list, **last;
  51         off_t last_obj_offset = 0;
  52         int i, fd;
  53         uint32_t index_version;
  54
  55         if (nr_objects) {
  56                 sorted_by_sha = objects;
  57                 list = sorted_by_sha;
  58                 last = sorted_by_sha + nr_objects;
  59                 for (i = 0; i < nr_objects; ++i) {
  60                         if (objects[i]->offset > last_obj_offset)
  61                                 last_obj_offset = objects[i]->offset;
  62                 }
  63                 QSORT(sorted_by_sha, nr_objects, sha1_compare);
  64         }
  65         else
  66                 sorted_by_sha = list = last = NULL;
  67
  68         if (opts->flags & WRITE_IDX_VERIFY) {
  69                 assert(index_name);
  70                 f = hashfd_check(index_name);
  71         } else {
  72                 if (!index_name) {
  73                         struct strbuf tmp_file = STRBUF_INIT;
  74                         fd = odb_mkstemp(&tmp_file, "pack/tmp_idx_XXXXXX");
  75                         index_name = strbuf_detach(&tmp_file, NULL);
  76                 } else {
  77                         unlink(index_name);
  78                         fd = open(index_name, O_CREAT|O_EXCL|O_WRONLY, 0600);
  79                         if (fd < 0)
  80                                 die_errno("unable to create '%s'", index_name);
  81                 }
  82                 f = hashfd(fd, index_name);
  83         }
  84
  85         /* if last object's offset is >= 2^31 we should use index V2 */
  86         index_version = need_large_offset(last_obj_offset, opts) ? 2 : opts->version;
  87
  88         /* index versions 2 and above need a header */
  89         if (index_version >= 2) {
  90                 struct pack_idx_header hdr;
  91                 hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
  92                 hdr.idx_version = htonl(index_version);
  93                 hashwrite(f, &hdr, sizeof(hdr));
  94         }
  95
  96         /*
  97          * Write the first-level table (the list is sorted,
  98          * but we use a 256-entry lookup to be able to avoid
  99          * having to do eight extra binary search iterations).
 100          */
 101         for (i = 0; i < 256; i++) {
 102                 struct pack_idx_entry **next = list;
 103                 while (next < last) {
 104                         struct pack_idx_entry *obj = *next;
 105                         if (obj->oid.hash[0] != i)
 106                                 break;
 107                         next++;
 108                 }
 109                 hashwrite_be32(f, next - sorted_by_sha);
 110                 list = next;
 111         }
 112
 113         /*
 114          * Write the actual SHA1 entries..
 115          */
 116         list = sorted_by_sha;
 117         for (i = 0; i < nr_objects; i++) {
 118                 struct pack_idx_entry *obj = *list++;
 119                 if (index_version < 2)
 120                         hashwrite_be32(f, obj->offset);
 121                 hashwrite(f, obj->oid.hash, the_hash_algo->rawsz);
 122                 if ((opts->flags & WRITE_IDX_STRICT) &&
 123                     (i && oideq(&list[-2]->oid, &obj->oid)))
 124                         die("The same object %s appears twice in the pack",
 125                             oid_to_hex(&obj->oid));
 126         }
 127
 128         if (index_version >= 2) {
 129                 unsigned int nr_large_offset = 0;
 130
 131                 /* write the crc32 table */
 132                 list = sorted_by_sha;
 133                 for (i = 0; i < nr_objects; i++) {
 134                         struct pack_idx_entry *obj = *list++;
 135                         hashwrite_be32(f, obj->crc32);
 136                 }
 137
 138                 /* write the 32-bit offset table */
 139                 list = sorted_by_sha;
 140                 for (i = 0; i < nr_objects; i++) {
 141                         struct pack_idx_entry *obj = *list++;
 142                         uint32_t offset;
 143
 144                         offset = (need_large_offset(obj->offset, opts)
 145                                   ? (0x80000000 | nr_large_offset++)
 146                                   : obj->offset);
 147                         hashwrite_be32(f, offset);
 148                 }
 149
 150                 /* write the large offset table */
 151                 list = sorted_by_sha;
 152                 while (nr_large_offset) {
 153                         struct pack_idx_entry *obj = *list++;
 154                         uint64_t offset = obj->offset;
 155
 156                         if (!need_large_offset(offset, opts))
 157                                 continue;
 158                         hashwrite_be64(f, offset);
 159                         nr_large_offset--;
 160                 }
 161         }
 162
 163         hashwrite(f, sha1, the_hash_algo->rawsz);
 164         finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_CLOSE |
 165                                     ((opts->flags & WRITE_IDX_VERIFY)
 166                                     ? 0 : CSUM_FSYNC));
 167         return index_name;
 168 }
 169
 170 off_t write_pack_header(struct hashfile *f, uint32_t nr_entries)
 171 {
 172         struct pack_header hdr;
 173
 174         hdr.hdr_signature = htonl(PACK_SIGNATURE);
 175         hdr.hdr_version = htonl(PACK_VERSION);
 176         hdr.hdr_entries = htonl(nr_entries);
 177         hashwrite(f, &hdr, sizeof(hdr));
 178         return sizeof(hdr);
 179 }
 180
 181 /*
 182  * Update pack header with object_count and compute new SHA1 for pack data
 183  * associated to pack_fd, and write that SHA1 at the end.  That new SHA1
 184  * is also returned in new_pack_sha1.
 185  *
 186  * If partial_pack_sha1 is non null, then the SHA1 of the existing pack
 187  * (without the header update) is computed and validated against the
 188  * one provided in partial_pack_sha1.  The validation is performed at
 189  * partial_pack_offset bytes in the pack file.  The SHA1 of the remaining
 190  * data (i.e. from partial_pack_offset to the end) is then computed and
 191  * returned in partial_pack_sha1.
 192  *
 193  * Note that new_pack_sha1 is updated last, so both new_pack_sha1 and
 194  * partial_pack_sha1 can refer to the same buffer if the caller is not
 195  * interested in the resulting SHA1 of pack data above partial_pack_offset.
 196  */
 197 void fixup_pack_header_footer(int pack_fd,
 198                          unsigned char *new_pack_hash,
 199                          const char *pack_name,
 200                          uint32_t object_count,
 201                          unsigned char *partial_pack_hash,
 202                          off_t partial_pack_offset)
 203 {
 204         int aligned_sz, buf_sz = 8 * 1024;
 205         git_hash_ctx old_hash_ctx, new_hash_ctx;
 206         struct pack_header hdr;
 207         char *buf;
 208         ssize_t read_result;
 209
 210         the_hash_algo->init_fn(&old_hash_ctx);
 211         the_hash_algo->init_fn(&new_hash_ctx);
 212
 213         if (lseek(pack_fd, 0, SEEK_SET) != 0)
 214                 die_errno("Failed seeking to start of '%s'", pack_name);
 215         read_result = read_in_full(pack_fd, &hdr, sizeof(hdr));
 216         if (read_result < 0)
 217                 die_errno("Unable to reread header of '%s'", pack_name);
 218         else if (read_result != sizeof(hdr))
 219                 die_errno("Unexpected short read for header of '%s'",
 220                           pack_name);
 221         if (lseek(pack_fd, 0, SEEK_SET) != 0)
 222                 die_errno("Failed seeking to start of '%s'", pack_name);
 223         the_hash_algo->update_fn(&old_hash_ctx, &hdr, sizeof(hdr));
 224         hdr.hdr_entries = htonl(object_count);
 225         the_hash_algo->update_fn(&new_hash_ctx, &hdr, sizeof(hdr));
 226         write_or_die(pack_fd, &hdr, sizeof(hdr));
 227         partial_pack_offset -= sizeof(hdr);
 228
 229         buf = xmalloc(buf_sz);
 230         aligned_sz = buf_sz - sizeof(hdr);
 231         for (;;) {
 232                 ssize_t m, n;
 233                 m = (partial_pack_hash && partial_pack_offset < aligned_sz) ?
 234                         partial_pack_offset : aligned_sz;
 235                 n = xread(pack_fd, buf, m);
 236                 if (!n)
 237                         break;
 238                 if (n < 0)
 239                         die_errno("Failed to checksum '%s'", pack_name);
 240                 the_hash_algo->update_fn(&new_hash_ctx, buf, n);
 241
 242                 aligned_sz -= n;
 243                 if (!aligned_sz)
 244                         aligned_sz = buf_sz;
 245
 246                 if (!partial_pack_hash)
 247                         continue;
 248
 249                 the_hash_algo->update_fn(&old_hash_ctx, buf, n);
 250                 partial_pack_offset -= n;
 251                 if (partial_pack_offset == 0) {
 252                         unsigned char hash[GIT_MAX_RAWSZ];
 253                         the_hash_algo->final_fn(hash, &old_hash_ctx);
 254                         if (!hasheq(hash, partial_pack_hash))
 255                                 die("Unexpected checksum for %s "
 256                                     "(disk corruption?)", pack_name);
 257                         /*
 258                          * Now let's compute the SHA1 of the remainder of the
 259                          * pack, which also means making partial_pack_offset
 260                          * big enough not to matter anymore.
 261                          */
 262                         the_hash_algo->init_fn(&old_hash_ctx);
 263                         partial_pack_offset = ~partial_pack_offset;
 264                         partial_pack_offset -= MSB(partial_pack_offset, 1);
 265                 }
 266         }
 267         free(buf);
 268
 269         if (partial_pack_hash)
 270                 the_hash_algo->final_fn(partial_pack_hash, &old_hash_ctx);
 271         the_hash_algo->final_fn(new_pack_hash, &new_hash_ctx);
 272         write_or_die(pack_fd, new_pack_hash, the_hash_algo->rawsz);
 273         fsync_or_die(pack_fd, pack_name);
 274 }
 275
 276 char *index_pack_lockfile(int ip_out)
 277 {
 278         char packname[GIT_MAX_HEXSZ + 6];
 279         const int len = the_hash_algo->hexsz + 6;
 280
 281         /*
 282          * The first thing we expect from index-pack's output
 283          * is "pack\t%40s\n" or "keep\t%40s\n" (46 bytes) where
 284          * %40s is the newly created pack SHA1 name.  In the "keep"
 285          * case, we need it to remove the corresponding .keep file
 286          * later on.  If we don't get that then tough luck with it.
 287          */
 288         if (read_in_full(ip_out, packname, len) == len && packname[len-1] == '\n') {
 289                 const char *name;
 290                 packname[len-1] = 0;
 291                 if (skip_prefix(packname, "keep\t", &name))
 292                         return xstrfmt("%s/pack/pack-%s.keep",
 293                                        get_object_directory(), name);
 294         }
 295         return NULL;
 296 }
 297
 298 /*
 299  * The per-object header is a pretty dense thing, which is
 300  *  - first byte: low four bits are "size", then three bits of "type",
 301  *    and the high bit is "size continues".
 302  *  - each byte afterwards: low seven bits are size continuation,
 303  *    with the high bit being "size continues"
 304  */
 305 int encode_in_pack_object_header(unsigned char *hdr, int hdr_len,
 306                                  enum object_type type, uintmax_t size)
 307 {
 308         int n = 1;
 309         unsigned char c;
 310
 311         if (type < OBJ_COMMIT || type > OBJ_REF_DELTA)
 312                 die("bad type %d", type);
 313
 314         c = (type << 4) | (size & 15);
 315         size >>= 4;
 316         while (size) {
 317                 if (n == hdr_len)
 318                         die("object size is too enormous to format");
 319                 *hdr++ = c | 0x80;
 320                 c = size & 0x7f;
 321                 size >>= 7;
 322                 n++;
 323         }
 324         *hdr = c;
 325         return n;
 326 }
 327
 328 struct hashfile *create_tmp_packfile(char **pack_tmp_name)
 329 {
 330         struct strbuf tmpname = STRBUF_INIT;
 331         int fd;
 332
 333         fd = odb_mkstemp(&tmpname, "pack/tmp_pack_XXXXXX");
 334         *pack_tmp_name = strbuf_detach(&tmpname, NULL);
 335         return hashfd(fd, *pack_tmp_name);
 336 }
 337
 338 void finish_tmp_packfile(struct strbuf *name_buffer,
 339                          const char *pack_tmp_name,
 340                          struct pack_idx_entry **written_list,
 341                          uint32_t nr_written,
 342                          struct pack_idx_option *pack_idx_opts,
 343                          unsigned char hash[])
 344 {
 345         const char *idx_tmp_name;
 346         int basename_len = name_buffer->len;
 347
 348         if (adjust_shared_perm(pack_tmp_name))
 349                 die_errno("unable to make temporary pack file readable");
 350
 351         idx_tmp_name = write_idx_file(NULL, written_list, nr_written,
 352                                       pack_idx_opts, hash);
 353         if (adjust_shared_perm(idx_tmp_name))
 354                 die_errno("unable to make temporary index file readable");
 355
 356         strbuf_addf(name_buffer, "%s.pack", hash_to_hex(hash));
 357
 358         if (rename(pack_tmp_name, name_buffer->buf))
 359                 die_errno("unable to rename temporary pack file");
 360
 361         strbuf_setlen(name_buffer, basename_len);
 362
 363         strbuf_addf(name_buffer, "%s.idx", hash_to_hex(hash));
 364         if (rename(idx_tmp_name, name_buffer->buf))
 365                 die_errno("unable to rename temporary index file");
 366
 367         strbuf_setlen(name_buffer, basename_len);
 368
 369         free((void *)idx_tmp_name);
 370 }
 371
 372 void write_promisor_file(const char *promisor_name, struct ref **sought, int nr_sought)
 373 {
 374         int i, err;
 375         FILE *output = xfopen(promisor_name, "w");
 376
 377         for (i = 0; i < nr_sought; i++)
 378                 fprintf(output, "%s %s\n", oid_to_hex(&sought[i]->old_oid),
 379                         sought[i]->name);
 380
 381         err = ferror(output);
 382         err |= fclose(output);
 383         if (err)
 384                 die(_("could not write '%s' promisor file"), promisor_name);
 385 }