builtin/pack-objects.c

   1 #include "builtin.h"
   2 #include "alloc.h"
   3 #include "environment.h"
   4 #include "gettext.h"
   5 #include "hex.h"
   6 #include "repository.h"
   7 #include "config.h"
   8 #include "attr.h"
   9 #include "object.h"
  10 #include "blob.h"
  11 #include "commit.h"
  12 #include "tag.h"
  13 #include "tree.h"
  14 #include "delta.h"
  15 #include "pack.h"
  16 #include "pack-revindex.h"
  17 #include "csum-file.h"
  18 #include "tree-walk.h"
  19 #include "diff.h"
  20 #include "revision.h"
  21 #include "list-objects.h"
  22 #include "list-objects-filter.h"
  23 #include "list-objects-filter-options.h"
  24 #include "pack-objects.h"
  25 #include "progress.h"
  26 #include "refs.h"
  27 #include "streaming.h"
  28 #include "thread-utils.h"
  29 #include "pack-bitmap.h"
  30 #include "delta-islands.h"
  31 #include "reachable.h"
  32 #include "oid-array.h"
  33 #include "strvec.h"
  34 #include "list.h"
  35 #include "packfile.h"
  36 #include "object-file.h"
  37 #include "object-store.h"
  38 #include "replace-object.h"
  39 #include "dir.h"
  40 #include "midx.h"
  41 #include "trace2.h"
  42 #include "shallow.h"
  43 #include "promisor-remote.h"
  44 #include "pack-mtimes.h"
  45 #include "parse-options.h"
  46 #include "wrapper.h"
  47
  48 /*
  49  * Objects we are going to pack are collected in the `to_pack` structure.
  50  * It contains an array (dynamically expanded) of the object data, and a map
  51  * that can resolve SHA1s to their position in the array.
  52  */
  53 static struct packing_data to_pack;
  54
  55 static inline struct object_entry *oe_delta(
  56                 const struct packing_data *pack,
  57                 const struct object_entry *e)
  58 {
  59         if (!e->delta_idx)
  60                 return NULL;
  61         if (e->ext_base)
  62                 return &pack->ext_bases[e->delta_idx - 1];
  63         else
  64                 return &pack->objects[e->delta_idx - 1];
  65 }
  66
  67 static inline unsigned long oe_delta_size(struct packing_data *pack,
  68                                           const struct object_entry *e)
  69 {
  70         if (e->delta_size_valid)
  71                 return e->delta_size_;
  72
  73         /*
  74          * pack->delta_size[] can't be NULL because oe_set_delta_size()
  75          * must have been called when a new delta is saved with
  76          * oe_set_delta().
  77          * If oe_delta() returns NULL (i.e. default state, which means
  78          * delta_size_valid is also false), then the caller must never
  79          * call oe_delta_size().
  80          */
  81         return pack->delta_size[e - pack->objects];
  82 }
  83
  84 unsigned long oe_get_size_slow(struct packing_data *pack,
  85                                const struct object_entry *e);
  86
  87 static inline unsigned long oe_size(struct packing_data *pack,
  88                                     const struct object_entry *e)
  89 {
  90         if (e->size_valid)
  91                 return e->size_;
  92
  93         return oe_get_size_slow(pack, e);
  94 }
  95
  96 static inline void oe_set_delta(struct packing_data *pack,
  97                                 struct object_entry *e,
  98                                 struct object_entry *delta)
  99 {
 100         if (delta)
 101                 e->delta_idx = (delta - pack->objects) + 1;
 102         else
 103                 e->delta_idx = 0;
 104 }
 105
 106 static inline struct object_entry *oe_delta_sibling(
 107                 const struct packing_data *pack,
 108                 const struct object_entry *e)
 109 {
 110         if (e->delta_sibling_idx)
 111                 return &pack->objects[e->delta_sibling_idx - 1];
 112         return NULL;
 113 }
 114
 115 static inline struct object_entry *oe_delta_child(
 116                 const struct packing_data *pack,
 117                 const struct object_entry *e)
 118 {
 119         if (e->delta_child_idx)
 120                 return &pack->objects[e->delta_child_idx - 1];
 121         return NULL;
 122 }
 123
 124 static inline void oe_set_delta_child(struct packing_data *pack,
 125                                       struct object_entry *e,
 126                                       struct object_entry *delta)
 127 {
 128         if (delta)
 129                 e->delta_child_idx = (delta - pack->objects) + 1;
 130         else
 131                 e->delta_child_idx = 0;
 132 }
 133
 134 static inline void oe_set_delta_sibling(struct packing_data *pack,
 135                                         struct object_entry *e,
 136                                         struct object_entry *delta)
 137 {
 138         if (delta)
 139                 e->delta_sibling_idx = (delta - pack->objects) + 1;
 140         else
 141                 e->delta_sibling_idx = 0;
 142 }
 143
 144 static inline void oe_set_size(struct packing_data *pack,
 145                                struct object_entry *e,
 146                                unsigned long size)
 147 {
 148         if (size < pack->oe_size_limit) {
 149                 e->size_ = size;
 150                 e->size_valid = 1;
 151         } else {
 152                 e->size_valid = 0;
 153                 if (oe_get_size_slow(pack, e) != size)
 154                         BUG("'size' is supposed to be the object size!");
 155         }
 156 }
 157
 158 static inline void oe_set_delta_size(struct packing_data *pack,
 159                                      struct object_entry *e,
 160                                      unsigned long size)
 161 {
 162         if (size < pack->oe_delta_size_limit) {
 163                 e->delta_size_ = size;
 164                 e->delta_size_valid = 1;
 165         } else {
 166                 packing_data_lock(pack);
 167                 if (!pack->delta_size)
 168                         ALLOC_ARRAY(pack->delta_size, pack->nr_alloc);
 169                 packing_data_unlock(pack);
 170
 171                 pack->delta_size[e - pack->objects] = size;
 172                 e->delta_size_valid = 0;
 173         }
 174 }
 175
 176 #define IN_PACK(obj) oe_in_pack(&to_pack, obj)
 177 #define SIZE(obj) oe_size(&to_pack, obj)
 178 #define SET_SIZE(obj,size) oe_set_size(&to_pack, obj, size)
 179 #define DELTA_SIZE(obj) oe_delta_size(&to_pack, obj)
 180 #define DELTA(obj) oe_delta(&to_pack, obj)
 181 #define DELTA_CHILD(obj) oe_delta_child(&to_pack, obj)
 182 #define DELTA_SIBLING(obj) oe_delta_sibling(&to_pack, obj)
 183 #define SET_DELTA(obj, val) oe_set_delta(&to_pack, obj, val)
 184 #define SET_DELTA_EXT(obj, oid) oe_set_delta_ext(&to_pack, obj, oid)
 185 #define SET_DELTA_SIZE(obj, val) oe_set_delta_size(&to_pack, obj, val)
 186 #define SET_DELTA_CHILD(obj, val) oe_set_delta_child(&to_pack, obj, val)
 187 #define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val)
 188
 189 static const char *pack_usage[] = {
 190         N_("git pack-objects --stdout [<options>] [< <ref-list> | < <object-list>]"),
 191         N_("git pack-objects [<options>] <base-name> [< <ref-list> | < <object-list>]"),
 192         NULL
 193 };
 194
 195 static struct pack_idx_entry **written_list;
 196 static uint32_t nr_result, nr_written, nr_seen;
 197 static struct bitmap_index *bitmap_git;
 198 static uint32_t write_layer;
 199
 200 static int non_empty;
 201 static int reuse_delta = 1, reuse_object = 1;
 202 static int keep_unreachable, unpack_unreachable, include_tag;
 203 static timestamp_t unpack_unreachable_expiration;
 204 static int pack_loose_unreachable;
 205 static int cruft;
 206 static timestamp_t cruft_expiration;
 207 static int local;
 208 static int have_non_local_packs;
 209 static int incremental;
 210 static int ignore_packed_keep_on_disk;
 211 static int ignore_packed_keep_in_core;
 212 static int allow_ofs_delta;
 213 static struct pack_idx_option pack_idx_opts;
 214 static const char *base_name;
 215 static int progress = 1;
 216 static int window = 10;
 217 static unsigned long pack_size_limit;
 218 static int depth = 50;
 219 static int delta_search_threads;
 220 static int pack_to_stdout;
 221 static int sparse;
 222 static int thin;
 223 static int num_preferred_base;
 224 static struct progress *progress_state;
 225
 226 static struct packed_git *reuse_packfile;
 227 static uint32_t reuse_packfile_objects;
 228 static struct bitmap *reuse_packfile_bitmap;
 229
 230 static int use_bitmap_index_default = 1;
 231 static int use_bitmap_index = -1;
 232 static int allow_pack_reuse = 1;
 233 static enum {
 234         WRITE_BITMAP_FALSE = 0,
 235         WRITE_BITMAP_QUIET,
 236         WRITE_BITMAP_TRUE,
 237 } write_bitmap_index;
 238 static uint16_t write_bitmap_options = BITMAP_OPT_HASH_CACHE;
 239
 240 static int exclude_promisor_objects;
 241
 242 static int use_delta_islands;
 243
 244 static unsigned long delta_cache_size = 0;
 245 static unsigned long max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE;
 246 static unsigned long cache_max_small_delta_size = 1000;
 247
 248 static unsigned long window_memory_limit = 0;
 249
 250 static struct string_list uri_protocols = STRING_LIST_INIT_NODUP;
 251
 252 enum missing_action {
 253         MA_ERROR = 0,      /* fail if any missing objects are encountered */
 254         MA_ALLOW_ANY,      /* silently allow ALL missing objects */
 255         MA_ALLOW_PROMISOR, /* silently allow all missing PROMISOR objects */
 256 };
 257 static enum missing_action arg_missing_action;
 258 static show_object_fn fn_show_object;
 259
 260 struct configured_exclusion {
 261         struct oidmap_entry e;
 262         char *pack_hash_hex;
 263         char *uri;
 264 };
 265 static struct oidmap configured_exclusions;
 266
 267 static struct oidset excluded_by_config;
 268
 269 /*
 270  * stats
 271  */
 272 static uint32_t written, written_delta;
 273 static uint32_t reused, reused_delta;
 274
 275 /*
 276  * Indexed commits
 277  */
 278 static struct commit **indexed_commits;
 279 static unsigned int indexed_commits_nr;
 280 static unsigned int indexed_commits_alloc;
 281
 282 static void index_commit_for_bitmap(struct commit *commit)
 283 {
 284         if (indexed_commits_nr >= indexed_commits_alloc) {
 285                 indexed_commits_alloc = (indexed_commits_alloc + 32) * 2;
 286                 REALLOC_ARRAY(indexed_commits, indexed_commits_alloc);
 287         }
 288
 289         indexed_commits[indexed_commits_nr++] = commit;
 290 }
 291
 292 static void *get_delta(struct object_entry *entry)
 293 {
 294         unsigned long size, base_size, delta_size;
 295         void *buf, *base_buf, *delta_buf;
 296         enum object_type type;
 297
 298         buf = repo_read_object_file(the_repository, &entry->idx.oid, &type,
 299                                     &size);
 300         if (!buf)
 301                 die(_("unable to read %s"), oid_to_hex(&entry->idx.oid));
 302         base_buf = repo_read_object_file(the_repository,
 303                                          &DELTA(entry)->idx.oid, &type,
 304                                          &base_size);
 305         if (!base_buf)
 306                 die("unable to read %s",
 307                     oid_to_hex(&DELTA(entry)->idx.oid));
 308         delta_buf = diff_delta(base_buf, base_size,
 309                                buf, size, &delta_size, 0);
 310         /*
 311          * We successfully computed this delta once but dropped it for
 312          * memory reasons. Something is very wrong if this time we
 313          * recompute and create a different delta.
 314          */
 315         if (!delta_buf || delta_size != DELTA_SIZE(entry))
 316                 BUG("delta size changed");
 317         free(buf);
 318         free(base_buf);
 319         return delta_buf;
 320 }
 321
 322 static unsigned long do_compress(void **pptr, unsigned long size)
 323 {
 324         git_zstream stream;
 325         void *in, *out;
 326         unsigned long maxsize;
 327
 328         git_deflate_init(&stream, pack_compression_level);
 329         maxsize = git_deflate_bound(&stream, size);
 330
 331         in = *pptr;
 332         out = xmalloc(maxsize);
 333         *pptr = out;
 334
 335         stream.next_in = in;
 336         stream.avail_in = size;
 337         stream.next_out = out;
 338         stream.avail_out = maxsize;
 339         while (git_deflate(&stream, Z_FINISH) == Z_OK)
 340                 ; /* nothing */
 341         git_deflate_end(&stream);
 342
 343         free(in);
 344         return stream.total_out;
 345 }
 346
 347 static unsigned long write_large_blob_data(struct git_istream *st, struct hashfile *f,
 348                                            const struct object_id *oid)
 349 {
 350         git_zstream stream;
 351         unsigned char ibuf[1024 * 16];
 352         unsigned char obuf[1024 * 16];
 353         unsigned long olen = 0;
 354
 355         git_deflate_init(&stream, pack_compression_level);
 356
 357         for (;;) {
 358                 ssize_t readlen;
 359                 int zret = Z_OK;
 360                 readlen = read_istream(st, ibuf, sizeof(ibuf));
 361                 if (readlen == -1)
 362                         die(_("unable to read %s"), oid_to_hex(oid));
 363
 364                 stream.next_in = ibuf;
 365                 stream.avail_in = readlen;
 366                 while ((stream.avail_in || readlen == 0) &&
 367                        (zret == Z_OK || zret == Z_BUF_ERROR)) {
 368                         stream.next_out = obuf;
 369                         stream.avail_out = sizeof(obuf);
 370                         zret = git_deflate(&stream, readlen ? 0 : Z_FINISH);
 371                         hashwrite(f, obuf, stream.next_out - obuf);
 372                         olen += stream.next_out - obuf;
 373                 }
 374                 if (stream.avail_in)
 375                         die(_("deflate error (%d)"), zret);
 376                 if (readlen == 0) {
 377                         if (zret != Z_STREAM_END)
 378                                 die(_("deflate error (%d)"), zret);
 379                         break;
 380                 }
 381         }
 382         git_deflate_end(&stream);
 383         return olen;
 384 }
 385
 386 /*
 387  * we are going to reuse the existing object data as is.  make
 388  * sure it is not corrupt.
 389  */
 390 static int check_pack_inflate(struct packed_git *p,
 391                 struct pack_window **w_curs,
 392                 off_t offset,
 393                 off_t len,
 394                 unsigned long expect)
 395 {
 396         git_zstream stream;
 397         unsigned char fakebuf[4096], *in;
 398         int st;
 399
 400         memset(&stream, 0, sizeof(stream));
 401         git_inflate_init(&stream);
 402         do {
 403                 in = use_pack(p, w_curs, offset, &stream.avail_in);
 404                 stream.next_in = in;
 405                 stream.next_out = fakebuf;
 406                 stream.avail_out = sizeof(fakebuf);
 407                 st = git_inflate(&stream, Z_FINISH);
 408                 offset += stream.next_in - in;
 409         } while (st == Z_OK || st == Z_BUF_ERROR);
 410         git_inflate_end(&stream);
 411         return (st == Z_STREAM_END &&
 412                 stream.total_out == expect &&
 413                 stream.total_in == len) ? 0 : -1;
 414 }
 415
 416 static void copy_pack_data(struct hashfile *f,
 417                 struct packed_git *p,
 418                 struct pack_window **w_curs,
 419                 off_t offset,
 420                 off_t len)
 421 {
 422         unsigned char *in;
 423         unsigned long avail;
 424
 425         while (len) {
 426                 in = use_pack(p, w_curs, offset, &avail);
 427                 if (avail > len)
 428                         avail = (unsigned long)len;
 429                 hashwrite(f, in, avail);
 430                 offset += avail;
 431                 len -= avail;
 432         }
 433 }
 434
 435 static inline int oe_size_greater_than(struct packing_data *pack,
 436                                        const struct object_entry *lhs,
 437                                        unsigned long rhs)
 438 {
 439         if (lhs->size_valid)
 440                 return lhs->size_ > rhs;
 441         if (rhs < pack->oe_size_limit) /* rhs < 2^x <= lhs ? */
 442                 return 1;
 443         return oe_get_size_slow(pack, lhs) > rhs;
 444 }
 445
 446 /* Return 0 if we will bust the pack-size limit */
 447 static unsigned long write_no_reuse_object(struct hashfile *f, struct object_entry *entry,
 448                                            unsigned long limit, int usable_delta)
 449 {
 450         unsigned long size, datalen;
 451         unsigned char header[MAX_PACK_OBJECT_HEADER],
 452                       dheader[MAX_PACK_OBJECT_HEADER];
 453         unsigned hdrlen;
 454         enum object_type type;
 455         void *buf;
 456         struct git_istream *st = NULL;
 457         const unsigned hashsz = the_hash_algo->rawsz;
 458
 459         if (!usable_delta) {
 460                 if (oe_type(entry) == OBJ_BLOB &&
 461                     oe_size_greater_than(&to_pack, entry, big_file_threshold) &&
 462                     (st = open_istream(the_repository, &entry->idx.oid, &type,
 463                                        &size, NULL)) != NULL)
 464                         buf = NULL;
 465                 else {
 466                         buf = repo_read_object_file(the_repository,
 467                                                     &entry->idx.oid, &type,
 468                                                     &size);
 469                         if (!buf)
 470                                 die(_("unable to read %s"),
 471                                     oid_to_hex(&entry->idx.oid));
 472                 }
 473                 /*
 474                  * make sure no cached delta data remains from a
 475                  * previous attempt before a pack split occurred.
 476                  */
 477                 FREE_AND_NULL(entry->delta_data);
 478                 entry->z_delta_size = 0;
 479         } else if (entry->delta_data) {
 480                 size = DELTA_SIZE(entry);
 481                 buf = entry->delta_data;
 482                 entry->delta_data = NULL;
 483                 type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
 484                         OBJ_OFS_DELTA : OBJ_REF_DELTA;
 485         } else {
 486                 buf = get_delta(entry);
 487                 size = DELTA_SIZE(entry);
 488                 type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
 489                         OBJ_OFS_DELTA : OBJ_REF_DELTA;
 490         }
 491
 492         if (st) /* large blob case, just assume we don't compress well */
 493                 datalen = size;
 494         else if (entry->z_delta_size)
 495                 datalen = entry->z_delta_size;
 496         else
 497                 datalen = do_compress(&buf, size);
 498
 499         /*
 500          * The object header is a byte of 'type' followed by zero or
 501          * more bytes of length.
 502          */
 503         hdrlen = encode_in_pack_object_header(header, sizeof(header),
 504                                               type, size);
 505
 506         if (type == OBJ_OFS_DELTA) {
 507                 /*
 508                  * Deltas with relative base contain an additional
 509                  * encoding of the relative offset for the delta
 510                  * base from this object's position in the pack.
 511                  */
 512                 off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset;
 513                 unsigned pos = sizeof(dheader) - 1;
 514                 dheader[pos] = ofs & 127;
 515                 while (ofs >>= 7)
 516                         dheader[--pos] = 128 | (--ofs & 127);
 517                 if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) {
 518                         if (st)
 519                                 close_istream(st);
 520                         free(buf);
 521                         return 0;
 522                 }
 523                 hashwrite(f, header, hdrlen);
 524                 hashwrite(f, dheader + pos, sizeof(dheader) - pos);
 525                 hdrlen += sizeof(dheader) - pos;
 526         } else if (type == OBJ_REF_DELTA) {
 527                 /*
 528                  * Deltas with a base reference contain
 529                  * additional bytes for the base object ID.
 530                  */
 531                 if (limit && hdrlen + hashsz + datalen + hashsz >= limit) {
 532                         if (st)
 533                                 close_istream(st);
 534                         free(buf);
 535                         return 0;
 536                 }
 537                 hashwrite(f, header, hdrlen);
 538                 hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz);
 539                 hdrlen += hashsz;
 540         } else {
 541                 if (limit && hdrlen + datalen + hashsz >= limit) {
 542                         if (st)
 543                                 close_istream(st);
 544                         free(buf);
 545                         return 0;
 546                 }
 547                 hashwrite(f, header, hdrlen);
 548         }
 549         if (st) {
 550                 datalen = write_large_blob_data(st, f, &entry->idx.oid);
 551                 close_istream(st);
 552         } else {
 553                 hashwrite(f, buf, datalen);
 554                 free(buf);
 555         }
 556
 557         return hdrlen + datalen;
 558 }
 559
 560 /* Return 0 if we will bust the pack-size limit */
 561 static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
 562                                 unsigned long limit, int usable_delta)
 563 {
 564         struct packed_git *p = IN_PACK(entry);
 565         struct pack_window *w_curs = NULL;
 566         uint32_t pos;
 567         off_t offset;
 568         enum object_type type = oe_type(entry);
 569         off_t datalen;
 570         unsigned char header[MAX_PACK_OBJECT_HEADER],
 571                       dheader[MAX_PACK_OBJECT_HEADER];
 572         unsigned hdrlen;
 573         const unsigned hashsz = the_hash_algo->rawsz;
 574         unsigned long entry_size = SIZE(entry);
 575
 576         if (DELTA(entry))
 577                 type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
 578                         OBJ_OFS_DELTA : OBJ_REF_DELTA;
 579         hdrlen = encode_in_pack_object_header(header, sizeof(header),
 580                                               type, entry_size);
 581
 582         offset = entry->in_pack_offset;
 583         if (offset_to_pack_pos(p, offset, &pos) < 0)
 584                 die(_("write_reuse_object: could not locate %s, expected at "
 585                       "offset %"PRIuMAX" in pack %s"),
 586                     oid_to_hex(&entry->idx.oid), (uintmax_t)offset,
 587                     p->pack_name);
 588         datalen = pack_pos_to_offset(p, pos + 1) - offset;
 589         if (!pack_to_stdout && p->index_version > 1 &&
 590             check_pack_crc(p, &w_curs, offset, datalen,
 591                            pack_pos_to_index(p, pos))) {
 592                 error(_("bad packed object CRC for %s"),
 593                       oid_to_hex(&entry->idx.oid));
 594                 unuse_pack(&w_curs);
 595                 return write_no_reuse_object(f, entry, limit, usable_delta);
 596         }
 597
 598         offset += entry->in_pack_header_size;
 599         datalen -= entry->in_pack_header_size;
 600
 601         if (!pack_to_stdout && p->index_version == 1 &&
 602             check_pack_inflate(p, &w_curs, offset, datalen, entry_size)) {
 603                 error(_("corrupt packed object for %s"),
 604                       oid_to_hex(&entry->idx.oid));
 605                 unuse_pack(&w_curs);
 606                 return write_no_reuse_object(f, entry, limit, usable_delta);
 607         }
 608
 609         if (type == OBJ_OFS_DELTA) {
 610                 off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset;
 611                 unsigned pos = sizeof(dheader) - 1;
 612                 dheader[pos] = ofs & 127;
 613                 while (ofs >>= 7)
 614                         dheader[--pos] = 128 | (--ofs & 127);
 615                 if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) {
 616                         unuse_pack(&w_curs);
 617                         return 0;
 618                 }
 619                 hashwrite(f, header, hdrlen);
 620                 hashwrite(f, dheader + pos, sizeof(dheader) - pos);
 621                 hdrlen += sizeof(dheader) - pos;
 622                 reused_delta++;
 623         } else if (type == OBJ_REF_DELTA) {
 624                 if (limit && hdrlen + hashsz + datalen + hashsz >= limit) {
 625                         unuse_pack(&w_curs);
 626                         return 0;
 627                 }
 628                 hashwrite(f, header, hdrlen);
 629                 hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz);
 630                 hdrlen += hashsz;
 631                 reused_delta++;
 632         } else {
 633                 if (limit && hdrlen + datalen + hashsz >= limit) {
 634                         unuse_pack(&w_curs);
 635                         return 0;
 636                 }
 637                 hashwrite(f, header, hdrlen);
 638         }
 639         copy_pack_data(f, p, &w_curs, offset, datalen);
 640         unuse_pack(&w_curs);
 641         reused++;
 642         return hdrlen + datalen;
 643 }
 644
 645 /* Return 0 if we will bust the pack-size limit */
 646 static off_t write_object(struct hashfile *f,
 647                           struct object_entry *entry,
 648                           off_t write_offset)
 649 {
 650         unsigned long limit;
 651         off_t len;
 652         int usable_delta, to_reuse;
 653
 654         if (!pack_to_stdout)
 655                 crc32_begin(f);
 656
 657         /* apply size limit if limited packsize and not first object */
 658         if (!pack_size_limit || !nr_written)
 659                 limit = 0;
 660         else if (pack_size_limit <= write_offset)
 661                 /*
 662                  * the earlier object did not fit the limit; avoid
 663                  * mistaking this with unlimited (i.e. limit = 0).
 664                  */
 665                 limit = 1;
 666         else
 667                 limit = pack_size_limit - write_offset;
 668
 669         if (!DELTA(entry))
 670                 usable_delta = 0;       /* no delta */
 671         else if (!pack_size_limit)
 672                usable_delta = 1;        /* unlimited packfile */
 673         else if (DELTA(entry)->idx.offset == (off_t)-1)
 674                 usable_delta = 0;       /* base was written to another pack */
 675         else if (DELTA(entry)->idx.offset)
 676                 usable_delta = 1;       /* base already exists in this pack */
 677         else
 678                 usable_delta = 0;       /* base could end up in another pack */
 679
 680         if (!reuse_object)
 681                 to_reuse = 0;   /* explicit */
 682         else if (!IN_PACK(entry))
 683                 to_reuse = 0;   /* can't reuse what we don't have */
 684         else if (oe_type(entry) == OBJ_REF_DELTA ||
 685                  oe_type(entry) == OBJ_OFS_DELTA)
 686                                 /* check_object() decided it for us ... */
 687                 to_reuse = usable_delta;
 688                                 /* ... but pack split may override that */
 689         else if (oe_type(entry) != entry->in_pack_type)
 690                 to_reuse = 0;   /* pack has delta which is unusable */
 691         else if (DELTA(entry))
 692                 to_reuse = 0;   /* we want to pack afresh */
 693         else
 694                 to_reuse = 1;   /* we have it in-pack undeltified,
 695                                  * and we do not need to deltify it.
 696                                  */
 697
 698         if (!to_reuse)
 699                 len = write_no_reuse_object(f, entry, limit, usable_delta);
 700         else
 701                 len = write_reuse_object(f, entry, limit, usable_delta);
 702         if (!len)
 703                 return 0;
 704
 705         if (usable_delta)
 706                 written_delta++;
 707         written++;
 708         if (!pack_to_stdout)
 709                 entry->idx.crc32 = crc32_end(f);
 710         return len;
 711 }
 712
 713 enum write_one_status {
 714         WRITE_ONE_SKIP = -1, /* already written */
 715         WRITE_ONE_BREAK = 0, /* writing this will bust the limit; not written */
 716         WRITE_ONE_WRITTEN = 1, /* normal */
 717         WRITE_ONE_RECURSIVE = 2 /* already scheduled to be written */
 718 };
 719
 720 static enum write_one_status write_one(struct hashfile *f,
 721                                        struct object_entry *e,
 722                                        off_t *offset)
 723 {
 724         off_t size;
 725         int recursing;
 726
 727         /*
 728          * we set offset to 1 (which is an impossible value) to mark
 729          * the fact that this object is involved in "write its base
 730          * first before writing a deltified object" recursion.
 731          */
 732         recursing = (e->idx.offset == 1);
 733         if (recursing) {
 734                 warning(_("recursive delta detected for object %s"),
 735                         oid_to_hex(&e->idx.oid));
 736                 return WRITE_ONE_RECURSIVE;
 737         } else if (e->idx.offset || e->preferred_base) {
 738                 /* offset is non zero if object is written already. */
 739                 return WRITE_ONE_SKIP;
 740         }
 741
 742         /* if we are deltified, write out base object first. */
 743         if (DELTA(e)) {
 744                 e->idx.offset = 1; /* now recurse */
 745                 switch (write_one(f, DELTA(e), offset)) {
 746                 case WRITE_ONE_RECURSIVE:
 747                         /* we cannot depend on this one */
 748                         SET_DELTA(e, NULL);
 749                         break;
 750                 default:
 751                         break;
 752                 case WRITE_ONE_BREAK:
 753                         e->idx.offset = recursing;
 754                         return WRITE_ONE_BREAK;
 755                 }
 756         }
 757
 758         e->idx.offset = *offset;
 759         size = write_object(f, e, *offset);
 760         if (!size) {
 761                 e->idx.offset = recursing;
 762                 return WRITE_ONE_BREAK;
 763         }
 764         written_list[nr_written++] = &e->idx;
 765
 766         /* make sure off_t is sufficiently large not to wrap */
 767         if (signed_add_overflows(*offset, size))
 768                 die(_("pack too large for current definition of off_t"));
 769         *offset += size;
 770         return WRITE_ONE_WRITTEN;
 771 }
 772
 773 static int mark_tagged(const char *path UNUSED, const struct object_id *oid,
 774                        int flag UNUSED, void *cb_data UNUSED)
 775 {
 776         struct object_id peeled;
 777         struct object_entry *entry = packlist_find(&to_pack, oid);
 778
 779         if (entry)
 780                 entry->tagged = 1;
 781         if (!peel_iterated_oid(oid, &peeled)) {
 782                 entry = packlist_find(&to_pack, &peeled);
 783                 if (entry)
 784                         entry->tagged = 1;
 785         }
 786         return 0;
 787 }
 788
 789 static inline unsigned char oe_layer(struct packing_data *pack,
 790                                      struct object_entry *e)
 791 {
 792         if (!pack->layer)
 793                 return 0;
 794         return pack->layer[e - pack->objects];
 795 }
 796
 797 static inline void add_to_write_order(struct object_entry **wo,
 798                                unsigned int *endp,
 799                                struct object_entry *e)
 800 {
 801         if (e->filled || oe_layer(&to_pack, e) != write_layer)
 802                 return;
 803         wo[(*endp)++] = e;
 804         e->filled = 1;
 805 }
 806
 807 static void add_descendants_to_write_order(struct object_entry **wo,
 808                                            unsigned int *endp,
 809                                            struct object_entry *e)
 810 {
 811         int add_to_order = 1;
 812         while (e) {
 813                 if (add_to_order) {
 814                         struct object_entry *s;
 815                         /* add this node... */
 816                         add_to_write_order(wo, endp, e);
 817                         /* all its siblings... */
 818                         for (s = DELTA_SIBLING(e); s; s = DELTA_SIBLING(s)) {
 819                                 add_to_write_order(wo, endp, s);
 820                         }
 821                 }
 822                 /* drop down a level to add left subtree nodes if possible */
 823                 if (DELTA_CHILD(e)) {
 824                         add_to_order = 1;
 825                         e = DELTA_CHILD(e);
 826                 } else {
 827                         add_to_order = 0;
 828                         /* our sibling might have some children, it is next */
 829                         if (DELTA_SIBLING(e)) {
 830                                 e = DELTA_SIBLING(e);
 831                                 continue;
 832                         }
 833                         /* go back to our parent node */
 834                         e = DELTA(e);
 835                         while (e && !DELTA_SIBLING(e)) {
 836                                 /* we're on the right side of a subtree, keep
 837                                  * going up until we can go right again */
 838                                 e = DELTA(e);
 839                         }
 840                         if (!e) {
 841                                 /* done- we hit our original root node */
 842                                 return;
 843                         }
 844                         /* pass it off to sibling at this level */
 845                         e = DELTA_SIBLING(e);
 846                 }
 847         };
 848 }
 849
 850 static void add_family_to_write_order(struct object_entry **wo,
 851                                       unsigned int *endp,
 852                                       struct object_entry *e)
 853 {
 854         struct object_entry *root;
 855
 856         for (root = e; DELTA(root); root = DELTA(root))
 857                 ; /* nothing */
 858         add_descendants_to_write_order(wo, endp, root);
 859 }
 860
 861 static void compute_layer_order(struct object_entry **wo, unsigned int *wo_end)
 862 {
 863         unsigned int i, last_untagged;
 864         struct object_entry *objects = to_pack.objects;
 865
 866         for (i = 0; i < to_pack.nr_objects; i++) {
 867                 if (objects[i].tagged)
 868                         break;
 869                 add_to_write_order(wo, wo_end, &objects[i]);
 870         }
 871         last_untagged = i;
 872
 873         /*
 874          * Then fill all the tagged tips.
 875          */
 876         for (; i < to_pack.nr_objects; i++) {
 877                 if (objects[i].tagged)
 878                         add_to_write_order(wo, wo_end, &objects[i]);
 879         }
 880
 881         /*
 882          * And then all remaining commits and tags.
 883          */
 884         for (i = last_untagged; i < to_pack.nr_objects; i++) {
 885                 if (oe_type(&objects[i]) != OBJ_COMMIT &&
 886                     oe_type(&objects[i]) != OBJ_TAG)
 887                         continue;
 888                 add_to_write_order(wo, wo_end, &objects[i]);
 889         }
 890
 891         /*
 892          * And then all the trees.
 893          */
 894         for (i = last_untagged; i < to_pack.nr_objects; i++) {
 895                 if (oe_type(&objects[i]) != OBJ_TREE)
 896                         continue;
 897                 add_to_write_order(wo, wo_end, &objects[i]);
 898         }
 899
 900         /*
 901          * Finally all the rest in really tight order
 902          */
 903         for (i = last_untagged; i < to_pack.nr_objects; i++) {
 904                 if (!objects[i].filled && oe_layer(&to_pack, &objects[i]) == write_layer)
 905                         add_family_to_write_order(wo, wo_end, &objects[i]);
 906         }
 907 }
 908
 909 static struct object_entry **compute_write_order(void)
 910 {
 911         uint32_t max_layers = 1;
 912         unsigned int i, wo_end;
 913
 914         struct object_entry **wo;
 915         struct object_entry *objects = to_pack.objects;
 916
 917         for (i = 0; i < to_pack.nr_objects; i++) {
 918                 objects[i].tagged = 0;
 919                 objects[i].filled = 0;
 920                 SET_DELTA_CHILD(&objects[i], NULL);
 921                 SET_DELTA_SIBLING(&objects[i], NULL);
 922         }
 923
 924         /*
 925          * Fully connect delta_child/delta_sibling network.
 926          * Make sure delta_sibling is sorted in the original
 927          * recency order.
 928          */
 929         for (i = to_pack.nr_objects; i > 0;) {
 930                 struct object_entry *e = &objects[--i];
 931                 if (!DELTA(e))
 932                         continue;
 933                 /* Mark me as the first child */
 934                 e->delta_sibling_idx = DELTA(e)->delta_child_idx;
 935                 SET_DELTA_CHILD(DELTA(e), e);
 936         }
 937
 938         /*
 939          * Mark objects that are at the tip of tags.
 940          */
 941         for_each_tag_ref(mark_tagged, NULL);
 942
 943         if (use_delta_islands) {
 944                 max_layers = compute_pack_layers(&to_pack);
 945                 free_island_marks();
 946         }
 947
 948         ALLOC_ARRAY(wo, to_pack.nr_objects);
 949         wo_end = 0;
 950
 951         for (; write_layer < max_layers; ++write_layer)
 952                 compute_layer_order(wo, &wo_end);
 953
 954         if (wo_end != to_pack.nr_objects)
 955                 die(_("ordered %u objects, expected %"PRIu32),
 956                     wo_end, to_pack.nr_objects);
 957
 958         return wo;
 959 }
 960
 961
 962 /*
 963  * A reused set of objects. All objects in a chunk have the same
 964  * relative position in the original packfile and the generated
 965  * packfile.
 966  */
 967
 968 static struct reused_chunk {
 969         /* The offset of the first object of this chunk in the original
 970          * packfile. */
 971         off_t original;
 972         /* The difference for "original" minus the offset of the first object of
 973          * this chunk in the generated packfile. */
 974         off_t difference;
 975 } *reused_chunks;
 976 static int reused_chunks_nr;
 977 static int reused_chunks_alloc;
 978
 979 static void record_reused_object(off_t where, off_t offset)
 980 {
 981         if (reused_chunks_nr && reused_chunks[reused_chunks_nr-1].difference == offset)
 982                 return;
 983
 984         ALLOC_GROW(reused_chunks, reused_chunks_nr + 1,
 985                    reused_chunks_alloc);
 986         reused_chunks[reused_chunks_nr].original = where;
 987         reused_chunks[reused_chunks_nr].difference = offset;
 988         reused_chunks_nr++;
 989 }
 990
 991 /*
 992  * Binary search to find the chunk that "where" is in. Note
 993  * that we're not looking for an exact match, just the first
 994  * chunk that contains it (which implicitly ends at the start
 995  * of the next chunk.
 996  */
 997 static off_t find_reused_offset(off_t where)
 998 {
 999         int lo = 0, hi = reused_chunks_nr;
1000         while (lo < hi) {
1001                 int mi = lo + ((hi - lo) / 2);
1002                 if (where == reused_chunks[mi].original)
1003                         return reused_chunks[mi].difference;
1004                 if (where < reused_chunks[mi].original)
1005                         hi = mi;
1006                 else
1007                         lo = mi + 1;
1008         }
1009
1010         /*
1011          * The first chunk starts at zero, so we can't have gone below
1012          * there.
1013          */
1014         assert(lo);
1015         return reused_chunks[lo-1].difference;
1016 }
1017
1018 static void write_reused_pack_one(size_t pos, struct hashfile *out,
1019                                   struct pack_window **w_curs)
1020 {
1021         off_t offset, next, cur;
1022         enum object_type type;
1023         unsigned long size;
1024
1025         offset = pack_pos_to_offset(reuse_packfile, pos);
1026         next = pack_pos_to_offset(reuse_packfile, pos + 1);
1027
1028         record_reused_object(offset, offset - hashfile_total(out));
1029
1030         cur = offset;
1031         type = unpack_object_header(reuse_packfile, w_curs, &cur, &size);
1032         assert(type >= 0);
1033
1034         if (type == OBJ_OFS_DELTA) {
1035                 off_t base_offset;
1036                 off_t fixup;
1037
1038                 unsigned char header[MAX_PACK_OBJECT_HEADER];
1039                 unsigned len;
1040
1041                 base_offset = get_delta_base(reuse_packfile, w_curs, &cur, type, offset);
1042                 assert(base_offset != 0);
1043
1044                 /* Convert to REF_DELTA if we must... */
1045                 if (!allow_ofs_delta) {
1046                         uint32_t base_pos;
1047                         struct object_id base_oid;
1048
1049                         if (offset_to_pack_pos(reuse_packfile, base_offset, &base_pos) < 0)
1050                                 die(_("expected object at offset %"PRIuMAX" "
1051                                       "in pack %s"),
1052                                     (uintmax_t)base_offset,
1053                                     reuse_packfile->pack_name);
1054
1055                         nth_packed_object_id(&base_oid, reuse_packfile,
1056                                              pack_pos_to_index(reuse_packfile, base_pos));
1057
1058                         len = encode_in_pack_object_header(header, sizeof(header),
1059                                                            OBJ_REF_DELTA, size);
1060                         hashwrite(out, header, len);
1061                         hashwrite(out, base_oid.hash, the_hash_algo->rawsz);
1062                         copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur);
1063                         return;
1064                 }
1065
1066                 /* Otherwise see if we need to rewrite the offset... */
1067                 fixup = find_reused_offset(offset) -
1068                         find_reused_offset(base_offset);
1069                 if (fixup) {
1070                         unsigned char ofs_header[10];
1071                         unsigned i, ofs_len;
1072                         off_t ofs = offset - base_offset - fixup;
1073
1074                         len = encode_in_pack_object_header(header, sizeof(header),
1075                                                            OBJ_OFS_DELTA, size);
1076
1077                         i = sizeof(ofs_header) - 1;
1078                         ofs_header[i] = ofs & 127;
1079                         while (ofs >>= 7)
1080                                 ofs_header[--i] = 128 | (--ofs & 127);
1081
1082                         ofs_len = sizeof(ofs_header) - i;
1083
1084                         hashwrite(out, header, len);
1085                         hashwrite(out, ofs_header + sizeof(ofs_header) - ofs_len, ofs_len);
1086                         copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur);
1087                         return;
1088                 }
1089
1090                 /* ...otherwise we have no fixup, and can write it verbatim */
1091         }
1092
1093         copy_pack_data(out, reuse_packfile, w_curs, offset, next - offset);
1094 }
1095
1096 static size_t write_reused_pack_verbatim(struct hashfile *out,
1097                                          struct pack_window **w_curs)
1098 {
1099         size_t pos = 0;
1100
1101         while (pos < reuse_packfile_bitmap->word_alloc &&
1102                         reuse_packfile_bitmap->words[pos] == (eword_t)~0)
1103                 pos++;
1104
1105         if (pos) {
1106                 off_t to_write;
1107
1108                 written = (pos * BITS_IN_EWORD);
1109                 to_write = pack_pos_to_offset(reuse_packfile, written)
1110                         - sizeof(struct pack_header);
1111
1112                 /* We're recording one chunk, not one object. */
1113                 record_reused_object(sizeof(struct pack_header), 0);
1114                 hashflush(out);
1115                 copy_pack_data(out, reuse_packfile, w_curs,
1116                         sizeof(struct pack_header), to_write);
1117
1118                 display_progress(progress_state, written);
1119         }
1120         return pos;
1121 }
1122
1123 static void write_reused_pack(struct hashfile *f)
1124 {
1125         size_t i = 0;
1126         uint32_t offset;
1127         struct pack_window *w_curs = NULL;
1128
1129         if (allow_ofs_delta)
1130                 i = write_reused_pack_verbatim(f, &w_curs);
1131
1132         for (; i < reuse_packfile_bitmap->word_alloc; ++i) {
1133                 eword_t word = reuse_packfile_bitmap->words[i];
1134                 size_t pos = (i * BITS_IN_EWORD);
1135
1136                 for (offset = 0; offset < BITS_IN_EWORD; ++offset) {
1137                         if ((word >> offset) == 0)
1138                                 break;
1139
1140                         offset += ewah_bit_ctz64(word >> offset);
1141                         /*
1142                          * Can use bit positions directly, even for MIDX
1143                          * bitmaps. See comment in try_partial_reuse()
1144                          * for why.
1145                          */
1146                         write_reused_pack_one(pos + offset, f, &w_curs);
1147                         display_progress(progress_state, ++written);
1148                 }
1149         }
1150
1151         unuse_pack(&w_curs);
1152 }
1153
1154 static void write_excluded_by_configs(void)
1155 {
1156         struct oidset_iter iter;
1157         const struct object_id *oid;
1158
1159         oidset_iter_init(&excluded_by_config, &iter);
1160         while ((oid = oidset_iter_next(&iter))) {
1161                 struct configured_exclusion *ex =
1162                         oidmap_get(&configured_exclusions, oid);
1163
1164                 if (!ex)
1165                         BUG("configured exclusion wasn't configured");
1166                 write_in_full(1, ex->pack_hash_hex, strlen(ex->pack_hash_hex));
1167                 write_in_full(1, " ", 1);
1168                 write_in_full(1, ex->uri, strlen(ex->uri));
1169                 write_in_full(1, "\n", 1);
1170         }
1171 }
1172
1173 static const char no_split_warning[] = N_(
1174 "disabling bitmap writing, packs are split due to pack.packSizeLimit"
1175 );
1176
1177 static void write_pack_file(void)
1178 {
1179         uint32_t i = 0, j;
1180         struct hashfile *f;
1181         off_t offset;
1182         uint32_t nr_remaining = nr_result;
1183         time_t last_mtime = 0;
1184         struct object_entry **write_order;
1185
1186         if (progress > pack_to_stdout)
1187                 progress_state = start_progress(_("Writing objects"), nr_result);
1188         ALLOC_ARRAY(written_list, to_pack.nr_objects);
1189         write_order = compute_write_order();
1190
1191         do {
1192                 unsigned char hash[GIT_MAX_RAWSZ];
1193                 char *pack_tmp_name = NULL;
1194
1195                 if (pack_to_stdout)
1196                         f = hashfd_throughput(1, "<stdout>", progress_state);
1197                 else
1198                         f = create_tmp_packfile(&pack_tmp_name);
1199
1200                 offset = write_pack_header(f, nr_remaining);
1201
1202                 if (reuse_packfile) {
1203                         assert(pack_to_stdout);
1204                         write_reused_pack(f);
1205                         offset = hashfile_total(f);
1206                 }
1207
1208                 nr_written = 0;
1209                 for (; i < to_pack.nr_objects; i++) {
1210                         struct object_entry *e = write_order[i];
1211                         if (write_one(f, e, &offset) == WRITE_ONE_BREAK)
1212                                 break;
1213                         display_progress(progress_state, written);
1214                 }
1215
1216                 if (pack_to_stdout) {
1217                         /*
1218                          * We never fsync when writing to stdout since we may
1219                          * not be writing to an actual pack file. For instance,
1220                          * the upload-pack code passes a pipe here. Calling
1221                          * fsync on a pipe results in unnecessary
1222                          * synchronization with the reader on some platforms.
1223                          */
1224                         finalize_hashfile(f, hash, FSYNC_COMPONENT_NONE,
1225                                           CSUM_HASH_IN_STREAM | CSUM_CLOSE);
1226                 } else if (nr_written == nr_remaining) {
1227                         finalize_hashfile(f, hash, FSYNC_COMPONENT_PACK,
1228                                           CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
1229                 } else {
1230                         /*
1231                          * If we wrote the wrong number of entries in the
1232                          * header, rewrite it like in fast-import.
1233                          */
1234
1235                         int fd = finalize_hashfile(f, hash, FSYNC_COMPONENT_PACK, 0);
1236                         fixup_pack_header_footer(fd, hash, pack_tmp_name,
1237                                                  nr_written, hash, offset);
1238                         close(fd);
1239                         if (write_bitmap_index) {
1240                                 if (write_bitmap_index != WRITE_BITMAP_QUIET)
1241                                         warning(_(no_split_warning));
1242                                 write_bitmap_index = 0;
1243                         }
1244                 }
1245
1246                 if (!pack_to_stdout) {
1247                         struct stat st;
1248                         struct strbuf tmpname = STRBUF_INIT;
1249                         char *idx_tmp_name = NULL;
1250
1251                         /*
1252                          * Packs are runtime accessed in their mtime
1253                          * order since newer packs are more likely to contain
1254                          * younger objects.  So if we are creating multiple
1255                          * packs then we should modify the mtime of later ones
1256                          * to preserve this property.
1257                          */
1258                         if (stat(pack_tmp_name, &st) < 0) {
1259                                 warning_errno(_("failed to stat %s"), pack_tmp_name);
1260                         } else if (!last_mtime) {
1261                                 last_mtime = st.st_mtime;
1262                         } else {
1263                                 struct utimbuf utb;
1264                                 utb.actime = st.st_atime;
1265                                 utb.modtime = --last_mtime;
1266                                 if (utime(pack_tmp_name, &utb) < 0)
1267                                         warning_errno(_("failed utime() on %s"), pack_tmp_name);
1268                         }
1269
1270                         strbuf_addf(&tmpname, "%s-%s.", base_name,
1271                                     hash_to_hex(hash));
1272
1273                         if (write_bitmap_index) {
1274                                 bitmap_writer_set_checksum(hash);
1275                                 bitmap_writer_build_type_index(
1276                                         &to_pack, written_list, nr_written);
1277                         }
1278
1279                         if (cruft)
1280                                 pack_idx_opts.flags |= WRITE_MTIMES;
1281
1282                         stage_tmp_packfiles(&tmpname, pack_tmp_name,
1283                                             written_list, nr_written,
1284                                             &to_pack, &pack_idx_opts, hash,
1285                                             &idx_tmp_name);
1286
1287                         if (write_bitmap_index) {
1288                                 size_t tmpname_len = tmpname.len;
1289
1290                                 strbuf_addstr(&tmpname, "bitmap");
1291                                 stop_progress(&progress_state);
1292
1293                                 bitmap_writer_show_progress(progress);
1294                                 bitmap_writer_select_commits(indexed_commits, indexed_commits_nr, -1);
1295                                 if (bitmap_writer_build(&to_pack) < 0)
1296                                         die(_("failed to write bitmap index"));
1297                                 bitmap_writer_finish(written_list, nr_written,
1298                                                      tmpname.buf, write_bitmap_options);
1299                                 write_bitmap_index = 0;
1300                                 strbuf_setlen(&tmpname, tmpname_len);
1301                         }
1302
1303                         rename_tmp_packfile_idx(&tmpname, &idx_tmp_name);
1304
1305                         free(idx_tmp_name);
1306                         strbuf_release(&tmpname);
1307                         free(pack_tmp_name);
1308                         puts(hash_to_hex(hash));
1309                 }
1310
1311                 /* mark written objects as written to previous pack */
1312                 for (j = 0; j < nr_written; j++) {
1313                         written_list[j]->offset = (off_t)-1;
1314                 }
1315                 nr_remaining -= nr_written;
1316         } while (nr_remaining && i < to_pack.nr_objects);
1317
1318         free(written_list);
1319         free(write_order);
1320         stop_progress(&progress_state);
1321         if (written != nr_result)
1322                 die(_("wrote %"PRIu32" objects while expecting %"PRIu32),
1323                     written, nr_result);
1324         trace2_data_intmax("pack-objects", the_repository,
1325                            "write_pack_file/wrote", nr_result);
1326 }
1327
1328 static int no_try_delta(const char *path)
1329 {
1330         static struct attr_check *check;
1331
1332         if (!check)
1333                 check = attr_check_initl("delta", NULL);
1334         git_check_attr(the_repository->index, path, check);
1335         if (ATTR_FALSE(check->items[0].value))
1336                 return 1;
1337         return 0;
1338 }
1339
1340 /*
1341  * When adding an object, check whether we have already added it
1342  * to our packing list. If so, we can skip. However, if we are
1343  * being asked to excludei t, but the previous mention was to include
1344  * it, make sure to adjust its flags and tweak our numbers accordingly.
1345  *
1346  * As an optimization, we pass out the index position where we would have
1347  * found the item, since that saves us from having to look it up again a
1348  * few lines later when we want to add the new entry.
1349  */
1350 static int have_duplicate_entry(const struct object_id *oid,
1351                                 int exclude)
1352 {
1353         struct object_entry *entry;
1354
1355         if (reuse_packfile_bitmap &&
1356             bitmap_walk_contains(bitmap_git, reuse_packfile_bitmap, oid))
1357                 return 1;
1358
1359         entry = packlist_find(&to_pack, oid);
1360         if (!entry)
1361                 return 0;
1362
1363         if (exclude) {
1364                 if (!entry->preferred_base)
1365                         nr_result--;
1366                 entry->preferred_base = 1;
1367         }
1368
1369         return 1;
1370 }
1371
1372 static int want_found_object(const struct object_id *oid, int exclude,
1373                              struct packed_git *p)
1374 {
1375         if (exclude)
1376                 return 1;
1377         if (incremental)
1378                 return 0;
1379
1380         if (!is_pack_valid(p))
1381                 return -1;
1382
1383         /*
1384          * When asked to do --local (do not include an object that appears in a
1385          * pack we borrow from elsewhere) or --honor-pack-keep (do not include
1386          * an object that appears in a pack marked with .keep), finding a pack
1387          * that matches the criteria is sufficient for us to decide to omit it.
1388          * However, even if this pack does not satisfy the criteria, we need to
1389          * make sure no copy of this object appears in _any_ pack that makes us
1390          * to omit the object, so we need to check all the packs.
1391          *
1392          * We can however first check whether these options can possibly matter;
1393          * if they do not matter we know we want the object in generated pack.
1394          * Otherwise, we signal "-1" at the end to tell the caller that we do
1395          * not know either way, and it needs to check more packs.
1396          */
1397
1398         /*
1399          * Objects in packs borrowed from elsewhere are discarded regardless of
1400          * if they appear in other packs that weren't borrowed.
1401          */
1402         if (local && !p->pack_local)
1403                 return 0;
1404
1405         /*
1406          * Then handle .keep first, as we have a fast(er) path there.
1407          */
1408         if (ignore_packed_keep_on_disk || ignore_packed_keep_in_core) {
1409                 /*
1410                  * Set the flags for the kept-pack cache to be the ones we want
1411                  * to ignore.
1412                  *
1413                  * That is, if we are ignoring objects in on-disk keep packs,
1414                  * then we want to search through the on-disk keep and ignore
1415                  * the in-core ones.
1416                  */
1417                 unsigned flags = 0;
1418                 if (ignore_packed_keep_on_disk)
1419                         flags |= ON_DISK_KEEP_PACKS;
1420                 if (ignore_packed_keep_in_core)
1421                         flags |= IN_CORE_KEEP_PACKS;
1422
1423                 if (ignore_packed_keep_on_disk && p->pack_keep)
1424                         return 0;
1425                 if (ignore_packed_keep_in_core && p->pack_keep_in_core)
1426                         return 0;
1427                 if (has_object_kept_pack(oid, flags))
1428                         return 0;
1429         }
1430
1431         /*
1432          * At this point we know definitively that either we don't care about
1433          * keep-packs, or the object is not in one. Keep checking other
1434          * conditions...
1435          */
1436         if (!local || !have_non_local_packs)
1437                 return 1;
1438
1439         /* we don't know yet; keep looking for more packs */
1440         return -1;
1441 }
1442
1443 static int want_object_in_pack_one(struct packed_git *p,
1444                                    const struct object_id *oid,
1445                                    int exclude,
1446                                    struct packed_git **found_pack,
1447                                    off_t *found_offset)
1448 {
1449         off_t offset;
1450
1451         if (p == *found_pack)
1452                 offset = *found_offset;
1453         else
1454                 offset = find_pack_entry_one(oid->hash, p);
1455
1456         if (offset) {
1457                 if (!*found_pack) {
1458                         if (!is_pack_valid(p))
1459                                 return -1;
1460                         *found_offset = offset;
1461                         *found_pack = p;
1462                 }
1463                 return want_found_object(oid, exclude, p);
1464         }
1465         return -1;
1466 }
1467
1468 /*
1469  * Check whether we want the object in the pack (e.g., we do not want
1470  * objects found in non-local stores if the "--local" option was used).
1471  *
1472  * If the caller already knows an existing pack it wants to take the object
1473  * from, that is passed in *found_pack and *found_offset; otherwise this
1474  * function finds if there is any pack that has the object and returns the pack
1475  * and its offset in these variables.
1476  */
1477 static int want_object_in_pack(const struct object_id *oid,
1478                                int exclude,
1479                                struct packed_git **found_pack,
1480                                off_t *found_offset)
1481 {
1482         int want;
1483         struct list_head *pos;
1484         struct multi_pack_index *m;
1485
1486         if (!exclude && local && has_loose_object_nonlocal(oid))
1487                 return 0;
1488
1489         /*
1490          * If we already know the pack object lives in, start checks from that
1491          * pack - in the usual case when neither --local was given nor .keep files
1492          * are present we will determine the answer right now.
1493          */
1494         if (*found_pack) {
1495                 want = want_found_object(oid, exclude, *found_pack);
1496                 if (want != -1)
1497                         return want;
1498
1499                 *found_pack = NULL;
1500                 *found_offset = 0;
1501         }
1502
1503         for (m = get_multi_pack_index(the_repository); m; m = m->next) {
1504                 struct pack_entry e;
1505                 if (fill_midx_entry(the_repository, oid, &e, m)) {
1506                         want = want_object_in_pack_one(e.p, oid, exclude, found_pack, found_offset);
1507                         if (want != -1)
1508                                 return want;
1509                 }
1510         }
1511
1512         list_for_each(pos, get_packed_git_mru(the_repository)) {
1513                 struct packed_git *p = list_entry(pos, struct packed_git, mru);
1514                 want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset);
1515                 if (!exclude && want > 0)
1516                         list_move(&p->mru,
1517                                   get_packed_git_mru(the_repository));
1518                 if (want != -1)
1519                         return want;
1520         }
1521
1522         if (uri_protocols.nr) {
1523                 struct configured_exclusion *ex =
1524                         oidmap_get(&configured_exclusions, oid);
1525                 int i;
1526                 const char *p;
1527
1528                 if (ex) {
1529                         for (i = 0; i < uri_protocols.nr; i++) {
1530                                 if (skip_prefix(ex->uri,
1531                                                 uri_protocols.items[i].string,
1532                                                 &p) &&
1533                                     *p == ':') {
1534                                         oidset_insert(&excluded_by_config, oid);
1535                                         return 0;
1536                                 }
1537                         }
1538                 }
1539         }
1540
1541         return 1;
1542 }
1543
1544 static struct object_entry *create_object_entry(const struct object_id *oid,
1545                                                 enum object_type type,
1546                                                 uint32_t hash,
1547                                                 int exclude,
1548                                                 int no_try_delta,
1549                                                 struct packed_git *found_pack,
1550                                                 off_t found_offset)
1551 {
1552         struct object_entry *entry;
1553
1554         entry = packlist_alloc(&to_pack, oid);
1555         entry->hash = hash;
1556         oe_set_type(entry, type);
1557         if (exclude)
1558                 entry->preferred_base = 1;
1559         else
1560                 nr_result++;
1561         if (found_pack) {
1562                 oe_set_in_pack(&to_pack, entry, found_pack);
1563                 entry->in_pack_offset = found_offset;
1564         }
1565
1566         entry->no_try_delta = no_try_delta;
1567
1568         return entry;
1569 }
1570
1571 static const char no_closure_warning[] = N_(
1572 "disabling bitmap writing, as some objects are not being packed"
1573 );
1574
1575 static int add_object_entry(const struct object_id *oid, enum object_type type,
1576                             const char *name, int exclude)
1577 {
1578         struct packed_git *found_pack = NULL;
1579         off_t found_offset = 0;
1580
1581         display_progress(progress_state, ++nr_seen);
1582
1583         if (have_duplicate_entry(oid, exclude))
1584                 return 0;
1585
1586         if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset)) {
1587                 /* The pack is missing an object, so it will not have closure */
1588                 if (write_bitmap_index) {
1589                         if (write_bitmap_index != WRITE_BITMAP_QUIET)
1590                                 warning(_(no_closure_warning));
1591                         write_bitmap_index = 0;
1592                 }
1593                 return 0;
1594         }
1595
1596         create_object_entry(oid, type, pack_name_hash(name),
1597                             exclude, name && no_try_delta(name),
1598                             found_pack, found_offset);
1599         return 1;
1600 }
1601
1602 static int add_object_entry_from_bitmap(const struct object_id *oid,
1603                                         enum object_type type,
1604                                         int flags UNUSED, uint32_t name_hash,
1605                                         struct packed_git *pack, off_t offset)
1606 {
1607         display_progress(progress_state, ++nr_seen);
1608
1609         if (have_duplicate_entry(oid, 0))
1610                 return 0;
1611
1612         if (!want_object_in_pack(oid, 0, &pack, &offset))
1613                 return 0;
1614
1615         create_object_entry(oid, type, name_hash, 0, 0, pack, offset);
1616         return 1;
1617 }
1618
1619 struct pbase_tree_cache {
1620         struct object_id oid;
1621         int ref;
1622         int temporary;
1623         void *tree_data;
1624         unsigned long tree_size;
1625 };
1626
1627 static struct pbase_tree_cache *(pbase_tree_cache[256]);
1628 static int pbase_tree_cache_ix(const struct object_id *oid)
1629 {
1630         return oid->hash[0] % ARRAY_SIZE(pbase_tree_cache);
1631 }
1632 static int pbase_tree_cache_ix_incr(int ix)
1633 {
1634         return (ix+1) % ARRAY_SIZE(pbase_tree_cache);
1635 }
1636
1637 static struct pbase_tree {
1638         struct pbase_tree *next;
1639         /* This is a phony "cache" entry; we are not
1640          * going to evict it or find it through _get()
1641          * mechanism -- this is for the toplevel node that
1642          * would almost always change with any commit.
1643          */
1644         struct pbase_tree_cache pcache;
1645 } *pbase_tree;
1646
1647 static struct pbase_tree_cache *pbase_tree_get(const struct object_id *oid)
1648 {
1649         struct pbase_tree_cache *ent, *nent;
1650         void *data;
1651         unsigned long size;
1652         enum object_type type;
1653         int neigh;
1654         int my_ix = pbase_tree_cache_ix(oid);
1655         int available_ix = -1;
1656
1657         /* pbase-tree-cache acts as a limited hashtable.
1658          * your object will be found at your index or within a few
1659          * slots after that slot if it is cached.
1660          */
1661         for (neigh = 0; neigh < 8; neigh++) {
1662                 ent = pbase_tree_cache[my_ix];
1663                 if (ent && oideq(&ent->oid, oid)) {
1664                         ent->ref++;
1665                         return ent;
1666                 }
1667                 else if (((available_ix < 0) && (!ent || !ent->ref)) ||
1668                          ((0 <= available_ix) &&
1669                           (!ent && pbase_tree_cache[available_ix])))
1670                         available_ix = my_ix;
1671                 if (!ent)
1672                         break;
1673                 my_ix = pbase_tree_cache_ix_incr(my_ix);
1674         }
1675
1676         /* Did not find one.  Either we got a bogus request or
1677          * we need to read and perhaps cache.
1678          */
1679         data = repo_read_object_file(the_repository, oid, &type, &size);
1680         if (!data)
1681                 return NULL;
1682         if (type != OBJ_TREE) {
1683                 free(data);
1684                 return NULL;
1685         }
1686
1687         /* We need to either cache or return a throwaway copy */
1688
1689         if (available_ix < 0)
1690                 ent = NULL;
1691         else {
1692                 ent = pbase_tree_cache[available_ix];
1693                 my_ix = available_ix;
1694         }
1695
1696         if (!ent) {
1697                 nent = xmalloc(sizeof(*nent));
1698                 nent->temporary = (available_ix < 0);
1699         }
1700         else {
1701                 /* evict and reuse */
1702                 free(ent->tree_data);
1703                 nent = ent;
1704         }
1705         oidcpy(&nent->oid, oid);
1706         nent->tree_data = data;
1707         nent->tree_size = size;
1708         nent->ref = 1;
1709         if (!nent->temporary)
1710                 pbase_tree_cache[my_ix] = nent;
1711         return nent;
1712 }
1713
1714 static void pbase_tree_put(struct pbase_tree_cache *cache)
1715 {
1716         if (!cache->temporary) {
1717                 cache->ref--;
1718                 return;
1719         }
1720         free(cache->tree_data);
1721         free(cache);
1722 }
1723
1724 static size_t name_cmp_len(const char *name)
1725 {
1726         return strcspn(name, "\n/");
1727 }
1728
1729 static void add_pbase_object(struct tree_desc *tree,
1730                              const char *name,
1731                              size_t cmplen,
1732                              const char *fullname)
1733 {
1734         struct name_entry entry;
1735         int cmp;
1736
1737         while (tree_entry(tree,&entry)) {
1738                 if (S_ISGITLINK(entry.mode))
1739                         continue;
1740                 cmp = tree_entry_len(&entry) != cmplen ? 1 :
1741                       memcmp(name, entry.path, cmplen);
1742                 if (cmp > 0)
1743                         continue;
1744                 if (cmp < 0)
1745                         return;
1746                 if (name[cmplen] != '/') {
1747                         add_object_entry(&entry.oid,
1748                                          object_type(entry.mode),
1749                                          fullname, 1);
1750                         return;
1751                 }
1752                 if (S_ISDIR(entry.mode)) {
1753                         struct tree_desc sub;
1754                         struct pbase_tree_cache *tree;
1755                         const char *down = name+cmplen+1;
1756                         size_t downlen = name_cmp_len(down);
1757
1758                         tree = pbase_tree_get(&entry.oid);
1759                         if (!tree)
1760                                 return;
1761                         init_tree_desc(&sub, tree->tree_data, tree->tree_size);
1762
1763                         add_pbase_object(&sub, down, downlen, fullname);
1764                         pbase_tree_put(tree);
1765                 }
1766         }
1767 }
1768
1769 static unsigned *done_pbase_paths;
1770 static int done_pbase_paths_num;
1771 static int done_pbase_paths_alloc;
1772 static int done_pbase_path_pos(unsigned hash)
1773 {
1774         int lo = 0;
1775         int hi = done_pbase_paths_num;
1776         while (lo < hi) {
1777                 int mi = lo + (hi - lo) / 2;
1778                 if (done_pbase_paths[mi] == hash)
1779                         return mi;
1780                 if (done_pbase_paths[mi] < hash)
1781                         hi = mi;
1782                 else
1783                         lo = mi + 1;
1784         }
1785         return -lo-1;
1786 }
1787
1788 static int check_pbase_path(unsigned hash)
1789 {
1790         int pos = done_pbase_path_pos(hash);
1791         if (0 <= pos)
1792                 return 1;
1793         pos = -pos - 1;
1794         ALLOC_GROW(done_pbase_paths,
1795                    done_pbase_paths_num + 1,
1796                    done_pbase_paths_alloc);
1797         done_pbase_paths_num++;
1798         if (pos < done_pbase_paths_num)
1799                 MOVE_ARRAY(done_pbase_paths + pos + 1, done_pbase_paths + pos,
1800                            done_pbase_paths_num - pos - 1);
1801         done_pbase_paths[pos] = hash;
1802         return 0;
1803 }
1804
1805 static void add_preferred_base_object(const char *name)
1806 {
1807         struct pbase_tree *it;
1808         size_t cmplen;
1809         unsigned hash = pack_name_hash(name);
1810
1811         if (!num_preferred_base || check_pbase_path(hash))
1812                 return;
1813
1814         cmplen = name_cmp_len(name);
1815         for (it = pbase_tree; it; it = it->next) {
1816                 if (cmplen == 0) {
1817                         add_object_entry(&it->pcache.oid, OBJ_TREE, NULL, 1);
1818                 }
1819                 else {
1820                         struct tree_desc tree;
1821                         init_tree_desc(&tree, it->pcache.tree_data, it->pcache.tree_size);
1822                         add_pbase_object(&tree, name, cmplen, name);
1823                 }
1824         }
1825 }
1826
1827 static void add_preferred_base(struct object_id *oid)
1828 {
1829         struct pbase_tree *it;
1830         void *data;
1831         unsigned long size;
1832         struct object_id tree_oid;
1833
1834         if (window <= num_preferred_base++)
1835                 return;
1836
1837         data = read_object_with_reference(the_repository, oid,
1838                                           OBJ_TREE, &size, &tree_oid);
1839         if (!data)
1840                 return;
1841
1842         for (it = pbase_tree; it; it = it->next) {
1843                 if (oideq(&it->pcache.oid, &tree_oid)) {
1844                         free(data);
1845                         return;
1846                 }
1847         }
1848
1849         CALLOC_ARRAY(it, 1);
1850         it->next = pbase_tree;
1851         pbase_tree = it;
1852
1853         oidcpy(&it->pcache.oid, &tree_oid);
1854         it->pcache.tree_data = data;
1855         it->pcache.tree_size = size;
1856 }
1857
1858 static void cleanup_preferred_base(void)
1859 {
1860         struct pbase_tree *it;
1861         unsigned i;
1862
1863         it = pbase_tree;
1864         pbase_tree = NULL;
1865         while (it) {
1866                 struct pbase_tree *tmp = it;
1867                 it = tmp->next;
1868                 free(tmp->pcache.tree_data);
1869                 free(tmp);
1870         }
1871
1872         for (i = 0; i < ARRAY_SIZE(pbase_tree_cache); i++) {
1873                 if (!pbase_tree_cache[i])
1874                         continue;
1875                 free(pbase_tree_cache[i]->tree_data);
1876                 FREE_AND_NULL(pbase_tree_cache[i]);
1877         }
1878
1879         FREE_AND_NULL(done_pbase_paths);
1880         done_pbase_paths_num = done_pbase_paths_alloc = 0;
1881 }
1882
1883 /*
1884  * Return 1 iff the object specified by "delta" can be sent
1885  * literally as a delta against the base in "base_sha1". If
1886  * so, then *base_out will point to the entry in our packing
1887  * list, or NULL if we must use the external-base list.
1888  *
1889  * Depth value does not matter - find_deltas() will
1890  * never consider reused delta as the base object to
1891  * deltify other objects against, in order to avoid
1892  * circular deltas.
1893  */
1894 static int can_reuse_delta(const struct object_id *base_oid,
1895                            struct object_entry *delta,
1896                            struct object_entry **base_out)
1897 {
1898         struct object_entry *base;
1899
1900         /*
1901          * First see if we're already sending the base (or it's explicitly in
1902          * our "excluded" list).
1903          */
1904         base = packlist_find(&to_pack, base_oid);
1905         if (base) {
1906                 if (!in_same_island(&delta->idx.oid, &base->idx.oid))
1907                         return 0;
1908                 *base_out = base;
1909                 return 1;
1910         }
1911
1912         /*
1913          * Otherwise, reachability bitmaps may tell us if the receiver has it,
1914          * even if it was buried too deep in history to make it into the
1915          * packing list.
1916          */
1917         if (thin && bitmap_has_oid_in_uninteresting(bitmap_git, base_oid)) {
1918                 if (use_delta_islands) {
1919                         if (!in_same_island(&delta->idx.oid, base_oid))
1920                                 return 0;
1921                 }
1922                 *base_out = NULL;
1923                 return 1;
1924         }
1925
1926         return 0;
1927 }
1928
1929 static void prefetch_to_pack(uint32_t object_index_start) {
1930         struct oid_array to_fetch = OID_ARRAY_INIT;
1931         uint32_t i;
1932
1933         for (i = object_index_start; i < to_pack.nr_objects; i++) {
1934                 struct object_entry *entry = to_pack.objects + i;
1935
1936                 if (!oid_object_info_extended(the_repository,
1937                                               &entry->idx.oid,
1938                                               NULL,
1939                                               OBJECT_INFO_FOR_PREFETCH))
1940                         continue;
1941                 oid_array_append(&to_fetch, &entry->idx.oid);
1942         }
1943         promisor_remote_get_direct(the_repository,
1944                                    to_fetch.oid, to_fetch.nr);
1945         oid_array_clear(&to_fetch);
1946 }
1947
1948 static void check_object(struct object_entry *entry, uint32_t object_index)
1949 {
1950         unsigned long canonical_size;
1951         enum object_type type;
1952         struct object_info oi = {.typep = &type, .sizep = &canonical_size};
1953
1954         if (IN_PACK(entry)) {
1955                 struct packed_git *p = IN_PACK(entry);
1956                 struct pack_window *w_curs = NULL;
1957                 int have_base = 0;
1958                 struct object_id base_ref;
1959                 struct object_entry *base_entry;
1960                 unsigned long used, used_0;
1961                 unsigned long avail;
1962                 off_t ofs;
1963                 unsigned char *buf, c;
1964                 enum object_type type;
1965                 unsigned long in_pack_size;
1966
1967                 buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail);
1968
1969                 /*
1970                  * We want in_pack_type even if we do not reuse delta
1971                  * since non-delta representations could still be reused.
1972                  */
1973                 used = unpack_object_header_buffer(buf, avail,
1974                                                    &type,
1975                                                    &in_pack_size);
1976                 if (used == 0)
1977                         goto give_up;
1978
1979                 if (type < 0)
1980                         BUG("invalid type %d", type);
1981                 entry->in_pack_type = type;
1982
1983                 /*
1984                  * Determine if this is a delta and if so whether we can
1985                  * reuse it or not.  Otherwise let's find out as cheaply as
1986                  * possible what the actual type and size for this object is.
1987                  */
1988                 switch (entry->in_pack_type) {
1989                 default:
1990                         /* Not a delta hence we've already got all we need. */
1991                         oe_set_type(entry, entry->in_pack_type);
1992                         SET_SIZE(entry, in_pack_size);
1993                         entry->in_pack_header_size = used;
1994                         if (oe_type(entry) < OBJ_COMMIT || oe_type(entry) > OBJ_BLOB)
1995                                 goto give_up;
1996                         unuse_pack(&w_curs);
1997                         return;
1998                 case OBJ_REF_DELTA:
1999                         if (reuse_delta && !entry->preferred_base) {
2000                                 oidread(&base_ref,
2001                                         use_pack(p, &w_curs,
2002                                                  entry->in_pack_offset + used,
2003                                                  NULL));
2004                                 have_base = 1;
2005                         }
2006                         entry->in_pack_header_size = used + the_hash_algo->rawsz;
2007                         break;
2008                 case OBJ_OFS_DELTA:
2009                         buf = use_pack(p, &w_curs,
2010                                        entry->in_pack_offset + used, NULL);
2011                         used_0 = 0;
2012                         c = buf[used_0++];
2013                         ofs = c & 127;
2014                         while (c & 128) {
2015                                 ofs += 1;
2016                                 if (!ofs || MSB(ofs, 7)) {
2017                                         error(_("delta base offset overflow in pack for %s"),
2018                                               oid_to_hex(&entry->idx.oid));
2019                                         goto give_up;
2020                                 }
2021                                 c = buf[used_0++];
2022                                 ofs = (ofs << 7) + (c & 127);
2023                         }
2024                         ofs = entry->in_pack_offset - ofs;
2025                         if (ofs <= 0 || ofs >= entry->in_pack_offset) {
2026                                 error(_("delta base offset out of bound for %s"),
2027                                       oid_to_hex(&entry->idx.oid));
2028                                 goto give_up;
2029                         }
2030                         if (reuse_delta && !entry->preferred_base) {
2031                                 uint32_t pos;
2032                                 if (offset_to_pack_pos(p, ofs, &pos) < 0)
2033                                         goto give_up;
2034                                 if (!nth_packed_object_id(&base_ref, p,
2035                                                           pack_pos_to_index(p, pos)))
2036                                         have_base = 1;
2037                         }
2038                         entry->in_pack_header_size = used + used_0;
2039                         break;
2040                 }
2041
2042                 if (have_base &&
2043                     can_reuse_delta(&base_ref, entry, &base_entry)) {
2044                         oe_set_type(entry, entry->in_pack_type);
2045                         SET_SIZE(entry, in_pack_size); /* delta size */
2046                         SET_DELTA_SIZE(entry, in_pack_size);
2047
2048                         if (base_entry) {
2049                                 SET_DELTA(entry, base_entry);
2050                                 entry->delta_sibling_idx = base_entry->delta_child_idx;
2051                                 SET_DELTA_CHILD(base_entry, entry);
2052                         } else {
2053                                 SET_DELTA_EXT(entry, &base_ref);
2054                         }
2055
2056                         unuse_pack(&w_curs);
2057                         return;
2058                 }
2059
2060                 if (oe_type(entry)) {
2061                         off_t delta_pos;
2062
2063                         /*
2064                          * This must be a delta and we already know what the
2065                          * final object type is.  Let's extract the actual
2066                          * object size from the delta header.
2067                          */
2068                         delta_pos = entry->in_pack_offset + entry->in_pack_header_size;
2069                         canonical_size = get_size_from_delta(p, &w_curs, delta_pos);
2070                         if (canonical_size == 0)
2071                                 goto give_up;
2072                         SET_SIZE(entry, canonical_size);
2073                         unuse_pack(&w_curs);
2074                         return;
2075                 }
2076
2077                 /*
2078                  * No choice but to fall back to the recursive delta walk
2079                  * with oid_object_info() to find about the object type
2080                  * at this point...
2081                  */
2082                 give_up:
2083                 unuse_pack(&w_curs);
2084         }
2085
2086         if (oid_object_info_extended(the_repository, &entry->idx.oid, &oi,
2087                                      OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0) {
2088                 if (repo_has_promisor_remote(the_repository)) {
2089                         prefetch_to_pack(object_index);
2090                         if (oid_object_info_extended(the_repository, &entry->idx.oid, &oi,
2091                                                      OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0)
2092                                 type = -1;
2093                 } else {
2094                         type = -1;
2095                 }
2096         }
2097         oe_set_type(entry, type);
2098         if (entry->type_valid) {
2099                 SET_SIZE(entry, canonical_size);
2100         } else {
2101                 /*
2102                  * Bad object type is checked in prepare_pack().  This is
2103                  * to permit a missing preferred base object to be ignored
2104                  * as a preferred base.  Doing so can result in a larger
2105                  * pack file, but the transfer will still take place.
2106                  */
2107         }
2108 }
2109
2110 static int pack_offset_sort(const void *_a, const void *_b)
2111 {
2112         const struct object_entry *a = *(struct object_entry **)_a;
2113         const struct object_entry *b = *(struct object_entry **)_b;
2114         const struct packed_git *a_in_pack = IN_PACK(a);
2115         const struct packed_git *b_in_pack = IN_PACK(b);
2116
2117         /* avoid filesystem trashing with loose objects */
2118         if (!a_in_pack && !b_in_pack)
2119                 return oidcmp(&a->idx.oid, &b->idx.oid);
2120
2121         if (a_in_pack < b_in_pack)
2122                 return -1;
2123         if (a_in_pack > b_in_pack)
2124                 return 1;
2125         return a->in_pack_offset < b->in_pack_offset ? -1 :
2126                         (a->in_pack_offset > b->in_pack_offset);
2127 }
2128
2129 /*
2130  * Drop an on-disk delta we were planning to reuse. Naively, this would
2131  * just involve blanking out the "delta" field, but we have to deal
2132  * with some extra book-keeping:
2133  *
2134  *   1. Removing ourselves from the delta_sibling linked list.
2135  *
2136  *   2. Updating our size/type to the non-delta representation. These were
2137  *      either not recorded initially (size) or overwritten with the delta type
2138  *      (type) when check_object() decided to reuse the delta.
2139  *
2140  *   3. Resetting our delta depth, as we are now a base object.
2141  */
2142 static void drop_reused_delta(struct object_entry *entry)
2143 {
2144         unsigned *idx = &to_pack.objects[entry->delta_idx - 1].delta_child_idx;
2145         struct object_info oi = OBJECT_INFO_INIT;
2146         enum object_type type;
2147         unsigned long size;
2148
2149         while (*idx) {
2150                 struct object_entry *oe = &to_pack.objects[*idx - 1];
2151
2152                 if (oe == entry)
2153                         *idx = oe->delta_sibling_idx;
2154                 else
2155                         idx = &oe->delta_sibling_idx;
2156         }
2157         SET_DELTA(entry, NULL);
2158         entry->depth = 0;
2159
2160         oi.sizep = &size;
2161         oi.typep = &type;
2162         if (packed_object_info(the_repository, IN_PACK(entry), entry->in_pack_offset, &oi) < 0) {
2163                 /*
2164                  * We failed to get the info from this pack for some reason;
2165                  * fall back to oid_object_info, which may find another copy.
2166                  * And if that fails, the error will be recorded in oe_type(entry)
2167                  * and dealt with in prepare_pack().
2168                  */
2169                 oe_set_type(entry,
2170                             oid_object_info(the_repository, &entry->idx.oid, &size));
2171         } else {
2172                 oe_set_type(entry, type);
2173         }
2174         SET_SIZE(entry, size);
2175 }
2176
2177 /*
2178  * Follow the chain of deltas from this entry onward, throwing away any links
2179  * that cause us to hit a cycle (as determined by the DFS state flags in
2180  * the entries).
2181  *
2182  * We also detect too-long reused chains that would violate our --depth
2183  * limit.
2184  */
2185 static void break_delta_chains(struct object_entry *entry)
2186 {
2187         /*
2188          * The actual depth of each object we will write is stored as an int,
2189          * as it cannot exceed our int "depth" limit. But before we break
2190          * changes based no that limit, we may potentially go as deep as the
2191          * number of objects, which is elsewhere bounded to a uint32_t.
2192          */
2193         uint32_t total_depth;
2194         struct object_entry *cur, *next;
2195
2196         for (cur = entry, total_depth = 0;
2197              cur;
2198              cur = DELTA(cur), total_depth++) {
2199                 if (cur->dfs_state == DFS_DONE) {
2200                         /*
2201                          * We've already seen this object and know it isn't
2202                          * part of a cycle. We do need to append its depth
2203                          * to our count.
2204                          */
2205                         total_depth += cur->depth;
2206                         break;
2207                 }
2208
2209                 /*
2210                  * We break cycles before looping, so an ACTIVE state (or any
2211                  * other cruft which made its way into the state variable)
2212                  * is a bug.
2213                  */
2214                 if (cur->dfs_state != DFS_NONE)
2215                         BUG("confusing delta dfs state in first pass: %d",
2216                             cur->dfs_state);
2217
2218                 /*
2219                  * Now we know this is the first time we've seen the object. If
2220                  * it's not a delta, we're done traversing, but we'll mark it
2221                  * done to save time on future traversals.
2222                  */
2223                 if (!DELTA(cur)) {
2224                         cur->dfs_state = DFS_DONE;
2225                         break;
2226                 }
2227
2228                 /*
2229                  * Mark ourselves as active and see if the next step causes
2230                  * us to cycle to another active object. It's important to do
2231                  * this _before_ we loop, because it impacts where we make the
2232                  * cut, and thus how our total_depth counter works.
2233                  * E.g., We may see a partial loop like:
2234                  *
2235                  *   A -> B -> C -> D -> B
2236                  *
2237                  * Cutting B->C breaks the cycle. But now the depth of A is
2238                  * only 1, and our total_depth counter is at 3. The size of the
2239                  * error is always one less than the size of the cycle we
2240                  * broke. Commits C and D were "lost" from A's chain.
2241                  *
2242                  * If we instead cut D->B, then the depth of A is correct at 3.
2243                  * We keep all commits in the chain that we examined.
2244                  */
2245                 cur->dfs_state = DFS_ACTIVE;
2246                 if (DELTA(cur)->dfs_state == DFS_ACTIVE) {
2247                         drop_reused_delta(cur);
2248                         cur->dfs_state = DFS_DONE;
2249                         break;
2250                 }
2251         }
2252
2253         /*
2254          * And now that we've gone all the way to the bottom of the chain, we
2255          * need to clear the active flags and set the depth fields as
2256          * appropriate. Unlike the loop above, which can quit when it drops a
2257          * delta, we need to keep going to look for more depth cuts. So we need
2258          * an extra "next" pointer to keep going after we reset cur->delta.
2259          */
2260         for (cur = entry; cur; cur = next) {
2261                 next = DELTA(cur);
2262
2263                 /*
2264                  * We should have a chain of zero or more ACTIVE states down to
2265                  * a final DONE. We can quit after the DONE, because either it
2266                  * has no bases, or we've already handled them in a previous
2267                  * call.
2268                  */
2269                 if (cur->dfs_state == DFS_DONE)
2270                         break;
2271                 else if (cur->dfs_state != DFS_ACTIVE)
2272                         BUG("confusing delta dfs state in second pass: %d",
2273                             cur->dfs_state);
2274
2275                 /*
2276                  * If the total_depth is more than depth, then we need to snip
2277                  * the chain into two or more smaller chains that don't exceed
2278                  * the maximum depth. Most of the resulting chains will contain
2279                  * (depth + 1) entries (i.e., depth deltas plus one base), and
2280                  * the last chain (i.e., the one containing entry) will contain
2281                  * whatever entries are left over, namely
2282                  * (total_depth % (depth + 1)) of them.
2283                  *
2284                  * Since we are iterating towards decreasing depth, we need to
2285                  * decrement total_depth as we go, and we need to write to the
2286                  * entry what its final depth will be after all of the
2287                  * snipping. Since we're snipping into chains of length (depth
2288                  * + 1) entries, the final depth of an entry will be its
2289                  * original depth modulo (depth + 1). Any time we encounter an
2290                  * entry whose final depth is supposed to be zero, we snip it
2291                  * from its delta base, thereby making it so.
2292                  */
2293                 cur->depth = (total_depth--) % (depth + 1);
2294                 if (!cur->depth)
2295                         drop_reused_delta(cur);
2296
2297                 cur->dfs_state = DFS_DONE;
2298         }
2299 }
2300
2301 static void get_object_details(void)
2302 {
2303         uint32_t i;
2304         struct object_entry **sorted_by_offset;
2305
2306         if (progress)
2307                 progress_state = start_progress(_("Counting objects"),
2308                                                 to_pack.nr_objects);
2309
2310         CALLOC_ARRAY(sorted_by_offset, to_pack.nr_objects);
2311         for (i = 0; i < to_pack.nr_objects; i++)
2312                 sorted_by_offset[i] = to_pack.objects + i;
2313         QSORT(sorted_by_offset, to_pack.nr_objects, pack_offset_sort);
2314
2315         for (i = 0; i < to_pack.nr_objects; i++) {
2316                 struct object_entry *entry = sorted_by_offset[i];
2317                 check_object(entry, i);
2318                 if (entry->type_valid &&
2319                     oe_size_greater_than(&to_pack, entry, big_file_threshold))
2320                         entry->no_try_delta = 1;
2321                 display_progress(progress_state, i + 1);
2322         }
2323         stop_progress(&progress_state);
2324
2325         /*
2326          * This must happen in a second pass, since we rely on the delta
2327          * information for the whole list being completed.
2328          */
2329         for (i = 0; i < to_pack.nr_objects; i++)
2330                 break_delta_chains(&to_pack.objects[i]);
2331
2332         free(sorted_by_offset);
2333 }
2334
2335 /*
2336  * We search for deltas in a list sorted by type, by filename hash, and then
2337  * by size, so that we see progressively smaller and smaller files.
2338  * That's because we prefer deltas to be from the bigger file
2339  * to the smaller -- deletes are potentially cheaper, but perhaps
2340  * more importantly, the bigger file is likely the more recent
2341  * one.  The deepest deltas are therefore the oldest objects which are
2342  * less susceptible to be accessed often.
2343  */
2344 static int type_size_sort(const void *_a, const void *_b)
2345 {
2346         const struct object_entry *a = *(struct object_entry **)_a;
2347         const struct object_entry *b = *(struct object_entry **)_b;
2348         const enum object_type a_type = oe_type(a);
2349         const enum object_type b_type = oe_type(b);
2350         const unsigned long a_size = SIZE(a);
2351         const unsigned long b_size = SIZE(b);
2352
2353         if (a_type > b_type)
2354                 return -1;
2355         if (a_type < b_type)
2356                 return 1;
2357         if (a->hash > b->hash)
2358                 return -1;
2359         if (a->hash < b->hash)
2360                 return 1;
2361         if (a->preferred_base > b->preferred_base)
2362                 return -1;
2363         if (a->preferred_base < b->preferred_base)
2364                 return 1;
2365         if (use_delta_islands) {
2366                 const int island_cmp = island_delta_cmp(&a->idx.oid, &b->idx.oid);
2367                 if (island_cmp)
2368                         return island_cmp;
2369         }
2370         if (a_size > b_size)
2371                 return -1;
2372         if (a_size < b_size)
2373                 return 1;
2374         return a < b ? -1 : (a > b);  /* newest first */
2375 }
2376
2377 struct unpacked {
2378         struct object_entry *entry;
2379         void *data;
2380         struct delta_index *index;
2381         unsigned depth;
2382 };
2383
2384 static int delta_cacheable(unsigned long src_size, unsigned long trg_size,
2385                            unsigned long delta_size)
2386 {
2387         if (max_delta_cache_size && delta_cache_size + delta_size > max_delta_cache_size)
2388                 return 0;
2389
2390         if (delta_size < cache_max_small_delta_size)
2391                 return 1;
2392
2393         /* cache delta, if objects are large enough compared to delta size */
2394         if ((src_size >> 20) + (trg_size >> 21) > (delta_size >> 10))
2395                 return 1;
2396
2397         return 0;
2398 }
2399
2400 /* Protect delta_cache_size */
2401 static pthread_mutex_t cache_mutex;
2402 #define cache_lock()            pthread_mutex_lock(&cache_mutex)
2403 #define cache_unlock()          pthread_mutex_unlock(&cache_mutex)
2404
2405 /*
2406  * Protect object list partitioning (e.g. struct thread_param) and
2407  * progress_state
2408  */
2409 static pthread_mutex_t progress_mutex;
2410 #define progress_lock()         pthread_mutex_lock(&progress_mutex)
2411 #define progress_unlock()       pthread_mutex_unlock(&progress_mutex)
2412
2413 /*
2414  * Access to struct object_entry is unprotected since each thread owns
2415  * a portion of the main object list. Just don't access object entries
2416  * ahead in the list because they can be stolen and would need
2417  * progress_mutex for protection.
2418  */
2419
2420 static inline int oe_size_less_than(struct packing_data *pack,
2421                                     const struct object_entry *lhs,
2422                                     unsigned long rhs)
2423 {
2424         if (lhs->size_valid)
2425                 return lhs->size_ < rhs;
2426         if (rhs < pack->oe_size_limit) /* rhs < 2^x <= lhs ? */
2427                 return 0;
2428         return oe_get_size_slow(pack, lhs) < rhs;
2429 }
2430
2431 static inline void oe_set_tree_depth(struct packing_data *pack,
2432                                      struct object_entry *e,
2433                                      unsigned int tree_depth)
2434 {
2435         if (!pack->tree_depth)
2436                 CALLOC_ARRAY(pack->tree_depth, pack->nr_alloc);
2437         pack->tree_depth[e - pack->objects] = tree_depth;
2438 }
2439
2440 /*
2441  * Return the size of the object without doing any delta
2442  * reconstruction (so non-deltas are true object sizes, but deltas
2443  * return the size of the delta data).
2444  */
2445 unsigned long oe_get_size_slow(struct packing_data *pack,
2446                                const struct object_entry *e)
2447 {
2448         struct packed_git *p;
2449         struct pack_window *w_curs;
2450         unsigned char *buf;
2451         enum object_type type;
2452         unsigned long used, avail, size;
2453
2454         if (e->type_ != OBJ_OFS_DELTA && e->type_ != OBJ_REF_DELTA) {
2455                 packing_data_lock(&to_pack);
2456                 if (oid_object_info(the_repository, &e->idx.oid, &size) < 0)
2457                         die(_("unable to get size of %s"),
2458                             oid_to_hex(&e->idx.oid));
2459                 packing_data_unlock(&to_pack);
2460                 return size;
2461         }
2462
2463         p = oe_in_pack(pack, e);
2464         if (!p)
2465                 BUG("when e->type is a delta, it must belong to a pack");
2466
2467         packing_data_lock(&to_pack);
2468         w_curs = NULL;
2469         buf = use_pack(p, &w_curs, e->in_pack_offset, &avail);
2470         used = unpack_object_header_buffer(buf, avail, &type, &size);
2471         if (used == 0)
2472                 die(_("unable to parse object header of %s"),
2473                     oid_to_hex(&e->idx.oid));
2474
2475         unuse_pack(&w_curs);
2476         packing_data_unlock(&to_pack);
2477         return size;
2478 }
2479
2480 static int try_delta(struct unpacked *trg, struct unpacked *src,
2481                      unsigned max_depth, unsigned long *mem_usage)
2482 {
2483         struct object_entry *trg_entry = trg->entry;
2484         struct object_entry *src_entry = src->entry;
2485         unsigned long trg_size, src_size, delta_size, sizediff, max_size, sz;
2486         unsigned ref_depth;
2487         enum object_type type;
2488         void *delta_buf;
2489
2490         /* Don't bother doing diffs between different types */
2491         if (oe_type(trg_entry) != oe_type(src_entry))
2492                 return -1;
2493
2494         /*
2495          * We do not bother to try a delta that we discarded on an
2496          * earlier try, but only when reusing delta data.  Note that
2497          * src_entry that is marked as the preferred_base should always
2498          * be considered, as even if we produce a suboptimal delta against
2499          * it, we will still save the transfer cost, as we already know
2500          * the other side has it and we won't send src_entry at all.
2501          */
2502         if (reuse_delta && IN_PACK(trg_entry) &&
2503             IN_PACK(trg_entry) == IN_PACK(src_entry) &&
2504             !src_entry->preferred_base &&
2505             trg_entry->in_pack_type != OBJ_REF_DELTA &&
2506             trg_entry->in_pack_type != OBJ_OFS_DELTA)
2507                 return 0;
2508
2509         /* Let's not bust the allowed depth. */
2510         if (src->depth >= max_depth)
2511                 return 0;
2512
2513         /* Now some size filtering heuristics. */
2514         trg_size = SIZE(trg_entry);
2515         if (!DELTA(trg_entry)) {
2516                 max_size = trg_size/2 - the_hash_algo->rawsz;
2517                 ref_depth = 1;
2518         } else {
2519                 max_size = DELTA_SIZE(trg_entry);
2520                 ref_depth = trg->depth;
2521         }
2522         max_size = (uint64_t)max_size * (max_depth - src->depth) /
2523                                                 (max_depth - ref_depth + 1);
2524         if (max_size == 0)
2525                 return 0;
2526         src_size = SIZE(src_entry);
2527         sizediff = src_size < trg_size ? trg_size - src_size : 0;
2528         if (sizediff >= max_size)
2529                 return 0;
2530         if (trg_size < src_size / 32)
2531                 return 0;
2532
2533         if (!in_same_island(&trg->entry->idx.oid, &src->entry->idx.oid))
2534                 return 0;
2535
2536         /* Load data if not already done */
2537         if (!trg->data) {
2538                 packing_data_lock(&to_pack);
2539                 trg->data = repo_read_object_file(the_repository,
2540                                                   &trg_entry->idx.oid, &type,
2541                                                   &sz);
2542                 packing_data_unlock(&to_pack);
2543                 if (!trg->data)
2544                         die(_("object %s cannot be read"),
2545                             oid_to_hex(&trg_entry->idx.oid));
2546                 if (sz != trg_size)
2547                         die(_("object %s inconsistent object length (%"PRIuMAX" vs %"PRIuMAX")"),
2548                             oid_to_hex(&trg_entry->idx.oid), (uintmax_t)sz,
2549                             (uintmax_t)trg_size);
2550                 *mem_usage += sz;
2551         }
2552         if (!src->data) {
2553                 packing_data_lock(&to_pack);
2554                 src->data = repo_read_object_file(the_repository,
2555                                                   &src_entry->idx.oid, &type,
2556                                                   &sz);
2557                 packing_data_unlock(&to_pack);
2558                 if (!src->data) {
2559                         if (src_entry->preferred_base) {
2560                                 static int warned = 0;
2561                                 if (!warned++)
2562                                         warning(_("object %s cannot be read"),
2563                                                 oid_to_hex(&src_entry->idx.oid));
2564                                 /*
2565                                  * Those objects are not included in the
2566                                  * resulting pack.  Be resilient and ignore
2567                                  * them if they can't be read, in case the
2568                                  * pack could be created nevertheless.
2569                                  */
2570                                 return 0;
2571                         }
2572                         die(_("object %s cannot be read"),
2573                             oid_to_hex(&src_entry->idx.oid));
2574                 }
2575                 if (sz != src_size)
2576                         die(_("object %s inconsistent object length (%"PRIuMAX" vs %"PRIuMAX")"),
2577                             oid_to_hex(&src_entry->idx.oid), (uintmax_t)sz,
2578                             (uintmax_t)src_size);
2579                 *mem_usage += sz;
2580         }
2581         if (!src->index) {
2582                 src->index = create_delta_index(src->data, src_size);
2583                 if (!src->index) {
2584                         static int warned = 0;
2585                         if (!warned++)
2586                                 warning(_("suboptimal pack - out of memory"));
2587                         return 0;
2588                 }
2589                 *mem_usage += sizeof_delta_index(src->index);
2590         }
2591
2592         delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size);
2593         if (!delta_buf)
2594                 return 0;
2595
2596         if (DELTA(trg_entry)) {
2597                 /* Prefer only shallower same-sized deltas. */
2598                 if (delta_size == DELTA_SIZE(trg_entry) &&
2599                     src->depth + 1 >= trg->depth) {
2600                         free(delta_buf);
2601                         return 0;
2602                 }
2603         }
2604
2605         /*
2606          * Handle memory allocation outside of the cache
2607          * accounting lock.  Compiler will optimize the strangeness
2608          * away when NO_PTHREADS is defined.
2609          */
2610         free(trg_entry->delta_data);
2611         cache_lock();
2612         if (trg_entry->delta_data) {
2613                 delta_cache_size -= DELTA_SIZE(trg_entry);
2614                 trg_entry->delta_data = NULL;
2615         }
2616         if (delta_cacheable(src_size, trg_size, delta_size)) {
2617                 delta_cache_size += delta_size;
2618                 cache_unlock();
2619                 trg_entry->delta_data = xrealloc(delta_buf, delta_size);
2620         } else {
2621                 cache_unlock();
2622                 free(delta_buf);
2623         }
2624
2625         SET_DELTA(trg_entry, src_entry);
2626         SET_DELTA_SIZE(trg_entry, delta_size);
2627         trg->depth = src->depth + 1;
2628
2629         return 1;
2630 }
2631
2632 static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
2633 {
2634         struct object_entry *child = DELTA_CHILD(me);
2635         unsigned int m = n;
2636         while (child) {
2637                 const unsigned int c = check_delta_limit(child, n + 1);
2638                 if (m < c)
2639                         m = c;
2640                 child = DELTA_SIBLING(child);
2641         }
2642         return m;
2643 }
2644
2645 static unsigned long free_unpacked(struct unpacked *n)
2646 {
2647         unsigned long freed_mem = sizeof_delta_index(n->index);
2648         free_delta_index(n->index);
2649         n->index = NULL;
2650         if (n->data) {
2651                 freed_mem += SIZE(n->entry);
2652                 FREE_AND_NULL(n->data);
2653         }
2654         n->entry = NULL;
2655         n->depth = 0;
2656         return freed_mem;
2657 }
2658
2659 static void find_deltas(struct object_entry **list, unsigned *list_size,
2660                         int window, int depth, unsigned *processed)
2661 {
2662         uint32_t i, idx = 0, count = 0;
2663         struct unpacked *array;
2664         unsigned long mem_usage = 0;
2665
2666         CALLOC_ARRAY(array, window);
2667
2668         for (;;) {
2669                 struct object_entry *entry;
2670                 struct unpacked *n = array + idx;
2671                 int j, max_depth, best_base = -1;
2672
2673                 progress_lock();
2674                 if (!*list_size) {
2675                         progress_unlock();
2676                         break;
2677                 }
2678                 entry = *list++;
2679                 (*list_size)--;
2680                 if (!entry->preferred_base) {
2681                         (*processed)++;
2682                         display_progress(progress_state, *processed);
2683                 }
2684                 progress_unlock();
2685
2686                 mem_usage -= free_unpacked(n);
2687                 n->entry = entry;
2688
2689                 while (window_memory_limit &&
2690                        mem_usage > window_memory_limit &&
2691                        count > 1) {
2692                         const uint32_t tail = (idx + window - count) % window;
2693                         mem_usage -= free_unpacked(array + tail);
2694                         count--;
2695                 }
2696
2697                 /* We do not compute delta to *create* objects we are not
2698                  * going to pack.
2699                  */
2700                 if (entry->preferred_base)
2701                         goto next;
2702
2703                 /*
2704                  * If the current object is at pack edge, take the depth the
2705                  * objects that depend on the current object into account
2706                  * otherwise they would become too deep.
2707                  */
2708                 max_depth = depth;
2709                 if (DELTA_CHILD(entry)) {
2710                         max_depth -= check_delta_limit(entry, 0);
2711                         if (max_depth <= 0)
2712                                 goto next;
2713                 }
2714
2715                 j = window;
2716                 while (--j > 0) {
2717                         int ret;
2718                         uint32_t other_idx = idx + j;
2719                         struct unpacked *m;
2720                         if (other_idx >= window)
2721                                 other_idx -= window;
2722                         m = array + other_idx;
2723                         if (!m->entry)
2724                                 break;
2725                         ret = try_delta(n, m, max_depth, &mem_usage);
2726                         if (ret < 0)
2727                                 break;
2728                         else if (ret > 0)
2729                                 best_base = other_idx;
2730                 }
2731
2732                 /*
2733                  * If we decided to cache the delta data, then it is best
2734                  * to compress it right away.  First because we have to do
2735                  * it anyway, and doing it here while we're threaded will
2736                  * save a lot of time in the non threaded write phase,
2737                  * as well as allow for caching more deltas within
2738                  * the same cache size limit.
2739                  * ...
2740                  * But only if not writing to stdout, since in that case
2741                  * the network is most likely throttling writes anyway,
2742                  * and therefore it is best to go to the write phase ASAP
2743                  * instead, as we can afford spending more time compressing
2744                  * between writes at that moment.
2745                  */
2746                 if (entry->delta_data && !pack_to_stdout) {
2747                         unsigned long size;
2748
2749                         size = do_compress(&entry->delta_data, DELTA_SIZE(entry));
2750                         if (size < (1U << OE_Z_DELTA_BITS)) {
2751                                 entry->z_delta_size = size;
2752                                 cache_lock();
2753                                 delta_cache_size -= DELTA_SIZE(entry);
2754                                 delta_cache_size += entry->z_delta_size;
2755                                 cache_unlock();
2756                         } else {
2757                                 FREE_AND_NULL(entry->delta_data);
2758                                 entry->z_delta_size = 0;
2759                         }
2760                 }
2761
2762                 /* if we made n a delta, and if n is already at max
2763                  * depth, leaving it in the window is pointless.  we
2764                  * should evict it first.
2765                  */
2766                 if (DELTA(entry) && max_depth <= n->depth)
2767                         continue;
2768
2769                 /*
2770                  * Move the best delta base up in the window, after the
2771                  * currently deltified object, to keep it longer.  It will
2772                  * be the first base object to be attempted next.
2773                  */
2774                 if (DELTA(entry)) {
2775                         struct unpacked swap = array[best_base];
2776                         int dist = (window + idx - best_base) % window;
2777                         int dst = best_base;
2778                         while (dist--) {
2779                                 int src = (dst + 1) % window;
2780                                 array[dst] = array[src];
2781                                 dst = src;
2782                         }
2783                         array[dst] = swap;
2784                 }
2785
2786                 next:
2787                 idx++;
2788                 if (count + 1 < window)
2789                         count++;
2790                 if (idx >= window)
2791                         idx = 0;
2792         }
2793
2794         for (i = 0; i < window; ++i) {
2795                 free_delta_index(array[i].index);
2796                 free(array[i].data);
2797         }
2798         free(array);
2799 }
2800
2801 /*
2802  * The main object list is split into smaller lists, each is handed to
2803  * one worker.
2804  *
2805  * The main thread waits on the condition that (at least) one of the workers
2806  * has stopped working (which is indicated in the .working member of
2807  * struct thread_params).
2808  *
2809  * When a work thread has completed its work, it sets .working to 0 and
2810  * signals the main thread and waits on the condition that .data_ready
2811  * becomes 1.
2812  *
2813  * The main thread steals half of the work from the worker that has
2814  * most work left to hand it to the idle worker.
2815  */
2816
2817 struct thread_params {
2818         pthread_t thread;
2819         struct object_entry **list;
2820         unsigned list_size;
2821         unsigned remaining;
2822         int window;
2823         int depth;
2824         int working;
2825         int data_ready;
2826         pthread_mutex_t mutex;
2827         pthread_cond_t cond;
2828         unsigned *processed;
2829 };
2830
2831 static pthread_cond_t progress_cond;
2832
2833 /*
2834  * Mutex and conditional variable can't be statically-initialized on Windows.
2835  */
2836 static void init_threaded_search(void)
2837 {
2838         pthread_mutex_init(&cache_mutex, NULL);
2839         pthread_mutex_init(&progress_mutex, NULL);
2840         pthread_cond_init(&progress_cond, NULL);
2841 }
2842
2843 static void cleanup_threaded_search(void)
2844 {
2845         pthread_cond_destroy(&progress_cond);
2846         pthread_mutex_destroy(&cache_mutex);
2847         pthread_mutex_destroy(&progress_mutex);
2848 }
2849
2850 static void *threaded_find_deltas(void *arg)
2851 {
2852         struct thread_params *me = arg;
2853
2854         progress_lock();
2855         while (me->remaining) {
2856                 progress_unlock();
2857
2858                 find_deltas(me->list, &me->remaining,
2859                             me->window, me->depth, me->processed);
2860
2861                 progress_lock();
2862                 me->working = 0;
2863                 pthread_cond_signal(&progress_cond);
2864                 progress_unlock();
2865
2866                 /*
2867                  * We must not set ->data_ready before we wait on the
2868                  * condition because the main thread may have set it to 1
2869                  * before we get here. In order to be sure that new
2870                  * work is available if we see 1 in ->data_ready, it
2871                  * was initialized to 0 before this thread was spawned
2872                  * and we reset it to 0 right away.
2873                  */
2874                 pthread_mutex_lock(&me->mutex);
2875                 while (!me->data_ready)
2876                         pthread_cond_wait(&me->cond, &me->mutex);
2877                 me->data_ready = 0;
2878                 pthread_mutex_unlock(&me->mutex);
2879
2880                 progress_lock();
2881         }
2882         progress_unlock();
2883         /* leave ->working 1 so that this doesn't get more work assigned */
2884         return NULL;
2885 }
2886
2887 static void ll_find_deltas(struct object_entry **list, unsigned list_size,
2888                            int window, int depth, unsigned *processed)
2889 {
2890         struct thread_params *p;
2891         int i, ret, active_threads = 0;
2892
2893         init_threaded_search();
2894
2895         if (delta_search_threads <= 1) {
2896                 find_deltas(list, &list_size, window, depth, processed);
2897                 cleanup_threaded_search();
2898                 return;
2899         }
2900         if (progress > pack_to_stdout)
2901                 fprintf_ln(stderr, _("Delta compression using up to %d threads"),
2902                            delta_search_threads);
2903         CALLOC_ARRAY(p, delta_search_threads);
2904
2905         /* Partition the work amongst work threads. */
2906         for (i = 0; i < delta_search_threads; i++) {
2907                 unsigned sub_size = list_size / (delta_search_threads - i);
2908
2909                 /* don't use too small segments or no deltas will be found */
2910                 if (sub_size < 2*window && i+1 < delta_search_threads)
2911                         sub_size = 0;
2912
2913                 p[i].window = window;
2914                 p[i].depth = depth;
2915                 p[i].processed = processed;
2916                 p[i].working = 1;
2917                 p[i].data_ready = 0;
2918
2919                 /* try to split chunks on "path" boundaries */
2920                 while (sub_size && sub_size < list_size &&
2921                        list[sub_size]->hash &&
2922                        list[sub_size]->hash == list[sub_size-1]->hash)
2923                         sub_size++;
2924
2925                 p[i].list = list;
2926                 p[i].list_size = sub_size;
2927                 p[i].remaining = sub_size;
2928
2929                 list += sub_size;
2930                 list_size -= sub_size;
2931         }
2932
2933         /* Start work threads. */
2934         for (i = 0; i < delta_search_threads; i++) {
2935                 if (!p[i].list_size)
2936                         continue;
2937                 pthread_mutex_init(&p[i].mutex, NULL);
2938                 pthread_cond_init(&p[i].cond, NULL);
2939                 ret = pthread_create(&p[i].thread, NULL,
2940                                      threaded_find_deltas, &p[i]);
2941                 if (ret)
2942                         die(_("unable to create thread: %s"), strerror(ret));
2943                 active_threads++;
2944         }
2945
2946         /*
2947          * Now let's wait for work completion.  Each time a thread is done
2948          * with its work, we steal half of the remaining work from the
2949          * thread with the largest number of unprocessed objects and give
2950          * it to that newly idle thread.  This ensure good load balancing
2951          * until the remaining object list segments are simply too short
2952          * to be worth splitting anymore.
2953          */
2954         while (active_threads) {
2955                 struct thread_params *target = NULL;
2956                 struct thread_params *victim = NULL;
2957                 unsigned sub_size = 0;
2958
2959                 progress_lock();
2960                 for (;;) {
2961                         for (i = 0; !target && i < delta_search_threads; i++)
2962                                 if (!p[i].working)
2963                                         target = &p[i];
2964                         if (target)
2965                                 break;
2966                         pthread_cond_wait(&progress_cond, &progress_mutex);
2967                 }
2968
2969                 for (i = 0; i < delta_search_threads; i++)
2970                         if (p[i].remaining > 2*window &&
2971                             (!victim || victim->remaining < p[i].remaining))
2972                                 victim = &p[i];
2973                 if (victim) {
2974                         sub_size = victim->remaining / 2;
2975                         list = victim->list + victim->list_size - sub_size;
2976                         while (sub_size && list[0]->hash &&
2977                                list[0]->hash == list[-1]->hash) {
2978                                 list++;
2979                                 sub_size--;
2980                         }
2981                         if (!sub_size) {
2982                                 /*
2983                                  * It is possible for some "paths" to have
2984                                  * so many objects that no hash boundary
2985                                  * might be found.  Let's just steal the
2986                                  * exact half in that case.
2987                                  */
2988                                 sub_size = victim->remaining / 2;
2989                                 list -= sub_size;
2990                         }
2991                         target->list = list;
2992                         victim->list_size -= sub_size;
2993                         victim->remaining -= sub_size;
2994                 }
2995                 target->list_size = sub_size;
2996                 target->remaining = sub_size;
2997                 target->working = 1;
2998                 progress_unlock();
2999
3000                 pthread_mutex_lock(&target->mutex);
3001                 target->data_ready = 1;
3002                 pthread_cond_signal(&target->cond);
3003                 pthread_mutex_unlock(&target->mutex);
3004
3005                 if (!sub_size) {
3006                         pthread_join(target->thread, NULL);
3007                         pthread_cond_destroy(&target->cond);
3008                         pthread_mutex_destroy(&target->mutex);
3009                         active_threads--;
3010                 }
3011         }
3012         cleanup_threaded_search();
3013         free(p);
3014 }
3015
3016 static int obj_is_packed(const struct object_id *oid)
3017 {
3018         return packlist_find(&to_pack, oid) ||
3019                 (reuse_packfile_bitmap &&
3020                  bitmap_walk_contains(bitmap_git, reuse_packfile_bitmap, oid));
3021 }
3022
3023 static void add_tag_chain(const struct object_id *oid)
3024 {
3025         struct tag *tag;
3026
3027         /*
3028          * We catch duplicates already in add_object_entry(), but we'd
3029          * prefer to do this extra check to avoid having to parse the
3030          * tag at all if we already know that it's being packed (e.g., if
3031          * it was included via bitmaps, we would not have parsed it
3032          * previously).
3033          */
3034         if (obj_is_packed(oid))
3035                 return;
3036
3037         tag = lookup_tag(the_repository, oid);
3038         while (1) {
3039                 if (!tag || parse_tag(tag) || !tag->tagged)
3040                         die(_("unable to pack objects reachable from tag %s"),
3041                             oid_to_hex(oid));
3042
3043                 add_object_entry(&tag->object.oid, OBJ_TAG, NULL, 0);
3044
3045                 if (tag->tagged->type != OBJ_TAG)
3046                         return;
3047
3048                 tag = (struct tag *)tag->tagged;
3049         }
3050 }
3051
3052 static int add_ref_tag(const char *tag UNUSED, const struct object_id *oid,
3053                        int flag UNUSED, void *cb_data UNUSED)
3054 {
3055         struct object_id peeled;
3056
3057         if (!peel_iterated_oid(oid, &peeled) && obj_is_packed(&peeled))
3058                 add_tag_chain(oid);
3059         return 0;
3060 }
3061
3062 static void prepare_pack(int window, int depth)
3063 {
3064         struct object_entry **delta_list;
3065         uint32_t i, nr_deltas;
3066         unsigned n;
3067
3068         if (use_delta_islands)
3069                 resolve_tree_islands(the_repository, progress, &to_pack);
3070
3071         get_object_details();
3072
3073         /*
3074          * If we're locally repacking then we need to be doubly careful
3075          * from now on in order to make sure no stealth corruption gets
3076          * propagated to the new pack.  Clients receiving streamed packs
3077          * should validate everything they get anyway so no need to incur
3078          * the additional cost here in that case.
3079          */
3080         if (!pack_to_stdout)
3081                 do_check_packed_object_crc = 1;
3082
3083         if (!to_pack.nr_objects || !window || !depth)
3084                 return;
3085
3086         ALLOC_ARRAY(delta_list, to_pack.nr_objects);
3087         nr_deltas = n = 0;
3088
3089         for (i = 0; i < to_pack.nr_objects; i++) {
3090                 struct object_entry *entry = to_pack.objects + i;
3091
3092                 if (DELTA(entry))
3093                         /* This happens if we decided to reuse existing
3094                          * delta from a pack.  "reuse_delta &&" is implied.
3095                          */
3096                         continue;
3097
3098                 if (!entry->type_valid ||
3099                     oe_size_less_than(&to_pack, entry, 50))
3100                         continue;
3101
3102                 if (entry->no_try_delta)
3103                         continue;
3104
3105                 if (!entry->preferred_base) {
3106                         nr_deltas++;
3107                         if (oe_type(entry) < 0)
3108                                 die(_("unable to get type of object %s"),
3109                                     oid_to_hex(&entry->idx.oid));
3110                 } else {
3111                         if (oe_type(entry) < 0) {
3112                                 /*
3113                                  * This object is not found, but we
3114                                  * don't have to include it anyway.
3115                                  */
3116                                 continue;
3117                         }
3118                 }
3119
3120                 delta_list[n++] = entry;
3121         }
3122
3123         if (nr_deltas && n > 1) {
3124                 unsigned nr_done = 0;
3125
3126                 if (progress)
3127                         progress_state = start_progress(_("Compressing objects"),
3128                                                         nr_deltas);
3129                 QSORT(delta_list, n, type_size_sort);
3130                 ll_find_deltas(delta_list, n, window+1, depth, &nr_done);
3131                 stop_progress(&progress_state);
3132                 if (nr_done != nr_deltas)
3133                         die(_("inconsistency with delta count"));
3134         }
3135         free(delta_list);
3136 }
3137
3138 static int git_pack_config(const char *k, const char *v,
3139                            const struct config_context *ctx, void *cb)
3140 {
3141         if (!strcmp(k, "pack.window")) {
3142                 window = git_config_int(k, v, ctx->kvi);
3143                 return 0;
3144         }
3145         if (!strcmp(k, "pack.windowmemory")) {
3146                 window_memory_limit = git_config_ulong(k, v, ctx->kvi);
3147                 return 0;
3148         }
3149         if (!strcmp(k, "pack.depth")) {
3150                 depth = git_config_int(k, v, ctx->kvi);
3151                 return 0;
3152         }
3153         if (!strcmp(k, "pack.deltacachesize")) {
3154                 max_delta_cache_size = git_config_int(k, v, ctx->kvi);
3155                 return 0;
3156         }
3157         if (!strcmp(k, "pack.deltacachelimit")) {
3158                 cache_max_small_delta_size = git_config_int(k, v, ctx->kvi);
3159                 return 0;
3160         }
3161         if (!strcmp(k, "pack.writebitmaphashcache")) {
3162                 if (git_config_bool(k, v))
3163                         write_bitmap_options |= BITMAP_OPT_HASH_CACHE;
3164                 else
3165                         write_bitmap_options &= ~BITMAP_OPT_HASH_CACHE;
3166         }
3167
3168         if (!strcmp(k, "pack.writebitmaplookuptable")) {
3169                 if (git_config_bool(k, v))
3170                         write_bitmap_options |= BITMAP_OPT_LOOKUP_TABLE;
3171                 else
3172                         write_bitmap_options &= ~BITMAP_OPT_LOOKUP_TABLE;
3173         }
3174
3175         if (!strcmp(k, "pack.usebitmaps")) {
3176                 use_bitmap_index_default = git_config_bool(k, v);
3177                 return 0;
3178         }
3179         if (!strcmp(k, "pack.allowpackreuse")) {
3180                 allow_pack_reuse = git_config_bool(k, v);
3181                 return 0;
3182         }
3183         if (!strcmp(k, "pack.threads")) {
3184                 delta_search_threads = git_config_int(k, v, ctx->kvi);
3185                 if (delta_search_threads < 0)
3186                         die(_("invalid number of threads specified (%d)"),
3187                             delta_search_threads);
3188                 if (!HAVE_THREADS && delta_search_threads != 1) {
3189                         warning(_("no threads support, ignoring %s"), k);
3190                         delta_search_threads = 0;
3191                 }
3192                 return 0;
3193         }
3194         if (!strcmp(k, "pack.indexversion")) {
3195                 pack_idx_opts.version = git_config_int(k, v, ctx->kvi);
3196                 if (pack_idx_opts.version > 2)
3197                         die(_("bad pack.indexVersion=%"PRIu32),
3198                             pack_idx_opts.version);
3199                 return 0;
3200         }
3201         if (!strcmp(k, "pack.writereverseindex")) {
3202                 if (git_config_bool(k, v))
3203                         pack_idx_opts.flags |= WRITE_REV;
3204                 else
3205                         pack_idx_opts.flags &= ~WRITE_REV;
3206                 return 0;
3207         }
3208         if (!strcmp(k, "uploadpack.blobpackfileuri")) {
3209                 struct configured_exclusion *ex = xmalloc(sizeof(*ex));
3210                 const char *oid_end, *pack_end;
3211                 /*
3212                  * Stores the pack hash. This is not a true object ID, but is
3213                  * of the same form.
3214                  */
3215                 struct object_id pack_hash;
3216
3217                 if (parse_oid_hex(v, &ex->e.oid, &oid_end) ||
3218                     *oid_end != ' ' ||
3219                     parse_oid_hex(oid_end + 1, &pack_hash, &pack_end) ||
3220                     *pack_end != ' ')
3221                         die(_("value of uploadpack.blobpackfileuri must be "
3222                               "of the form '<object-hash> <pack-hash> <uri>' (got '%s')"), v);
3223                 if (oidmap_get(&configured_exclusions, &ex->e.oid))
3224                         die(_("object already configured in another "
3225                               "uploadpack.blobpackfileuri (got '%s')"), v);
3226                 ex->pack_hash_hex = xcalloc(1, pack_end - oid_end);
3227                 memcpy(ex->pack_hash_hex, oid_end + 1, pack_end - oid_end - 1);
3228                 ex->uri = xstrdup(pack_end + 1);
3229                 oidmap_put(&configured_exclusions, ex);
3230         }
3231         return git_default_config(k, v, ctx, cb);
3232 }
3233
3234 /* Counters for trace2 output when in --stdin-packs mode. */
3235 static int stdin_packs_found_nr;
3236 static int stdin_packs_hints_nr;
3237
3238 static int add_object_entry_from_pack(const struct object_id *oid,
3239                                       struct packed_git *p,
3240                                       uint32_t pos,
3241                                       void *_data)
3242 {
3243         off_t ofs;
3244         enum object_type type = OBJ_NONE;
3245
3246         display_progress(progress_state, ++nr_seen);
3247
3248         if (have_duplicate_entry(oid, 0))
3249                 return 0;
3250
3251         ofs = nth_packed_object_offset(p, pos);
3252         if (!want_object_in_pack(oid, 0, &p, &ofs))
3253                 return 0;
3254
3255         if (p) {
3256                 struct rev_info *revs = _data;
3257                 struct object_info oi = OBJECT_INFO_INIT;
3258
3259                 oi.typep = &type;
3260                 if (packed_object_info(the_repository, p, ofs, &oi) < 0) {
3261                         die(_("could not get type of object %s in pack %s"),
3262                             oid_to_hex(oid), p->pack_name);
3263                 } else if (type == OBJ_COMMIT) {
3264                         /*
3265                          * commits in included packs are used as starting points for the
3266                          * subsequent revision walk
3267                          */
3268                         add_pending_oid(revs, NULL, oid, 0);
3269                 }
3270
3271                 stdin_packs_found_nr++;
3272         }
3273
3274         create_object_entry(oid, type, 0, 0, 0, p, ofs);
3275
3276         return 0;
3277 }
3278
3279 static void show_commit_pack_hint(struct commit *commit UNUSED,
3280                                   void *data UNUSED)
3281 {
3282         /* nothing to do; commits don't have a namehash */
3283 }
3284
3285 static void show_object_pack_hint(struct object *object, const char *name,
3286                                   void *data UNUSED)
3287 {
3288         struct object_entry *oe = packlist_find(&to_pack, &object->oid);
3289         if (!oe)
3290                 return;
3291
3292         /*
3293          * Our 'to_pack' list was constructed by iterating all objects packed in
3294          * included packs, and so doesn't have a non-zero hash field that you
3295          * would typically pick up during a reachability traversal.
3296          *
3297          * Make a best-effort attempt to fill in the ->hash and ->no_try_delta
3298          * here using a now in order to perhaps improve the delta selection
3299          * process.
3300          */
3301         oe->hash = pack_name_hash(name);
3302         oe->no_try_delta = name && no_try_delta(name);
3303
3304         stdin_packs_hints_nr++;
3305 }
3306
3307 static int pack_mtime_cmp(const void *_a, const void *_b)
3308 {
3309         struct packed_git *a = ((const struct string_list_item*)_a)->util;
3310         struct packed_git *b = ((const struct string_list_item*)_b)->util;
3311
3312         /*
3313          * order packs by descending mtime so that objects are laid out
3314          * roughly as newest-to-oldest
3315          */
3316         if (a->mtime < b->mtime)
3317                 return 1;
3318         else if (b->mtime < a->mtime)
3319                 return -1;
3320         else
3321                 return 0;
3322 }
3323
3324 static void read_packs_list_from_stdin(void)
3325 {
3326         struct strbuf buf = STRBUF_INIT;
3327         struct string_list include_packs = STRING_LIST_INIT_DUP;
3328         struct string_list exclude_packs = STRING_LIST_INIT_DUP;
3329         struct string_list_item *item = NULL;
3330
3331         struct packed_git *p;
3332         struct rev_info revs;
3333
3334         repo_init_revisions(the_repository, &revs, NULL);
3335         /*
3336          * Use a revision walk to fill in the namehash of objects in the include
3337          * packs. To save time, we'll avoid traversing through objects that are
3338          * in excluded packs.
3339          *
3340          * That may cause us to avoid populating all of the namehash fields of
3341          * all included objects, but our goal is best-effort, since this is only
3342          * an optimization during delta selection.
3343          */
3344         revs.no_kept_objects = 1;
3345         revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
3346         revs.blob_objects = 1;
3347         revs.tree_objects = 1;
3348         revs.tag_objects = 1;
3349         revs.ignore_missing_links = 1;
3350
3351         while (strbuf_getline(&buf, stdin) != EOF) {
3352                 if (!buf.len)
3353                         continue;
3354
3355                 if (*buf.buf == '^')
3356                         string_list_append(&exclude_packs, buf.buf + 1);
3357                 else
3358                         string_list_append(&include_packs, buf.buf);
3359
3360                 strbuf_reset(&buf);
3361         }
3362
3363         string_list_sort(&include_packs);
3364         string_list_remove_duplicates(&include_packs, 0);
3365         string_list_sort(&exclude_packs);
3366         string_list_remove_duplicates(&exclude_packs, 0);
3367
3368         for (p = get_all_packs(the_repository); p; p = p->next) {
3369                 const char *pack_name = pack_basename(p);
3370
3371                 if ((item = string_list_lookup(&include_packs, pack_name)))
3372                         item->util = p;
3373                 if ((item = string_list_lookup(&exclude_packs, pack_name)))
3374                         item->util = p;
3375         }
3376
3377         /*
3378          * Arguments we got on stdin may not even be packs. First
3379          * check that to avoid segfaulting later on in
3380          * e.g. pack_mtime_cmp(), excluded packs are handled below.
3381          *
3382          * Since we first parsed our STDIN and then sorted the input
3383          * lines the pack we error on will be whatever line happens to
3384          * sort first. This is lazy, it's enough that we report one
3385          * bad case here, we don't need to report the first/last one,
3386          * or all of them.
3387          */
3388         for_each_string_list_item(item, &include_packs) {
3389                 struct packed_git *p = item->util;
3390                 if (!p)
3391                         die(_("could not find pack '%s'"), item->string);
3392                 if (!is_pack_valid(p))
3393                         die(_("packfile %s cannot be accessed"), p->pack_name);
3394         }
3395
3396         /*
3397          * Then, handle all of the excluded packs, marking them as
3398          * kept in-core so that later calls to add_object_entry()
3399          * discards any objects that are also found in excluded packs.
3400          */
3401         for_each_string_list_item(item, &exclude_packs) {
3402                 struct packed_git *p = item->util;
3403                 if (!p)
3404                         die(_("could not find pack '%s'"), item->string);
3405                 p->pack_keep_in_core = 1;
3406         }
3407
3408         /*
3409          * Order packs by ascending mtime; use QSORT directly to access the
3410          * string_list_item's ->util pointer, which string_list_sort() does not
3411          * provide.
3412          */
3413         QSORT(include_packs.items, include_packs.nr, pack_mtime_cmp);
3414
3415         for_each_string_list_item(item, &include_packs) {
3416                 struct packed_git *p = item->util;
3417                 for_each_object_in_pack(p,
3418                                         add_object_entry_from_pack,
3419                                         &revs,
3420                                         FOR_EACH_OBJECT_PACK_ORDER);
3421         }
3422
3423         if (prepare_revision_walk(&revs))
3424                 die(_("revision walk setup failed"));
3425         traverse_commit_list(&revs,
3426                              show_commit_pack_hint,
3427                              show_object_pack_hint,
3428                              NULL);
3429
3430         trace2_data_intmax("pack-objects", the_repository, "stdin_packs_found",
3431                            stdin_packs_found_nr);
3432         trace2_data_intmax("pack-objects", the_repository, "stdin_packs_hints",
3433                            stdin_packs_hints_nr);
3434
3435         strbuf_release(&buf);
3436         string_list_clear(&include_packs, 0);
3437         string_list_clear(&exclude_packs, 0);
3438 }
3439
3440 static void add_cruft_object_entry(const struct object_id *oid, enum object_type type,
3441                                    struct packed_git *pack, off_t offset,
3442                                    const char *name, uint32_t mtime)
3443 {
3444         struct object_entry *entry;
3445
3446         display_progress(progress_state, ++nr_seen);
3447
3448         entry = packlist_find(&to_pack, oid);
3449         if (entry) {
3450                 if (name) {
3451                         entry->hash = pack_name_hash(name);
3452                         entry->no_try_delta = no_try_delta(name);
3453                 }
3454         } else {
3455                 if (!want_object_in_pack(oid, 0, &pack, &offset))
3456                         return;
3457                 if (!pack && type == OBJ_BLOB && !has_loose_object(oid)) {
3458                         /*
3459                          * If a traversed tree has a missing blob then we want
3460                          * to avoid adding that missing object to our pack.
3461                          *
3462                          * This only applies to missing blobs, not trees,
3463                          * because the traversal needs to parse sub-trees but
3464                          * not blobs.
3465                          *
3466                          * Note we only perform this check when we couldn't
3467                          * already find the object in a pack, so we're really
3468                          * limited to "ensure non-tip blobs which don't exist in
3469                          * packs do exist via loose objects". Confused?
3470                          */
3471                         return;
3472                 }
3473
3474                 entry = create_object_entry(oid, type, pack_name_hash(name),
3475                                             0, name && no_try_delta(name),
3476                                             pack, offset);
3477         }
3478
3479         if (mtime > oe_cruft_mtime(&to_pack, entry))
3480                 oe_set_cruft_mtime(&to_pack, entry, mtime);
3481         return;
3482 }
3483
3484 static void show_cruft_object(struct object *obj, const char *name, void *data UNUSED)
3485 {
3486         /*
3487          * if we did not record it earlier, it's at least as old as our
3488          * expiration value. Rather than find it exactly, just use that
3489          * value.  This may bump it forward from its real mtime, but it
3490          * will still be "too old" next time we run with the same
3491          * expiration.
3492          *
3493          * if obj does appear in the packing list, this call is a noop (or may
3494          * set the namehash).
3495          */
3496         add_cruft_object_entry(&obj->oid, obj->type, NULL, 0, name, cruft_expiration);
3497 }
3498
3499 static void show_cruft_commit(struct commit *commit, void *data)
3500 {
3501         show_cruft_object((struct object*)commit, NULL, data);
3502 }
3503
3504 static int cruft_include_check_obj(struct object *obj, void *data UNUSED)
3505 {
3506         return !has_object_kept_pack(&obj->oid, IN_CORE_KEEP_PACKS);
3507 }
3508
3509 static int cruft_include_check(struct commit *commit, void *data)
3510 {
3511         return cruft_include_check_obj((struct object*)commit, data);
3512 }
3513
3514 static void set_cruft_mtime(const struct object *object,
3515                             struct packed_git *pack,
3516                             off_t offset, time_t mtime)
3517 {
3518         add_cruft_object_entry(&object->oid, object->type, pack, offset, NULL,
3519                                mtime);
3520 }
3521
3522 static void mark_pack_kept_in_core(struct string_list *packs, unsigned keep)
3523 {
3524         struct string_list_item *item = NULL;
3525         for_each_string_list_item(item, packs) {
3526                 struct packed_git *p = item->util;
3527                 if (!p)
3528                         die(_("could not find pack '%s'"), item->string);
3529                 p->pack_keep_in_core = keep;
3530         }
3531 }
3532
3533 static void add_unreachable_loose_objects(void);
3534 static void add_objects_in_unpacked_packs(void);
3535
3536 static void enumerate_cruft_objects(void)
3537 {
3538         if (progress)
3539                 progress_state = start_progress(_("Enumerating cruft objects"), 0);
3540
3541         add_objects_in_unpacked_packs();
3542         add_unreachable_loose_objects();
3543
3544         stop_progress(&progress_state);
3545 }
3546
3547 static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs)
3548 {
3549         struct packed_git *p;
3550         struct rev_info revs;
3551         int ret;
3552
3553         repo_init_revisions(the_repository, &revs, NULL);
3554
3555         revs.tag_objects = 1;
3556         revs.tree_objects = 1;
3557         revs.blob_objects = 1;
3558
3559         revs.include_check = cruft_include_check;
3560         revs.include_check_obj = cruft_include_check_obj;
3561
3562         revs.ignore_missing_links = 1;
3563
3564         if (progress)
3565                 progress_state = start_progress(_("Enumerating cruft objects"), 0);
3566         ret = add_unseen_recent_objects_to_traversal(&revs, cruft_expiration,
3567                                                      set_cruft_mtime, 1);
3568         stop_progress(&progress_state);
3569
3570         if (ret)
3571                 die(_("unable to add cruft objects"));
3572
3573         /*
3574          * Re-mark only the fresh packs as kept so that objects in
3575          * unknown packs do not halt the reachability traversal early.
3576          */
3577         for (p = get_all_packs(the_repository); p; p = p->next)
3578                 p->pack_keep_in_core = 0;
3579         mark_pack_kept_in_core(fresh_packs, 1);
3580
3581         if (prepare_revision_walk(&revs))
3582                 die(_("revision walk setup failed"));
3583         if (progress)
3584                 progress_state = start_progress(_("Traversing cruft objects"), 0);
3585         nr_seen = 0;
3586         traverse_commit_list(&revs, show_cruft_commit, show_cruft_object, NULL);
3587
3588         stop_progress(&progress_state);
3589 }
3590
3591 static void read_cruft_objects(void)
3592 {
3593         struct strbuf buf = STRBUF_INIT;
3594         struct string_list discard_packs = STRING_LIST_INIT_DUP;
3595         struct string_list fresh_packs = STRING_LIST_INIT_DUP;
3596         struct packed_git *p;
3597
3598         ignore_packed_keep_in_core = 1;
3599
3600         while (strbuf_getline(&buf, stdin) != EOF) {
3601                 if (!buf.len)
3602                         continue;
3603
3604                 if (*buf.buf == '-')
3605                         string_list_append(&discard_packs, buf.buf + 1);
3606                 else
3607                         string_list_append(&fresh_packs, buf.buf);
3608                 strbuf_reset(&buf);
3609         }
3610
3611         string_list_sort(&discard_packs);
3612         string_list_sort(&fresh_packs);
3613
3614         for (p = get_all_packs(the_repository); p; p = p->next) {
3615                 const char *pack_name = pack_basename(p);
3616                 struct string_list_item *item;
3617
3618                 item = string_list_lookup(&fresh_packs, pack_name);
3619                 if (!item)
3620                         item = string_list_lookup(&discard_packs, pack_name);
3621
3622                 if (item) {
3623                         item->util = p;
3624                 } else {
3625                         /*
3626                          * This pack wasn't mentioned in either the "fresh" or
3627                          * "discard" list, so the caller didn't know about it.
3628                          *
3629                          * Mark it as kept so that its objects are ignored by
3630                          * add_unseen_recent_objects_to_traversal(). We'll
3631                          * unmark it before starting the traversal so it doesn't
3632                          * halt the traversal early.
3633                          */
3634                         p->pack_keep_in_core = 1;
3635                 }
3636         }
3637
3638         mark_pack_kept_in_core(&fresh_packs, 1);
3639         mark_pack_kept_in_core(&discard_packs, 0);
3640
3641         if (cruft_expiration)
3642                 enumerate_and_traverse_cruft_objects(&fresh_packs);
3643         else
3644                 enumerate_cruft_objects();
3645
3646         strbuf_release(&buf);
3647         string_list_clear(&discard_packs, 0);
3648         string_list_clear(&fresh_packs, 0);
3649 }
3650
3651 static void read_object_list_from_stdin(void)
3652 {
3653         char line[GIT_MAX_HEXSZ + 1 + PATH_MAX + 2];
3654         struct object_id oid;
3655         const char *p;
3656
3657         for (;;) {
3658                 if (!fgets(line, sizeof(line), stdin)) {
3659                         if (feof(stdin))
3660                                 break;
3661                         if (!ferror(stdin))
3662                                 BUG("fgets returned NULL, not EOF, not error!");
3663                         if (errno != EINTR)
3664                                 die_errno("fgets");
3665                         clearerr(stdin);
3666                         continue;
3667                 }
3668                 if (line[0] == '-') {
3669                         if (get_oid_hex(line+1, &oid))
3670                                 die(_("expected edge object ID, got garbage:\n %s"),
3671                                     line);
3672                         add_preferred_base(&oid);
3673                         continue;
3674                 }
3675                 if (parse_oid_hex(line, &oid, &p))
3676                         die(_("expected object ID, got garbage:\n %s"), line);
3677
3678                 add_preferred_base_object(p + 1);
3679                 add_object_entry(&oid, OBJ_NONE, p + 1, 0);
3680         }
3681 }
3682
3683 static void show_commit(struct commit *commit, void *data UNUSED)
3684 {
3685         add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0);
3686
3687         if (write_bitmap_index)
3688                 index_commit_for_bitmap(commit);
3689
3690         if (use_delta_islands)
3691                 propagate_island_marks(commit);
3692 }
3693
3694 static void show_object(struct object *obj, const char *name,
3695                         void *data UNUSED)
3696 {
3697         add_preferred_base_object(name);
3698         add_object_entry(&obj->oid, obj->type, name, 0);
3699
3700         if (use_delta_islands) {
3701                 const char *p;
3702                 unsigned depth;
3703                 struct object_entry *ent;
3704
3705                 /* the empty string is a root tree, which is depth 0 */
3706                 depth = *name ? 1 : 0;
3707                 for (p = strchr(name, '/'); p; p = strchr(p + 1, '/'))
3708                         depth++;
3709
3710                 ent = packlist_find(&to_pack, &obj->oid);
3711                 if (ent && depth > oe_tree_depth(&to_pack, ent))
3712                         oe_set_tree_depth(&to_pack, ent, depth);
3713         }
3714 }
3715
3716 static void show_object__ma_allow_any(struct object *obj, const char *name, void *data)
3717 {
3718         assert(arg_missing_action == MA_ALLOW_ANY);
3719
3720         /*
3721          * Quietly ignore ALL missing objects.  This avoids problems with
3722          * staging them now and getting an odd error later.
3723          */
3724         if (!has_object(the_repository, &obj->oid, 0))
3725                 return;
3726
3727         show_object(obj, name, data);
3728 }
3729
3730 static void show_object__ma_allow_promisor(struct object *obj, const char *name, void *data)
3731 {
3732         assert(arg_missing_action == MA_ALLOW_PROMISOR);
3733
3734         /*
3735          * Quietly ignore EXPECTED missing objects.  This avoids problems with
3736          * staging them now and getting an odd error later.
3737          */
3738         if (!has_object(the_repository, &obj->oid, 0) && is_promisor_object(&obj->oid))
3739                 return;
3740
3741         show_object(obj, name, data);
3742 }
3743
3744 static int option_parse_missing_action(const struct option *opt,
3745                                        const char *arg, int unset)
3746 {
3747         assert(arg);
3748         assert(!unset);
3749
3750         if (!strcmp(arg, "error")) {
3751                 arg_missing_action = MA_ERROR;
3752                 fn_show_object = show_object;
3753                 return 0;
3754         }
3755
3756         if (!strcmp(arg, "allow-any")) {
3757                 arg_missing_action = MA_ALLOW_ANY;
3758                 fetch_if_missing = 0;
3759                 fn_show_object = show_object__ma_allow_any;
3760                 return 0;
3761         }
3762
3763         if (!strcmp(arg, "allow-promisor")) {
3764                 arg_missing_action = MA_ALLOW_PROMISOR;
3765                 fetch_if_missing = 0;
3766                 fn_show_object = show_object__ma_allow_promisor;
3767                 return 0;
3768         }
3769
3770         die(_("invalid value for '%s': '%s'"), "--missing", arg);
3771         return 0;
3772 }
3773
3774 static void show_edge(struct commit *commit)
3775 {
3776         add_preferred_base(&commit->object.oid);
3777 }
3778
3779 static int add_object_in_unpacked_pack(const struct object_id *oid,
3780                                        struct packed_git *pack,
3781                                        uint32_t pos,
3782                                        void *data UNUSED)
3783 {
3784         if (cruft) {
3785                 off_t offset;
3786                 time_t mtime;
3787
3788                 if (pack->is_cruft) {
3789                         if (load_pack_mtimes(pack) < 0)
3790                                 die(_("could not load cruft pack .mtimes"));
3791                         mtime = nth_packed_mtime(pack, pos);
3792                 } else {
3793                         mtime = pack->mtime;
3794                 }
3795                 offset = nth_packed_object_offset(pack, pos);
3796
3797                 add_cruft_object_entry(oid, OBJ_NONE, pack, offset,
3798                                        NULL, mtime);
3799         } else {
3800                 add_object_entry(oid, OBJ_NONE, "", 0);
3801         }
3802         return 0;
3803 }
3804
3805 static void add_objects_in_unpacked_packs(void)
3806 {
3807         if (for_each_packed_object(add_object_in_unpacked_pack, NULL,
3808                                    FOR_EACH_OBJECT_PACK_ORDER |
3809                                    FOR_EACH_OBJECT_LOCAL_ONLY |
3810                                    FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
3811                                    FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
3812                 die(_("cannot open pack index"));
3813 }
3814
3815 static int add_loose_object(const struct object_id *oid, const char *path,
3816                             void *data UNUSED)
3817 {
3818         enum object_type type = oid_object_info(the_repository, oid, NULL);
3819
3820         if (type < 0) {
3821                 warning(_("loose object at %s could not be examined"), path);
3822                 return 0;
3823         }
3824
3825         if (cruft) {
3826                 struct stat st;
3827                 if (stat(path, &st) < 0) {
3828                         if (errno == ENOENT)
3829                                 return 0;
3830                         return error_errno("unable to stat %s", oid_to_hex(oid));
3831                 }
3832
3833                 add_cruft_object_entry(oid, type, NULL, 0, NULL,
3834                                        st.st_mtime);
3835         } else {
3836                 add_object_entry(oid, type, "", 0);
3837         }
3838         return 0;
3839 }
3840
3841 /*
3842  * We actually don't even have to worry about reachability here.
3843  * add_object_entry will weed out duplicates, so we just add every
3844  * loose object we find.
3845  */
3846 static void add_unreachable_loose_objects(void)
3847 {
3848         for_each_loose_file_in_objdir(get_object_directory(),
3849                                       add_loose_object,
3850                                       NULL, NULL, NULL);
3851 }
3852
3853 static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid)
3854 {
3855         static struct packed_git *last_found = (void *)1;
3856         struct packed_git *p;
3857
3858         p = (last_found != (void *)1) ? last_found :
3859                                         get_all_packs(the_repository);
3860
3861         while (p) {
3862                 if ((!p->pack_local || p->pack_keep ||
3863                                 p->pack_keep_in_core) &&
3864                         find_pack_entry_one(oid->hash, p)) {
3865                         last_found = p;
3866                         return 1;
3867                 }
3868                 if (p == last_found)
3869                         p = get_all_packs(the_repository);
3870                 else
3871                         p = p->next;
3872                 if (p == last_found)
3873                         p = p->next;
3874         }
3875         return 0;
3876 }
3877
3878 /*
3879  * Store a list of sha1s that are should not be discarded
3880  * because they are either written too recently, or are
3881  * reachable from another object that was.
3882  *
3883  * This is filled by get_object_list.
3884  */
3885 static struct oid_array recent_objects;
3886
3887 static int loosened_object_can_be_discarded(const struct object_id *oid,
3888                                             timestamp_t mtime)
3889 {
3890         if (!unpack_unreachable_expiration)
3891                 return 0;
3892         if (mtime > unpack_unreachable_expiration)
3893                 return 0;
3894         if (oid_array_lookup(&recent_objects, oid) >= 0)
3895                 return 0;
3896         return 1;
3897 }
3898
3899 static void loosen_unused_packed_objects(void)
3900 {
3901         struct packed_git *p;
3902         uint32_t i;
3903         uint32_t loosened_objects_nr = 0;
3904         struct object_id oid;
3905
3906         for (p = get_all_packs(the_repository); p; p = p->next) {
3907                 if (!p->pack_local || p->pack_keep || p->pack_keep_in_core)
3908                         continue;
3909
3910                 if (open_pack_index(p))
3911                         die(_("cannot open pack index"));
3912
3913                 for (i = 0; i < p->num_objects; i++) {
3914                         nth_packed_object_id(&oid, p, i);
3915                         if (!packlist_find(&to_pack, &oid) &&
3916                             !has_sha1_pack_kept_or_nonlocal(&oid) &&
3917                             !loosened_object_can_be_discarded(&oid, p->mtime)) {
3918                                 if (force_object_loose(&oid, p->mtime))
3919                                         die(_("unable to force loose object"));
3920                                 loosened_objects_nr++;
3921                         }
3922                 }
3923         }
3924
3925         trace2_data_intmax("pack-objects", the_repository,
3926                            "loosen_unused_packed_objects/loosened", loosened_objects_nr);
3927 }
3928
3929 /*
3930  * This tracks any options which pack-reuse code expects to be on, or which a
3931  * reader of the pack might not understand, and which would therefore prevent
3932  * blind reuse of what we have on disk.
3933  */
3934 static int pack_options_allow_reuse(void)
3935 {
3936         return allow_pack_reuse &&
3937                pack_to_stdout &&
3938                !ignore_packed_keep_on_disk &&
3939                !ignore_packed_keep_in_core &&
3940                (!local || !have_non_local_packs) &&
3941                !incremental;
3942 }
3943
3944 static int get_object_list_from_bitmap(struct rev_info *revs)
3945 {
3946         if (!(bitmap_git = prepare_bitmap_walk(revs, 0)))
3947                 return -1;
3948
3949         if (pack_options_allow_reuse() &&
3950             !reuse_partial_packfile_from_bitmap(
3951                         bitmap_git,
3952                         &reuse_packfile,
3953                         &reuse_packfile_objects,
3954                         &reuse_packfile_bitmap)) {
3955                 assert(reuse_packfile_objects);
3956                 nr_result += reuse_packfile_objects;
3957                 nr_seen += reuse_packfile_objects;
3958                 display_progress(progress_state, nr_seen);
3959         }
3960
3961         traverse_bitmap_commit_list(bitmap_git, revs,
3962                                     &add_object_entry_from_bitmap);
3963         return 0;
3964 }
3965
3966 static void record_recent_object(struct object *obj,
3967                                  const char *name UNUSED,
3968                                  void *data UNUSED)
3969 {
3970         oid_array_append(&recent_objects, &obj->oid);
3971 }
3972
3973 static void record_recent_commit(struct commit *commit, void *data UNUSED)
3974 {
3975         oid_array_append(&recent_objects, &commit->object.oid);
3976 }
3977
3978 static int mark_bitmap_preferred_tip(const char *refname,
3979                                      const struct object_id *oid,
3980                                      int flags UNUSED,
3981                                      void *data UNUSED)
3982 {
3983         struct object_id peeled;
3984         struct object *object;
3985
3986         if (!peel_iterated_oid(oid, &peeled))
3987                 oid = &peeled;
3988
3989         object = parse_object_or_die(oid, refname);
3990         if (object->type == OBJ_COMMIT)
3991                 object->flags |= NEEDS_BITMAP;
3992
3993         return 0;
3994 }
3995
3996 static void mark_bitmap_preferred_tips(void)
3997 {
3998         struct string_list_item *item;
3999         const struct string_list *preferred_tips;
4000
4001         preferred_tips = bitmap_preferred_tips(the_repository);
4002         if (!preferred_tips)
4003                 return;
4004
4005         for_each_string_list_item(item, preferred_tips) {
4006                 for_each_ref_in(item->string, mark_bitmap_preferred_tip, NULL);
4007         }
4008 }
4009
4010 static void get_object_list(struct rev_info *revs, int ac, const char **av)
4011 {
4012         struct setup_revision_opt s_r_opt = {
4013                 .allow_exclude_promisor_objects = 1,
4014         };
4015         char line[1000];
4016         int flags = 0;
4017         int save_warning;
4018
4019         save_commit_buffer = 0;
4020         setup_revisions(ac, av, revs, &s_r_opt);
4021
4022         /* make sure shallows are read */
4023         is_repository_shallow(the_repository);
4024
4025         save_warning = warn_on_object_refname_ambiguity;
4026         warn_on_object_refname_ambiguity = 0;
4027
4028         while (fgets(line, sizeof(line), stdin) != NULL) {
4029                 int len = strlen(line);
4030                 if (len && line[len - 1] == '\n')
4031                         line[--len] = 0;
4032                 if (!len)
4033                         break;
4034                 if (*line == '-') {
4035                         if (!strcmp(line, "--not")) {
4036                                 flags ^= UNINTERESTING;
4037                                 write_bitmap_index = 0;
4038                                 continue;
4039                         }
4040                         if (starts_with(line, "--shallow ")) {
4041                                 struct object_id oid;
4042                                 if (get_oid_hex(line + 10, &oid))
4043                                         die("not an object name '%s'", line + 10);
4044                                 register_shallow(the_repository, &oid);
4045                                 use_bitmap_index = 0;
4046                                 continue;
4047                         }
4048                         die(_("not a rev '%s'"), line);
4049                 }
4050                 if (handle_revision_arg(line, revs, flags, REVARG_CANNOT_BE_FILENAME))
4051                         die(_("bad revision '%s'"), line);
4052         }
4053
4054         warn_on_object_refname_ambiguity = save_warning;
4055
4056         if (use_bitmap_index && !get_object_list_from_bitmap(revs))
4057                 return;
4058
4059         if (use_delta_islands)
4060                 load_delta_islands(the_repository, progress);
4061
4062         if (write_bitmap_index)
4063                 mark_bitmap_preferred_tips();
4064
4065         if (prepare_revision_walk(revs))
4066                 die(_("revision walk setup failed"));
4067         mark_edges_uninteresting(revs, show_edge, sparse);
4068
4069         if (!fn_show_object)
4070                 fn_show_object = show_object;
4071         traverse_commit_list(revs,
4072                              show_commit, fn_show_object,
4073                              NULL);
4074
4075         if (unpack_unreachable_expiration) {
4076                 revs->ignore_missing_links = 1;
4077                 if (add_unseen_recent_objects_to_traversal(revs,
4078                                 unpack_unreachable_expiration, NULL, 0))
4079                         die(_("unable to add recent objects"));
4080                 if (prepare_revision_walk(revs))
4081                         die(_("revision walk setup failed"));
4082                 traverse_commit_list(revs, record_recent_commit,
4083                                      record_recent_object, NULL);
4084         }
4085
4086         if (keep_unreachable)
4087                 add_objects_in_unpacked_packs();
4088         if (pack_loose_unreachable)
4089                 add_unreachable_loose_objects();
4090         if (unpack_unreachable)
4091                 loosen_unused_packed_objects();
4092
4093         oid_array_clear(&recent_objects);
4094 }
4095
4096 static void add_extra_kept_packs(const struct string_list *names)
4097 {
4098         struct packed_git *p;
4099
4100         if (!names->nr)
4101                 return;
4102
4103         for (p = get_all_packs(the_repository); p; p = p->next) {
4104                 const char *name = basename(p->pack_name);
4105                 int i;
4106
4107                 if (!p->pack_local)
4108                         continue;
4109
4110                 for (i = 0; i < names->nr; i++)
4111                         if (!fspathcmp(name, names->items[i].string))
4112                                 break;
4113
4114                 if (i < names->nr) {
4115                         p->pack_keep_in_core = 1;
4116                         ignore_packed_keep_in_core = 1;
4117                         continue;
4118                 }
4119         }
4120 }
4121
4122 static int option_parse_index_version(const struct option *opt,
4123                                       const char *arg, int unset)
4124 {
4125         char *c;
4126         const char *val = arg;
4127
4128         BUG_ON_OPT_NEG(unset);
4129
4130         pack_idx_opts.version = strtoul(val, &c, 10);
4131         if (pack_idx_opts.version > 2)
4132                 die(_("unsupported index version %s"), val);
4133         if (*c == ',' && c[1])
4134                 pack_idx_opts.off32_limit = strtoul(c+1, &c, 0);
4135         if (*c || pack_idx_opts.off32_limit & 0x80000000)
4136                 die(_("bad index version '%s'"), val);
4137         return 0;
4138 }
4139
4140 static int option_parse_unpack_unreachable(const struct option *opt,
4141                                            const char *arg, int unset)
4142 {
4143         if (unset) {
4144                 unpack_unreachable = 0;
4145                 unpack_unreachable_expiration = 0;
4146         }
4147         else {
4148                 unpack_unreachable = 1;
4149                 if (arg)
4150                         unpack_unreachable_expiration = approxidate(arg);
4151         }
4152         return 0;
4153 }
4154
4155 static int option_parse_cruft_expiration(const struct option *opt,
4156                                          const char *arg, int unset)
4157 {
4158         if (unset) {
4159                 cruft = 0;
4160                 cruft_expiration = 0;
4161         } else {
4162                 cruft = 1;
4163                 if (arg)
4164                         cruft_expiration = approxidate(arg);
4165         }
4166         return 0;
4167 }
4168
4169 int cmd_pack_objects(int argc, const char **argv, const char *prefix)
4170 {
4171         int use_internal_rev_list = 0;
4172         int shallow = 0;
4173         int all_progress_implied = 0;
4174         struct strvec rp = STRVEC_INIT;
4175         int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;
4176         int rev_list_index = 0;
4177         int stdin_packs = 0;
4178         struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
4179         struct list_objects_filter_options filter_options =
4180                 LIST_OBJECTS_FILTER_INIT;
4181
4182         struct option pack_objects_options[] = {
4183                 OPT_SET_INT('q', "quiet", &progress,
4184                             N_("do not show progress meter"), 0),
4185                 OPT_SET_INT(0, "progress", &progress,
4186                             N_("show progress meter"), 1),
4187                 OPT_SET_INT(0, "all-progress", &progress,
4188                             N_("show progress meter during object writing phase"), 2),
4189                 OPT_BOOL(0, "all-progress-implied",
4190                          &all_progress_implied,
4191                          N_("similar to --all-progress when progress meter is shown")),
4192                 OPT_CALLBACK_F(0, "index-version", NULL, N_("<version>[,<offset>]"),
4193                   N_("write the pack index file in the specified idx format version"),
4194                   PARSE_OPT_NONEG, option_parse_index_version),
4195                 OPT_MAGNITUDE(0, "max-pack-size", &pack_size_limit,
4196                               N_("maximum size of each output pack file")),
4197                 OPT_BOOL(0, "local", &local,
4198                          N_("ignore borrowed objects from alternate object store")),
4199                 OPT_BOOL(0, "incremental", &incremental,
4200                          N_("ignore packed objects")),
4201                 OPT_INTEGER(0, "window", &window,
4202                             N_("limit pack window by objects")),
4203                 OPT_MAGNITUDE(0, "window-memory", &window_memory_limit,
4204                               N_("limit pack window by memory in addition to object limit")),
4205                 OPT_INTEGER(0, "depth", &depth,
4206                             N_("maximum length of delta chain allowed in the resulting pack")),
4207                 OPT_BOOL(0, "reuse-delta", &reuse_delta,
4208                          N_("reuse existing deltas")),
4209                 OPT_BOOL(0, "reuse-object", &reuse_object,
4210                          N_("reuse existing objects")),
4211                 OPT_BOOL(0, "delta-base-offset", &allow_ofs_delta,
4212                          N_("use OFS_DELTA objects")),
4213                 OPT_INTEGER(0, "threads", &delta_search_threads,
4214                             N_("use threads when searching for best delta matches")),
4215                 OPT_BOOL(0, "non-empty", &non_empty,
4216                          N_("do not create an empty pack output")),
4217                 OPT_BOOL(0, "revs", &use_internal_rev_list,
4218                          N_("read revision arguments from standard input")),
4219                 OPT_SET_INT_F(0, "unpacked", &rev_list_unpacked,
4220                               N_("limit the objects to those that are not yet packed"),
4221                               1, PARSE_OPT_NONEG),
4222                 OPT_SET_INT_F(0, "all", &rev_list_all,
4223                               N_("include objects reachable from any reference"),
4224                               1, PARSE_OPT_NONEG),
4225                 OPT_SET_INT_F(0, "reflog", &rev_list_reflog,
4226                               N_("include objects referred by reflog entries"),
4227                               1, PARSE_OPT_NONEG),
4228                 OPT_SET_INT_F(0, "indexed-objects", &rev_list_index,
4229                               N_("include objects referred to by the index"),
4230                               1, PARSE_OPT_NONEG),
4231                 OPT_BOOL(0, "stdin-packs", &stdin_packs,
4232                          N_("read packs from stdin")),
4233                 OPT_BOOL(0, "stdout", &pack_to_stdout,
4234                          N_("output pack to stdout")),
4235                 OPT_BOOL(0, "include-tag", &include_tag,
4236                          N_("include tag objects that refer to objects to be packed")),
4237                 OPT_BOOL(0, "keep-unreachable", &keep_unreachable,
4238                          N_("keep unreachable objects")),
4239                 OPT_BOOL(0, "pack-loose-unreachable", &pack_loose_unreachable,
4240                          N_("pack loose unreachable objects")),
4241                 OPT_CALLBACK_F(0, "unpack-unreachable", NULL, N_("time"),
4242                   N_("unpack unreachable objects newer than <time>"),
4243                   PARSE_OPT_OPTARG, option_parse_unpack_unreachable),
4244                 OPT_BOOL(0, "cruft", &cruft, N_("create a cruft pack")),
4245                 OPT_CALLBACK_F(0, "cruft-expiration", NULL, N_("time"),
4246                   N_("expire cruft objects older than <time>"),
4247                   PARSE_OPT_OPTARG, option_parse_cruft_expiration),
4248                 OPT_BOOL(0, "sparse", &sparse,
4249                          N_("use the sparse reachability algorithm")),
4250                 OPT_BOOL(0, "thin", &thin,
4251                          N_("create thin packs")),
4252                 OPT_BOOL(0, "shallow", &shallow,
4253                          N_("create packs suitable for shallow fetches")),
4254                 OPT_BOOL(0, "honor-pack-keep", &ignore_packed_keep_on_disk,
4255                          N_("ignore packs that have companion .keep file")),
4256                 OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"),
4257                                 N_("ignore this pack")),
4258                 OPT_INTEGER(0, "compression", &pack_compression_level,
4259                             N_("pack compression level")),
4260                 OPT_SET_INT(0, "keep-true-parents", &grafts_replace_parents,
4261                             N_("do not hide commits by grafts"), 0),
4262                 OPT_BOOL(0, "use-bitmap-index", &use_bitmap_index,
4263                          N_("use a bitmap index if available to speed up counting objects")),
4264                 OPT_SET_INT(0, "write-bitmap-index", &write_bitmap_index,
4265                             N_("write a bitmap index together with the pack index"),
4266                             WRITE_BITMAP_TRUE),
4267                 OPT_SET_INT_F(0, "write-bitmap-index-quiet",
4268                               &write_bitmap_index,
4269                               N_("write a bitmap index if possible"),
4270                               WRITE_BITMAP_QUIET, PARSE_OPT_HIDDEN),
4271                 OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options),
4272                 OPT_CALLBACK_F(0, "missing", NULL, N_("action"),
4273                   N_("handling for missing objects"), PARSE_OPT_NONEG,
4274                   option_parse_missing_action),
4275                 OPT_BOOL(0, "exclude-promisor-objects", &exclude_promisor_objects,
4276                          N_("do not pack objects in promisor packfiles")),
4277                 OPT_BOOL(0, "delta-islands", &use_delta_islands,
4278                          N_("respect islands during delta compression")),
4279                 OPT_STRING_LIST(0, "uri-protocol", &uri_protocols,
4280                                 N_("protocol"),
4281                                 N_("exclude any configured uploadpack.blobpackfileuri with this protocol")),
4282                 OPT_END(),
4283         };
4284
4285         if (DFS_NUM_STATES > (1 << OE_DFS_STATE_BITS))
4286                 BUG("too many dfs states, increase OE_DFS_STATE_BITS");
4287
4288         disable_replace_refs();
4289
4290         sparse = git_env_bool("GIT_TEST_PACK_SPARSE", -1);
4291         if (the_repository->gitdir) {
4292                 prepare_repo_settings(the_repository);
4293                 if (sparse < 0)
4294                         sparse = the_repository->settings.pack_use_sparse;
4295         }
4296
4297         reset_pack_idx_option(&pack_idx_opts);
4298         pack_idx_opts.flags |= WRITE_REV;
4299         git_config(git_pack_config, NULL);
4300         if (git_env_bool(GIT_TEST_NO_WRITE_REV_INDEX, 0))
4301                 pack_idx_opts.flags &= ~WRITE_REV;
4302
4303         progress = isatty(2);
4304         argc = parse_options(argc, argv, prefix, pack_objects_options,
4305                              pack_usage, 0);
4306
4307         if (argc) {
4308                 base_name = argv[0];
4309                 argc--;
4310         }
4311         if (pack_to_stdout != !base_name || argc)
4312                 usage_with_options(pack_usage, pack_objects_options);
4313
4314         if (depth < 0)
4315                 depth = 0;
4316         if (depth >= (1 << OE_DEPTH_BITS)) {
4317                 warning(_("delta chain depth %d is too deep, forcing %d"),
4318                         depth, (1 << OE_DEPTH_BITS) - 1);
4319                 depth = (1 << OE_DEPTH_BITS) - 1;
4320         }
4321         if (cache_max_small_delta_size >= (1U << OE_Z_DELTA_BITS)) {
4322                 warning(_("pack.deltaCacheLimit is too high, forcing %d"),
4323                         (1U << OE_Z_DELTA_BITS) - 1);
4324                 cache_max_small_delta_size = (1U << OE_Z_DELTA_BITS) - 1;
4325         }
4326         if (window < 0)
4327                 window = 0;
4328
4329         strvec_push(&rp, "pack-objects");
4330         if (thin) {
4331                 use_internal_rev_list = 1;
4332                 strvec_push(&rp, shallow
4333                                 ? "--objects-edge-aggressive"
4334                                 : "--objects-edge");
4335         } else
4336                 strvec_push(&rp, "--objects");
4337
4338         if (rev_list_all) {
4339                 use_internal_rev_list = 1;
4340                 strvec_push(&rp, "--all");
4341         }
4342         if (rev_list_reflog) {
4343                 use_internal_rev_list = 1;
4344                 strvec_push(&rp, "--reflog");
4345         }
4346         if (rev_list_index) {
4347                 use_internal_rev_list = 1;
4348                 strvec_push(&rp, "--indexed-objects");
4349         }
4350         if (rev_list_unpacked && !stdin_packs) {
4351                 use_internal_rev_list = 1;
4352                 strvec_push(&rp, "--unpacked");
4353         }
4354
4355         if (exclude_promisor_objects) {
4356                 use_internal_rev_list = 1;
4357                 fetch_if_missing = 0;
4358                 strvec_push(&rp, "--exclude-promisor-objects");
4359         }
4360         if (unpack_unreachable || keep_unreachable || pack_loose_unreachable)
4361                 use_internal_rev_list = 1;
4362
4363         if (!reuse_object)
4364                 reuse_delta = 0;
4365         if (pack_compression_level == -1)
4366                 pack_compression_level = Z_DEFAULT_COMPRESSION;
4367         else if (pack_compression_level < 0 || pack_compression_level > Z_BEST_COMPRESSION)
4368                 die(_("bad pack compression level %d"), pack_compression_level);
4369
4370         if (!delta_search_threads)      /* --threads=0 means autodetect */
4371                 delta_search_threads = online_cpus();
4372
4373         if (!HAVE_THREADS && delta_search_threads != 1)
4374                 warning(_("no threads support, ignoring --threads"));
4375         if (!pack_to_stdout && !pack_size_limit && !cruft)
4376                 pack_size_limit = pack_size_limit_cfg;
4377         if (pack_to_stdout && pack_size_limit)
4378                 die(_("--max-pack-size cannot be used to build a pack for transfer"));
4379         if (pack_size_limit && pack_size_limit < 1024*1024) {
4380                 warning(_("minimum pack size limit is 1 MiB"));
4381                 pack_size_limit = 1024*1024;
4382         }
4383
4384         if (!pack_to_stdout && thin)
4385                 die(_("--thin cannot be used to build an indexable pack"));
4386
4387         if (keep_unreachable && unpack_unreachable)
4388                 die(_("options '%s' and '%s' cannot be used together"), "--keep-unreachable", "--unpack-unreachable");
4389         if (!rev_list_all || !rev_list_reflog || !rev_list_index)
4390                 unpack_unreachable_expiration = 0;
4391
4392         if (filter_options.choice) {
4393                 if (!pack_to_stdout)
4394                         die(_("cannot use --filter without --stdout"));
4395                 if (stdin_packs)
4396                         die(_("cannot use --filter with --stdin-packs"));
4397         }
4398
4399         if (stdin_packs && use_internal_rev_list)
4400                 die(_("cannot use internal rev list with --stdin-packs"));
4401
4402         if (cruft) {
4403                 if (use_internal_rev_list)
4404                         die(_("cannot use internal rev list with --cruft"));
4405                 if (stdin_packs)
4406                         die(_("cannot use --stdin-packs with --cruft"));
4407                 if (pack_size_limit)
4408                         die(_("cannot use --max-pack-size with --cruft"));
4409         }
4410
4411         /*
4412          * "soft" reasons not to use bitmaps - for on-disk repack by default we want
4413          *
4414          * - to produce good pack (with bitmap index not-yet-packed objects are
4415          *   packed in suboptimal order).
4416          *
4417          * - to use more robust pack-generation codepath (avoiding possible
4418          *   bugs in bitmap code and possible bitmap index corruption).
4419          */
4420         if (!pack_to_stdout)
4421                 use_bitmap_index_default = 0;
4422
4423         if (use_bitmap_index < 0)
4424                 use_bitmap_index = use_bitmap_index_default;
4425
4426         /* "hard" reasons not to use bitmaps; these just won't work at all */
4427         if (!use_internal_rev_list || (!pack_to_stdout && write_bitmap_index) || is_repository_shallow(the_repository))
4428                 use_bitmap_index = 0;
4429
4430         if (pack_to_stdout || !rev_list_all)
4431                 write_bitmap_index = 0;
4432
4433         if (use_delta_islands)
4434                 strvec_push(&rp, "--topo-order");
4435
4436         if (progress && all_progress_implied)
4437                 progress = 2;
4438
4439         add_extra_kept_packs(&keep_pack_list);
4440         if (ignore_packed_keep_on_disk) {
4441                 struct packed_git *p;
4442                 for (p = get_all_packs(the_repository); p; p = p->next)
4443                         if (p->pack_local && p->pack_keep)
4444                                 break;
4445                 if (!p) /* no keep-able packs found */
4446                         ignore_packed_keep_on_disk = 0;
4447         }
4448         if (local) {
4449                 /*
4450                  * unlike ignore_packed_keep_on_disk above, we do not
4451                  * want to unset "local" based on looking at packs, as
4452                  * it also covers non-local objects
4453                  */
4454                 struct packed_git *p;
4455                 for (p = get_all_packs(the_repository); p; p = p->next) {
4456                         if (!p->pack_local) {
4457                                 have_non_local_packs = 1;
4458                                 break;
4459                         }
4460                 }
4461         }
4462
4463         trace2_region_enter("pack-objects", "enumerate-objects",
4464                             the_repository);
4465         prepare_packing_data(the_repository, &to_pack);
4466
4467         if (progress && !cruft)
4468                 progress_state = start_progress(_("Enumerating objects"), 0);
4469         if (stdin_packs) {
4470                 /* avoids adding objects in excluded packs */
4471                 ignore_packed_keep_in_core = 1;
4472                 read_packs_list_from_stdin();
4473                 if (rev_list_unpacked)
4474                         add_unreachable_loose_objects();
4475         } else if (cruft) {
4476                 read_cruft_objects();
4477         } else if (!use_internal_rev_list) {
4478                 read_object_list_from_stdin();
4479         } else {
4480                 struct rev_info revs;
4481
4482                 repo_init_revisions(the_repository, &revs, NULL);
4483                 list_objects_filter_copy(&revs.filter, &filter_options);
4484                 get_object_list(&revs, rp.nr, rp.v);
4485                 release_revisions(&revs);
4486         }
4487         cleanup_preferred_base();
4488         if (include_tag && nr_result)
4489                 for_each_tag_ref(add_ref_tag, NULL);
4490         stop_progress(&progress_state);
4491         trace2_region_leave("pack-objects", "enumerate-objects",
4492                             the_repository);
4493
4494         if (non_empty && !nr_result)
4495                 goto cleanup;
4496         if (nr_result) {
4497                 trace2_region_enter("pack-objects", "prepare-pack",
4498                                     the_repository);
4499                 prepare_pack(window, depth);
4500                 trace2_region_leave("pack-objects", "prepare-pack",
4501                                     the_repository);
4502         }
4503
4504         trace2_region_enter("pack-objects", "write-pack-file", the_repository);
4505         write_excluded_by_configs();
4506         write_pack_file();
4507         trace2_region_leave("pack-objects", "write-pack-file", the_repository);
4508
4509         if (progress)
4510                 fprintf_ln(stderr,
4511                            _("Total %"PRIu32" (delta %"PRIu32"),"
4512                              " reused %"PRIu32" (delta %"PRIu32"),"
4513                              " pack-reused %"PRIu32),
4514                            written, written_delta, reused, reused_delta,
4515                            reuse_packfile_objects);
4516
4517 cleanup:
4518         list_objects_filter_release(&filter_options);
4519         strvec_clear(&rp);
4520
4521         return 0;
4522 }