builtin/pack-objects.c

   1 #include "builtin.h"
   2 #include "alloc.h"
   3 #include "environment.h"
   4 #include "gettext.h"
   5 #include "hex.h"
   6 #include "repository.h"
   7 #include "config.h"
   8 #include "attr.h"
   9 #include "object.h"
  10 #include "blob.h"
  11 #include "commit.h"
  12 #include "tag.h"
  13 #include "tree.h"
  14 #include "delta.h"
  15 #include "pack.h"
  16 #include "pack-revindex.h"
  17 #include "csum-file.h"
  18 #include "tree-walk.h"
  19 #include "diff.h"
  20 #include "revision.h"
  21 #include "list-objects.h"
  22 #include "list-objects-filter.h"
  23 #include "list-objects-filter-options.h"
  24 #include "pack-objects.h"
  25 #include "progress.h"
  26 #include "refs.h"
  27 #include "streaming.h"
  28 #include "thread-utils.h"
  29 #include "pack-bitmap.h"
  30 #include "delta-islands.h"
  31 #include "reachable.h"
  32 #include "oid-array.h"
  33 #include "strvec.h"
  34 #include "list.h"
  35 #include "packfile.h"
  36 #include "object-file.h"
  37 #include "object-store-ll.h"
  38 #include "replace-object.h"
  39 #include "dir.h"
  40 #include "midx.h"
  41 #include "trace2.h"
  42 #include "shallow.h"
  43 #include "promisor-remote.h"
  44 #include "pack-mtimes.h"
  45 #include "parse-options.h"
  46
  47 /*
  48  * Objects we are going to pack are collected in the `to_pack` structure.
  49  * It contains an array (dynamically expanded) of the object data, and a map
  50  * that can resolve SHA1s to their position in the array.
  51  */
  52 static struct packing_data to_pack;
  53
  54 static inline struct object_entry *oe_delta(
  55                 const struct packing_data *pack,
  56                 const struct object_entry *e)
  57 {
  58         if (!e->delta_idx)
  59                 return NULL;
  60         if (e->ext_base)
  61                 return &pack->ext_bases[e->delta_idx - 1];
  62         else
  63                 return &pack->objects[e->delta_idx - 1];
  64 }
  65
  66 static inline unsigned long oe_delta_size(struct packing_data *pack,
  67                                           const struct object_entry *e)
  68 {
  69         if (e->delta_size_valid)
  70                 return e->delta_size_;
  71
  72         /*
  73          * pack->delta_size[] can't be NULL because oe_set_delta_size()
  74          * must have been called when a new delta is saved with
  75          * oe_set_delta().
  76          * If oe_delta() returns NULL (i.e. default state, which means
  77          * delta_size_valid is also false), then the caller must never
  78          * call oe_delta_size().
  79          */
  80         return pack->delta_size[e - pack->objects];
  81 }
  82
  83 unsigned long oe_get_size_slow(struct packing_data *pack,
  84                                const struct object_entry *e);
  85
  86 static inline unsigned long oe_size(struct packing_data *pack,
  87                                     const struct object_entry *e)
  88 {
  89         if (e->size_valid)
  90                 return e->size_;
  91
  92         return oe_get_size_slow(pack, e);
  93 }
  94
  95 static inline void oe_set_delta(struct packing_data *pack,
  96                                 struct object_entry *e,
  97                                 struct object_entry *delta)
  98 {
  99         if (delta)
 100                 e->delta_idx = (delta - pack->objects) + 1;
 101         else
 102                 e->delta_idx = 0;
 103 }
 104
 105 static inline struct object_entry *oe_delta_sibling(
 106                 const struct packing_data *pack,
 107                 const struct object_entry *e)
 108 {
 109         if (e->delta_sibling_idx)
 110                 return &pack->objects[e->delta_sibling_idx - 1];
 111         return NULL;
 112 }
 113
 114 static inline struct object_entry *oe_delta_child(
 115                 const struct packing_data *pack,
 116                 const struct object_entry *e)
 117 {
 118         if (e->delta_child_idx)
 119                 return &pack->objects[e->delta_child_idx - 1];
 120         return NULL;
 121 }
 122
 123 static inline void oe_set_delta_child(struct packing_data *pack,
 124                                       struct object_entry *e,
 125                                       struct object_entry *delta)
 126 {
 127         if (delta)
 128                 e->delta_child_idx = (delta - pack->objects) + 1;
 129         else
 130                 e->delta_child_idx = 0;
 131 }
 132
 133 static inline void oe_set_delta_sibling(struct packing_data *pack,
 134                                         struct object_entry *e,
 135                                         struct object_entry *delta)
 136 {
 137         if (delta)
 138                 e->delta_sibling_idx = (delta - pack->objects) + 1;
 139         else
 140                 e->delta_sibling_idx = 0;
 141 }
 142
 143 static inline void oe_set_size(struct packing_data *pack,
 144                                struct object_entry *e,
 145                                unsigned long size)
 146 {
 147         if (size < pack->oe_size_limit) {
 148                 e->size_ = size;
 149                 e->size_valid = 1;
 150         } else {
 151                 e->size_valid = 0;
 152                 if (oe_get_size_slow(pack, e) != size)
 153                         BUG("'size' is supposed to be the object size!");
 154         }
 155 }
 156
 157 static inline void oe_set_delta_size(struct packing_data *pack,
 158                                      struct object_entry *e,
 159                                      unsigned long size)
 160 {
 161         if (size < pack->oe_delta_size_limit) {
 162                 e->delta_size_ = size;
 163                 e->delta_size_valid = 1;
 164         } else {
 165                 packing_data_lock(pack);
 166                 if (!pack->delta_size)
 167                         ALLOC_ARRAY(pack->delta_size, pack->nr_alloc);
 168                 packing_data_unlock(pack);
 169
 170                 pack->delta_size[e - pack->objects] = size;
 171                 e->delta_size_valid = 0;
 172         }
 173 }
 174
 175 #define IN_PACK(obj) oe_in_pack(&to_pack, obj)
 176 #define SIZE(obj) oe_size(&to_pack, obj)
 177 #define SET_SIZE(obj,size) oe_set_size(&to_pack, obj, size)
 178 #define DELTA_SIZE(obj) oe_delta_size(&to_pack, obj)
 179 #define DELTA(obj) oe_delta(&to_pack, obj)
 180 #define DELTA_CHILD(obj) oe_delta_child(&to_pack, obj)
 181 #define DELTA_SIBLING(obj) oe_delta_sibling(&to_pack, obj)
 182 #define SET_DELTA(obj, val) oe_set_delta(&to_pack, obj, val)
 183 #define SET_DELTA_EXT(obj, oid) oe_set_delta_ext(&to_pack, obj, oid)
 184 #define SET_DELTA_SIZE(obj, val) oe_set_delta_size(&to_pack, obj, val)
 185 #define SET_DELTA_CHILD(obj, val) oe_set_delta_child(&to_pack, obj, val)
 186 #define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val)
 187
 188 static const char *pack_usage[] = {
 189         N_("git pack-objects --stdout [<options>] [< <ref-list> | < <object-list>]"),
 190         N_("git pack-objects [<options>] <base-name> [< <ref-list> | < <object-list>]"),
 191         NULL
 192 };
 193
 194 static struct pack_idx_entry **written_list;
 195 static uint32_t nr_result, nr_written, nr_seen;
 196 static struct bitmap_index *bitmap_git;
 197 static uint32_t write_layer;
 198
 199 static int non_empty;
 200 static int reuse_delta = 1, reuse_object = 1;
 201 static int keep_unreachable, unpack_unreachable, include_tag;
 202 static timestamp_t unpack_unreachable_expiration;
 203 static int pack_loose_unreachable;
 204 static int cruft;
 205 static timestamp_t cruft_expiration;
 206 static int local;
 207 static int have_non_local_packs;
 208 static int incremental;
 209 static int ignore_packed_keep_on_disk;
 210 static int ignore_packed_keep_in_core;
 211 static int allow_ofs_delta;
 212 static struct pack_idx_option pack_idx_opts;
 213 static const char *base_name;
 214 static int progress = 1;
 215 static int window = 10;
 216 static unsigned long pack_size_limit;
 217 static int depth = 50;
 218 static int delta_search_threads;
 219 static int pack_to_stdout;
 220 static int sparse;
 221 static int thin;
 222 static int num_preferred_base;
 223 static struct progress *progress_state;
 224
 225 static struct packed_git *reuse_packfile;
 226 static uint32_t reuse_packfile_objects;
 227 static struct bitmap *reuse_packfile_bitmap;
 228
 229 static int use_bitmap_index_default = 1;
 230 static int use_bitmap_index = -1;
 231 static int allow_pack_reuse = 1;
 232 static enum {
 233         WRITE_BITMAP_FALSE = 0,
 234         WRITE_BITMAP_QUIET,
 235         WRITE_BITMAP_TRUE,
 236 } write_bitmap_index;
 237 static uint16_t write_bitmap_options = BITMAP_OPT_HASH_CACHE;
 238
 239 static int exclude_promisor_objects;
 240
 241 static int use_delta_islands;
 242
 243 static unsigned long delta_cache_size = 0;
 244 static unsigned long max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE;
 245 static unsigned long cache_max_small_delta_size = 1000;
 246
 247 static unsigned long window_memory_limit = 0;
 248
 249 static struct string_list uri_protocols = STRING_LIST_INIT_NODUP;
 250
 251 enum missing_action {
 252         MA_ERROR = 0,      /* fail if any missing objects are encountered */
 253         MA_ALLOW_ANY,      /* silently allow ALL missing objects */
 254         MA_ALLOW_PROMISOR, /* silently allow all missing PROMISOR objects */
 255 };
 256 static enum missing_action arg_missing_action;
 257 static show_object_fn fn_show_object;
 258
 259 struct configured_exclusion {
 260         struct oidmap_entry e;
 261         char *pack_hash_hex;
 262         char *uri;
 263 };
 264 static struct oidmap configured_exclusions;
 265
 266 static struct oidset excluded_by_config;
 267
 268 /*
 269  * stats
 270  */
 271 static uint32_t written, written_delta;
 272 static uint32_t reused, reused_delta;
 273
 274 /*
 275  * Indexed commits
 276  */
 277 static struct commit **indexed_commits;
 278 static unsigned int indexed_commits_nr;
 279 static unsigned int indexed_commits_alloc;
 280
 281 static void index_commit_for_bitmap(struct commit *commit)
 282 {
 283         if (indexed_commits_nr >= indexed_commits_alloc) {
 284                 indexed_commits_alloc = (indexed_commits_alloc + 32) * 2;
 285                 REALLOC_ARRAY(indexed_commits, indexed_commits_alloc);
 286         }
 287
 288         indexed_commits[indexed_commits_nr++] = commit;
 289 }
 290
 291 static void *get_delta(struct object_entry *entry)
 292 {
 293         unsigned long size, base_size, delta_size;
 294         void *buf, *base_buf, *delta_buf;
 295         enum object_type type;
 296
 297         buf = repo_read_object_file(the_repository, &entry->idx.oid, &type,
 298                                     &size);
 299         if (!buf)
 300                 die(_("unable to read %s"), oid_to_hex(&entry->idx.oid));
 301         base_buf = repo_read_object_file(the_repository,
 302                                          &DELTA(entry)->idx.oid, &type,
 303                                          &base_size);
 304         if (!base_buf)
 305                 die("unable to read %s",
 306                     oid_to_hex(&DELTA(entry)->idx.oid));
 307         delta_buf = diff_delta(base_buf, base_size,
 308                                buf, size, &delta_size, 0);
 309         /*
 310          * We successfully computed this delta once but dropped it for
 311          * memory reasons. Something is very wrong if this time we
 312          * recompute and create a different delta.
 313          */
 314         if (!delta_buf || delta_size != DELTA_SIZE(entry))
 315                 BUG("delta size changed");
 316         free(buf);
 317         free(base_buf);
 318         return delta_buf;
 319 }
 320
 321 static unsigned long do_compress(void **pptr, unsigned long size)
 322 {
 323         git_zstream stream;
 324         void *in, *out;
 325         unsigned long maxsize;
 326
 327         git_deflate_init(&stream, pack_compression_level);
 328         maxsize = git_deflate_bound(&stream, size);
 329
 330         in = *pptr;
 331         out = xmalloc(maxsize);
 332         *pptr = out;
 333
 334         stream.next_in = in;
 335         stream.avail_in = size;
 336         stream.next_out = out;
 337         stream.avail_out = maxsize;
 338         while (git_deflate(&stream, Z_FINISH) == Z_OK)
 339                 ; /* nothing */
 340         git_deflate_end(&stream);
 341
 342         free(in);
 343         return stream.total_out;
 344 }
 345
 346 static unsigned long write_large_blob_data(struct git_istream *st, struct hashfile *f,
 347                                            const struct object_id *oid)
 348 {
 349         git_zstream stream;
 350         unsigned char ibuf[1024 * 16];
 351         unsigned char obuf[1024 * 16];
 352         unsigned long olen = 0;
 353
 354         git_deflate_init(&stream, pack_compression_level);
 355
 356         for (;;) {
 357                 ssize_t readlen;
 358                 int zret = Z_OK;
 359                 readlen = read_istream(st, ibuf, sizeof(ibuf));
 360                 if (readlen == -1)
 361                         die(_("unable to read %s"), oid_to_hex(oid));
 362
 363                 stream.next_in = ibuf;
 364                 stream.avail_in = readlen;
 365                 while ((stream.avail_in || readlen == 0) &&
 366                        (zret == Z_OK || zret == Z_BUF_ERROR)) {
 367                         stream.next_out = obuf;
 368                         stream.avail_out = sizeof(obuf);
 369                         zret = git_deflate(&stream, readlen ? 0 : Z_FINISH);
 370                         hashwrite(f, obuf, stream.next_out - obuf);
 371                         olen += stream.next_out - obuf;
 372                 }
 373                 if (stream.avail_in)
 374                         die(_("deflate error (%d)"), zret);
 375                 if (readlen == 0) {
 376                         if (zret != Z_STREAM_END)
 377                                 die(_("deflate error (%d)"), zret);
 378                         break;
 379                 }
 380         }
 381         git_deflate_end(&stream);
 382         return olen;
 383 }
 384
 385 /*
 386  * we are going to reuse the existing object data as is.  make
 387  * sure it is not corrupt.
 388  */
 389 static int check_pack_inflate(struct packed_git *p,
 390                 struct pack_window **w_curs,
 391                 off_t offset,
 392                 off_t len,
 393                 unsigned long expect)
 394 {
 395         git_zstream stream;
 396         unsigned char fakebuf[4096], *in;
 397         int st;
 398
 399         memset(&stream, 0, sizeof(stream));
 400         git_inflate_init(&stream);
 401         do {
 402                 in = use_pack(p, w_curs, offset, &stream.avail_in);
 403                 stream.next_in = in;
 404                 stream.next_out = fakebuf;
 405                 stream.avail_out = sizeof(fakebuf);
 406                 st = git_inflate(&stream, Z_FINISH);
 407                 offset += stream.next_in - in;
 408         } while (st == Z_OK || st == Z_BUF_ERROR);
 409         git_inflate_end(&stream);
 410         return (st == Z_STREAM_END &&
 411                 stream.total_out == expect &&
 412                 stream.total_in == len) ? 0 : -1;
 413 }
 414
 415 static void copy_pack_data(struct hashfile *f,
 416                 struct packed_git *p,
 417                 struct pack_window **w_curs,
 418                 off_t offset,
 419                 off_t len)
 420 {
 421         unsigned char *in;
 422         unsigned long avail;
 423
 424         while (len) {
 425                 in = use_pack(p, w_curs, offset, &avail);
 426                 if (avail > len)
 427                         avail = (unsigned long)len;
 428                 hashwrite(f, in, avail);
 429                 offset += avail;
 430                 len -= avail;
 431         }
 432 }
 433
 434 static inline int oe_size_greater_than(struct packing_data *pack,
 435                                        const struct object_entry *lhs,
 436                                        unsigned long rhs)
 437 {
 438         if (lhs->size_valid)
 439                 return lhs->size_ > rhs;
 440         if (rhs < pack->oe_size_limit) /* rhs < 2^x <= lhs ? */
 441                 return 1;
 442         return oe_get_size_slow(pack, lhs) > rhs;
 443 }
 444
 445 /* Return 0 if we will bust the pack-size limit */
 446 static unsigned long write_no_reuse_object(struct hashfile *f, struct object_entry *entry,
 447                                            unsigned long limit, int usable_delta)
 448 {
 449         unsigned long size, datalen;
 450         unsigned char header[MAX_PACK_OBJECT_HEADER],
 451                       dheader[MAX_PACK_OBJECT_HEADER];
 452         unsigned hdrlen;
 453         enum object_type type;
 454         void *buf;
 455         struct git_istream *st = NULL;
 456         const unsigned hashsz = the_hash_algo->rawsz;
 457
 458         if (!usable_delta) {
 459                 if (oe_type(entry) == OBJ_BLOB &&
 460                     oe_size_greater_than(&to_pack, entry, big_file_threshold) &&
 461                     (st = open_istream(the_repository, &entry->idx.oid, &type,
 462                                        &size, NULL)) != NULL)
 463                         buf = NULL;
 464                 else {
 465                         buf = repo_read_object_file(the_repository,
 466                                                     &entry->idx.oid, &type,
 467                                                     &size);
 468                         if (!buf)
 469                                 die(_("unable to read %s"),
 470                                     oid_to_hex(&entry->idx.oid));
 471                 }
 472                 /*
 473                  * make sure no cached delta data remains from a
 474                  * previous attempt before a pack split occurred.
 475                  */
 476                 FREE_AND_NULL(entry->delta_data);
 477                 entry->z_delta_size = 0;
 478         } else if (entry->delta_data) {
 479                 size = DELTA_SIZE(entry);
 480                 buf = entry->delta_data;
 481                 entry->delta_data = NULL;
 482                 type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
 483                         OBJ_OFS_DELTA : OBJ_REF_DELTA;
 484         } else {
 485                 buf = get_delta(entry);
 486                 size = DELTA_SIZE(entry);
 487                 type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
 488                         OBJ_OFS_DELTA : OBJ_REF_DELTA;
 489         }
 490
 491         if (st) /* large blob case, just assume we don't compress well */
 492                 datalen = size;
 493         else if (entry->z_delta_size)
 494                 datalen = entry->z_delta_size;
 495         else
 496                 datalen = do_compress(&buf, size);
 497
 498         /*
 499          * The object header is a byte of 'type' followed by zero or
 500          * more bytes of length.
 501          */
 502         hdrlen = encode_in_pack_object_header(header, sizeof(header),
 503                                               type, size);
 504
 505         if (type == OBJ_OFS_DELTA) {
 506                 /*
 507                  * Deltas with relative base contain an additional
 508                  * encoding of the relative offset for the delta
 509                  * base from this object's position in the pack.
 510                  */
 511                 off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset;
 512                 unsigned pos = sizeof(dheader) - 1;
 513                 dheader[pos] = ofs & 127;
 514                 while (ofs >>= 7)
 515                         dheader[--pos] = 128 | (--ofs & 127);
 516                 if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) {
 517                         if (st)
 518                                 close_istream(st);
 519                         free(buf);
 520                         return 0;
 521                 }
 522                 hashwrite(f, header, hdrlen);
 523                 hashwrite(f, dheader + pos, sizeof(dheader) - pos);
 524                 hdrlen += sizeof(dheader) - pos;
 525         } else if (type == OBJ_REF_DELTA) {
 526                 /*
 527                  * Deltas with a base reference contain
 528                  * additional bytes for the base object ID.
 529                  */
 530                 if (limit && hdrlen + hashsz + datalen + hashsz >= limit) {
 531                         if (st)
 532                                 close_istream(st);
 533                         free(buf);
 534                         return 0;
 535                 }
 536                 hashwrite(f, header, hdrlen);
 537                 hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz);
 538                 hdrlen += hashsz;
 539         } else {
 540                 if (limit && hdrlen + datalen + hashsz >= limit) {
 541                         if (st)
 542                                 close_istream(st);
 543                         free(buf);
 544                         return 0;
 545                 }
 546                 hashwrite(f, header, hdrlen);
 547         }
 548         if (st) {
 549                 datalen = write_large_blob_data(st, f, &entry->idx.oid);
 550                 close_istream(st);
 551         } else {
 552                 hashwrite(f, buf, datalen);
 553                 free(buf);
 554         }
 555
 556         return hdrlen + datalen;
 557 }
 558
 559 /* Return 0 if we will bust the pack-size limit */
 560 static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
 561                                 unsigned long limit, int usable_delta)
 562 {
 563         struct packed_git *p = IN_PACK(entry);
 564         struct pack_window *w_curs = NULL;
 565         uint32_t pos;
 566         off_t offset;
 567         enum object_type type = oe_type(entry);
 568         off_t datalen;
 569         unsigned char header[MAX_PACK_OBJECT_HEADER],
 570                       dheader[MAX_PACK_OBJECT_HEADER];
 571         unsigned hdrlen;
 572         const unsigned hashsz = the_hash_algo->rawsz;
 573         unsigned long entry_size = SIZE(entry);
 574
 575         if (DELTA(entry))
 576                 type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
 577                         OBJ_OFS_DELTA : OBJ_REF_DELTA;
 578         hdrlen = encode_in_pack_object_header(header, sizeof(header),
 579                                               type, entry_size);
 580
 581         offset = entry->in_pack_offset;
 582         if (offset_to_pack_pos(p, offset, &pos) < 0)
 583                 die(_("write_reuse_object: could not locate %s, expected at "
 584                       "offset %"PRIuMAX" in pack %s"),
 585                     oid_to_hex(&entry->idx.oid), (uintmax_t)offset,
 586                     p->pack_name);
 587         datalen = pack_pos_to_offset(p, pos + 1) - offset;
 588         if (!pack_to_stdout && p->index_version > 1 &&
 589             check_pack_crc(p, &w_curs, offset, datalen,
 590                            pack_pos_to_index(p, pos))) {
 591                 error(_("bad packed object CRC for %s"),
 592                       oid_to_hex(&entry->idx.oid));
 593                 unuse_pack(&w_curs);
 594                 return write_no_reuse_object(f, entry, limit, usable_delta);
 595         }
 596
 597         offset += entry->in_pack_header_size;
 598         datalen -= entry->in_pack_header_size;
 599
 600         if (!pack_to_stdout && p->index_version == 1 &&
 601             check_pack_inflate(p, &w_curs, offset, datalen, entry_size)) {
 602                 error(_("corrupt packed object for %s"),
 603                       oid_to_hex(&entry->idx.oid));
 604                 unuse_pack(&w_curs);
 605                 return write_no_reuse_object(f, entry, limit, usable_delta);
 606         }
 607
 608         if (type == OBJ_OFS_DELTA) {
 609                 off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset;
 610                 unsigned pos = sizeof(dheader) - 1;
 611                 dheader[pos] = ofs & 127;
 612                 while (ofs >>= 7)
 613                         dheader[--pos] = 128 | (--ofs & 127);
 614                 if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) {
 615                         unuse_pack(&w_curs);
 616                         return 0;
 617                 }
 618                 hashwrite(f, header, hdrlen);
 619                 hashwrite(f, dheader + pos, sizeof(dheader) - pos);
 620                 hdrlen += sizeof(dheader) - pos;
 621                 reused_delta++;
 622         } else if (type == OBJ_REF_DELTA) {
 623                 if (limit && hdrlen + hashsz + datalen + hashsz >= limit) {
 624                         unuse_pack(&w_curs);
 625                         return 0;
 626                 }
 627                 hashwrite(f, header, hdrlen);
 628                 hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz);
 629                 hdrlen += hashsz;
 630                 reused_delta++;
 631         } else {
 632                 if (limit && hdrlen + datalen + hashsz >= limit) {
 633                         unuse_pack(&w_curs);
 634                         return 0;
 635                 }
 636                 hashwrite(f, header, hdrlen);
 637         }
 638         copy_pack_data(f, p, &w_curs, offset, datalen);
 639         unuse_pack(&w_curs);
 640         reused++;
 641         return hdrlen + datalen;
 642 }
 643
 644 /* Return 0 if we will bust the pack-size limit */
 645 static off_t write_object(struct hashfile *f,
 646                           struct object_entry *entry,
 647                           off_t write_offset)
 648 {
 649         unsigned long limit;
 650         off_t len;
 651         int usable_delta, to_reuse;
 652
 653         if (!pack_to_stdout)
 654                 crc32_begin(f);
 655
 656         /* apply size limit if limited packsize and not first object */
 657         if (!pack_size_limit || !nr_written)
 658                 limit = 0;
 659         else if (pack_size_limit <= write_offset)
 660                 /*
 661                  * the earlier object did not fit the limit; avoid
 662                  * mistaking this with unlimited (i.e. limit = 0).
 663                  */
 664                 limit = 1;
 665         else
 666                 limit = pack_size_limit - write_offset;
 667
 668         if (!DELTA(entry))
 669                 usable_delta = 0;       /* no delta */
 670         else if (!pack_size_limit)
 671                usable_delta = 1;        /* unlimited packfile */
 672         else if (DELTA(entry)->idx.offset == (off_t)-1)
 673                 usable_delta = 0;       /* base was written to another pack */
 674         else if (DELTA(entry)->idx.offset)
 675                 usable_delta = 1;       /* base already exists in this pack */
 676         else
 677                 usable_delta = 0;       /* base could end up in another pack */
 678
 679         if (!reuse_object)
 680                 to_reuse = 0;   /* explicit */
 681         else if (!IN_PACK(entry))
 682                 to_reuse = 0;   /* can't reuse what we don't have */
 683         else if (oe_type(entry) == OBJ_REF_DELTA ||
 684                  oe_type(entry) == OBJ_OFS_DELTA)
 685                                 /* check_object() decided it for us ... */
 686                 to_reuse = usable_delta;
 687                                 /* ... but pack split may override that */
 688         else if (oe_type(entry) != entry->in_pack_type)
 689                 to_reuse = 0;   /* pack has delta which is unusable */
 690         else if (DELTA(entry))
 691                 to_reuse = 0;   /* we want to pack afresh */
 692         else
 693                 to_reuse = 1;   /* we have it in-pack undeltified,
 694                                  * and we do not need to deltify it.
 695                                  */
 696
 697         if (!to_reuse)
 698                 len = write_no_reuse_object(f, entry, limit, usable_delta);
 699         else
 700                 len = write_reuse_object(f, entry, limit, usable_delta);
 701         if (!len)
 702                 return 0;
 703
 704         if (usable_delta)
 705                 written_delta++;
 706         written++;
 707         if (!pack_to_stdout)
 708                 entry->idx.crc32 = crc32_end(f);
 709         return len;
 710 }
 711
 712 enum write_one_status {
 713         WRITE_ONE_SKIP = -1, /* already written */
 714         WRITE_ONE_BREAK = 0, /* writing this will bust the limit; not written */
 715         WRITE_ONE_WRITTEN = 1, /* normal */
 716         WRITE_ONE_RECURSIVE = 2 /* already scheduled to be written */
 717 };
 718
 719 static enum write_one_status write_one(struct hashfile *f,
 720                                        struct object_entry *e,
 721                                        off_t *offset)
 722 {
 723         off_t size;
 724         int recursing;
 725
 726         /*
 727          * we set offset to 1 (which is an impossible value) to mark
 728          * the fact that this object is involved in "write its base
 729          * first before writing a deltified object" recursion.
 730          */
 731         recursing = (e->idx.offset == 1);
 732         if (recursing) {
 733                 warning(_("recursive delta detected for object %s"),
 734                         oid_to_hex(&e->idx.oid));
 735                 return WRITE_ONE_RECURSIVE;
 736         } else if (e->idx.offset || e->preferred_base) {
 737                 /* offset is non zero if object is written already. */
 738                 return WRITE_ONE_SKIP;
 739         }
 740
 741         /* if we are deltified, write out base object first. */
 742         if (DELTA(e)) {
 743                 e->idx.offset = 1; /* now recurse */
 744                 switch (write_one(f, DELTA(e), offset)) {
 745                 case WRITE_ONE_RECURSIVE:
 746                         /* we cannot depend on this one */
 747                         SET_DELTA(e, NULL);
 748                         break;
 749                 default:
 750                         break;
 751                 case WRITE_ONE_BREAK:
 752                         e->idx.offset = recursing;
 753                         return WRITE_ONE_BREAK;
 754                 }
 755         }
 756
 757         e->idx.offset = *offset;
 758         size = write_object(f, e, *offset);
 759         if (!size) {
 760                 e->idx.offset = recursing;
 761                 return WRITE_ONE_BREAK;
 762         }
 763         written_list[nr_written++] = &e->idx;
 764
 765         /* make sure off_t is sufficiently large not to wrap */
 766         if (signed_add_overflows(*offset, size))
 767                 die(_("pack too large for current definition of off_t"));
 768         *offset += size;
 769         return WRITE_ONE_WRITTEN;
 770 }
 771
 772 static int mark_tagged(const char *path UNUSED, const struct object_id *oid,
 773                        int flag UNUSED, void *cb_data UNUSED)
 774 {
 775         struct object_id peeled;
 776         struct object_entry *entry = packlist_find(&to_pack, oid);
 777
 778         if (entry)
 779                 entry->tagged = 1;
 780         if (!peel_iterated_oid(oid, &peeled)) {
 781                 entry = packlist_find(&to_pack, &peeled);
 782                 if (entry)
 783                         entry->tagged = 1;
 784         }
 785         return 0;
 786 }
 787
 788 static inline unsigned char oe_layer(struct packing_data *pack,
 789                                      struct object_entry *e)
 790 {
 791         if (!pack->layer)
 792                 return 0;
 793         return pack->layer[e - pack->objects];
 794 }
 795
 796 static inline void add_to_write_order(struct object_entry **wo,
 797                                unsigned int *endp,
 798                                struct object_entry *e)
 799 {
 800         if (e->filled || oe_layer(&to_pack, e) != write_layer)
 801                 return;
 802         wo[(*endp)++] = e;
 803         e->filled = 1;
 804 }
 805
 806 static void add_descendants_to_write_order(struct object_entry **wo,
 807                                            unsigned int *endp,
 808                                            struct object_entry *e)
 809 {
 810         int add_to_order = 1;
 811         while (e) {
 812                 if (add_to_order) {
 813                         struct object_entry *s;
 814                         /* add this node... */
 815                         add_to_write_order(wo, endp, e);
 816                         /* all its siblings... */
 817                         for (s = DELTA_SIBLING(e); s; s = DELTA_SIBLING(s)) {
 818                                 add_to_write_order(wo, endp, s);
 819                         }
 820                 }
 821                 /* drop down a level to add left subtree nodes if possible */
 822                 if (DELTA_CHILD(e)) {
 823                         add_to_order = 1;
 824                         e = DELTA_CHILD(e);
 825                 } else {
 826                         add_to_order = 0;
 827                         /* our sibling might have some children, it is next */
 828                         if (DELTA_SIBLING(e)) {
 829                                 e = DELTA_SIBLING(e);
 830                                 continue;
 831                         }
 832                         /* go back to our parent node */
 833                         e = DELTA(e);
 834                         while (e && !DELTA_SIBLING(e)) {
 835                                 /* we're on the right side of a subtree, keep
 836                                  * going up until we can go right again */
 837                                 e = DELTA(e);
 838                         }
 839                         if (!e) {
 840                                 /* done- we hit our original root node */
 841                                 return;
 842                         }
 843                         /* pass it off to sibling at this level */
 844                         e = DELTA_SIBLING(e);
 845                 }
 846         };
 847 }
 848
 849 static void add_family_to_write_order(struct object_entry **wo,
 850                                       unsigned int *endp,
 851                                       struct object_entry *e)
 852 {
 853         struct object_entry *root;
 854
 855         for (root = e; DELTA(root); root = DELTA(root))
 856                 ; /* nothing */
 857         add_descendants_to_write_order(wo, endp, root);
 858 }
 859
 860 static void compute_layer_order(struct object_entry **wo, unsigned int *wo_end)
 861 {
 862         unsigned int i, last_untagged;
 863         struct object_entry *objects = to_pack.objects;
 864
 865         for (i = 0; i < to_pack.nr_objects; i++) {
 866                 if (objects[i].tagged)
 867                         break;
 868                 add_to_write_order(wo, wo_end, &objects[i]);
 869         }
 870         last_untagged = i;
 871
 872         /*
 873          * Then fill all the tagged tips.
 874          */
 875         for (; i < to_pack.nr_objects; i++) {
 876                 if (objects[i].tagged)
 877                         add_to_write_order(wo, wo_end, &objects[i]);
 878         }
 879
 880         /*
 881          * And then all remaining commits and tags.
 882          */
 883         for (i = last_untagged; i < to_pack.nr_objects; i++) {
 884                 if (oe_type(&objects[i]) != OBJ_COMMIT &&
 885                     oe_type(&objects[i]) != OBJ_TAG)
 886                         continue;
 887                 add_to_write_order(wo, wo_end, &objects[i]);
 888         }
 889
 890         /*
 891          * And then all the trees.
 892          */
 893         for (i = last_untagged; i < to_pack.nr_objects; i++) {
 894                 if (oe_type(&objects[i]) != OBJ_TREE)
 895                         continue;
 896                 add_to_write_order(wo, wo_end, &objects[i]);
 897         }
 898
 899         /*
 900          * Finally all the rest in really tight order
 901          */
 902         for (i = last_untagged; i < to_pack.nr_objects; i++) {
 903                 if (!objects[i].filled && oe_layer(&to_pack, &objects[i]) == write_layer)
 904                         add_family_to_write_order(wo, wo_end, &objects[i]);
 905         }
 906 }
 907
 908 static struct object_entry **compute_write_order(void)
 909 {
 910         uint32_t max_layers = 1;
 911         unsigned int i, wo_end;
 912
 913         struct object_entry **wo;
 914         struct object_entry *objects = to_pack.objects;
 915
 916         for (i = 0; i < to_pack.nr_objects; i++) {
 917                 objects[i].tagged = 0;
 918                 objects[i].filled = 0;
 919                 SET_DELTA_CHILD(&objects[i], NULL);
 920                 SET_DELTA_SIBLING(&objects[i], NULL);
 921         }
 922
 923         /*
 924          * Fully connect delta_child/delta_sibling network.
 925          * Make sure delta_sibling is sorted in the original
 926          * recency order.
 927          */
 928         for (i = to_pack.nr_objects; i > 0;) {
 929                 struct object_entry *e = &objects[--i];
 930                 if (!DELTA(e))
 931                         continue;
 932                 /* Mark me as the first child */
 933                 e->delta_sibling_idx = DELTA(e)->delta_child_idx;
 934                 SET_DELTA_CHILD(DELTA(e), e);
 935         }
 936
 937         /*
 938          * Mark objects that are at the tip of tags.
 939          */
 940         for_each_tag_ref(mark_tagged, NULL);
 941
 942         if (use_delta_islands) {
 943                 max_layers = compute_pack_layers(&to_pack);
 944                 free_island_marks();
 945         }
 946
 947         ALLOC_ARRAY(wo, to_pack.nr_objects);
 948         wo_end = 0;
 949
 950         for (; write_layer < max_layers; ++write_layer)
 951                 compute_layer_order(wo, &wo_end);
 952
 953         if (wo_end != to_pack.nr_objects)
 954                 die(_("ordered %u objects, expected %"PRIu32),
 955                     wo_end, to_pack.nr_objects);
 956
 957         return wo;
 958 }
 959
 960
 961 /*
 962  * A reused set of objects. All objects in a chunk have the same
 963  * relative position in the original packfile and the generated
 964  * packfile.
 965  */
 966
 967 static struct reused_chunk {
 968         /* The offset of the first object of this chunk in the original
 969          * packfile. */
 970         off_t original;
 971         /* The difference for "original" minus the offset of the first object of
 972          * this chunk in the generated packfile. */
 973         off_t difference;
 974 } *reused_chunks;
 975 static int reused_chunks_nr;
 976 static int reused_chunks_alloc;
 977
 978 static void record_reused_object(off_t where, off_t offset)
 979 {
 980         if (reused_chunks_nr && reused_chunks[reused_chunks_nr-1].difference == offset)
 981                 return;
 982
 983         ALLOC_GROW(reused_chunks, reused_chunks_nr + 1,
 984                    reused_chunks_alloc);
 985         reused_chunks[reused_chunks_nr].original = where;
 986         reused_chunks[reused_chunks_nr].difference = offset;
 987         reused_chunks_nr++;
 988 }
 989
 990 /*
 991  * Binary search to find the chunk that "where" is in. Note
 992  * that we're not looking for an exact match, just the first
 993  * chunk that contains it (which implicitly ends at the start
 994  * of the next chunk.
 995  */
 996 static off_t find_reused_offset(off_t where)
 997 {
 998         int lo = 0, hi = reused_chunks_nr;
 999         while (lo < hi) {
1000                 int mi = lo + ((hi - lo) / 2);
1001                 if (where == reused_chunks[mi].original)
1002                         return reused_chunks[mi].difference;
1003                 if (where < reused_chunks[mi].original)
1004                         hi = mi;
1005                 else
1006                         lo = mi + 1;
1007         }
1008
1009         /*
1010          * The first chunk starts at zero, so we can't have gone below
1011          * there.
1012          */
1013         assert(lo);
1014         return reused_chunks[lo-1].difference;
1015 }
1016
1017 static void write_reused_pack_one(size_t pos, struct hashfile *out,
1018                                   struct pack_window **w_curs)
1019 {
1020         off_t offset, next, cur;
1021         enum object_type type;
1022         unsigned long size;
1023
1024         offset = pack_pos_to_offset(reuse_packfile, pos);
1025         next = pack_pos_to_offset(reuse_packfile, pos + 1);
1026
1027         record_reused_object(offset, offset - hashfile_total(out));
1028
1029         cur = offset;
1030         type = unpack_object_header(reuse_packfile, w_curs, &cur, &size);
1031         assert(type >= 0);
1032
1033         if (type == OBJ_OFS_DELTA) {
1034                 off_t base_offset;
1035                 off_t fixup;
1036
1037                 unsigned char header[MAX_PACK_OBJECT_HEADER];
1038                 unsigned len;
1039
1040                 base_offset = get_delta_base(reuse_packfile, w_curs, &cur, type, offset);
1041                 assert(base_offset != 0);
1042
1043                 /* Convert to REF_DELTA if we must... */
1044                 if (!allow_ofs_delta) {
1045                         uint32_t base_pos;
1046                         struct object_id base_oid;
1047
1048                         if (offset_to_pack_pos(reuse_packfile, base_offset, &base_pos) < 0)
1049                                 die(_("expected object at offset %"PRIuMAX" "
1050                                       "in pack %s"),
1051                                     (uintmax_t)base_offset,
1052                                     reuse_packfile->pack_name);
1053
1054                         nth_packed_object_id(&base_oid, reuse_packfile,
1055                                              pack_pos_to_index(reuse_packfile, base_pos));
1056
1057                         len = encode_in_pack_object_header(header, sizeof(header),
1058                                                            OBJ_REF_DELTA, size);
1059                         hashwrite(out, header, len);
1060                         hashwrite(out, base_oid.hash, the_hash_algo->rawsz);
1061                         copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur);
1062                         return;
1063                 }
1064
1065                 /* Otherwise see if we need to rewrite the offset... */
1066                 fixup = find_reused_offset(offset) -
1067                         find_reused_offset(base_offset);
1068                 if (fixup) {
1069                         unsigned char ofs_header[10];
1070                         unsigned i, ofs_len;
1071                         off_t ofs = offset - base_offset - fixup;
1072
1073                         len = encode_in_pack_object_header(header, sizeof(header),
1074                                                            OBJ_OFS_DELTA, size);
1075
1076                         i = sizeof(ofs_header) - 1;
1077                         ofs_header[i] = ofs & 127;
1078                         while (ofs >>= 7)
1079                                 ofs_header[--i] = 128 | (--ofs & 127);
1080
1081                         ofs_len = sizeof(ofs_header) - i;
1082
1083                         hashwrite(out, header, len);
1084                         hashwrite(out, ofs_header + sizeof(ofs_header) - ofs_len, ofs_len);
1085                         copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur);
1086                         return;
1087                 }
1088
1089                 /* ...otherwise we have no fixup, and can write it verbatim */
1090         }
1091
1092         copy_pack_data(out, reuse_packfile, w_curs, offset, next - offset);
1093 }
1094
1095 static size_t write_reused_pack_verbatim(struct hashfile *out,
1096                                          struct pack_window **w_curs)
1097 {
1098         size_t pos = 0;
1099
1100         while (pos < reuse_packfile_bitmap->word_alloc &&
1101                         reuse_packfile_bitmap->words[pos] == (eword_t)~0)
1102                 pos++;
1103
1104         if (pos) {
1105                 off_t to_write;
1106
1107                 written = (pos * BITS_IN_EWORD);
1108                 to_write = pack_pos_to_offset(reuse_packfile, written)
1109                         - sizeof(struct pack_header);
1110
1111                 /* We're recording one chunk, not one object. */
1112                 record_reused_object(sizeof(struct pack_header), 0);
1113                 hashflush(out);
1114                 copy_pack_data(out, reuse_packfile, w_curs,
1115                         sizeof(struct pack_header), to_write);
1116
1117                 display_progress(progress_state, written);
1118         }
1119         return pos;
1120 }
1121
1122 static void write_reused_pack(struct hashfile *f)
1123 {
1124         size_t i = 0;
1125         uint32_t offset;
1126         struct pack_window *w_curs = NULL;
1127
1128         if (allow_ofs_delta)
1129                 i = write_reused_pack_verbatim(f, &w_curs);
1130
1131         for (; i < reuse_packfile_bitmap->word_alloc; ++i) {
1132                 eword_t word = reuse_packfile_bitmap->words[i];
1133                 size_t pos = (i * BITS_IN_EWORD);
1134
1135                 for (offset = 0; offset < BITS_IN_EWORD; ++offset) {
1136                         if ((word >> offset) == 0)
1137                                 break;
1138
1139                         offset += ewah_bit_ctz64(word >> offset);
1140                         /*
1141                          * Can use bit positions directly, even for MIDX
1142                          * bitmaps. See comment in try_partial_reuse()
1143                          * for why.
1144                          */
1145                         write_reused_pack_one(pos + offset, f, &w_curs);
1146                         display_progress(progress_state, ++written);
1147                 }
1148         }
1149
1150         unuse_pack(&w_curs);
1151 }
1152
1153 static void write_excluded_by_configs(void)
1154 {
1155         struct oidset_iter iter;
1156         const struct object_id *oid;
1157
1158         oidset_iter_init(&excluded_by_config, &iter);
1159         while ((oid = oidset_iter_next(&iter))) {
1160                 struct configured_exclusion *ex =
1161                         oidmap_get(&configured_exclusions, oid);
1162
1163                 if (!ex)
1164                         BUG("configured exclusion wasn't configured");
1165                 write_in_full(1, ex->pack_hash_hex, strlen(ex->pack_hash_hex));
1166                 write_in_full(1, " ", 1);
1167                 write_in_full(1, ex->uri, strlen(ex->uri));
1168                 write_in_full(1, "\n", 1);
1169         }
1170 }
1171
1172 static const char no_split_warning[] = N_(
1173 "disabling bitmap writing, packs are split due to pack.packSizeLimit"
1174 );
1175
1176 static void write_pack_file(void)
1177 {
1178         uint32_t i = 0, j;
1179         struct hashfile *f;
1180         off_t offset;
1181         uint32_t nr_remaining = nr_result;
1182         time_t last_mtime = 0;
1183         struct object_entry **write_order;
1184
1185         if (progress > pack_to_stdout)
1186                 progress_state = start_progress(_("Writing objects"), nr_result);
1187         ALLOC_ARRAY(written_list, to_pack.nr_objects);
1188         write_order = compute_write_order();
1189
1190         do {
1191                 unsigned char hash[GIT_MAX_RAWSZ];
1192                 char *pack_tmp_name = NULL;
1193
1194                 if (pack_to_stdout)
1195                         f = hashfd_throughput(1, "<stdout>", progress_state);
1196                 else
1197                         f = create_tmp_packfile(&pack_tmp_name);
1198
1199                 offset = write_pack_header(f, nr_remaining);
1200
1201                 if (reuse_packfile) {
1202                         assert(pack_to_stdout);
1203                         write_reused_pack(f);
1204                         offset = hashfile_total(f);
1205                 }
1206
1207                 nr_written = 0;
1208                 for (; i < to_pack.nr_objects; i++) {
1209                         struct object_entry *e = write_order[i];
1210                         if (write_one(f, e, &offset) == WRITE_ONE_BREAK)
1211                                 break;
1212                         display_progress(progress_state, written);
1213                 }
1214
1215                 if (pack_to_stdout) {
1216                         /*
1217                          * We never fsync when writing to stdout since we may
1218                          * not be writing to an actual pack file. For instance,
1219                          * the upload-pack code passes a pipe here. Calling
1220                          * fsync on a pipe results in unnecessary
1221                          * synchronization with the reader on some platforms.
1222                          */
1223                         finalize_hashfile(f, hash, FSYNC_COMPONENT_NONE,
1224                                           CSUM_HASH_IN_STREAM | CSUM_CLOSE);
1225                 } else if (nr_written == nr_remaining) {
1226                         finalize_hashfile(f, hash, FSYNC_COMPONENT_PACK,
1227                                           CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
1228                 } else {
1229                         /*
1230                          * If we wrote the wrong number of entries in the
1231                          * header, rewrite it like in fast-import.
1232                          */
1233
1234                         int fd = finalize_hashfile(f, hash, FSYNC_COMPONENT_PACK, 0);
1235                         fixup_pack_header_footer(fd, hash, pack_tmp_name,
1236                                                  nr_written, hash, offset);
1237                         close(fd);
1238                         if (write_bitmap_index) {
1239                                 if (write_bitmap_index != WRITE_BITMAP_QUIET)
1240                                         warning(_(no_split_warning));
1241                                 write_bitmap_index = 0;
1242                         }
1243                 }
1244
1245                 if (!pack_to_stdout) {
1246                         struct stat st;
1247                         struct strbuf tmpname = STRBUF_INIT;
1248                         char *idx_tmp_name = NULL;
1249
1250                         /*
1251                          * Packs are runtime accessed in their mtime
1252                          * order since newer packs are more likely to contain
1253                          * younger objects.  So if we are creating multiple
1254                          * packs then we should modify the mtime of later ones
1255                          * to preserve this property.
1256                          */
1257                         if (stat(pack_tmp_name, &st) < 0) {
1258                                 warning_errno(_("failed to stat %s"), pack_tmp_name);
1259                         } else if (!last_mtime) {
1260                                 last_mtime = st.st_mtime;
1261                         } else {
1262                                 struct utimbuf utb;
1263                                 utb.actime = st.st_atime;
1264                                 utb.modtime = --last_mtime;
1265                                 if (utime(pack_tmp_name, &utb) < 0)
1266                                         warning_errno(_("failed utime() on %s"), pack_tmp_name);
1267                         }
1268
1269                         strbuf_addf(&tmpname, "%s-%s.", base_name,
1270                                     hash_to_hex(hash));
1271
1272                         if (write_bitmap_index) {
1273                                 bitmap_writer_set_checksum(hash);
1274                                 bitmap_writer_build_type_index(
1275                                         &to_pack, written_list, nr_written);
1276                         }
1277
1278                         if (cruft)
1279                                 pack_idx_opts.flags |= WRITE_MTIMES;
1280
1281                         stage_tmp_packfiles(&tmpname, pack_tmp_name,
1282                                             written_list, nr_written,
1283                                             &to_pack, &pack_idx_opts, hash,
1284                                             &idx_tmp_name);
1285
1286                         if (write_bitmap_index) {
1287                                 size_t tmpname_len = tmpname.len;
1288
1289                                 strbuf_addstr(&tmpname, "bitmap");
1290                                 stop_progress(&progress_state);
1291
1292                                 bitmap_writer_show_progress(progress);
1293                                 bitmap_writer_select_commits(indexed_commits, indexed_commits_nr, -1);
1294                                 if (bitmap_writer_build(&to_pack) < 0)
1295                                         die(_("failed to write bitmap index"));
1296                                 bitmap_writer_finish(written_list, nr_written,
1297                                                      tmpname.buf, write_bitmap_options);
1298                                 write_bitmap_index = 0;
1299                                 strbuf_setlen(&tmpname, tmpname_len);
1300                         }
1301
1302                         rename_tmp_packfile_idx(&tmpname, &idx_tmp_name);
1303
1304                         free(idx_tmp_name);
1305                         strbuf_release(&tmpname);
1306                         free(pack_tmp_name);
1307                         puts(hash_to_hex(hash));
1308                 }
1309
1310                 /* mark written objects as written to previous pack */
1311                 for (j = 0; j < nr_written; j++) {
1312                         written_list[j]->offset = (off_t)-1;
1313                 }
1314                 nr_remaining -= nr_written;
1315         } while (nr_remaining && i < to_pack.nr_objects);
1316
1317         free(written_list);
1318         free(write_order);
1319         stop_progress(&progress_state);
1320         if (written != nr_result)
1321                 die(_("wrote %"PRIu32" objects while expecting %"PRIu32),
1322                     written, nr_result);
1323         trace2_data_intmax("pack-objects", the_repository,
1324                            "write_pack_file/wrote", nr_result);
1325 }
1326
1327 static int no_try_delta(const char *path)
1328 {
1329         static struct attr_check *check;
1330
1331         if (!check)
1332                 check = attr_check_initl("delta", NULL);
1333         git_check_attr(the_repository->index, path, check);
1334         if (ATTR_FALSE(check->items[0].value))
1335                 return 1;
1336         return 0;
1337 }
1338
1339 /*
1340  * When adding an object, check whether we have already added it
1341  * to our packing list. If so, we can skip. However, if we are
1342  * being asked to excludei t, but the previous mention was to include
1343  * it, make sure to adjust its flags and tweak our numbers accordingly.
1344  *
1345  * As an optimization, we pass out the index position where we would have
1346  * found the item, since that saves us from having to look it up again a
1347  * few lines later when we want to add the new entry.
1348  */
1349 static int have_duplicate_entry(const struct object_id *oid,
1350                                 int exclude)
1351 {
1352         struct object_entry *entry;
1353
1354         if (reuse_packfile_bitmap &&
1355             bitmap_walk_contains(bitmap_git, reuse_packfile_bitmap, oid))
1356                 return 1;
1357
1358         entry = packlist_find(&to_pack, oid);
1359         if (!entry)
1360                 return 0;
1361
1362         if (exclude) {
1363                 if (!entry->preferred_base)
1364                         nr_result--;
1365                 entry->preferred_base = 1;
1366         }
1367
1368         return 1;
1369 }
1370
1371 static int want_found_object(const struct object_id *oid, int exclude,
1372                              struct packed_git *p)
1373 {
1374         if (exclude)
1375                 return 1;
1376         if (incremental)
1377                 return 0;
1378
1379         if (!is_pack_valid(p))
1380                 return -1;
1381
1382         /*
1383          * When asked to do --local (do not include an object that appears in a
1384          * pack we borrow from elsewhere) or --honor-pack-keep (do not include
1385          * an object that appears in a pack marked with .keep), finding a pack
1386          * that matches the criteria is sufficient for us to decide to omit it.
1387          * However, even if this pack does not satisfy the criteria, we need to
1388          * make sure no copy of this object appears in _any_ pack that makes us
1389          * to omit the object, so we need to check all the packs.
1390          *
1391          * We can however first check whether these options can possibly matter;
1392          * if they do not matter we know we want the object in generated pack.
1393          * Otherwise, we signal "-1" at the end to tell the caller that we do
1394          * not know either way, and it needs to check more packs.
1395          */
1396
1397         /*
1398          * Objects in packs borrowed from elsewhere are discarded regardless of
1399          * if they appear in other packs that weren't borrowed.
1400          */
1401         if (local && !p->pack_local)
1402                 return 0;
1403
1404         /*
1405          * Then handle .keep first, as we have a fast(er) path there.
1406          */
1407         if (ignore_packed_keep_on_disk || ignore_packed_keep_in_core) {
1408                 /*
1409                  * Set the flags for the kept-pack cache to be the ones we want
1410                  * to ignore.
1411                  *
1412                  * That is, if we are ignoring objects in on-disk keep packs,
1413                  * then we want to search through the on-disk keep and ignore
1414                  * the in-core ones.
1415                  */
1416                 unsigned flags = 0;
1417                 if (ignore_packed_keep_on_disk)
1418                         flags |= ON_DISK_KEEP_PACKS;
1419                 if (ignore_packed_keep_in_core)
1420                         flags |= IN_CORE_KEEP_PACKS;
1421
1422                 if (ignore_packed_keep_on_disk && p->pack_keep)
1423                         return 0;
1424                 if (ignore_packed_keep_in_core && p->pack_keep_in_core)
1425                         return 0;
1426                 if (has_object_kept_pack(oid, flags))
1427                         return 0;
1428         }
1429
1430         /*
1431          * At this point we know definitively that either we don't care about
1432          * keep-packs, or the object is not in one. Keep checking other
1433          * conditions...
1434          */
1435         if (!local || !have_non_local_packs)
1436                 return 1;
1437
1438         /* we don't know yet; keep looking for more packs */
1439         return -1;
1440 }
1441
1442 static int want_object_in_pack_one(struct packed_git *p,
1443                                    const struct object_id *oid,
1444                                    int exclude,
1445                                    struct packed_git **found_pack,
1446                                    off_t *found_offset)
1447 {
1448         off_t offset;
1449
1450         if (p == *found_pack)
1451                 offset = *found_offset;
1452         else
1453                 offset = find_pack_entry_one(oid->hash, p);
1454
1455         if (offset) {
1456                 if (!*found_pack) {
1457                         if (!is_pack_valid(p))
1458                                 return -1;
1459                         *found_offset = offset;
1460                         *found_pack = p;
1461                 }
1462                 return want_found_object(oid, exclude, p);
1463         }
1464         return -1;
1465 }
1466
1467 /*
1468  * Check whether we want the object in the pack (e.g., we do not want
1469  * objects found in non-local stores if the "--local" option was used).
1470  *
1471  * If the caller already knows an existing pack it wants to take the object
1472  * from, that is passed in *found_pack and *found_offset; otherwise this
1473  * function finds if there is any pack that has the object and returns the pack
1474  * and its offset in these variables.
1475  */
1476 static int want_object_in_pack(const struct object_id *oid,
1477                                int exclude,
1478                                struct packed_git **found_pack,
1479                                off_t *found_offset)
1480 {
1481         int want;
1482         struct list_head *pos;
1483         struct multi_pack_index *m;
1484
1485         if (!exclude && local && has_loose_object_nonlocal(oid))
1486                 return 0;
1487
1488         /*
1489          * If we already know the pack object lives in, start checks from that
1490          * pack - in the usual case when neither --local was given nor .keep files
1491          * are present we will determine the answer right now.
1492          */
1493         if (*found_pack) {
1494                 want = want_found_object(oid, exclude, *found_pack);
1495                 if (want != -1)
1496                         return want;
1497
1498                 *found_pack = NULL;
1499                 *found_offset = 0;
1500         }
1501
1502         for (m = get_multi_pack_index(the_repository); m; m = m->next) {
1503                 struct pack_entry e;
1504                 if (fill_midx_entry(the_repository, oid, &e, m)) {
1505                         want = want_object_in_pack_one(e.p, oid, exclude, found_pack, found_offset);
1506                         if (want != -1)
1507                                 return want;
1508                 }
1509         }
1510
1511         list_for_each(pos, get_packed_git_mru(the_repository)) {
1512                 struct packed_git *p = list_entry(pos, struct packed_git, mru);
1513                 want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset);
1514                 if (!exclude && want > 0)
1515                         list_move(&p->mru,
1516                                   get_packed_git_mru(the_repository));
1517                 if (want != -1)
1518                         return want;
1519         }
1520
1521         if (uri_protocols.nr) {
1522                 struct configured_exclusion *ex =
1523                         oidmap_get(&configured_exclusions, oid);
1524                 int i;
1525                 const char *p;
1526
1527                 if (ex) {
1528                         for (i = 0; i < uri_protocols.nr; i++) {
1529                                 if (skip_prefix(ex->uri,
1530                                                 uri_protocols.items[i].string,
1531                                                 &p) &&
1532                                     *p == ':') {
1533                                         oidset_insert(&excluded_by_config, oid);
1534                                         return 0;
1535                                 }
1536                         }
1537                 }
1538         }
1539
1540         return 1;
1541 }
1542
1543 static struct object_entry *create_object_entry(const struct object_id *oid,
1544                                                 enum object_type type,
1545                                                 uint32_t hash,
1546                                                 int exclude,
1547                                                 int no_try_delta,
1548                                                 struct packed_git *found_pack,
1549                                                 off_t found_offset)
1550 {
1551         struct object_entry *entry;
1552
1553         entry = packlist_alloc(&to_pack, oid);
1554         entry->hash = hash;
1555         oe_set_type(entry, type);
1556         if (exclude)
1557                 entry->preferred_base = 1;
1558         else
1559                 nr_result++;
1560         if (found_pack) {
1561                 oe_set_in_pack(&to_pack, entry, found_pack);
1562                 entry->in_pack_offset = found_offset;
1563         }
1564
1565         entry->no_try_delta = no_try_delta;
1566
1567         return entry;
1568 }
1569
1570 static const char no_closure_warning[] = N_(
1571 "disabling bitmap writing, as some objects are not being packed"
1572 );
1573
1574 static int add_object_entry(const struct object_id *oid, enum object_type type,
1575                             const char *name, int exclude)
1576 {
1577         struct packed_git *found_pack = NULL;
1578         off_t found_offset = 0;
1579
1580         display_progress(progress_state, ++nr_seen);
1581
1582         if (have_duplicate_entry(oid, exclude))
1583                 return 0;
1584
1585         if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset)) {
1586                 /* The pack is missing an object, so it will not have closure */
1587                 if (write_bitmap_index) {
1588                         if (write_bitmap_index != WRITE_BITMAP_QUIET)
1589                                 warning(_(no_closure_warning));
1590                         write_bitmap_index = 0;
1591                 }
1592                 return 0;
1593         }
1594
1595         create_object_entry(oid, type, pack_name_hash(name),
1596                             exclude, name && no_try_delta(name),
1597                             found_pack, found_offset);
1598         return 1;
1599 }
1600
1601 static int add_object_entry_from_bitmap(const struct object_id *oid,
1602                                         enum object_type type,
1603                                         int flags UNUSED, uint32_t name_hash,
1604                                         struct packed_git *pack, off_t offset)
1605 {
1606         display_progress(progress_state, ++nr_seen);
1607
1608         if (have_duplicate_entry(oid, 0))
1609                 return 0;
1610
1611         if (!want_object_in_pack(oid, 0, &pack, &offset))
1612                 return 0;
1613
1614         create_object_entry(oid, type, name_hash, 0, 0, pack, offset);
1615         return 1;
1616 }
1617
1618 struct pbase_tree_cache {
1619         struct object_id oid;
1620         int ref;
1621         int temporary;
1622         void *tree_data;
1623         unsigned long tree_size;
1624 };
1625
1626 static struct pbase_tree_cache *(pbase_tree_cache[256]);
1627 static int pbase_tree_cache_ix(const struct object_id *oid)
1628 {
1629         return oid->hash[0] % ARRAY_SIZE(pbase_tree_cache);
1630 }
1631 static int pbase_tree_cache_ix_incr(int ix)
1632 {
1633         return (ix+1) % ARRAY_SIZE(pbase_tree_cache);
1634 }
1635
1636 static struct pbase_tree {
1637         struct pbase_tree *next;
1638         /* This is a phony "cache" entry; we are not
1639          * going to evict it or find it through _get()
1640          * mechanism -- this is for the toplevel node that
1641          * would almost always change with any commit.
1642          */
1643         struct pbase_tree_cache pcache;
1644 } *pbase_tree;
1645
1646 static struct pbase_tree_cache *pbase_tree_get(const struct object_id *oid)
1647 {
1648         struct pbase_tree_cache *ent, *nent;
1649         void *data;
1650         unsigned long size;
1651         enum object_type type;
1652         int neigh;
1653         int my_ix = pbase_tree_cache_ix(oid);
1654         int available_ix = -1;
1655
1656         /* pbase-tree-cache acts as a limited hashtable.
1657          * your object will be found at your index or within a few
1658          * slots after that slot if it is cached.
1659          */
1660         for (neigh = 0; neigh < 8; neigh++) {
1661                 ent = pbase_tree_cache[my_ix];
1662                 if (ent && oideq(&ent->oid, oid)) {
1663                         ent->ref++;
1664                         return ent;
1665                 }
1666                 else if (((available_ix < 0) && (!ent || !ent->ref)) ||
1667                          ((0 <= available_ix) &&
1668                           (!ent && pbase_tree_cache[available_ix])))
1669                         available_ix = my_ix;
1670                 if (!ent)
1671                         break;
1672                 my_ix = pbase_tree_cache_ix_incr(my_ix);
1673         }
1674
1675         /* Did not find one.  Either we got a bogus request or
1676          * we need to read and perhaps cache.
1677          */
1678         data = repo_read_object_file(the_repository, oid, &type, &size);
1679         if (!data)
1680                 return NULL;
1681         if (type != OBJ_TREE) {
1682                 free(data);
1683                 return NULL;
1684         }
1685
1686         /* We need to either cache or return a throwaway copy */
1687
1688         if (available_ix < 0)
1689                 ent = NULL;
1690         else {
1691                 ent = pbase_tree_cache[available_ix];
1692                 my_ix = available_ix;
1693         }
1694
1695         if (!ent) {
1696                 nent = xmalloc(sizeof(*nent));
1697                 nent->temporary = (available_ix < 0);
1698         }
1699         else {
1700                 /* evict and reuse */
1701                 free(ent->tree_data);
1702                 nent = ent;
1703         }
1704         oidcpy(&nent->oid, oid);
1705         nent->tree_data = data;
1706         nent->tree_size = size;
1707         nent->ref = 1;
1708         if (!nent->temporary)
1709                 pbase_tree_cache[my_ix] = nent;
1710         return nent;
1711 }
1712
1713 static void pbase_tree_put(struct pbase_tree_cache *cache)
1714 {
1715         if (!cache->temporary) {
1716                 cache->ref--;
1717                 return;
1718         }
1719         free(cache->tree_data);
1720         free(cache);
1721 }
1722
1723 static size_t name_cmp_len(const char *name)
1724 {
1725         return strcspn(name, "\n/");
1726 }
1727
1728 static void add_pbase_object(struct tree_desc *tree,
1729                              const char *name,
1730                              size_t cmplen,
1731                              const char *fullname)
1732 {
1733         struct name_entry entry;
1734         int cmp;
1735
1736         while (tree_entry(tree,&entry)) {
1737                 if (S_ISGITLINK(entry.mode))
1738                         continue;
1739                 cmp = tree_entry_len(&entry) != cmplen ? 1 :
1740                       memcmp(name, entry.path, cmplen);
1741                 if (cmp > 0)
1742                         continue;
1743                 if (cmp < 0)
1744                         return;
1745                 if (name[cmplen] != '/') {
1746                         add_object_entry(&entry.oid,
1747                                          object_type(entry.mode),
1748                                          fullname, 1);
1749                         return;
1750                 }
1751                 if (S_ISDIR(entry.mode)) {
1752                         struct tree_desc sub;
1753                         struct pbase_tree_cache *tree;
1754                         const char *down = name+cmplen+1;
1755                         size_t downlen = name_cmp_len(down);
1756
1757                         tree = pbase_tree_get(&entry.oid);
1758                         if (!tree)
1759                                 return;
1760                         init_tree_desc(&sub, tree->tree_data, tree->tree_size);
1761
1762                         add_pbase_object(&sub, down, downlen, fullname);
1763                         pbase_tree_put(tree);
1764                 }
1765         }
1766 }
1767
1768 static unsigned *done_pbase_paths;
1769 static int done_pbase_paths_num;
1770 static int done_pbase_paths_alloc;
1771 static int done_pbase_path_pos(unsigned hash)
1772 {
1773         int lo = 0;
1774         int hi = done_pbase_paths_num;
1775         while (lo < hi) {
1776                 int mi = lo + (hi - lo) / 2;
1777                 if (done_pbase_paths[mi] == hash)
1778                         return mi;
1779                 if (done_pbase_paths[mi] < hash)
1780                         hi = mi;
1781                 else
1782                         lo = mi + 1;
1783         }
1784         return -lo-1;
1785 }
1786
1787 static int check_pbase_path(unsigned hash)
1788 {
1789         int pos = done_pbase_path_pos(hash);
1790         if (0 <= pos)
1791                 return 1;
1792         pos = -pos - 1;
1793         ALLOC_GROW(done_pbase_paths,
1794                    done_pbase_paths_num + 1,
1795                    done_pbase_paths_alloc);
1796         done_pbase_paths_num++;
1797         if (pos < done_pbase_paths_num)
1798                 MOVE_ARRAY(done_pbase_paths + pos + 1, done_pbase_paths + pos,
1799                            done_pbase_paths_num - pos - 1);
1800         done_pbase_paths[pos] = hash;
1801         return 0;
1802 }
1803
1804 static void add_preferred_base_object(const char *name)
1805 {
1806         struct pbase_tree *it;
1807         size_t cmplen;
1808         unsigned hash = pack_name_hash(name);
1809
1810         if (!num_preferred_base || check_pbase_path(hash))
1811                 return;
1812
1813         cmplen = name_cmp_len(name);
1814         for (it = pbase_tree; it; it = it->next) {
1815                 if (cmplen == 0) {
1816                         add_object_entry(&it->pcache.oid, OBJ_TREE, NULL, 1);
1817                 }
1818                 else {
1819                         struct tree_desc tree;
1820                         init_tree_desc(&tree, it->pcache.tree_data, it->pcache.tree_size);
1821                         add_pbase_object(&tree, name, cmplen, name);
1822                 }
1823         }
1824 }
1825
1826 static void add_preferred_base(struct object_id *oid)
1827 {
1828         struct pbase_tree *it;
1829         void *data;
1830         unsigned long size;
1831         struct object_id tree_oid;
1832
1833         if (window <= num_preferred_base++)
1834                 return;
1835
1836         data = read_object_with_reference(the_repository, oid,
1837                                           OBJ_TREE, &size, &tree_oid);
1838         if (!data)
1839                 return;
1840
1841         for (it = pbase_tree; it; it = it->next) {
1842                 if (oideq(&it->pcache.oid, &tree_oid)) {
1843                         free(data);
1844                         return;
1845                 }
1846         }
1847
1848         CALLOC_ARRAY(it, 1);
1849         it->next = pbase_tree;
1850         pbase_tree = it;
1851
1852         oidcpy(&it->pcache.oid, &tree_oid);
1853         it->pcache.tree_data = data;
1854         it->pcache.tree_size = size;
1855 }
1856
1857 static void cleanup_preferred_base(void)
1858 {
1859         struct pbase_tree *it;
1860         unsigned i;
1861
1862         it = pbase_tree;
1863         pbase_tree = NULL;
1864         while (it) {
1865                 struct pbase_tree *tmp = it;
1866                 it = tmp->next;
1867                 free(tmp->pcache.tree_data);
1868                 free(tmp);
1869         }
1870
1871         for (i = 0; i < ARRAY_SIZE(pbase_tree_cache); i++) {
1872                 if (!pbase_tree_cache[i])
1873                         continue;
1874                 free(pbase_tree_cache[i]->tree_data);
1875                 FREE_AND_NULL(pbase_tree_cache[i]);
1876         }
1877
1878         FREE_AND_NULL(done_pbase_paths);
1879         done_pbase_paths_num = done_pbase_paths_alloc = 0;
1880 }
1881
1882 /*
1883  * Return 1 iff the object specified by "delta" can be sent
1884  * literally as a delta against the base in "base_sha1". If
1885  * so, then *base_out will point to the entry in our packing
1886  * list, or NULL if we must use the external-base list.
1887  *
1888  * Depth value does not matter - find_deltas() will
1889  * never consider reused delta as the base object to
1890  * deltify other objects against, in order to avoid
1891  * circular deltas.
1892  */
1893 static int can_reuse_delta(const struct object_id *base_oid,
1894                            struct object_entry *delta,
1895                            struct object_entry **base_out)
1896 {
1897         struct object_entry *base;
1898
1899         /*
1900          * First see if we're already sending the base (or it's explicitly in
1901          * our "excluded" list).
1902          */
1903         base = packlist_find(&to_pack, base_oid);
1904         if (base) {
1905                 if (!in_same_island(&delta->idx.oid, &base->idx.oid))
1906                         return 0;
1907                 *base_out = base;
1908                 return 1;
1909         }
1910
1911         /*
1912          * Otherwise, reachability bitmaps may tell us if the receiver has it,
1913          * even if it was buried too deep in history to make it into the
1914          * packing list.
1915          */
1916         if (thin && bitmap_has_oid_in_uninteresting(bitmap_git, base_oid)) {
1917                 if (use_delta_islands) {
1918                         if (!in_same_island(&delta->idx.oid, base_oid))
1919                                 return 0;
1920                 }
1921                 *base_out = NULL;
1922                 return 1;
1923         }
1924
1925         return 0;
1926 }
1927
1928 static void prefetch_to_pack(uint32_t object_index_start) {
1929         struct oid_array to_fetch = OID_ARRAY_INIT;
1930         uint32_t i;
1931
1932         for (i = object_index_start; i < to_pack.nr_objects; i++) {
1933                 struct object_entry *entry = to_pack.objects + i;
1934
1935                 if (!oid_object_info_extended(the_repository,
1936                                               &entry->idx.oid,
1937                                               NULL,
1938                                               OBJECT_INFO_FOR_PREFETCH))
1939                         continue;
1940                 oid_array_append(&to_fetch, &entry->idx.oid);
1941         }
1942         promisor_remote_get_direct(the_repository,
1943                                    to_fetch.oid, to_fetch.nr);
1944         oid_array_clear(&to_fetch);
1945 }
1946
1947 static void check_object(struct object_entry *entry, uint32_t object_index)
1948 {
1949         unsigned long canonical_size;
1950         enum object_type type;
1951         struct object_info oi = {.typep = &type, .sizep = &canonical_size};
1952
1953         if (IN_PACK(entry)) {
1954                 struct packed_git *p = IN_PACK(entry);
1955                 struct pack_window *w_curs = NULL;
1956                 int have_base = 0;
1957                 struct object_id base_ref;
1958                 struct object_entry *base_entry;
1959                 unsigned long used, used_0;
1960                 unsigned long avail;
1961                 off_t ofs;
1962                 unsigned char *buf, c;
1963                 enum object_type type;
1964                 unsigned long in_pack_size;
1965
1966                 buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail);
1967
1968                 /*
1969                  * We want in_pack_type even if we do not reuse delta
1970                  * since non-delta representations could still be reused.
1971                  */
1972                 used = unpack_object_header_buffer(buf, avail,
1973                                                    &type,
1974                                                    &in_pack_size);
1975                 if (used == 0)
1976                         goto give_up;
1977
1978                 if (type < 0)
1979                         BUG("invalid type %d", type);
1980                 entry->in_pack_type = type;
1981
1982                 /*
1983                  * Determine if this is a delta and if so whether we can
1984                  * reuse it or not.  Otherwise let's find out as cheaply as
1985                  * possible what the actual type and size for this object is.
1986                  */
1987                 switch (entry->in_pack_type) {
1988                 default:
1989                         /* Not a delta hence we've already got all we need. */
1990                         oe_set_type(entry, entry->in_pack_type);
1991                         SET_SIZE(entry, in_pack_size);
1992                         entry->in_pack_header_size = used;
1993                         if (oe_type(entry) < OBJ_COMMIT || oe_type(entry) > OBJ_BLOB)
1994                                 goto give_up;
1995                         unuse_pack(&w_curs);
1996                         return;
1997                 case OBJ_REF_DELTA:
1998                         if (reuse_delta && !entry->preferred_base) {
1999                                 oidread(&base_ref,
2000                                         use_pack(p, &w_curs,
2001                                                  entry->in_pack_offset + used,
2002                                                  NULL));
2003                                 have_base = 1;
2004                         }
2005                         entry->in_pack_header_size = used + the_hash_algo->rawsz;
2006                         break;
2007                 case OBJ_OFS_DELTA:
2008                         buf = use_pack(p, &w_curs,
2009                                        entry->in_pack_offset + used, NULL);
2010                         used_0 = 0;
2011                         c = buf[used_0++];
2012                         ofs = c & 127;
2013                         while (c & 128) {
2014                                 ofs += 1;
2015                                 if (!ofs || MSB(ofs, 7)) {
2016                                         error(_("delta base offset overflow in pack for %s"),
2017                                               oid_to_hex(&entry->idx.oid));
2018                                         goto give_up;
2019                                 }
2020                                 c = buf[used_0++];
2021                                 ofs = (ofs << 7) + (c & 127);
2022                         }
2023                         ofs = entry->in_pack_offset - ofs;
2024                         if (ofs <= 0 || ofs >= entry->in_pack_offset) {
2025                                 error(_("delta base offset out of bound for %s"),
2026                                       oid_to_hex(&entry->idx.oid));
2027                                 goto give_up;
2028                         }
2029                         if (reuse_delta && !entry->preferred_base) {
2030                                 uint32_t pos;
2031                                 if (offset_to_pack_pos(p, ofs, &pos) < 0)
2032                                         goto give_up;
2033                                 if (!nth_packed_object_id(&base_ref, p,
2034                                                           pack_pos_to_index(p, pos)))
2035                                         have_base = 1;
2036                         }
2037                         entry->in_pack_header_size = used + used_0;
2038                         break;
2039                 }
2040
2041                 if (have_base &&
2042                     can_reuse_delta(&base_ref, entry, &base_entry)) {
2043                         oe_set_type(entry, entry->in_pack_type);
2044                         SET_SIZE(entry, in_pack_size); /* delta size */
2045                         SET_DELTA_SIZE(entry, in_pack_size);
2046
2047                         if (base_entry) {
2048                                 SET_DELTA(entry, base_entry);
2049                                 entry->delta_sibling_idx = base_entry->delta_child_idx;
2050                                 SET_DELTA_CHILD(base_entry, entry);
2051                         } else {
2052                                 SET_DELTA_EXT(entry, &base_ref);
2053                         }
2054
2055                         unuse_pack(&w_curs);
2056                         return;
2057                 }
2058
2059                 if (oe_type(entry)) {
2060                         off_t delta_pos;
2061
2062                         /*
2063                          * This must be a delta and we already know what the
2064                          * final object type is.  Let's extract the actual
2065                          * object size from the delta header.
2066                          */
2067                         delta_pos = entry->in_pack_offset + entry->in_pack_header_size;
2068                         canonical_size = get_size_from_delta(p, &w_curs, delta_pos);
2069                         if (canonical_size == 0)
2070                                 goto give_up;
2071                         SET_SIZE(entry, canonical_size);
2072                         unuse_pack(&w_curs);
2073                         return;
2074                 }
2075
2076                 /*
2077                  * No choice but to fall back to the recursive delta walk
2078                  * with oid_object_info() to find about the object type
2079                  * at this point...
2080                  */
2081                 give_up:
2082                 unuse_pack(&w_curs);
2083         }
2084
2085         if (oid_object_info_extended(the_repository, &entry->idx.oid, &oi,
2086                                      OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0) {
2087                 if (repo_has_promisor_remote(the_repository)) {
2088                         prefetch_to_pack(object_index);
2089                         if (oid_object_info_extended(the_repository, &entry->idx.oid, &oi,
2090                                                      OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0)
2091                                 type = -1;
2092                 } else {
2093                         type = -1;
2094                 }
2095         }
2096         oe_set_type(entry, type);
2097         if (entry->type_valid) {
2098                 SET_SIZE(entry, canonical_size);
2099         } else {
2100                 /*
2101                  * Bad object type is checked in prepare_pack().  This is
2102                  * to permit a missing preferred base object to be ignored
2103                  * as a preferred base.  Doing so can result in a larger
2104                  * pack file, but the transfer will still take place.
2105                  */
2106         }
2107 }
2108
2109 static int pack_offset_sort(const void *_a, const void *_b)
2110 {
2111         const struct object_entry *a = *(struct object_entry **)_a;
2112         const struct object_entry *b = *(struct object_entry **)_b;
2113         const struct packed_git *a_in_pack = IN_PACK(a);
2114         const struct packed_git *b_in_pack = IN_PACK(b);
2115
2116         /* avoid filesystem trashing with loose objects */
2117         if (!a_in_pack && !b_in_pack)
2118                 return oidcmp(&a->idx.oid, &b->idx.oid);
2119
2120         if (a_in_pack < b_in_pack)
2121                 return -1;
2122         if (a_in_pack > b_in_pack)
2123                 return 1;
2124         return a->in_pack_offset < b->in_pack_offset ? -1 :
2125                         (a->in_pack_offset > b->in_pack_offset);
2126 }
2127
2128 /*
2129  * Drop an on-disk delta we were planning to reuse. Naively, this would
2130  * just involve blanking out the "delta" field, but we have to deal
2131  * with some extra book-keeping:
2132  *
2133  *   1. Removing ourselves from the delta_sibling linked list.
2134  *
2135  *   2. Updating our size/type to the non-delta representation. These were
2136  *      either not recorded initially (size) or overwritten with the delta type
2137  *      (type) when check_object() decided to reuse the delta.
2138  *
2139  *   3. Resetting our delta depth, as we are now a base object.
2140  */
2141 static void drop_reused_delta(struct object_entry *entry)
2142 {
2143         unsigned *idx = &to_pack.objects[entry->delta_idx - 1].delta_child_idx;
2144         struct object_info oi = OBJECT_INFO_INIT;
2145         enum object_type type;
2146         unsigned long size;
2147
2148         while (*idx) {
2149                 struct object_entry *oe = &to_pack.objects[*idx - 1];
2150
2151                 if (oe == entry)
2152                         *idx = oe->delta_sibling_idx;
2153                 else
2154                         idx = &oe->delta_sibling_idx;
2155         }
2156         SET_DELTA(entry, NULL);
2157         entry->depth = 0;
2158
2159         oi.sizep = &size;
2160         oi.typep = &type;
2161         if (packed_object_info(the_repository, IN_PACK(entry), entry->in_pack_offset, &oi) < 0) {
2162                 /*
2163                  * We failed to get the info from this pack for some reason;
2164                  * fall back to oid_object_info, which may find another copy.
2165                  * And if that fails, the error will be recorded in oe_type(entry)
2166                  * and dealt with in prepare_pack().
2167                  */
2168                 oe_set_type(entry,
2169                             oid_object_info(the_repository, &entry->idx.oid, &size));
2170         } else {
2171                 oe_set_type(entry, type);
2172         }
2173         SET_SIZE(entry, size);
2174 }
2175
2176 /*
2177  * Follow the chain of deltas from this entry onward, throwing away any links
2178  * that cause us to hit a cycle (as determined by the DFS state flags in
2179  * the entries).
2180  *
2181  * We also detect too-long reused chains that would violate our --depth
2182  * limit.
2183  */
2184 static void break_delta_chains(struct object_entry *entry)
2185 {
2186         /*
2187          * The actual depth of each object we will write is stored as an int,
2188          * as it cannot exceed our int "depth" limit. But before we break
2189          * changes based no that limit, we may potentially go as deep as the
2190          * number of objects, which is elsewhere bounded to a uint32_t.
2191          */
2192         uint32_t total_depth;
2193         struct object_entry *cur, *next;
2194
2195         for (cur = entry, total_depth = 0;
2196              cur;
2197              cur = DELTA(cur), total_depth++) {
2198                 if (cur->dfs_state == DFS_DONE) {
2199                         /*
2200                          * We've already seen this object and know it isn't
2201                          * part of a cycle. We do need to append its depth
2202                          * to our count.
2203                          */
2204                         total_depth += cur->depth;
2205                         break;
2206                 }
2207
2208                 /*
2209                  * We break cycles before looping, so an ACTIVE state (or any
2210                  * other cruft which made its way into the state variable)
2211                  * is a bug.
2212                  */
2213                 if (cur->dfs_state != DFS_NONE)
2214                         BUG("confusing delta dfs state in first pass: %d",
2215                             cur->dfs_state);
2216
2217                 /*
2218                  * Now we know this is the first time we've seen the object. If
2219                  * it's not a delta, we're done traversing, but we'll mark it
2220                  * done to save time on future traversals.
2221                  */
2222                 if (!DELTA(cur)) {
2223                         cur->dfs_state = DFS_DONE;
2224                         break;
2225                 }
2226
2227                 /*
2228                  * Mark ourselves as active and see if the next step causes
2229                  * us to cycle to another active object. It's important to do
2230                  * this _before_ we loop, because it impacts where we make the
2231                  * cut, and thus how our total_depth counter works.
2232                  * E.g., We may see a partial loop like:
2233                  *
2234                  *   A -> B -> C -> D -> B
2235                  *
2236                  * Cutting B->C breaks the cycle. But now the depth of A is
2237                  * only 1, and our total_depth counter is at 3. The size of the
2238                  * error is always one less than the size of the cycle we
2239                  * broke. Commits C and D were "lost" from A's chain.
2240                  *
2241                  * If we instead cut D->B, then the depth of A is correct at 3.
2242                  * We keep all commits in the chain that we examined.
2243                  */
2244                 cur->dfs_state = DFS_ACTIVE;
2245                 if (DELTA(cur)->dfs_state == DFS_ACTIVE) {
2246                         drop_reused_delta(cur);
2247                         cur->dfs_state = DFS_DONE;
2248                         break;
2249                 }
2250         }
2251
2252         /*
2253          * And now that we've gone all the way to the bottom of the chain, we
2254          * need to clear the active flags and set the depth fields as
2255          * appropriate. Unlike the loop above, which can quit when it drops a
2256          * delta, we need to keep going to look for more depth cuts. So we need
2257          * an extra "next" pointer to keep going after we reset cur->delta.
2258          */
2259         for (cur = entry; cur; cur = next) {
2260                 next = DELTA(cur);
2261
2262                 /*
2263                  * We should have a chain of zero or more ACTIVE states down to
2264                  * a final DONE. We can quit after the DONE, because either it
2265                  * has no bases, or we've already handled them in a previous
2266                  * call.
2267                  */
2268                 if (cur->dfs_state == DFS_DONE)
2269                         break;
2270                 else if (cur->dfs_state != DFS_ACTIVE)
2271                         BUG("confusing delta dfs state in second pass: %d",
2272                             cur->dfs_state);
2273
2274                 /*
2275                  * If the total_depth is more than depth, then we need to snip
2276                  * the chain into two or more smaller chains that don't exceed
2277                  * the maximum depth. Most of the resulting chains will contain
2278                  * (depth + 1) entries (i.e., depth deltas plus one base), and
2279                  * the last chain (i.e., the one containing entry) will contain
2280                  * whatever entries are left over, namely
2281                  * (total_depth % (depth + 1)) of them.
2282                  *
2283                  * Since we are iterating towards decreasing depth, we need to
2284                  * decrement total_depth as we go, and we need to write to the
2285                  * entry what its final depth will be after all of the
2286                  * snipping. Since we're snipping into chains of length (depth
2287                  * + 1) entries, the final depth of an entry will be its
2288                  * original depth modulo (depth + 1). Any time we encounter an
2289                  * entry whose final depth is supposed to be zero, we snip it
2290                  * from its delta base, thereby making it so.
2291                  */
2292                 cur->depth = (total_depth--) % (depth + 1);
2293                 if (!cur->depth)
2294                         drop_reused_delta(cur);
2295
2296                 cur->dfs_state = DFS_DONE;
2297         }
2298 }
2299
2300 static void get_object_details(void)
2301 {
2302         uint32_t i;
2303         struct object_entry **sorted_by_offset;
2304
2305         if (progress)
2306                 progress_state = start_progress(_("Counting objects"),
2307                                                 to_pack.nr_objects);
2308
2309         CALLOC_ARRAY(sorted_by_offset, to_pack.nr_objects);
2310         for (i = 0; i < to_pack.nr_objects; i++)
2311                 sorted_by_offset[i] = to_pack.objects + i;
2312         QSORT(sorted_by_offset, to_pack.nr_objects, pack_offset_sort);
2313
2314         for (i = 0; i < to_pack.nr_objects; i++) {
2315                 struct object_entry *entry = sorted_by_offset[i];
2316                 check_object(entry, i);
2317                 if (entry->type_valid &&
2318                     oe_size_greater_than(&to_pack, entry, big_file_threshold))
2319                         entry->no_try_delta = 1;
2320                 display_progress(progress_state, i + 1);
2321         }
2322         stop_progress(&progress_state);
2323
2324         /*
2325          * This must happen in a second pass, since we rely on the delta
2326          * information for the whole list being completed.
2327          */
2328         for (i = 0; i < to_pack.nr_objects; i++)
2329                 break_delta_chains(&to_pack.objects[i]);
2330
2331         free(sorted_by_offset);
2332 }
2333
2334 /*
2335  * We search for deltas in a list sorted by type, by filename hash, and then
2336  * by size, so that we see progressively smaller and smaller files.
2337  * That's because we prefer deltas to be from the bigger file
2338  * to the smaller -- deletes are potentially cheaper, but perhaps
2339  * more importantly, the bigger file is likely the more recent
2340  * one.  The deepest deltas are therefore the oldest objects which are
2341  * less susceptible to be accessed often.
2342  */
2343 static int type_size_sort(const void *_a, const void *_b)
2344 {
2345         const struct object_entry *a = *(struct object_entry **)_a;
2346         const struct object_entry *b = *(struct object_entry **)_b;
2347         const enum object_type a_type = oe_type(a);
2348         const enum object_type b_type = oe_type(b);
2349         const unsigned long a_size = SIZE(a);
2350         const unsigned long b_size = SIZE(b);
2351
2352         if (a_type > b_type)
2353                 return -1;
2354         if (a_type < b_type)
2355                 return 1;
2356         if (a->hash > b->hash)
2357                 return -1;
2358         if (a->hash < b->hash)
2359                 return 1;
2360         if (a->preferred_base > b->preferred_base)
2361                 return -1;
2362         if (a->preferred_base < b->preferred_base)
2363                 return 1;
2364         if (use_delta_islands) {
2365                 const int island_cmp = island_delta_cmp(&a->idx.oid, &b->idx.oid);
2366                 if (island_cmp)
2367                         return island_cmp;
2368         }
2369         if (a_size > b_size)
2370                 return -1;
2371         if (a_size < b_size)
2372                 return 1;
2373         return a < b ? -1 : (a > b);  /* newest first */
2374 }
2375
2376 struct unpacked {
2377         struct object_entry *entry;
2378         void *data;
2379         struct delta_index *index;
2380         unsigned depth;
2381 };
2382
2383 static int delta_cacheable(unsigned long src_size, unsigned long trg_size,
2384                            unsigned long delta_size)
2385 {
2386         if (max_delta_cache_size && delta_cache_size + delta_size > max_delta_cache_size)
2387                 return 0;
2388
2389         if (delta_size < cache_max_small_delta_size)
2390                 return 1;
2391
2392         /* cache delta, if objects are large enough compared to delta size */
2393         if ((src_size >> 20) + (trg_size >> 21) > (delta_size >> 10))
2394                 return 1;
2395
2396         return 0;
2397 }
2398
2399 /* Protect delta_cache_size */
2400 static pthread_mutex_t cache_mutex;
2401 #define cache_lock()            pthread_mutex_lock(&cache_mutex)
2402 #define cache_unlock()          pthread_mutex_unlock(&cache_mutex)
2403
2404 /*
2405  * Protect object list partitioning (e.g. struct thread_param) and
2406  * progress_state
2407  */
2408 static pthread_mutex_t progress_mutex;
2409 #define progress_lock()         pthread_mutex_lock(&progress_mutex)
2410 #define progress_unlock()       pthread_mutex_unlock(&progress_mutex)
2411
2412 /*
2413  * Access to struct object_entry is unprotected since each thread owns
2414  * a portion of the main object list. Just don't access object entries
2415  * ahead in the list because they can be stolen and would need
2416  * progress_mutex for protection.
2417  */
2418
2419 static inline int oe_size_less_than(struct packing_data *pack,
2420                                     const struct object_entry *lhs,
2421                                     unsigned long rhs)
2422 {
2423         if (lhs->size_valid)
2424                 return lhs->size_ < rhs;
2425         if (rhs < pack->oe_size_limit) /* rhs < 2^x <= lhs ? */
2426                 return 0;
2427         return oe_get_size_slow(pack, lhs) < rhs;
2428 }
2429
2430 static inline void oe_set_tree_depth(struct packing_data *pack,
2431                                      struct object_entry *e,
2432                                      unsigned int tree_depth)
2433 {
2434         if (!pack->tree_depth)
2435                 CALLOC_ARRAY(pack->tree_depth, pack->nr_alloc);
2436         pack->tree_depth[e - pack->objects] = tree_depth;
2437 }
2438
2439 /*
2440  * Return the size of the object without doing any delta
2441  * reconstruction (so non-deltas are true object sizes, but deltas
2442  * return the size of the delta data).
2443  */
2444 unsigned long oe_get_size_slow(struct packing_data *pack,
2445                                const struct object_entry *e)
2446 {
2447         struct packed_git *p;
2448         struct pack_window *w_curs;
2449         unsigned char *buf;
2450         enum object_type type;
2451         unsigned long used, avail, size;
2452
2453         if (e->type_ != OBJ_OFS_DELTA && e->type_ != OBJ_REF_DELTA) {
2454                 packing_data_lock(&to_pack);
2455                 if (oid_object_info(the_repository, &e->idx.oid, &size) < 0)
2456                         die(_("unable to get size of %s"),
2457                             oid_to_hex(&e->idx.oid));
2458                 packing_data_unlock(&to_pack);
2459                 return size;
2460         }
2461
2462         p = oe_in_pack(pack, e);
2463         if (!p)
2464                 BUG("when e->type is a delta, it must belong to a pack");
2465
2466         packing_data_lock(&to_pack);
2467         w_curs = NULL;
2468         buf = use_pack(p, &w_curs, e->in_pack_offset, &avail);
2469         used = unpack_object_header_buffer(buf, avail, &type, &size);
2470         if (used == 0)
2471                 die(_("unable to parse object header of %s"),
2472                     oid_to_hex(&e->idx.oid));
2473
2474         unuse_pack(&w_curs);
2475         packing_data_unlock(&to_pack);
2476         return size;
2477 }
2478
2479 static int try_delta(struct unpacked *trg, struct unpacked *src,
2480                      unsigned max_depth, unsigned long *mem_usage)
2481 {
2482         struct object_entry *trg_entry = trg->entry;
2483         struct object_entry *src_entry = src->entry;
2484         unsigned long trg_size, src_size, delta_size, sizediff, max_size, sz;
2485         unsigned ref_depth;
2486         enum object_type type;
2487         void *delta_buf;
2488
2489         /* Don't bother doing diffs between different types */
2490         if (oe_type(trg_entry) != oe_type(src_entry))
2491                 return -1;
2492
2493         /*
2494          * We do not bother to try a delta that we discarded on an
2495          * earlier try, but only when reusing delta data.  Note that
2496          * src_entry that is marked as the preferred_base should always
2497          * be considered, as even if we produce a suboptimal delta against
2498          * it, we will still save the transfer cost, as we already know
2499          * the other side has it and we won't send src_entry at all.
2500          */
2501         if (reuse_delta && IN_PACK(trg_entry) &&
2502             IN_PACK(trg_entry) == IN_PACK(src_entry) &&
2503             !src_entry->preferred_base &&
2504             trg_entry->in_pack_type != OBJ_REF_DELTA &&
2505             trg_entry->in_pack_type != OBJ_OFS_DELTA)
2506                 return 0;
2507
2508         /* Let's not bust the allowed depth. */
2509         if (src->depth >= max_depth)
2510                 return 0;
2511
2512         /* Now some size filtering heuristics. */
2513         trg_size = SIZE(trg_entry);
2514         if (!DELTA(trg_entry)) {
2515                 max_size = trg_size/2 - the_hash_algo->rawsz;
2516                 ref_depth = 1;
2517         } else {
2518                 max_size = DELTA_SIZE(trg_entry);
2519                 ref_depth = trg->depth;
2520         }
2521         max_size = (uint64_t)max_size * (max_depth - src->depth) /
2522                                                 (max_depth - ref_depth + 1);
2523         if (max_size == 0)
2524                 return 0;
2525         src_size = SIZE(src_entry);
2526         sizediff = src_size < trg_size ? trg_size - src_size : 0;
2527         if (sizediff >= max_size)
2528                 return 0;
2529         if (trg_size < src_size / 32)
2530                 return 0;
2531
2532         if (!in_same_island(&trg->entry->idx.oid, &src->entry->idx.oid))
2533                 return 0;
2534
2535         /* Load data if not already done */
2536         if (!trg->data) {
2537                 packing_data_lock(&to_pack);
2538                 trg->data = repo_read_object_file(the_repository,
2539                                                   &trg_entry->idx.oid, &type,
2540                                                   &sz);
2541                 packing_data_unlock(&to_pack);
2542                 if (!trg->data)
2543                         die(_("object %s cannot be read"),
2544                             oid_to_hex(&trg_entry->idx.oid));
2545                 if (sz != trg_size)
2546                         die(_("object %s inconsistent object length (%"PRIuMAX" vs %"PRIuMAX")"),
2547                             oid_to_hex(&trg_entry->idx.oid), (uintmax_t)sz,
2548                             (uintmax_t)trg_size);
2549                 *mem_usage += sz;
2550         }
2551         if (!src->data) {
2552                 packing_data_lock(&to_pack);
2553                 src->data = repo_read_object_file(the_repository,
2554                                                   &src_entry->idx.oid, &type,
2555                                                   &sz);
2556                 packing_data_unlock(&to_pack);
2557                 if (!src->data) {
2558                         if (src_entry->preferred_base) {
2559                                 static int warned = 0;
2560                                 if (!warned++)
2561                                         warning(_("object %s cannot be read"),
2562                                                 oid_to_hex(&src_entry->idx.oid));
2563                                 /*
2564                                  * Those objects are not included in the
2565                                  * resulting pack.  Be resilient and ignore
2566                                  * them if they can't be read, in case the
2567                                  * pack could be created nevertheless.
2568                                  */
2569                                 return 0;
2570                         }
2571                         die(_("object %s cannot be read"),
2572                             oid_to_hex(&src_entry->idx.oid));
2573                 }
2574                 if (sz != src_size)
2575                         die(_("object %s inconsistent object length (%"PRIuMAX" vs %"PRIuMAX")"),
2576                             oid_to_hex(&src_entry->idx.oid), (uintmax_t)sz,
2577                             (uintmax_t)src_size);
2578                 *mem_usage += sz;
2579         }
2580         if (!src->index) {
2581                 src->index = create_delta_index(src->data, src_size);
2582                 if (!src->index) {
2583                         static int warned = 0;
2584                         if (!warned++)
2585                                 warning(_("suboptimal pack - out of memory"));
2586                         return 0;
2587                 }
2588                 *mem_usage += sizeof_delta_index(src->index);
2589         }
2590
2591         delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size);
2592         if (!delta_buf)
2593                 return 0;
2594
2595         if (DELTA(trg_entry)) {
2596                 /* Prefer only shallower same-sized deltas. */
2597                 if (delta_size == DELTA_SIZE(trg_entry) &&
2598                     src->depth + 1 >= trg->depth) {
2599                         free(delta_buf);
2600                         return 0;
2601                 }
2602         }
2603
2604         /*
2605          * Handle memory allocation outside of the cache
2606          * accounting lock.  Compiler will optimize the strangeness
2607          * away when NO_PTHREADS is defined.
2608          */
2609         free(trg_entry->delta_data);
2610         cache_lock();
2611         if (trg_entry->delta_data) {
2612                 delta_cache_size -= DELTA_SIZE(trg_entry);
2613                 trg_entry->delta_data = NULL;
2614         }
2615         if (delta_cacheable(src_size, trg_size, delta_size)) {
2616                 delta_cache_size += delta_size;
2617                 cache_unlock();
2618                 trg_entry->delta_data = xrealloc(delta_buf, delta_size);
2619         } else {
2620                 cache_unlock();
2621                 free(delta_buf);
2622         }
2623
2624         SET_DELTA(trg_entry, src_entry);
2625         SET_DELTA_SIZE(trg_entry, delta_size);
2626         trg->depth = src->depth + 1;
2627
2628         return 1;
2629 }
2630
2631 static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
2632 {
2633         struct object_entry *child = DELTA_CHILD(me);
2634         unsigned int m = n;
2635         while (child) {
2636                 const unsigned int c = check_delta_limit(child, n + 1);
2637                 if (m < c)
2638                         m = c;
2639                 child = DELTA_SIBLING(child);
2640         }
2641         return m;
2642 }
2643
2644 static unsigned long free_unpacked(struct unpacked *n)
2645 {
2646         unsigned long freed_mem = sizeof_delta_index(n->index);
2647         free_delta_index(n->index);
2648         n->index = NULL;
2649         if (n->data) {
2650                 freed_mem += SIZE(n->entry);
2651                 FREE_AND_NULL(n->data);
2652         }
2653         n->entry = NULL;
2654         n->depth = 0;
2655         return freed_mem;
2656 }
2657
2658 static void find_deltas(struct object_entry **list, unsigned *list_size,
2659                         int window, int depth, unsigned *processed)
2660 {
2661         uint32_t i, idx = 0, count = 0;
2662         struct unpacked *array;
2663         unsigned long mem_usage = 0;
2664
2665         CALLOC_ARRAY(array, window);
2666
2667         for (;;) {
2668                 struct object_entry *entry;
2669                 struct unpacked *n = array + idx;
2670                 int j, max_depth, best_base = -1;
2671
2672                 progress_lock();
2673                 if (!*list_size) {
2674                         progress_unlock();
2675                         break;
2676                 }
2677                 entry = *list++;
2678                 (*list_size)--;
2679                 if (!entry->preferred_base) {
2680                         (*processed)++;
2681                         display_progress(progress_state, *processed);
2682                 }
2683                 progress_unlock();
2684
2685                 mem_usage -= free_unpacked(n);
2686                 n->entry = entry;
2687
2688                 while (window_memory_limit &&
2689                        mem_usage > window_memory_limit &&
2690                        count > 1) {
2691                         const uint32_t tail = (idx + window - count) % window;
2692                         mem_usage -= free_unpacked(array + tail);
2693                         count--;
2694                 }
2695
2696                 /* We do not compute delta to *create* objects we are not
2697                  * going to pack.
2698                  */
2699                 if (entry->preferred_base)
2700                         goto next;
2701
2702                 /*
2703                  * If the current object is at pack edge, take the depth the
2704                  * objects that depend on the current object into account
2705                  * otherwise they would become too deep.
2706                  */
2707                 max_depth = depth;
2708                 if (DELTA_CHILD(entry)) {
2709                         max_depth -= check_delta_limit(entry, 0);
2710                         if (max_depth <= 0)
2711                                 goto next;
2712                 }
2713
2714                 j = window;
2715                 while (--j > 0) {
2716                         int ret;
2717                         uint32_t other_idx = idx + j;
2718                         struct unpacked *m;
2719                         if (other_idx >= window)
2720                                 other_idx -= window;
2721                         m = array + other_idx;
2722                         if (!m->entry)
2723                                 break;
2724                         ret = try_delta(n, m, max_depth, &mem_usage);
2725                         if (ret < 0)
2726                                 break;
2727                         else if (ret > 0)
2728                                 best_base = other_idx;
2729                 }
2730
2731                 /*
2732                  * If we decided to cache the delta data, then it is best
2733                  * to compress it right away.  First because we have to do
2734                  * it anyway, and doing it here while we're threaded will
2735                  * save a lot of time in the non threaded write phase,
2736                  * as well as allow for caching more deltas within
2737                  * the same cache size limit.
2738                  * ...
2739                  * But only if not writing to stdout, since in that case
2740                  * the network is most likely throttling writes anyway,
2741                  * and therefore it is best to go to the write phase ASAP
2742                  * instead, as we can afford spending more time compressing
2743                  * between writes at that moment.
2744                  */
2745                 if (entry->delta_data && !pack_to_stdout) {
2746                         unsigned long size;
2747
2748                         size = do_compress(&entry->delta_data, DELTA_SIZE(entry));
2749                         if (size < (1U << OE_Z_DELTA_BITS)) {
2750                                 entry->z_delta_size = size;
2751                                 cache_lock();
2752                                 delta_cache_size -= DELTA_SIZE(entry);
2753                                 delta_cache_size += entry->z_delta_size;
2754                                 cache_unlock();
2755                         } else {
2756                                 FREE_AND_NULL(entry->delta_data);
2757                                 entry->z_delta_size = 0;
2758                         }
2759                 }
2760
2761                 /* if we made n a delta, and if n is already at max
2762                  * depth, leaving it in the window is pointless.  we
2763                  * should evict it first.
2764                  */
2765                 if (DELTA(entry) && max_depth <= n->depth)
2766                         continue;
2767
2768                 /*
2769                  * Move the best delta base up in the window, after the
2770                  * currently deltified object, to keep it longer.  It will
2771                  * be the first base object to be attempted next.
2772                  */
2773                 if (DELTA(entry)) {
2774                         struct unpacked swap = array[best_base];
2775                         int dist = (window + idx - best_base) % window;
2776                         int dst = best_base;
2777                         while (dist--) {
2778                                 int src = (dst + 1) % window;
2779                                 array[dst] = array[src];
2780                                 dst = src;
2781                         }
2782                         array[dst] = swap;
2783                 }
2784
2785                 next:
2786                 idx++;
2787                 if (count + 1 < window)
2788                         count++;
2789                 if (idx >= window)
2790                         idx = 0;
2791         }
2792
2793         for (i = 0; i < window; ++i) {
2794                 free_delta_index(array[i].index);
2795                 free(array[i].data);
2796         }
2797         free(array);
2798 }
2799
2800 /*
2801  * The main object list is split into smaller lists, each is handed to
2802  * one worker.
2803  *
2804  * The main thread waits on the condition that (at least) one of the workers
2805  * has stopped working (which is indicated in the .working member of
2806  * struct thread_params).
2807  *
2808  * When a work thread has completed its work, it sets .working to 0 and
2809  * signals the main thread and waits on the condition that .data_ready
2810  * becomes 1.
2811  *
2812  * The main thread steals half of the work from the worker that has
2813  * most work left to hand it to the idle worker.
2814  */
2815
2816 struct thread_params {
2817         pthread_t thread;
2818         struct object_entry **list;
2819         unsigned list_size;
2820         unsigned remaining;
2821         int window;
2822         int depth;
2823         int working;
2824         int data_ready;
2825         pthread_mutex_t mutex;
2826         pthread_cond_t cond;
2827         unsigned *processed;
2828 };
2829
2830 static pthread_cond_t progress_cond;
2831
2832 /*
2833  * Mutex and conditional variable can't be statically-initialized on Windows.
2834  */
2835 static void init_threaded_search(void)
2836 {
2837         pthread_mutex_init(&cache_mutex, NULL);
2838         pthread_mutex_init(&progress_mutex, NULL);
2839         pthread_cond_init(&progress_cond, NULL);
2840 }
2841
2842 static void cleanup_threaded_search(void)
2843 {
2844         pthread_cond_destroy(&progress_cond);
2845         pthread_mutex_destroy(&cache_mutex);
2846         pthread_mutex_destroy(&progress_mutex);
2847 }
2848
2849 static void *threaded_find_deltas(void *arg)
2850 {
2851         struct thread_params *me = arg;
2852
2853         progress_lock();
2854         while (me->remaining) {
2855                 progress_unlock();
2856
2857                 find_deltas(me->list, &me->remaining,
2858                             me->window, me->depth, me->processed);
2859
2860                 progress_lock();
2861                 me->working = 0;
2862                 pthread_cond_signal(&progress_cond);
2863                 progress_unlock();
2864
2865                 /*
2866                  * We must not set ->data_ready before we wait on the
2867                  * condition because the main thread may have set it to 1
2868                  * before we get here. In order to be sure that new
2869                  * work is available if we see 1 in ->data_ready, it
2870                  * was initialized to 0 before this thread was spawned
2871                  * and we reset it to 0 right away.
2872                  */
2873                 pthread_mutex_lock(&me->mutex);
2874                 while (!me->data_ready)
2875                         pthread_cond_wait(&me->cond, &me->mutex);
2876                 me->data_ready = 0;
2877                 pthread_mutex_unlock(&me->mutex);
2878
2879                 progress_lock();
2880         }
2881         progress_unlock();
2882         /* leave ->working 1 so that this doesn't get more work assigned */
2883         return NULL;
2884 }
2885
2886 static void ll_find_deltas(struct object_entry **list, unsigned list_size,
2887                            int window, int depth, unsigned *processed)
2888 {
2889         struct thread_params *p;
2890         int i, ret, active_threads = 0;
2891
2892         init_threaded_search();
2893
2894         if (delta_search_threads <= 1) {
2895                 find_deltas(list, &list_size, window, depth, processed);
2896                 cleanup_threaded_search();
2897                 return;
2898         }
2899         if (progress > pack_to_stdout)
2900                 fprintf_ln(stderr, _("Delta compression using up to %d threads"),
2901                            delta_search_threads);
2902         CALLOC_ARRAY(p, delta_search_threads);
2903
2904         /* Partition the work amongst work threads. */
2905         for (i = 0; i < delta_search_threads; i++) {
2906                 unsigned sub_size = list_size / (delta_search_threads - i);
2907
2908                 /* don't use too small segments or no deltas will be found */
2909                 if (sub_size < 2*window && i+1 < delta_search_threads)
2910                         sub_size = 0;
2911
2912                 p[i].window = window;
2913                 p[i].depth = depth;
2914                 p[i].processed = processed;
2915                 p[i].working = 1;
2916                 p[i].data_ready = 0;
2917
2918                 /* try to split chunks on "path" boundaries */
2919                 while (sub_size && sub_size < list_size &&
2920                        list[sub_size]->hash &&
2921                        list[sub_size]->hash == list[sub_size-1]->hash)
2922                         sub_size++;
2923
2924                 p[i].list = list;
2925                 p[i].list_size = sub_size;
2926                 p[i].remaining = sub_size;
2927
2928                 list += sub_size;
2929                 list_size -= sub_size;
2930         }
2931
2932         /* Start work threads. */
2933         for (i = 0; i < delta_search_threads; i++) {
2934                 if (!p[i].list_size)
2935                         continue;
2936                 pthread_mutex_init(&p[i].mutex, NULL);
2937                 pthread_cond_init(&p[i].cond, NULL);
2938                 ret = pthread_create(&p[i].thread, NULL,
2939                                      threaded_find_deltas, &p[i]);
2940                 if (ret)
2941                         die(_("unable to create thread: %s"), strerror(ret));
2942                 active_threads++;
2943         }
2944
2945         /*
2946          * Now let's wait for work completion.  Each time a thread is done
2947          * with its work, we steal half of the remaining work from the
2948          * thread with the largest number of unprocessed objects and give
2949          * it to that newly idle thread.  This ensure good load balancing
2950          * until the remaining object list segments are simply too short
2951          * to be worth splitting anymore.
2952          */
2953         while (active_threads) {
2954                 struct thread_params *target = NULL;
2955                 struct thread_params *victim = NULL;
2956                 unsigned sub_size = 0;
2957
2958                 progress_lock();
2959                 for (;;) {
2960                         for (i = 0; !target && i < delta_search_threads; i++)
2961                                 if (!p[i].working)
2962                                         target = &p[i];
2963                         if (target)
2964                                 break;
2965                         pthread_cond_wait(&progress_cond, &progress_mutex);
2966                 }
2967
2968                 for (i = 0; i < delta_search_threads; i++)
2969                         if (p[i].remaining > 2*window &&
2970                             (!victim || victim->remaining < p[i].remaining))
2971                                 victim = &p[i];
2972                 if (victim) {
2973                         sub_size = victim->remaining / 2;
2974                         list = victim->list + victim->list_size - sub_size;
2975                         while (sub_size && list[0]->hash &&
2976                                list[0]->hash == list[-1]->hash) {
2977                                 list++;
2978                                 sub_size--;
2979                         }
2980                         if (!sub_size) {
2981                                 /*
2982                                  * It is possible for some "paths" to have
2983                                  * so many objects that no hash boundary
2984                                  * might be found.  Let's just steal the
2985                                  * exact half in that case.
2986                                  */
2987                                 sub_size = victim->remaining / 2;
2988                                 list -= sub_size;
2989                         }
2990                         target->list = list;
2991                         victim->list_size -= sub_size;
2992                         victim->remaining -= sub_size;
2993                 }
2994                 target->list_size = sub_size;
2995                 target->remaining = sub_size;
2996                 target->working = 1;
2997                 progress_unlock();
2998
2999                 pthread_mutex_lock(&target->mutex);
3000                 target->data_ready = 1;
3001                 pthread_cond_signal(&target->cond);
3002                 pthread_mutex_unlock(&target->mutex);
3003
3004                 if (!sub_size) {
3005                         pthread_join(target->thread, NULL);
3006                         pthread_cond_destroy(&target->cond);
3007                         pthread_mutex_destroy(&target->mutex);
3008                         active_threads--;
3009                 }
3010         }
3011         cleanup_threaded_search();
3012         free(p);
3013 }
3014
3015 static int obj_is_packed(const struct object_id *oid)
3016 {
3017         return packlist_find(&to_pack, oid) ||
3018                 (reuse_packfile_bitmap &&
3019                  bitmap_walk_contains(bitmap_git, reuse_packfile_bitmap, oid));
3020 }
3021
3022 static void add_tag_chain(const struct object_id *oid)
3023 {
3024         struct tag *tag;
3025
3026         /*
3027          * We catch duplicates already in add_object_entry(), but we'd
3028          * prefer to do this extra check to avoid having to parse the
3029          * tag at all if we already know that it's being packed (e.g., if
3030          * it was included via bitmaps, we would not have parsed it
3031          * previously).
3032          */
3033         if (obj_is_packed(oid))
3034                 return;
3035
3036         tag = lookup_tag(the_repository, oid);
3037         while (1) {
3038                 if (!tag || parse_tag(tag) || !tag->tagged)
3039                         die(_("unable to pack objects reachable from tag %s"),
3040                             oid_to_hex(oid));
3041
3042                 add_object_entry(&tag->object.oid, OBJ_TAG, NULL, 0);
3043
3044                 if (tag->tagged->type != OBJ_TAG)
3045                         return;
3046
3047                 tag = (struct tag *)tag->tagged;
3048         }
3049 }
3050
3051 static int add_ref_tag(const char *tag UNUSED, const struct object_id *oid,
3052                        int flag UNUSED, void *cb_data UNUSED)
3053 {
3054         struct object_id peeled;
3055
3056         if (!peel_iterated_oid(oid, &peeled) && obj_is_packed(&peeled))
3057                 add_tag_chain(oid);
3058         return 0;
3059 }
3060
3061 static void prepare_pack(int window, int depth)
3062 {
3063         struct object_entry **delta_list;
3064         uint32_t i, nr_deltas;
3065         unsigned n;
3066
3067         if (use_delta_islands)
3068                 resolve_tree_islands(the_repository, progress, &to_pack);
3069
3070         get_object_details();
3071
3072         /*
3073          * If we're locally repacking then we need to be doubly careful
3074          * from now on in order to make sure no stealth corruption gets
3075          * propagated to the new pack.  Clients receiving streamed packs
3076          * should validate everything they get anyway so no need to incur
3077          * the additional cost here in that case.
3078          */
3079         if (!pack_to_stdout)
3080                 do_check_packed_object_crc = 1;
3081
3082         if (!to_pack.nr_objects || !window || !depth)
3083                 return;
3084
3085         ALLOC_ARRAY(delta_list, to_pack.nr_objects);
3086         nr_deltas = n = 0;
3087
3088         for (i = 0; i < to_pack.nr_objects; i++) {
3089                 struct object_entry *entry = to_pack.objects + i;
3090
3091                 if (DELTA(entry))
3092                         /* This happens if we decided to reuse existing
3093                          * delta from a pack.  "reuse_delta &&" is implied.
3094                          */
3095                         continue;
3096
3097                 if (!entry->type_valid ||
3098                     oe_size_less_than(&to_pack, entry, 50))
3099                         continue;
3100
3101                 if (entry->no_try_delta)
3102                         continue;
3103
3104                 if (!entry->preferred_base) {
3105                         nr_deltas++;
3106                         if (oe_type(entry) < 0)
3107                                 die(_("unable to get type of object %s"),
3108                                     oid_to_hex(&entry->idx.oid));
3109                 } else {
3110                         if (oe_type(entry) < 0) {
3111                                 /*
3112                                  * This object is not found, but we
3113                                  * don't have to include it anyway.
3114                                  */
3115                                 continue;
3116                         }
3117                 }
3118
3119                 delta_list[n++] = entry;
3120         }
3121
3122         if (nr_deltas && n > 1) {
3123                 unsigned nr_done = 0;
3124
3125                 if (progress)
3126                         progress_state = start_progress(_("Compressing objects"),
3127                                                         nr_deltas);
3128                 QSORT(delta_list, n, type_size_sort);
3129                 ll_find_deltas(delta_list, n, window+1, depth, &nr_done);
3130                 stop_progress(&progress_state);
3131                 if (nr_done != nr_deltas)
3132                         die(_("inconsistency with delta count"));
3133         }
3134         free(delta_list);
3135 }
3136
3137 static int git_pack_config(const char *k, const char *v, void *cb)
3138 {
3139         if (!strcmp(k, "pack.window")) {
3140                 window = git_config_int(k, v);
3141                 return 0;
3142         }
3143         if (!strcmp(k, "pack.windowmemory")) {
3144                 window_memory_limit = git_config_ulong(k, v);
3145                 return 0;
3146         }
3147         if (!strcmp(k, "pack.depth")) {
3148                 depth = git_config_int(k, v);
3149                 return 0;
3150         }
3151         if (!strcmp(k, "pack.deltacachesize")) {
3152                 max_delta_cache_size = git_config_int(k, v);
3153                 return 0;
3154         }
3155         if (!strcmp(k, "pack.deltacachelimit")) {
3156                 cache_max_small_delta_size = git_config_int(k, v);
3157                 return 0;
3158         }
3159         if (!strcmp(k, "pack.writebitmaphashcache")) {
3160                 if (git_config_bool(k, v))
3161                         write_bitmap_options |= BITMAP_OPT_HASH_CACHE;
3162                 else
3163                         write_bitmap_options &= ~BITMAP_OPT_HASH_CACHE;
3164         }
3165
3166         if (!strcmp(k, "pack.writebitmaplookuptable")) {
3167                 if (git_config_bool(k, v))
3168                         write_bitmap_options |= BITMAP_OPT_LOOKUP_TABLE;
3169                 else
3170                         write_bitmap_options &= ~BITMAP_OPT_LOOKUP_TABLE;
3171         }
3172
3173         if (!strcmp(k, "pack.usebitmaps")) {
3174                 use_bitmap_index_default = git_config_bool(k, v);
3175                 return 0;
3176         }
3177         if (!strcmp(k, "pack.allowpackreuse")) {
3178                 allow_pack_reuse = git_config_bool(k, v);
3179                 return 0;
3180         }
3181         if (!strcmp(k, "pack.threads")) {
3182                 delta_search_threads = git_config_int(k, v);
3183                 if (delta_search_threads < 0)
3184                         die(_("invalid number of threads specified (%d)"),
3185                             delta_search_threads);
3186                 if (!HAVE_THREADS && delta_search_threads != 1) {
3187                         warning(_("no threads support, ignoring %s"), k);
3188                         delta_search_threads = 0;
3189                 }
3190                 return 0;
3191         }
3192         if (!strcmp(k, "pack.indexversion")) {
3193                 pack_idx_opts.version = git_config_int(k, v);
3194                 if (pack_idx_opts.version > 2)
3195                         die(_("bad pack.indexVersion=%"PRIu32),
3196                             pack_idx_opts.version);
3197                 return 0;
3198         }
3199         if (!strcmp(k, "pack.writereverseindex")) {
3200                 if (git_config_bool(k, v))
3201                         pack_idx_opts.flags |= WRITE_REV;
3202                 else
3203                         pack_idx_opts.flags &= ~WRITE_REV;
3204                 return 0;
3205         }
3206         if (!strcmp(k, "uploadpack.blobpackfileuri")) {
3207                 struct configured_exclusion *ex = xmalloc(sizeof(*ex));
3208                 const char *oid_end, *pack_end;
3209                 /*
3210                  * Stores the pack hash. This is not a true object ID, but is
3211                  * of the same form.
3212                  */
3213                 struct object_id pack_hash;
3214
3215                 if (parse_oid_hex(v, &ex->e.oid, &oid_end) ||
3216                     *oid_end != ' ' ||
3217                     parse_oid_hex(oid_end + 1, &pack_hash, &pack_end) ||
3218                     *pack_end != ' ')
3219                         die(_("value of uploadpack.blobpackfileuri must be "
3220                               "of the form '<object-hash> <pack-hash> <uri>' (got '%s')"), v);
3221                 if (oidmap_get(&configured_exclusions, &ex->e.oid))
3222                         die(_("object already configured in another "
3223                               "uploadpack.blobpackfileuri (got '%s')"), v);
3224                 ex->pack_hash_hex = xcalloc(1, pack_end - oid_end);
3225                 memcpy(ex->pack_hash_hex, oid_end + 1, pack_end - oid_end - 1);
3226                 ex->uri = xstrdup(pack_end + 1);
3227                 oidmap_put(&configured_exclusions, ex);
3228         }
3229         return git_default_config(k, v, cb);
3230 }
3231
3232 /* Counters for trace2 output when in --stdin-packs mode. */
3233 static int stdin_packs_found_nr;
3234 static int stdin_packs_hints_nr;
3235
3236 static int add_object_entry_from_pack(const struct object_id *oid,
3237                                       struct packed_git *p,
3238                                       uint32_t pos,
3239                                       void *_data)
3240 {
3241         off_t ofs;
3242         enum object_type type = OBJ_NONE;
3243
3244         display_progress(progress_state, ++nr_seen);
3245
3246         if (have_duplicate_entry(oid, 0))
3247                 return 0;
3248
3249         ofs = nth_packed_object_offset(p, pos);
3250         if (!want_object_in_pack(oid, 0, &p, &ofs))
3251                 return 0;
3252
3253         if (p) {
3254                 struct rev_info *revs = _data;
3255                 struct object_info oi = OBJECT_INFO_INIT;
3256
3257                 oi.typep = &type;
3258                 if (packed_object_info(the_repository, p, ofs, &oi) < 0) {
3259                         die(_("could not get type of object %s in pack %s"),
3260                             oid_to_hex(oid), p->pack_name);
3261                 } else if (type == OBJ_COMMIT) {
3262                         /*
3263                          * commits in included packs are used as starting points for the
3264                          * subsequent revision walk
3265                          */
3266                         add_pending_oid(revs, NULL, oid, 0);
3267                 }
3268
3269                 stdin_packs_found_nr++;
3270         }
3271
3272         create_object_entry(oid, type, 0, 0, 0, p, ofs);
3273
3274         return 0;
3275 }
3276
3277 static void show_commit_pack_hint(struct commit *commit UNUSED,
3278                                   void *data UNUSED)
3279 {
3280         /* nothing to do; commits don't have a namehash */
3281 }
3282
3283 static void show_object_pack_hint(struct object *object, const char *name,
3284                                   void *data UNUSED)
3285 {
3286         struct object_entry *oe = packlist_find(&to_pack, &object->oid);
3287         if (!oe)
3288                 return;
3289
3290         /*
3291          * Our 'to_pack' list was constructed by iterating all objects packed in
3292          * included packs, and so doesn't have a non-zero hash field that you
3293          * would typically pick up during a reachability traversal.
3294          *
3295          * Make a best-effort attempt to fill in the ->hash and ->no_try_delta
3296          * here using a now in order to perhaps improve the delta selection
3297          * process.
3298          */
3299         oe->hash = pack_name_hash(name);
3300         oe->no_try_delta = name && no_try_delta(name);
3301
3302         stdin_packs_hints_nr++;
3303 }
3304
3305 static int pack_mtime_cmp(const void *_a, const void *_b)
3306 {
3307         struct packed_git *a = ((const struct string_list_item*)_a)->util;
3308         struct packed_git *b = ((const struct string_list_item*)_b)->util;
3309
3310         /*
3311          * order packs by descending mtime so that objects are laid out
3312          * roughly as newest-to-oldest
3313          */
3314         if (a->mtime < b->mtime)
3315                 return 1;
3316         else if (b->mtime < a->mtime)
3317                 return -1;
3318         else
3319                 return 0;
3320 }
3321
3322 static void read_packs_list_from_stdin(void)
3323 {
3324         struct strbuf buf = STRBUF_INIT;
3325         struct string_list include_packs = STRING_LIST_INIT_DUP;
3326         struct string_list exclude_packs = STRING_LIST_INIT_DUP;
3327         struct string_list_item *item = NULL;
3328
3329         struct packed_git *p;
3330         struct rev_info revs;
3331
3332         repo_init_revisions(the_repository, &revs, NULL);
3333         /*
3334          * Use a revision walk to fill in the namehash of objects in the include
3335          * packs. To save time, we'll avoid traversing through objects that are
3336          * in excluded packs.
3337          *
3338          * That may cause us to avoid populating all of the namehash fields of
3339          * all included objects, but our goal is best-effort, since this is only
3340          * an optimization during delta selection.
3341          */
3342         revs.no_kept_objects = 1;
3343         revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
3344         revs.blob_objects = 1;
3345         revs.tree_objects = 1;
3346         revs.tag_objects = 1;
3347         revs.ignore_missing_links = 1;
3348
3349         while (strbuf_getline(&buf, stdin) != EOF) {
3350                 if (!buf.len)
3351                         continue;
3352
3353                 if (*buf.buf == '^')
3354                         string_list_append(&exclude_packs, buf.buf + 1);
3355                 else
3356                         string_list_append(&include_packs, buf.buf);
3357
3358                 strbuf_reset(&buf);
3359         }
3360
3361         string_list_sort(&include_packs);
3362         string_list_remove_duplicates(&include_packs, 0);
3363         string_list_sort(&exclude_packs);
3364         string_list_remove_duplicates(&exclude_packs, 0);
3365
3366         for (p = get_all_packs(the_repository); p; p = p->next) {
3367                 const char *pack_name = pack_basename(p);
3368
3369                 if ((item = string_list_lookup(&include_packs, pack_name)))
3370                         item->util = p;
3371                 if ((item = string_list_lookup(&exclude_packs, pack_name)))
3372                         item->util = p;
3373         }
3374
3375         /*
3376          * Arguments we got on stdin may not even be packs. First
3377          * check that to avoid segfaulting later on in
3378          * e.g. pack_mtime_cmp(), excluded packs are handled below.
3379          *
3380          * Since we first parsed our STDIN and then sorted the input
3381          * lines the pack we error on will be whatever line happens to
3382          * sort first. This is lazy, it's enough that we report one
3383          * bad case here, we don't need to report the first/last one,
3384          * or all of them.
3385          */
3386         for_each_string_list_item(item, &include_packs) {
3387                 struct packed_git *p = item->util;
3388                 if (!p)
3389                         die(_("could not find pack '%s'"), item->string);
3390                 if (!is_pack_valid(p))
3391                         die(_("packfile %s cannot be accessed"), p->pack_name);
3392         }
3393
3394         /*
3395          * Then, handle all of the excluded packs, marking them as
3396          * kept in-core so that later calls to add_object_entry()
3397          * discards any objects that are also found in excluded packs.
3398          */
3399         for_each_string_list_item(item, &exclude_packs) {
3400                 struct packed_git *p = item->util;
3401                 if (!p)
3402                         die(_("could not find pack '%s'"), item->string);
3403                 p->pack_keep_in_core = 1;
3404         }
3405
3406         /*
3407          * Order packs by ascending mtime; use QSORT directly to access the
3408          * string_list_item's ->util pointer, which string_list_sort() does not
3409          * provide.
3410          */
3411         QSORT(include_packs.items, include_packs.nr, pack_mtime_cmp);
3412
3413         for_each_string_list_item(item, &include_packs) {
3414                 struct packed_git *p = item->util;
3415                 for_each_object_in_pack(p,
3416                                         add_object_entry_from_pack,
3417                                         &revs,
3418                                         FOR_EACH_OBJECT_PACK_ORDER);
3419         }
3420
3421         if (prepare_revision_walk(&revs))
3422                 die(_("revision walk setup failed"));
3423         traverse_commit_list(&revs,
3424                              show_commit_pack_hint,
3425                              show_object_pack_hint,
3426                              NULL);
3427
3428         trace2_data_intmax("pack-objects", the_repository, "stdin_packs_found",
3429                            stdin_packs_found_nr);
3430         trace2_data_intmax("pack-objects", the_repository, "stdin_packs_hints",
3431                            stdin_packs_hints_nr);
3432
3433         strbuf_release(&buf);
3434         string_list_clear(&include_packs, 0);
3435         string_list_clear(&exclude_packs, 0);
3436 }
3437
3438 static void add_cruft_object_entry(const struct object_id *oid, enum object_type type,
3439                                    struct packed_git *pack, off_t offset,
3440                                    const char *name, uint32_t mtime)
3441 {
3442         struct object_entry *entry;
3443
3444         display_progress(progress_state, ++nr_seen);
3445
3446         entry = packlist_find(&to_pack, oid);
3447         if (entry) {
3448                 if (name) {
3449                         entry->hash = pack_name_hash(name);
3450                         entry->no_try_delta = no_try_delta(name);
3451                 }
3452         } else {
3453                 if (!want_object_in_pack(oid, 0, &pack, &offset))
3454                         return;
3455                 if (!pack && type == OBJ_BLOB && !has_loose_object(oid)) {
3456                         /*
3457                          * If a traversed tree has a missing blob then we want
3458                          * to avoid adding that missing object to our pack.
3459                          *
3460                          * This only applies to missing blobs, not trees,
3461                          * because the traversal needs to parse sub-trees but
3462                          * not blobs.
3463                          *
3464                          * Note we only perform this check when we couldn't
3465                          * already find the object in a pack, so we're really
3466                          * limited to "ensure non-tip blobs which don't exist in
3467                          * packs do exist via loose objects". Confused?
3468                          */
3469                         return;
3470                 }
3471
3472                 entry = create_object_entry(oid, type, pack_name_hash(name),
3473                                             0, name && no_try_delta(name),
3474                                             pack, offset);
3475         }
3476
3477         if (mtime > oe_cruft_mtime(&to_pack, entry))
3478                 oe_set_cruft_mtime(&to_pack, entry, mtime);
3479         return;
3480 }
3481
3482 static void show_cruft_object(struct object *obj, const char *name, void *data UNUSED)
3483 {
3484         /*
3485          * if we did not record it earlier, it's at least as old as our
3486          * expiration value. Rather than find it exactly, just use that
3487          * value.  This may bump it forward from its real mtime, but it
3488          * will still be "too old" next time we run with the same
3489          * expiration.
3490          *
3491          * if obj does appear in the packing list, this call is a noop (or may
3492          * set the namehash).
3493          */
3494         add_cruft_object_entry(&obj->oid, obj->type, NULL, 0, name, cruft_expiration);
3495 }
3496
3497 static void show_cruft_commit(struct commit *commit, void *data)
3498 {
3499         show_cruft_object((struct object*)commit, NULL, data);
3500 }
3501
3502 static int cruft_include_check_obj(struct object *obj, void *data UNUSED)
3503 {
3504         return !has_object_kept_pack(&obj->oid, IN_CORE_KEEP_PACKS);
3505 }
3506
3507 static int cruft_include_check(struct commit *commit, void *data)
3508 {
3509         return cruft_include_check_obj((struct object*)commit, data);
3510 }
3511
3512 static void set_cruft_mtime(const struct object *object,
3513                             struct packed_git *pack,
3514                             off_t offset, time_t mtime)
3515 {
3516         add_cruft_object_entry(&object->oid, object->type, pack, offset, NULL,
3517                                mtime);
3518 }
3519
3520 static void mark_pack_kept_in_core(struct string_list *packs, unsigned keep)
3521 {
3522         struct string_list_item *item = NULL;
3523         for_each_string_list_item(item, packs) {
3524                 struct packed_git *p = item->util;
3525                 if (!p)
3526                         die(_("could not find pack '%s'"), item->string);
3527                 p->pack_keep_in_core = keep;
3528         }
3529 }
3530
3531 static void add_unreachable_loose_objects(void);
3532 static void add_objects_in_unpacked_packs(void);
3533
3534 static void enumerate_cruft_objects(void)
3535 {
3536         if (progress)
3537                 progress_state = start_progress(_("Enumerating cruft objects"), 0);
3538
3539         add_objects_in_unpacked_packs();
3540         add_unreachable_loose_objects();
3541
3542         stop_progress(&progress_state);
3543 }
3544
3545 static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs)
3546 {
3547         struct packed_git *p;
3548         struct rev_info revs;
3549         int ret;
3550
3551         repo_init_revisions(the_repository, &revs, NULL);
3552
3553         revs.tag_objects = 1;
3554         revs.tree_objects = 1;
3555         revs.blob_objects = 1;
3556
3557         revs.include_check = cruft_include_check;
3558         revs.include_check_obj = cruft_include_check_obj;
3559
3560         revs.ignore_missing_links = 1;
3561
3562         if (progress)
3563                 progress_state = start_progress(_("Enumerating cruft objects"), 0);
3564         ret = add_unseen_recent_objects_to_traversal(&revs, cruft_expiration,
3565                                                      set_cruft_mtime, 1);
3566         stop_progress(&progress_state);
3567
3568         if (ret)
3569                 die(_("unable to add cruft objects"));
3570
3571         /*
3572          * Re-mark only the fresh packs as kept so that objects in
3573          * unknown packs do not halt the reachability traversal early.
3574          */
3575         for (p = get_all_packs(the_repository); p; p = p->next)
3576                 p->pack_keep_in_core = 0;
3577         mark_pack_kept_in_core(fresh_packs, 1);
3578
3579         if (prepare_revision_walk(&revs))
3580                 die(_("revision walk setup failed"));
3581         if (progress)
3582                 progress_state = start_progress(_("Traversing cruft objects"), 0);
3583         nr_seen = 0;
3584         traverse_commit_list(&revs, show_cruft_commit, show_cruft_object, NULL);
3585
3586         stop_progress(&progress_state);
3587 }
3588
3589 static void read_cruft_objects(void)
3590 {
3591         struct strbuf buf = STRBUF_INIT;
3592         struct string_list discard_packs = STRING_LIST_INIT_DUP;
3593         struct string_list fresh_packs = STRING_LIST_INIT_DUP;
3594         struct packed_git *p;
3595
3596         ignore_packed_keep_in_core = 1;
3597
3598         while (strbuf_getline(&buf, stdin) != EOF) {
3599                 if (!buf.len)
3600                         continue;
3601
3602                 if (*buf.buf == '-')
3603                         string_list_append(&discard_packs, buf.buf + 1);
3604                 else
3605                         string_list_append(&fresh_packs, buf.buf);
3606                 strbuf_reset(&buf);
3607         }
3608
3609         string_list_sort(&discard_packs);
3610         string_list_sort(&fresh_packs);
3611
3612         for (p = get_all_packs(the_repository); p; p = p->next) {
3613                 const char *pack_name = pack_basename(p);
3614                 struct string_list_item *item;
3615
3616                 item = string_list_lookup(&fresh_packs, pack_name);
3617                 if (!item)
3618                         item = string_list_lookup(&discard_packs, pack_name);
3619
3620                 if (item) {
3621                         item->util = p;
3622                 } else {
3623                         /*
3624                          * This pack wasn't mentioned in either the "fresh" or
3625                          * "discard" list, so the caller didn't know about it.
3626                          *
3627                          * Mark it as kept so that its objects are ignored by
3628                          * add_unseen_recent_objects_to_traversal(). We'll
3629                          * unmark it before starting the traversal so it doesn't
3630                          * halt the traversal early.
3631                          */
3632                         p->pack_keep_in_core = 1;
3633                 }
3634         }
3635
3636         mark_pack_kept_in_core(&fresh_packs, 1);
3637         mark_pack_kept_in_core(&discard_packs, 0);
3638
3639         if (cruft_expiration)
3640                 enumerate_and_traverse_cruft_objects(&fresh_packs);
3641         else
3642                 enumerate_cruft_objects();
3643
3644         strbuf_release(&buf);
3645         string_list_clear(&discard_packs, 0);
3646         string_list_clear(&fresh_packs, 0);
3647 }
3648
3649 static void read_object_list_from_stdin(void)
3650 {
3651         char line[GIT_MAX_HEXSZ + 1 + PATH_MAX + 2];
3652         struct object_id oid;
3653         const char *p;
3654
3655         for (;;) {
3656                 if (!fgets(line, sizeof(line), stdin)) {
3657                         if (feof(stdin))
3658                                 break;
3659                         if (!ferror(stdin))
3660                                 BUG("fgets returned NULL, not EOF, not error!");
3661                         if (errno != EINTR)
3662                                 die_errno("fgets");
3663                         clearerr(stdin);
3664                         continue;
3665                 }
3666                 if (line[0] == '-') {
3667                         if (get_oid_hex(line+1, &oid))
3668                                 die(_("expected edge object ID, got garbage:\n %s"),
3669                                     line);
3670                         add_preferred_base(&oid);
3671                         continue;
3672                 }
3673                 if (parse_oid_hex(line, &oid, &p))
3674                         die(_("expected object ID, got garbage:\n %s"), line);
3675
3676                 add_preferred_base_object(p + 1);
3677                 add_object_entry(&oid, OBJ_NONE, p + 1, 0);
3678         }
3679 }
3680
3681 static void show_commit(struct commit *commit, void *data UNUSED)
3682 {
3683         add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0);
3684
3685         if (write_bitmap_index)
3686                 index_commit_for_bitmap(commit);
3687
3688         if (use_delta_islands)
3689                 propagate_island_marks(commit);
3690 }
3691
3692 static void show_object(struct object *obj, const char *name,
3693                         void *data UNUSED)
3694 {
3695         add_preferred_base_object(name);
3696         add_object_entry(&obj->oid, obj->type, name, 0);
3697
3698         if (use_delta_islands) {
3699                 const char *p;
3700                 unsigned depth;
3701                 struct object_entry *ent;
3702
3703                 /* the empty string is a root tree, which is depth 0 */
3704                 depth = *name ? 1 : 0;
3705                 for (p = strchr(name, '/'); p; p = strchr(p + 1, '/'))
3706                         depth++;
3707
3708                 ent = packlist_find(&to_pack, &obj->oid);
3709                 if (ent && depth > oe_tree_depth(&to_pack, ent))
3710                         oe_set_tree_depth(&to_pack, ent, depth);
3711         }
3712 }
3713
3714 static void show_object__ma_allow_any(struct object *obj, const char *name, void *data)
3715 {
3716         assert(arg_missing_action == MA_ALLOW_ANY);
3717
3718         /*
3719          * Quietly ignore ALL missing objects.  This avoids problems with
3720          * staging them now and getting an odd error later.
3721          */
3722         if (!has_object(the_repository, &obj->oid, 0))
3723                 return;
3724
3725         show_object(obj, name, data);
3726 }
3727
3728 static void show_object__ma_allow_promisor(struct object *obj, const char *name, void *data)
3729 {
3730         assert(arg_missing_action == MA_ALLOW_PROMISOR);
3731
3732         /*
3733          * Quietly ignore EXPECTED missing objects.  This avoids problems with
3734          * staging them now and getting an odd error later.
3735          */
3736         if (!has_object(the_repository, &obj->oid, 0) && is_promisor_object(&obj->oid))
3737                 return;
3738
3739         show_object(obj, name, data);
3740 }
3741
3742 static int option_parse_missing_action(const struct option *opt,
3743                                        const char *arg, int unset)
3744 {
3745         assert(arg);
3746         assert(!unset);
3747
3748         if (!strcmp(arg, "error")) {
3749                 arg_missing_action = MA_ERROR;
3750                 fn_show_object = show_object;
3751                 return 0;
3752         }
3753
3754         if (!strcmp(arg, "allow-any")) {
3755                 arg_missing_action = MA_ALLOW_ANY;
3756                 fetch_if_missing = 0;
3757                 fn_show_object = show_object__ma_allow_any;
3758                 return 0;
3759         }
3760
3761         if (!strcmp(arg, "allow-promisor")) {
3762                 arg_missing_action = MA_ALLOW_PROMISOR;
3763                 fetch_if_missing = 0;
3764                 fn_show_object = show_object__ma_allow_promisor;
3765                 return 0;
3766         }
3767
3768         die(_("invalid value for '%s': '%s'"), "--missing", arg);
3769         return 0;
3770 }
3771
3772 static void show_edge(struct commit *commit)
3773 {
3774         add_preferred_base(&commit->object.oid);
3775 }
3776
3777 static int add_object_in_unpacked_pack(const struct object_id *oid,
3778                                        struct packed_git *pack,
3779                                        uint32_t pos,
3780                                        void *data UNUSED)
3781 {
3782         if (cruft) {
3783                 off_t offset;
3784                 time_t mtime;
3785
3786                 if (pack->is_cruft) {
3787                         if (load_pack_mtimes(pack) < 0)
3788                                 die(_("could not load cruft pack .mtimes"));
3789                         mtime = nth_packed_mtime(pack, pos);
3790                 } else {
3791                         mtime = pack->mtime;
3792                 }
3793                 offset = nth_packed_object_offset(pack, pos);
3794
3795                 add_cruft_object_entry(oid, OBJ_NONE, pack, offset,
3796                                        NULL, mtime);
3797         } else {
3798                 add_object_entry(oid, OBJ_NONE, "", 0);
3799         }
3800         return 0;
3801 }
3802
3803 static void add_objects_in_unpacked_packs(void)
3804 {
3805         if (for_each_packed_object(add_object_in_unpacked_pack, NULL,
3806                                    FOR_EACH_OBJECT_PACK_ORDER |
3807                                    FOR_EACH_OBJECT_LOCAL_ONLY |
3808                                    FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
3809                                    FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
3810                 die(_("cannot open pack index"));
3811 }
3812
3813 static int add_loose_object(const struct object_id *oid, const char *path,
3814                             void *data UNUSED)
3815 {
3816         enum object_type type = oid_object_info(the_repository, oid, NULL);
3817
3818         if (type < 0) {
3819                 warning(_("loose object at %s could not be examined"), path);
3820                 return 0;
3821         }
3822
3823         if (cruft) {
3824                 struct stat st;
3825                 if (stat(path, &st) < 0) {
3826                         if (errno == ENOENT)
3827                                 return 0;
3828                         return error_errno("unable to stat %s", oid_to_hex(oid));
3829                 }
3830
3831                 add_cruft_object_entry(oid, type, NULL, 0, NULL,
3832                                        st.st_mtime);
3833         } else {
3834                 add_object_entry(oid, type, "", 0);
3835         }
3836         return 0;
3837 }
3838
3839 /*
3840  * We actually don't even have to worry about reachability here.
3841  * add_object_entry will weed out duplicates, so we just add every
3842  * loose object we find.
3843  */
3844 static void add_unreachable_loose_objects(void)
3845 {
3846         for_each_loose_file_in_objdir(get_object_directory(),
3847                                       add_loose_object,
3848                                       NULL, NULL, NULL);
3849 }
3850
3851 static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid)
3852 {
3853         static struct packed_git *last_found = (void *)1;
3854         struct packed_git *p;
3855
3856         p = (last_found != (void *)1) ? last_found :
3857                                         get_all_packs(the_repository);
3858
3859         while (p) {
3860                 if ((!p->pack_local || p->pack_keep ||
3861                                 p->pack_keep_in_core) &&
3862                         find_pack_entry_one(oid->hash, p)) {
3863                         last_found = p;
3864                         return 1;
3865                 }
3866                 if (p == last_found)
3867                         p = get_all_packs(the_repository);
3868                 else
3869                         p = p->next;
3870                 if (p == last_found)
3871                         p = p->next;
3872         }
3873         return 0;
3874 }
3875
3876 /*
3877  * Store a list of sha1s that are should not be discarded
3878  * because they are either written too recently, or are
3879  * reachable from another object that was.
3880  *
3881  * This is filled by get_object_list.
3882  */
3883 static struct oid_array recent_objects;
3884
3885 static int loosened_object_can_be_discarded(const struct object_id *oid,
3886                                             timestamp_t mtime)
3887 {
3888         if (!unpack_unreachable_expiration)
3889                 return 0;
3890         if (mtime > unpack_unreachable_expiration)
3891                 return 0;
3892         if (oid_array_lookup(&recent_objects, oid) >= 0)
3893                 return 0;
3894         return 1;
3895 }
3896
3897 static void loosen_unused_packed_objects(void)
3898 {
3899         struct packed_git *p;
3900         uint32_t i;
3901         uint32_t loosened_objects_nr = 0;
3902         struct object_id oid;
3903
3904         for (p = get_all_packs(the_repository); p; p = p->next) {
3905                 if (!p->pack_local || p->pack_keep || p->pack_keep_in_core)
3906                         continue;
3907
3908                 if (open_pack_index(p))
3909                         die(_("cannot open pack index"));
3910
3911                 for (i = 0; i < p->num_objects; i++) {
3912                         nth_packed_object_id(&oid, p, i);
3913                         if (!packlist_find(&to_pack, &oid) &&
3914                             !has_sha1_pack_kept_or_nonlocal(&oid) &&
3915                             !loosened_object_can_be_discarded(&oid, p->mtime)) {
3916                                 if (force_object_loose(&oid, p->mtime))
3917                                         die(_("unable to force loose object"));
3918                                 loosened_objects_nr++;
3919                         }
3920                 }
3921         }
3922
3923         trace2_data_intmax("pack-objects", the_repository,
3924                            "loosen_unused_packed_objects/loosened", loosened_objects_nr);
3925 }
3926
3927 /*
3928  * This tracks any options which pack-reuse code expects to be on, or which a
3929  * reader of the pack might not understand, and which would therefore prevent
3930  * blind reuse of what we have on disk.
3931  */
3932 static int pack_options_allow_reuse(void)
3933 {
3934         return allow_pack_reuse &&
3935                pack_to_stdout &&
3936                !ignore_packed_keep_on_disk &&
3937                !ignore_packed_keep_in_core &&
3938                (!local || !have_non_local_packs) &&
3939                !incremental;
3940 }
3941
3942 static int get_object_list_from_bitmap(struct rev_info *revs)
3943 {
3944         if (!(bitmap_git = prepare_bitmap_walk(revs, 0)))
3945                 return -1;
3946
3947         if (pack_options_allow_reuse() &&
3948             !reuse_partial_packfile_from_bitmap(
3949                         bitmap_git,
3950                         &reuse_packfile,
3951                         &reuse_packfile_objects,
3952                         &reuse_packfile_bitmap)) {
3953                 assert(reuse_packfile_objects);
3954                 nr_result += reuse_packfile_objects;
3955                 nr_seen += reuse_packfile_objects;
3956                 display_progress(progress_state, nr_seen);
3957         }
3958
3959         traverse_bitmap_commit_list(bitmap_git, revs,
3960                                     &add_object_entry_from_bitmap);
3961         return 0;
3962 }
3963
3964 static void record_recent_object(struct object *obj,
3965                                  const char *name UNUSED,
3966                                  void *data UNUSED)
3967 {
3968         oid_array_append(&recent_objects, &obj->oid);
3969 }
3970
3971 static void record_recent_commit(struct commit *commit, void *data UNUSED)
3972 {
3973         oid_array_append(&recent_objects, &commit->object.oid);
3974 }
3975
3976 static int mark_bitmap_preferred_tip(const char *refname,
3977                                      const struct object_id *oid,
3978                                      int flags UNUSED,
3979                                      void *data UNUSED)
3980 {
3981         struct object_id peeled;
3982         struct object *object;
3983
3984         if (!peel_iterated_oid(oid, &peeled))
3985                 oid = &peeled;
3986
3987         object = parse_object_or_die(oid, refname);
3988         if (object->type == OBJ_COMMIT)
3989                 object->flags |= NEEDS_BITMAP;
3990
3991         return 0;
3992 }
3993
3994 static void mark_bitmap_preferred_tips(void)
3995 {
3996         struct string_list_item *item;
3997         const struct string_list *preferred_tips;
3998
3999         preferred_tips = bitmap_preferred_tips(the_repository);
4000         if (!preferred_tips)
4001                 return;
4002
4003         for_each_string_list_item(item, preferred_tips) {
4004                 for_each_ref_in(item->string, mark_bitmap_preferred_tip, NULL);
4005         }
4006 }
4007
4008 static void get_object_list(struct rev_info *revs, int ac, const char **av)
4009 {
4010         struct setup_revision_opt s_r_opt = {
4011                 .allow_exclude_promisor_objects = 1,
4012         };
4013         char line[1000];
4014         int flags = 0;
4015         int save_warning;
4016
4017         save_commit_buffer = 0;
4018         setup_revisions(ac, av, revs, &s_r_opt);
4019
4020         /* make sure shallows are read */
4021         is_repository_shallow(the_repository);
4022
4023         save_warning = warn_on_object_refname_ambiguity;
4024         warn_on_object_refname_ambiguity = 0;
4025
4026         while (fgets(line, sizeof(line), stdin) != NULL) {
4027                 int len = strlen(line);
4028                 if (len && line[len - 1] == '\n')
4029                         line[--len] = 0;
4030                 if (!len)
4031                         break;
4032                 if (*line == '-') {
4033                         if (!strcmp(line, "--not")) {
4034                                 flags ^= UNINTERESTING;
4035                                 write_bitmap_index = 0;
4036                                 continue;
4037                         }
4038                         if (starts_with(line, "--shallow ")) {
4039                                 struct object_id oid;
4040                                 if (get_oid_hex(line + 10, &oid))
4041                                         die("not an object name '%s'", line + 10);
4042                                 register_shallow(the_repository, &oid);
4043                                 use_bitmap_index = 0;
4044                                 continue;
4045                         }
4046                         die(_("not a rev '%s'"), line);
4047                 }
4048                 if (handle_revision_arg(line, revs, flags, REVARG_CANNOT_BE_FILENAME))
4049                         die(_("bad revision '%s'"), line);
4050         }
4051
4052         warn_on_object_refname_ambiguity = save_warning;
4053
4054         if (use_bitmap_index && !get_object_list_from_bitmap(revs))
4055                 return;
4056
4057         if (use_delta_islands)
4058                 load_delta_islands(the_repository, progress);
4059
4060         if (write_bitmap_index)
4061                 mark_bitmap_preferred_tips();
4062
4063         if (prepare_revision_walk(revs))
4064                 die(_("revision walk setup failed"));
4065         mark_edges_uninteresting(revs, show_edge, sparse);
4066
4067         if (!fn_show_object)
4068                 fn_show_object = show_object;
4069         traverse_commit_list(revs,
4070                              show_commit, fn_show_object,
4071                              NULL);
4072
4073         if (unpack_unreachable_expiration) {
4074                 revs->ignore_missing_links = 1;
4075                 if (add_unseen_recent_objects_to_traversal(revs,
4076                                 unpack_unreachable_expiration, NULL, 0))
4077                         die(_("unable to add recent objects"));
4078                 if (prepare_revision_walk(revs))
4079                         die(_("revision walk setup failed"));
4080                 traverse_commit_list(revs, record_recent_commit,
4081                                      record_recent_object, NULL);
4082         }
4083
4084         if (keep_unreachable)
4085                 add_objects_in_unpacked_packs();
4086         if (pack_loose_unreachable)
4087                 add_unreachable_loose_objects();
4088         if (unpack_unreachable)
4089                 loosen_unused_packed_objects();
4090
4091         oid_array_clear(&recent_objects);
4092 }
4093
4094 static void add_extra_kept_packs(const struct string_list *names)
4095 {
4096         struct packed_git *p;
4097
4098         if (!names->nr)
4099                 return;
4100
4101         for (p = get_all_packs(the_repository); p; p = p->next) {
4102                 const char *name = basename(p->pack_name);
4103                 int i;
4104
4105                 if (!p->pack_local)
4106                         continue;
4107
4108                 for (i = 0; i < names->nr; i++)
4109                         if (!fspathcmp(name, names->items[i].string))
4110                                 break;
4111
4112                 if (i < names->nr) {
4113                         p->pack_keep_in_core = 1;
4114                         ignore_packed_keep_in_core = 1;
4115                         continue;
4116                 }
4117         }
4118 }
4119
4120 static int option_parse_index_version(const struct option *opt,
4121                                       const char *arg, int unset)
4122 {
4123         char *c;
4124         const char *val = arg;
4125
4126         BUG_ON_OPT_NEG(unset);
4127
4128         pack_idx_opts.version = strtoul(val, &c, 10);
4129         if (pack_idx_opts.version > 2)
4130                 die(_("unsupported index version %s"), val);
4131         if (*c == ',' && c[1])
4132                 pack_idx_opts.off32_limit = strtoul(c+1, &c, 0);
4133         if (*c || pack_idx_opts.off32_limit & 0x80000000)
4134                 die(_("bad index version '%s'"), val);
4135         return 0;
4136 }
4137
4138 static int option_parse_unpack_unreachable(const struct option *opt,
4139                                            const char *arg, int unset)
4140 {
4141         if (unset) {
4142                 unpack_unreachable = 0;
4143                 unpack_unreachable_expiration = 0;
4144         }
4145         else {
4146                 unpack_unreachable = 1;
4147                 if (arg)
4148                         unpack_unreachable_expiration = approxidate(arg);
4149         }
4150         return 0;
4151 }
4152
4153 static int option_parse_cruft_expiration(const struct option *opt,
4154                                          const char *arg, int unset)
4155 {
4156         if (unset) {
4157                 cruft = 0;
4158                 cruft_expiration = 0;
4159         } else {
4160                 cruft = 1;
4161                 if (arg)
4162                         cruft_expiration = approxidate(arg);
4163         }
4164         return 0;
4165 }
4166
4167 int cmd_pack_objects(int argc, const char **argv, const char *prefix)
4168 {
4169         int use_internal_rev_list = 0;
4170         int shallow = 0;
4171         int all_progress_implied = 0;
4172         struct strvec rp = STRVEC_INIT;
4173         int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;
4174         int rev_list_index = 0;
4175         int stdin_packs = 0;
4176         struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
4177         struct list_objects_filter_options filter_options =
4178                 LIST_OBJECTS_FILTER_INIT;
4179
4180         struct option pack_objects_options[] = {
4181                 OPT_SET_INT('q', "quiet", &progress,
4182                             N_("do not show progress meter"), 0),
4183                 OPT_SET_INT(0, "progress", &progress,
4184                             N_("show progress meter"), 1),
4185                 OPT_SET_INT(0, "all-progress", &progress,
4186                             N_("show progress meter during object writing phase"), 2),
4187                 OPT_BOOL(0, "all-progress-implied",
4188                          &all_progress_implied,
4189                          N_("similar to --all-progress when progress meter is shown")),
4190                 OPT_CALLBACK_F(0, "index-version", NULL, N_("<version>[,<offset>]"),
4191                   N_("write the pack index file in the specified idx format version"),
4192                   PARSE_OPT_NONEG, option_parse_index_version),
4193                 OPT_MAGNITUDE(0, "max-pack-size", &pack_size_limit,
4194                               N_("maximum size of each output pack file")),
4195                 OPT_BOOL(0, "local", &local,
4196                          N_("ignore borrowed objects from alternate object store")),
4197                 OPT_BOOL(0, "incremental", &incremental,
4198                          N_("ignore packed objects")),
4199                 OPT_INTEGER(0, "window", &window,
4200                             N_("limit pack window by objects")),
4201                 OPT_MAGNITUDE(0, "window-memory", &window_memory_limit,
4202                               N_("limit pack window by memory in addition to object limit")),
4203                 OPT_INTEGER(0, "depth", &depth,
4204                             N_("maximum length of delta chain allowed in the resulting pack")),
4205                 OPT_BOOL(0, "reuse-delta", &reuse_delta,
4206                          N_("reuse existing deltas")),
4207                 OPT_BOOL(0, "reuse-object", &reuse_object,
4208                          N_("reuse existing objects")),
4209                 OPT_BOOL(0, "delta-base-offset", &allow_ofs_delta,
4210                          N_("use OFS_DELTA objects")),
4211                 OPT_INTEGER(0, "threads", &delta_search_threads,
4212                             N_("use threads when searching for best delta matches")),
4213                 OPT_BOOL(0, "non-empty", &non_empty,
4214                          N_("do not create an empty pack output")),
4215                 OPT_BOOL(0, "revs", &use_internal_rev_list,
4216                          N_("read revision arguments from standard input")),
4217                 OPT_SET_INT_F(0, "unpacked", &rev_list_unpacked,
4218                               N_("limit the objects to those that are not yet packed"),
4219                               1, PARSE_OPT_NONEG),
4220                 OPT_SET_INT_F(0, "all", &rev_list_all,
4221                               N_("include objects reachable from any reference"),
4222                               1, PARSE_OPT_NONEG),
4223                 OPT_SET_INT_F(0, "reflog", &rev_list_reflog,
4224                               N_("include objects referred by reflog entries"),
4225                               1, PARSE_OPT_NONEG),
4226                 OPT_SET_INT_F(0, "indexed-objects", &rev_list_index,
4227                               N_("include objects referred to by the index"),
4228                               1, PARSE_OPT_NONEG),
4229                 OPT_BOOL(0, "stdin-packs", &stdin_packs,
4230                          N_("read packs from stdin")),
4231                 OPT_BOOL(0, "stdout", &pack_to_stdout,
4232                          N_("output pack to stdout")),
4233                 OPT_BOOL(0, "include-tag", &include_tag,
4234                          N_("include tag objects that refer to objects to be packed")),
4235                 OPT_BOOL(0, "keep-unreachable", &keep_unreachable,
4236                          N_("keep unreachable objects")),
4237                 OPT_BOOL(0, "pack-loose-unreachable", &pack_loose_unreachable,
4238                          N_("pack loose unreachable objects")),
4239                 OPT_CALLBACK_F(0, "unpack-unreachable", NULL, N_("time"),
4240                   N_("unpack unreachable objects newer than <time>"),
4241                   PARSE_OPT_OPTARG, option_parse_unpack_unreachable),
4242                 OPT_BOOL(0, "cruft", &cruft, N_("create a cruft pack")),
4243                 OPT_CALLBACK_F(0, "cruft-expiration", NULL, N_("time"),
4244                   N_("expire cruft objects older than <time>"),
4245                   PARSE_OPT_OPTARG, option_parse_cruft_expiration),
4246                 OPT_BOOL(0, "sparse", &sparse,
4247                          N_("use the sparse reachability algorithm")),
4248                 OPT_BOOL(0, "thin", &thin,
4249                          N_("create thin packs")),
4250                 OPT_BOOL(0, "shallow", &shallow,
4251                          N_("create packs suitable for shallow fetches")),
4252                 OPT_BOOL(0, "honor-pack-keep", &ignore_packed_keep_on_disk,
4253                          N_("ignore packs that have companion .keep file")),
4254                 OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"),
4255                                 N_("ignore this pack")),
4256                 OPT_INTEGER(0, "compression", &pack_compression_level,
4257                             N_("pack compression level")),
4258                 OPT_SET_INT(0, "keep-true-parents", &grafts_replace_parents,
4259                             N_("do not hide commits by grafts"), 0),
4260                 OPT_BOOL(0, "use-bitmap-index", &use_bitmap_index,
4261                          N_("use a bitmap index if available to speed up counting objects")),
4262                 OPT_SET_INT(0, "write-bitmap-index", &write_bitmap_index,
4263                             N_("write a bitmap index together with the pack index"),
4264                             WRITE_BITMAP_TRUE),
4265                 OPT_SET_INT_F(0, "write-bitmap-index-quiet",
4266                               &write_bitmap_index,
4267                               N_("write a bitmap index if possible"),
4268                               WRITE_BITMAP_QUIET, PARSE_OPT_HIDDEN),
4269                 OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options),
4270                 OPT_CALLBACK_F(0, "missing", NULL, N_("action"),
4271                   N_("handling for missing objects"), PARSE_OPT_NONEG,
4272                   option_parse_missing_action),
4273                 OPT_BOOL(0, "exclude-promisor-objects", &exclude_promisor_objects,
4274                          N_("do not pack objects in promisor packfiles")),
4275                 OPT_BOOL(0, "delta-islands", &use_delta_islands,
4276                          N_("respect islands during delta compression")),
4277                 OPT_STRING_LIST(0, "uri-protocol", &uri_protocols,
4278                                 N_("protocol"),
4279                                 N_("exclude any configured uploadpack.blobpackfileuri with this protocol")),
4280                 OPT_END(),
4281         };
4282
4283         if (DFS_NUM_STATES > (1 << OE_DFS_STATE_BITS))
4284                 BUG("too many dfs states, increase OE_DFS_STATE_BITS");
4285
4286         disable_replace_refs();
4287
4288         sparse = git_env_bool("GIT_TEST_PACK_SPARSE", -1);
4289         if (the_repository->gitdir) {
4290                 prepare_repo_settings(the_repository);
4291                 if (sparse < 0)
4292                         sparse = the_repository->settings.pack_use_sparse;
4293         }
4294
4295         reset_pack_idx_option(&pack_idx_opts);
4296         pack_idx_opts.flags |= WRITE_REV;
4297         git_config(git_pack_config, NULL);
4298         if (git_env_bool(GIT_TEST_NO_WRITE_REV_INDEX, 0))
4299                 pack_idx_opts.flags &= ~WRITE_REV;
4300
4301         progress = isatty(2);
4302         argc = parse_options(argc, argv, prefix, pack_objects_options,
4303                              pack_usage, 0);
4304
4305         if (argc) {
4306                 base_name = argv[0];
4307                 argc--;
4308         }
4309         if (pack_to_stdout != !base_name || argc)
4310                 usage_with_options(pack_usage, pack_objects_options);
4311
4312         if (depth < 0)
4313                 depth = 0;
4314         if (depth >= (1 << OE_DEPTH_BITS)) {
4315                 warning(_("delta chain depth %d is too deep, forcing %d"),
4316                         depth, (1 << OE_DEPTH_BITS) - 1);
4317                 depth = (1 << OE_DEPTH_BITS) - 1;
4318         }
4319         if (cache_max_small_delta_size >= (1U << OE_Z_DELTA_BITS)) {
4320                 warning(_("pack.deltaCacheLimit is too high, forcing %d"),
4321                         (1U << OE_Z_DELTA_BITS) - 1);
4322                 cache_max_small_delta_size = (1U << OE_Z_DELTA_BITS) - 1;
4323         }
4324         if (window < 0)
4325                 window = 0;
4326
4327         strvec_push(&rp, "pack-objects");
4328         if (thin) {
4329                 use_internal_rev_list = 1;
4330                 strvec_push(&rp, shallow
4331                                 ? "--objects-edge-aggressive"
4332                                 : "--objects-edge");
4333         } else
4334                 strvec_push(&rp, "--objects");
4335
4336         if (rev_list_all) {
4337                 use_internal_rev_list = 1;
4338                 strvec_push(&rp, "--all");
4339         }
4340         if (rev_list_reflog) {
4341                 use_internal_rev_list = 1;
4342                 strvec_push(&rp, "--reflog");
4343         }
4344         if (rev_list_index) {
4345                 use_internal_rev_list = 1;
4346                 strvec_push(&rp, "--indexed-objects");
4347         }
4348         if (rev_list_unpacked && !stdin_packs) {
4349                 use_internal_rev_list = 1;
4350                 strvec_push(&rp, "--unpacked");
4351         }
4352
4353         if (exclude_promisor_objects) {
4354                 use_internal_rev_list = 1;
4355                 fetch_if_missing = 0;
4356                 strvec_push(&rp, "--exclude-promisor-objects");
4357         }
4358         if (unpack_unreachable || keep_unreachable || pack_loose_unreachable)
4359                 use_internal_rev_list = 1;
4360
4361         if (!reuse_object)
4362                 reuse_delta = 0;
4363         if (pack_compression_level == -1)
4364                 pack_compression_level = Z_DEFAULT_COMPRESSION;
4365         else if (pack_compression_level < 0 || pack_compression_level > Z_BEST_COMPRESSION)
4366                 die(_("bad pack compression level %d"), pack_compression_level);
4367
4368         if (!delta_search_threads)      /* --threads=0 means autodetect */
4369                 delta_search_threads = online_cpus();
4370
4371         if (!HAVE_THREADS && delta_search_threads != 1)
4372                 warning(_("no threads support, ignoring --threads"));
4373         if (!pack_to_stdout && !pack_size_limit && !cruft)
4374                 pack_size_limit = pack_size_limit_cfg;
4375         if (pack_to_stdout && pack_size_limit)
4376                 die(_("--max-pack-size cannot be used to build a pack for transfer"));
4377         if (pack_size_limit && pack_size_limit < 1024*1024) {
4378                 warning(_("minimum pack size limit is 1 MiB"));
4379                 pack_size_limit = 1024*1024;
4380         }
4381
4382         if (!pack_to_stdout && thin)
4383                 die(_("--thin cannot be used to build an indexable pack"));
4384
4385         if (keep_unreachable && unpack_unreachable)
4386                 die(_("options '%s' and '%s' cannot be used together"), "--keep-unreachable", "--unpack-unreachable");
4387         if (!rev_list_all || !rev_list_reflog || !rev_list_index)
4388                 unpack_unreachable_expiration = 0;
4389
4390         if (filter_options.choice) {
4391                 if (!pack_to_stdout)
4392                         die(_("cannot use --filter without --stdout"));
4393                 if (stdin_packs)
4394                         die(_("cannot use --filter with --stdin-packs"));
4395         }
4396
4397         if (stdin_packs && use_internal_rev_list)
4398                 die(_("cannot use internal rev list with --stdin-packs"));
4399
4400         if (cruft) {
4401                 if (use_internal_rev_list)
4402                         die(_("cannot use internal rev list with --cruft"));
4403                 if (stdin_packs)
4404                         die(_("cannot use --stdin-packs with --cruft"));
4405                 if (pack_size_limit)
4406                         die(_("cannot use --max-pack-size with --cruft"));
4407         }
4408
4409         /*
4410          * "soft" reasons not to use bitmaps - for on-disk repack by default we want
4411          *
4412          * - to produce good pack (with bitmap index not-yet-packed objects are
4413          *   packed in suboptimal order).
4414          *
4415          * - to use more robust pack-generation codepath (avoiding possible
4416          *   bugs in bitmap code and possible bitmap index corruption).
4417          */
4418         if (!pack_to_stdout)
4419                 use_bitmap_index_default = 0;
4420
4421         if (use_bitmap_index < 0)
4422                 use_bitmap_index = use_bitmap_index_default;
4423
4424         /* "hard" reasons not to use bitmaps; these just won't work at all */
4425         if (!use_internal_rev_list || (!pack_to_stdout && write_bitmap_index) || is_repository_shallow(the_repository))
4426                 use_bitmap_index = 0;
4427
4428         if (pack_to_stdout || !rev_list_all)
4429                 write_bitmap_index = 0;
4430
4431         if (use_delta_islands)
4432                 strvec_push(&rp, "--topo-order");
4433
4434         if (progress && all_progress_implied)
4435                 progress = 2;
4436
4437         add_extra_kept_packs(&keep_pack_list);
4438         if (ignore_packed_keep_on_disk) {
4439                 struct packed_git *p;
4440                 for (p = get_all_packs(the_repository); p; p = p->next)
4441                         if (p->pack_local && p->pack_keep)
4442                                 break;
4443                 if (!p) /* no keep-able packs found */
4444                         ignore_packed_keep_on_disk = 0;
4445         }
4446         if (local) {
4447                 /*
4448                  * unlike ignore_packed_keep_on_disk above, we do not
4449                  * want to unset "local" based on looking at packs, as
4450                  * it also covers non-local objects
4451                  */
4452                 struct packed_git *p;
4453                 for (p = get_all_packs(the_repository); p; p = p->next) {
4454                         if (!p->pack_local) {
4455                                 have_non_local_packs = 1;
4456                                 break;
4457                         }
4458                 }
4459         }
4460
4461         trace2_region_enter("pack-objects", "enumerate-objects",
4462                             the_repository);
4463         prepare_packing_data(the_repository, &to_pack);
4464
4465         if (progress && !cruft)
4466                 progress_state = start_progress(_("Enumerating objects"), 0);
4467         if (stdin_packs) {
4468                 /* avoids adding objects in excluded packs */
4469                 ignore_packed_keep_in_core = 1;
4470                 read_packs_list_from_stdin();
4471                 if (rev_list_unpacked)
4472                         add_unreachable_loose_objects();
4473         } else if (cruft) {
4474                 read_cruft_objects();
4475         } else if (!use_internal_rev_list) {
4476                 read_object_list_from_stdin();
4477         } else {
4478                 struct rev_info revs;
4479
4480                 repo_init_revisions(the_repository, &revs, NULL);
4481                 list_objects_filter_copy(&revs.filter, &filter_options);
4482                 get_object_list(&revs, rp.nr, rp.v);
4483                 release_revisions(&revs);
4484         }
4485         cleanup_preferred_base();
4486         if (include_tag && nr_result)
4487                 for_each_tag_ref(add_ref_tag, NULL);
4488         stop_progress(&progress_state);
4489         trace2_region_leave("pack-objects", "enumerate-objects",
4490                             the_repository);
4491
4492         if (non_empty && !nr_result)
4493                 goto cleanup;
4494         if (nr_result) {
4495                 trace2_region_enter("pack-objects", "prepare-pack",
4496                                     the_repository);
4497                 prepare_pack(window, depth);
4498                 trace2_region_leave("pack-objects", "prepare-pack",
4499                                     the_repository);
4500         }
4501
4502         trace2_region_enter("pack-objects", "write-pack-file", the_repository);
4503         write_excluded_by_configs();
4504         write_pack_file();
4505         trace2_region_leave("pack-objects", "write-pack-file", the_repository);
4506
4507         if (progress)
4508                 fprintf_ln(stderr,
4509                            _("Total %"PRIu32" (delta %"PRIu32"),"
4510                              " reused %"PRIu32" (delta %"PRIu32"),"
4511                              " pack-reused %"PRIu32),
4512                            written, written_delta, reused, reused_delta,
4513                            reuse_packfile_objects);
4514
4515 cleanup:
4516         list_objects_filter_release(&filter_options);
4517         strvec_clear(&rp);
4518
4519         return 0;
4520 }