builtin/pack-objects.c

   1 #define USE_THE_REPOSITORY_VARIABLE
   2 #define DISABLE_SIGN_COMPARE_WARNINGS
   3
   4 #include "builtin.h"
   5 #include "environment.h"
   6 #include "gettext.h"
   7 #include "hex.h"
   8 #include "config.h"
   9 #include "attr.h"
  10 #include "object.h"
  11 #include "commit.h"
  12 #include "tag.h"
  13 #include "delta.h"
  14 #include "pack.h"
  15 #include "pack-revindex.h"
  16 #include "csum-file.h"
  17 #include "tree-walk.h"
  18 #include "diff.h"
  19 #include "revision.h"
  20 #include "list-objects.h"
  21 #include "list-objects-filter-options.h"
  22 #include "pack-objects.h"
  23 #include "progress.h"
  24 #include "refs.h"
  25 #include "streaming.h"
  26 #include "thread-utils.h"
  27 #include "pack-bitmap.h"
  28 #include "delta-islands.h"
  29 #include "reachable.h"
  30 #include "oid-array.h"
  31 #include "strvec.h"
  32 #include "list.h"
  33 #include "packfile.h"
  34 #include "object-file.h"
  35 #include "object-store.h"
  36 #include "replace-object.h"
  37 #include "dir.h"
  38 #include "midx.h"
  39 #include "trace2.h"
  40 #include "shallow.h"
  41 #include "promisor-remote.h"
  42 #include "pack-mtimes.h"
  43 #include "parse-options.h"
  44 #include "blob.h"
  45 #include "tree.h"
  46 #include "path-walk.h"
  47 #include "trace2.h"
  48
  49 /*
  50  * Objects we are going to pack are collected in the `to_pack` structure.
  51  * It contains an array (dynamically expanded) of the object data, and a map
  52  * that can resolve SHA1s to their position in the array.
  53  */
  54 static struct packing_data to_pack;
  55
  56 static inline struct object_entry *oe_delta(
  57                 const struct packing_data *pack,
  58                 const struct object_entry *e)
  59 {
  60         if (!e->delta_idx)
  61                 return NULL;
  62         if (e->ext_base)
  63                 return &pack->ext_bases[e->delta_idx - 1];
  64         else
  65                 return &pack->objects[e->delta_idx - 1];
  66 }
  67
  68 static inline unsigned long oe_delta_size(struct packing_data *pack,
  69                                           const struct object_entry *e)
  70 {
  71         if (e->delta_size_valid)
  72                 return e->delta_size_;
  73
  74         /*
  75          * pack->delta_size[] can't be NULL because oe_set_delta_size()
  76          * must have been called when a new delta is saved with
  77          * oe_set_delta().
  78          * If oe_delta() returns NULL (i.e. default state, which means
  79          * delta_size_valid is also false), then the caller must never
  80          * call oe_delta_size().
  81          */
  82         return pack->delta_size[e - pack->objects];
  83 }
  84
  85 unsigned long oe_get_size_slow(struct packing_data *pack,
  86                                const struct object_entry *e);
  87
  88 static inline unsigned long oe_size(struct packing_data *pack,
  89                                     const struct object_entry *e)
  90 {
  91         if (e->size_valid)
  92                 return e->size_;
  93
  94         return oe_get_size_slow(pack, e);
  95 }
  96
  97 static inline void oe_set_delta(struct packing_data *pack,
  98                                 struct object_entry *e,
  99                                 struct object_entry *delta)
 100 {
 101         if (delta)
 102                 e->delta_idx = (delta - pack->objects) + 1;
 103         else
 104                 e->delta_idx = 0;
 105 }
 106
 107 static inline struct object_entry *oe_delta_sibling(
 108                 const struct packing_data *pack,
 109                 const struct object_entry *e)
 110 {
 111         if (e->delta_sibling_idx)
 112                 return &pack->objects[e->delta_sibling_idx - 1];
 113         return NULL;
 114 }
 115
 116 static inline struct object_entry *oe_delta_child(
 117                 const struct packing_data *pack,
 118                 const struct object_entry *e)
 119 {
 120         if (e->delta_child_idx)
 121                 return &pack->objects[e->delta_child_idx - 1];
 122         return NULL;
 123 }
 124
 125 static inline void oe_set_delta_child(struct packing_data *pack,
 126                                       struct object_entry *e,
 127                                       struct object_entry *delta)
 128 {
 129         if (delta)
 130                 e->delta_child_idx = (delta - pack->objects) + 1;
 131         else
 132                 e->delta_child_idx = 0;
 133 }
 134
 135 static inline void oe_set_delta_sibling(struct packing_data *pack,
 136                                         struct object_entry *e,
 137                                         struct object_entry *delta)
 138 {
 139         if (delta)
 140                 e->delta_sibling_idx = (delta - pack->objects) + 1;
 141         else
 142                 e->delta_sibling_idx = 0;
 143 }
 144
 145 static inline void oe_set_size(struct packing_data *pack,
 146                                struct object_entry *e,
 147                                unsigned long size)
 148 {
 149         if (size < pack->oe_size_limit) {
 150                 e->size_ = size;
 151                 e->size_valid = 1;
 152         } else {
 153                 e->size_valid = 0;
 154                 if (oe_get_size_slow(pack, e) != size)
 155                         BUG("'size' is supposed to be the object size!");
 156         }
 157 }
 158
 159 static inline void oe_set_delta_size(struct packing_data *pack,
 160                                      struct object_entry *e,
 161                                      unsigned long size)
 162 {
 163         if (size < pack->oe_delta_size_limit) {
 164                 e->delta_size_ = size;
 165                 e->delta_size_valid = 1;
 166         } else {
 167                 packing_data_lock(pack);
 168                 if (!pack->delta_size)
 169                         ALLOC_ARRAY(pack->delta_size, pack->nr_alloc);
 170                 packing_data_unlock(pack);
 171
 172                 pack->delta_size[e - pack->objects] = size;
 173                 e->delta_size_valid = 0;
 174         }
 175 }
 176
 177 #define IN_PACK(obj) oe_in_pack(&to_pack, obj)
 178 #define SIZE(obj) oe_size(&to_pack, obj)
 179 #define SET_SIZE(obj,size) oe_set_size(&to_pack, obj, size)
 180 #define DELTA_SIZE(obj) oe_delta_size(&to_pack, obj)
 181 #define DELTA(obj) oe_delta(&to_pack, obj)
 182 #define DELTA_CHILD(obj) oe_delta_child(&to_pack, obj)
 183 #define DELTA_SIBLING(obj) oe_delta_sibling(&to_pack, obj)
 184 #define SET_DELTA(obj, val) oe_set_delta(&to_pack, obj, val)
 185 #define SET_DELTA_EXT(obj, oid) oe_set_delta_ext(&to_pack, obj, oid)
 186 #define SET_DELTA_SIZE(obj, val) oe_set_delta_size(&to_pack, obj, val)
 187 #define SET_DELTA_CHILD(obj, val) oe_set_delta_child(&to_pack, obj, val)
 188 #define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val)
 189
 190 static const char *const pack_usage[] = {
 191         N_("git pack-objects [-q | --progress | --all-progress] [--all-progress-implied]\n"
 192            "                 [--no-reuse-delta] [--delta-base-offset] [--non-empty]\n"
 193            "                 [--local] [--incremental] [--window=<n>] [--depth=<n>]\n"
 194            "                 [--revs [--unpacked | --all]] [--keep-pack=<pack-name>]\n"
 195            "                 [--cruft] [--cruft-expiration=<time>]\n"
 196            "                 [--stdout [--filter=<filter-spec>] | <base-name>]\n"
 197            "                 [--shallow] [--keep-true-parents] [--[no-]sparse]\n"
 198            "                 [--name-hash-version=<n>] [--path-walk] < <object-list>"),
 199         NULL
 200 };
 201
 202 static struct pack_idx_entry **written_list;
 203 static uint32_t nr_result, nr_written, nr_seen;
 204 static struct bitmap_index *bitmap_git;
 205 static uint32_t write_layer;
 206
 207 static int non_empty;
 208 static int reuse_delta = 1, reuse_object = 1;
 209 static int keep_unreachable, unpack_unreachable, include_tag;
 210 static timestamp_t unpack_unreachable_expiration;
 211 static int pack_loose_unreachable;
 212 static int cruft;
 213 static int shallow = 0;
 214 static timestamp_t cruft_expiration;
 215 static int local;
 216 static int have_non_local_packs;
 217 static int incremental;
 218 static int ignore_packed_keep_on_disk;
 219 static int ignore_packed_keep_in_core;
 220 static int ignore_packed_keep_in_core_has_cruft;
 221 static int allow_ofs_delta;
 222 static struct pack_idx_option pack_idx_opts;
 223 static const char *base_name;
 224 static int progress = 1;
 225 static int window = 10;
 226 static unsigned long pack_size_limit;
 227 static int depth = 50;
 228 static int delta_search_threads;
 229 static int pack_to_stdout;
 230 static int sparse;
 231 static int thin;
 232 static int path_walk = -1;
 233 static int num_preferred_base;
 234 static struct progress *progress_state;
 235
 236 static struct bitmapped_pack *reuse_packfiles;
 237 static size_t reuse_packfiles_nr;
 238 static size_t reuse_packfiles_used_nr;
 239 static uint32_t reuse_packfile_objects;
 240 static struct bitmap *reuse_packfile_bitmap;
 241
 242 static int use_bitmap_index_default = 1;
 243 static int use_bitmap_index = -1;
 244 static enum {
 245         NO_PACK_REUSE = 0,
 246         SINGLE_PACK_REUSE,
 247         MULTI_PACK_REUSE,
 248 } allow_pack_reuse = SINGLE_PACK_REUSE;
 249 static enum {
 250         WRITE_BITMAP_FALSE = 0,
 251         WRITE_BITMAP_QUIET,
 252         WRITE_BITMAP_TRUE,
 253 } write_bitmap_index;
 254 static uint16_t write_bitmap_options = BITMAP_OPT_HASH_CACHE;
 255
 256 static int exclude_promisor_objects;
 257 static int exclude_promisor_objects_best_effort;
 258
 259 static int use_delta_islands;
 260
 261 static unsigned long delta_cache_size = 0;
 262 static unsigned long max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE;
 263 static unsigned long cache_max_small_delta_size = 1000;
 264
 265 static unsigned long window_memory_limit = 0;
 266
 267 static struct string_list uri_protocols = STRING_LIST_INIT_NODUP;
 268
 269 enum missing_action {
 270         MA_ERROR = 0,      /* fail if any missing objects are encountered */
 271         MA_ALLOW_ANY,      /* silently allow ALL missing objects */
 272         MA_ALLOW_PROMISOR, /* silently allow all missing PROMISOR objects */
 273 };
 274 static enum missing_action arg_missing_action;
 275 static show_object_fn fn_show_object;
 276
 277 struct configured_exclusion {
 278         struct oidmap_entry e;
 279         char *pack_hash_hex;
 280         char *uri;
 281 };
 282 static struct oidmap configured_exclusions;
 283
 284 static struct oidset excluded_by_config;
 285 static int name_hash_version = -1;
 286
 287 /**
 288  * Check whether the name_hash_version chosen by user input is appropriate,
 289  * and also validate whether it is compatible with other features.
 290  */
 291 static void validate_name_hash_version(void)
 292 {
 293         if (name_hash_version < 1 || name_hash_version > 2)
 294                 die(_("invalid --name-hash-version option: %d"), name_hash_version);
 295         if (write_bitmap_index && name_hash_version != 1) {
 296                 warning(_("currently, --write-bitmap-index requires --name-hash-version=1"));
 297                 name_hash_version = 1;
 298         }
 299 }
 300
 301 static inline uint32_t pack_name_hash_fn(const char *name)
 302 {
 303         static int seen_version = -1;
 304
 305         if (seen_version < 0)
 306                 seen_version = name_hash_version;
 307         else if (seen_version != name_hash_version)
 308                 BUG("name hash version changed from %d to %d mid-process",
 309                     seen_version, name_hash_version);
 310
 311         switch (name_hash_version) {
 312         case 1:
 313                 return pack_name_hash(name);
 314
 315         case 2:
 316                 return pack_name_hash_v2((const unsigned char *)name);
 317
 318         default:
 319                 BUG("invalid name-hash version: %d", name_hash_version);
 320         }
 321 }
 322
 323 /*
 324  * stats
 325  */
 326 static uint32_t written, written_delta;
 327 static uint32_t reused, reused_delta;
 328
 329 /*
 330  * Indexed commits
 331  */
 332 static struct commit **indexed_commits;
 333 static unsigned int indexed_commits_nr;
 334 static unsigned int indexed_commits_alloc;
 335
 336 static void index_commit_for_bitmap(struct commit *commit)
 337 {
 338         if (indexed_commits_nr >= indexed_commits_alloc) {
 339                 indexed_commits_alloc = (indexed_commits_alloc + 32) * 2;
 340                 REALLOC_ARRAY(indexed_commits, indexed_commits_alloc);
 341         }
 342
 343         indexed_commits[indexed_commits_nr++] = commit;
 344 }
 345
 346 static void *get_delta(struct object_entry *entry)
 347 {
 348         unsigned long size, base_size, delta_size;
 349         void *buf, *base_buf, *delta_buf;
 350         enum object_type type;
 351
 352         buf = repo_read_object_file(the_repository, &entry->idx.oid, &type,
 353                                     &size);
 354         if (!buf)
 355                 die(_("unable to read %s"), oid_to_hex(&entry->idx.oid));
 356         base_buf = repo_read_object_file(the_repository,
 357                                          &DELTA(entry)->idx.oid, &type,
 358                                          &base_size);
 359         if (!base_buf)
 360                 die("unable to read %s",
 361                     oid_to_hex(&DELTA(entry)->idx.oid));
 362         delta_buf = diff_delta(base_buf, base_size,
 363                                buf, size, &delta_size, 0);
 364         /*
 365          * We successfully computed this delta once but dropped it for
 366          * memory reasons. Something is very wrong if this time we
 367          * recompute and create a different delta.
 368          */
 369         if (!delta_buf || delta_size != DELTA_SIZE(entry))
 370                 BUG("delta size changed");
 371         free(buf);
 372         free(base_buf);
 373         return delta_buf;
 374 }
 375
 376 static unsigned long do_compress(void **pptr, unsigned long size)
 377 {
 378         git_zstream stream;
 379         void *in, *out;
 380         unsigned long maxsize;
 381
 382         git_deflate_init(&stream, pack_compression_level);
 383         maxsize = git_deflate_bound(&stream, size);
 384
 385         in = *pptr;
 386         out = xmalloc(maxsize);
 387         *pptr = out;
 388
 389         stream.next_in = in;
 390         stream.avail_in = size;
 391         stream.next_out = out;
 392         stream.avail_out = maxsize;
 393         while (git_deflate(&stream, Z_FINISH) == Z_OK)
 394                 ; /* nothing */
 395         git_deflate_end(&stream);
 396
 397         free(in);
 398         return stream.total_out;
 399 }
 400
 401 static unsigned long write_large_blob_data(struct git_istream *st, struct hashfile *f,
 402                                            const struct object_id *oid)
 403 {
 404         git_zstream stream;
 405         unsigned char ibuf[1024 * 16];
 406         unsigned char obuf[1024 * 16];
 407         unsigned long olen = 0;
 408
 409         git_deflate_init(&stream, pack_compression_level);
 410
 411         for (;;) {
 412                 ssize_t readlen;
 413                 int zret = Z_OK;
 414                 readlen = read_istream(st, ibuf, sizeof(ibuf));
 415                 if (readlen == -1)
 416                         die(_("unable to read %s"), oid_to_hex(oid));
 417
 418                 stream.next_in = ibuf;
 419                 stream.avail_in = readlen;
 420                 while ((stream.avail_in || readlen == 0) &&
 421                        (zret == Z_OK || zret == Z_BUF_ERROR)) {
 422                         stream.next_out = obuf;
 423                         stream.avail_out = sizeof(obuf);
 424                         zret = git_deflate(&stream, readlen ? 0 : Z_FINISH);
 425                         hashwrite(f, obuf, stream.next_out - obuf);
 426                         olen += stream.next_out - obuf;
 427                 }
 428                 if (stream.avail_in)
 429                         die(_("deflate error (%d)"), zret);
 430                 if (readlen == 0) {
 431                         if (zret != Z_STREAM_END)
 432                                 die(_("deflate error (%d)"), zret);
 433                         break;
 434                 }
 435         }
 436         git_deflate_end(&stream);
 437         return olen;
 438 }
 439
 440 /*
 441  * we are going to reuse the existing object data as is.  make
 442  * sure it is not corrupt.
 443  */
 444 static int check_pack_inflate(struct packed_git *p,
 445                 struct pack_window **w_curs,
 446                 off_t offset,
 447                 off_t len,
 448                 unsigned long expect)
 449 {
 450         git_zstream stream;
 451         unsigned char fakebuf[4096], *in;
 452         int st;
 453
 454         memset(&stream, 0, sizeof(stream));
 455         git_inflate_init(&stream);
 456         do {
 457                 in = use_pack(p, w_curs, offset, &stream.avail_in);
 458                 stream.next_in = in;
 459                 stream.next_out = fakebuf;
 460                 stream.avail_out = sizeof(fakebuf);
 461                 st = git_inflate(&stream, Z_FINISH);
 462                 offset += stream.next_in - in;
 463         } while (st == Z_OK || st == Z_BUF_ERROR);
 464         git_inflate_end(&stream);
 465         return (st == Z_STREAM_END &&
 466                 stream.total_out == expect &&
 467                 stream.total_in == len) ? 0 : -1;
 468 }
 469
 470 static void copy_pack_data(struct hashfile *f,
 471                 struct packed_git *p,
 472                 struct pack_window **w_curs,
 473                 off_t offset,
 474                 off_t len)
 475 {
 476         unsigned char *in;
 477         unsigned long avail;
 478
 479         while (len) {
 480                 in = use_pack(p, w_curs, offset, &avail);
 481                 if (avail > len)
 482                         avail = (unsigned long)len;
 483                 hashwrite(f, in, avail);
 484                 offset += avail;
 485                 len -= avail;
 486         }
 487 }
 488
 489 static inline int oe_size_greater_than(struct packing_data *pack,
 490                                        const struct object_entry *lhs,
 491                                        unsigned long rhs)
 492 {
 493         if (lhs->size_valid)
 494                 return lhs->size_ > rhs;
 495         if (rhs < pack->oe_size_limit) /* rhs < 2^x <= lhs ? */
 496                 return 1;
 497         return oe_get_size_slow(pack, lhs) > rhs;
 498 }
 499
 500 /* Return 0 if we will bust the pack-size limit */
 501 static unsigned long write_no_reuse_object(struct hashfile *f, struct object_entry *entry,
 502                                            unsigned long limit, int usable_delta)
 503 {
 504         unsigned long size, datalen;
 505         unsigned char header[MAX_PACK_OBJECT_HEADER],
 506                       dheader[MAX_PACK_OBJECT_HEADER];
 507         unsigned hdrlen;
 508         enum object_type type;
 509         void *buf;
 510         struct git_istream *st = NULL;
 511         const unsigned hashsz = the_hash_algo->rawsz;
 512
 513         if (!usable_delta) {
 514                 if (oe_type(entry) == OBJ_BLOB &&
 515                     oe_size_greater_than(&to_pack, entry,
 516                                          repo_settings_get_big_file_threshold(the_repository)) &&
 517                     (st = open_istream(the_repository, &entry->idx.oid, &type,
 518                                        &size, NULL)) != NULL)
 519                         buf = NULL;
 520                 else {
 521                         buf = repo_read_object_file(the_repository,
 522                                                     &entry->idx.oid, &type,
 523                                                     &size);
 524                         if (!buf)
 525                                 die(_("unable to read %s"),
 526                                     oid_to_hex(&entry->idx.oid));
 527                 }
 528                 /*
 529                  * make sure no cached delta data remains from a
 530                  * previous attempt before a pack split occurred.
 531                  */
 532                 FREE_AND_NULL(entry->delta_data);
 533                 entry->z_delta_size = 0;
 534         } else if (entry->delta_data) {
 535                 size = DELTA_SIZE(entry);
 536                 buf = entry->delta_data;
 537                 entry->delta_data = NULL;
 538                 type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
 539                         OBJ_OFS_DELTA : OBJ_REF_DELTA;
 540         } else {
 541                 buf = get_delta(entry);
 542                 size = DELTA_SIZE(entry);
 543                 type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
 544                         OBJ_OFS_DELTA : OBJ_REF_DELTA;
 545         }
 546
 547         if (st) /* large blob case, just assume we don't compress well */
 548                 datalen = size;
 549         else if (entry->z_delta_size)
 550                 datalen = entry->z_delta_size;
 551         else
 552                 datalen = do_compress(&buf, size);
 553
 554         /*
 555          * The object header is a byte of 'type' followed by zero or
 556          * more bytes of length.
 557          */
 558         hdrlen = encode_in_pack_object_header(header, sizeof(header),
 559                                               type, size);
 560
 561         if (type == OBJ_OFS_DELTA) {
 562                 /*
 563                  * Deltas with relative base contain an additional
 564                  * encoding of the relative offset for the delta
 565                  * base from this object's position in the pack.
 566                  */
 567                 off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset;
 568                 unsigned pos = sizeof(dheader) - 1;
 569                 dheader[pos] = ofs & 127;
 570                 while (ofs >>= 7)
 571                         dheader[--pos] = 128 | (--ofs & 127);
 572                 if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) {
 573                         if (st)
 574                                 close_istream(st);
 575                         free(buf);
 576                         return 0;
 577                 }
 578                 hashwrite(f, header, hdrlen);
 579                 hashwrite(f, dheader + pos, sizeof(dheader) - pos);
 580                 hdrlen += sizeof(dheader) - pos;
 581         } else if (type == OBJ_REF_DELTA) {
 582                 /*
 583                  * Deltas with a base reference contain
 584                  * additional bytes for the base object ID.
 585                  */
 586                 if (limit && hdrlen + hashsz + datalen + hashsz >= limit) {
 587                         if (st)
 588                                 close_istream(st);
 589                         free(buf);
 590                         return 0;
 591                 }
 592                 hashwrite(f, header, hdrlen);
 593                 hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz);
 594                 hdrlen += hashsz;
 595         } else {
 596                 if (limit && hdrlen + datalen + hashsz >= limit) {
 597                         if (st)
 598                                 close_istream(st);
 599                         free(buf);
 600                         return 0;
 601                 }
 602                 hashwrite(f, header, hdrlen);
 603         }
 604         if (st) {
 605                 datalen = write_large_blob_data(st, f, &entry->idx.oid);
 606                 close_istream(st);
 607         } else {
 608                 hashwrite(f, buf, datalen);
 609                 free(buf);
 610         }
 611
 612         return hdrlen + datalen;
 613 }
 614
 615 /* Return 0 if we will bust the pack-size limit */
 616 static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
 617                                 unsigned long limit, int usable_delta)
 618 {
 619         struct packed_git *p = IN_PACK(entry);
 620         struct pack_window *w_curs = NULL;
 621         uint32_t pos;
 622         off_t offset;
 623         enum object_type type = oe_type(entry);
 624         off_t datalen;
 625         unsigned char header[MAX_PACK_OBJECT_HEADER],
 626                       dheader[MAX_PACK_OBJECT_HEADER];
 627         unsigned hdrlen;
 628         const unsigned hashsz = the_hash_algo->rawsz;
 629         unsigned long entry_size = SIZE(entry);
 630
 631         if (DELTA(entry))
 632                 type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
 633                         OBJ_OFS_DELTA : OBJ_REF_DELTA;
 634         hdrlen = encode_in_pack_object_header(header, sizeof(header),
 635                                               type, entry_size);
 636
 637         offset = entry->in_pack_offset;
 638         if (offset_to_pack_pos(p, offset, &pos) < 0)
 639                 die(_("write_reuse_object: could not locate %s, expected at "
 640                       "offset %"PRIuMAX" in pack %s"),
 641                     oid_to_hex(&entry->idx.oid), (uintmax_t)offset,
 642                     p->pack_name);
 643         datalen = pack_pos_to_offset(p, pos + 1) - offset;
 644         if (!pack_to_stdout && p->index_version > 1 &&
 645             check_pack_crc(p, &w_curs, offset, datalen,
 646                            pack_pos_to_index(p, pos))) {
 647                 error(_("bad packed object CRC for %s"),
 648                       oid_to_hex(&entry->idx.oid));
 649                 unuse_pack(&w_curs);
 650                 return write_no_reuse_object(f, entry, limit, usable_delta);
 651         }
 652
 653         offset += entry->in_pack_header_size;
 654         datalen -= entry->in_pack_header_size;
 655
 656         if (!pack_to_stdout && p->index_version == 1 &&
 657             check_pack_inflate(p, &w_curs, offset, datalen, entry_size)) {
 658                 error(_("corrupt packed object for %s"),
 659                       oid_to_hex(&entry->idx.oid));
 660                 unuse_pack(&w_curs);
 661                 return write_no_reuse_object(f, entry, limit, usable_delta);
 662         }
 663
 664         if (type == OBJ_OFS_DELTA) {
 665                 off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset;
 666                 unsigned pos = sizeof(dheader) - 1;
 667                 dheader[pos] = ofs & 127;
 668                 while (ofs >>= 7)
 669                         dheader[--pos] = 128 | (--ofs & 127);
 670                 if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) {
 671                         unuse_pack(&w_curs);
 672                         return 0;
 673                 }
 674                 hashwrite(f, header, hdrlen);
 675                 hashwrite(f, dheader + pos, sizeof(dheader) - pos);
 676                 hdrlen += sizeof(dheader) - pos;
 677                 reused_delta++;
 678         } else if (type == OBJ_REF_DELTA) {
 679                 if (limit && hdrlen + hashsz + datalen + hashsz >= limit) {
 680                         unuse_pack(&w_curs);
 681                         return 0;
 682                 }
 683                 hashwrite(f, header, hdrlen);
 684                 hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz);
 685                 hdrlen += hashsz;
 686                 reused_delta++;
 687         } else {
 688                 if (limit && hdrlen + datalen + hashsz >= limit) {
 689                         unuse_pack(&w_curs);
 690                         return 0;
 691                 }
 692                 hashwrite(f, header, hdrlen);
 693         }
 694         copy_pack_data(f, p, &w_curs, offset, datalen);
 695         unuse_pack(&w_curs);
 696         reused++;
 697         return hdrlen + datalen;
 698 }
 699
 700 /* Return 0 if we will bust the pack-size limit */
 701 static off_t write_object(struct hashfile *f,
 702                           struct object_entry *entry,
 703                           off_t write_offset)
 704 {
 705         unsigned long limit;
 706         off_t len;
 707         int usable_delta, to_reuse;
 708
 709         if (!pack_to_stdout)
 710                 crc32_begin(f);
 711
 712         /* apply size limit if limited packsize and not first object */
 713         if (!pack_size_limit || !nr_written)
 714                 limit = 0;
 715         else if (pack_size_limit <= write_offset)
 716                 /*
 717                  * the earlier object did not fit the limit; avoid
 718                  * mistaking this with unlimited (i.e. limit = 0).
 719                  */
 720                 limit = 1;
 721         else
 722                 limit = pack_size_limit - write_offset;
 723
 724         if (!DELTA(entry))
 725                 usable_delta = 0;       /* no delta */
 726         else if (!pack_size_limit)
 727                usable_delta = 1;        /* unlimited packfile */
 728         else if (DELTA(entry)->idx.offset == (off_t)-1)
 729                 usable_delta = 0;       /* base was written to another pack */
 730         else if (DELTA(entry)->idx.offset)
 731                 usable_delta = 1;       /* base already exists in this pack */
 732         else
 733                 usable_delta = 0;       /* base could end up in another pack */
 734
 735         if (!reuse_object)
 736                 to_reuse = 0;   /* explicit */
 737         else if (!IN_PACK(entry))
 738                 to_reuse = 0;   /* can't reuse what we don't have */
 739         else if (oe_type(entry) == OBJ_REF_DELTA ||
 740                  oe_type(entry) == OBJ_OFS_DELTA)
 741                                 /* check_object() decided it for us ... */
 742                 to_reuse = usable_delta;
 743                                 /* ... but pack split may override that */
 744         else if (oe_type(entry) != entry->in_pack_type)
 745                 to_reuse = 0;   /* pack has delta which is unusable */
 746         else if (DELTA(entry))
 747                 to_reuse = 0;   /* we want to pack afresh */
 748         else
 749                 to_reuse = 1;   /* we have it in-pack undeltified,
 750                                  * and we do not need to deltify it.
 751                                  */
 752
 753         if (!to_reuse)
 754                 len = write_no_reuse_object(f, entry, limit, usable_delta);
 755         else
 756                 len = write_reuse_object(f, entry, limit, usable_delta);
 757         if (!len)
 758                 return 0;
 759
 760         if (usable_delta)
 761                 written_delta++;
 762         written++;
 763         if (!pack_to_stdout)
 764                 entry->idx.crc32 = crc32_end(f);
 765         return len;
 766 }
 767
 768 enum write_one_status {
 769         WRITE_ONE_SKIP = -1, /* already written */
 770         WRITE_ONE_BREAK = 0, /* writing this will bust the limit; not written */
 771         WRITE_ONE_WRITTEN = 1, /* normal */
 772         WRITE_ONE_RECURSIVE = 2 /* already scheduled to be written */
 773 };
 774
 775 static enum write_one_status write_one(struct hashfile *f,
 776                                        struct object_entry *e,
 777                                        off_t *offset)
 778 {
 779         off_t size;
 780         int recursing;
 781
 782         /*
 783          * we set offset to 1 (which is an impossible value) to mark
 784          * the fact that this object is involved in "write its base
 785          * first before writing a deltified object" recursion.
 786          */
 787         recursing = (e->idx.offset == 1);
 788         if (recursing) {
 789                 warning(_("recursive delta detected for object %s"),
 790                         oid_to_hex(&e->idx.oid));
 791                 return WRITE_ONE_RECURSIVE;
 792         } else if (e->idx.offset || e->preferred_base) {
 793                 /* offset is non zero if object is written already. */
 794                 return WRITE_ONE_SKIP;
 795         }
 796
 797         /* if we are deltified, write out base object first. */
 798         if (DELTA(e)) {
 799                 e->idx.offset = 1; /* now recurse */
 800                 switch (write_one(f, DELTA(e), offset)) {
 801                 case WRITE_ONE_RECURSIVE:
 802                         /* we cannot depend on this one */
 803                         SET_DELTA(e, NULL);
 804                         break;
 805                 default:
 806                         break;
 807                 case WRITE_ONE_BREAK:
 808                         e->idx.offset = recursing;
 809                         return WRITE_ONE_BREAK;
 810                 }
 811         }
 812
 813         e->idx.offset = *offset;
 814         size = write_object(f, e, *offset);
 815         if (!size) {
 816                 e->idx.offset = recursing;
 817                 return WRITE_ONE_BREAK;
 818         }
 819         written_list[nr_written++] = &e->idx;
 820
 821         /* make sure off_t is sufficiently large not to wrap */
 822         if (signed_add_overflows(*offset, size))
 823                 die(_("pack too large for current definition of off_t"));
 824         *offset += size;
 825         return WRITE_ONE_WRITTEN;
 826 }
 827
 828 static int mark_tagged(const char *path UNUSED, const char *referent UNUSED, const struct object_id *oid,
 829                        int flag UNUSED, void *cb_data UNUSED)
 830 {
 831         struct object_id peeled;
 832         struct object_entry *entry = packlist_find(&to_pack, oid);
 833
 834         if (entry)
 835                 entry->tagged = 1;
 836         if (!peel_iterated_oid(the_repository, oid, &peeled)) {
 837                 entry = packlist_find(&to_pack, &peeled);
 838                 if (entry)
 839                         entry->tagged = 1;
 840         }
 841         return 0;
 842 }
 843
 844 static inline unsigned char oe_layer(struct packing_data *pack,
 845                                      struct object_entry *e)
 846 {
 847         if (!pack->layer)
 848                 return 0;
 849         return pack->layer[e - pack->objects];
 850 }
 851
 852 static inline void add_to_write_order(struct object_entry **wo,
 853                                unsigned int *endp,
 854                                struct object_entry *e)
 855 {
 856         if (e->filled || oe_layer(&to_pack, e) != write_layer)
 857                 return;
 858         wo[(*endp)++] = e;
 859         e->filled = 1;
 860 }
 861
 862 static void add_descendants_to_write_order(struct object_entry **wo,
 863                                            unsigned int *endp,
 864                                            struct object_entry *e)
 865 {
 866         int add_to_order = 1;
 867         while (e) {
 868                 if (add_to_order) {
 869                         struct object_entry *s;
 870                         /* add this node... */
 871                         add_to_write_order(wo, endp, e);
 872                         /* all its siblings... */
 873                         for (s = DELTA_SIBLING(e); s; s = DELTA_SIBLING(s)) {
 874                                 add_to_write_order(wo, endp, s);
 875                         }
 876                 }
 877                 /* drop down a level to add left subtree nodes if possible */
 878                 if (DELTA_CHILD(e)) {
 879                         add_to_order = 1;
 880                         e = DELTA_CHILD(e);
 881                 } else {
 882                         add_to_order = 0;
 883                         /* our sibling might have some children, it is next */
 884                         if (DELTA_SIBLING(e)) {
 885                                 e = DELTA_SIBLING(e);
 886                                 continue;
 887                         }
 888                         /* go back to our parent node */
 889                         e = DELTA(e);
 890                         while (e && !DELTA_SIBLING(e)) {
 891                                 /* we're on the right side of a subtree, keep
 892                                  * going up until we can go right again */
 893                                 e = DELTA(e);
 894                         }
 895                         if (!e) {
 896                                 /* done- we hit our original root node */
 897                                 return;
 898                         }
 899                         /* pass it off to sibling at this level */
 900                         e = DELTA_SIBLING(e);
 901                 }
 902         };
 903 }
 904
 905 static void add_family_to_write_order(struct object_entry **wo,
 906                                       unsigned int *endp,
 907                                       struct object_entry *e)
 908 {
 909         struct object_entry *root;
 910
 911         for (root = e; DELTA(root); root = DELTA(root))
 912                 ; /* nothing */
 913         add_descendants_to_write_order(wo, endp, root);
 914 }
 915
 916 static void compute_layer_order(struct object_entry **wo, unsigned int *wo_end)
 917 {
 918         unsigned int i, last_untagged;
 919         struct object_entry *objects = to_pack.objects;
 920
 921         for (i = 0; i < to_pack.nr_objects; i++) {
 922                 if (objects[i].tagged)
 923                         break;
 924                 add_to_write_order(wo, wo_end, &objects[i]);
 925         }
 926         last_untagged = i;
 927
 928         /*
 929          * Then fill all the tagged tips.
 930          */
 931         for (; i < to_pack.nr_objects; i++) {
 932                 if (objects[i].tagged)
 933                         add_to_write_order(wo, wo_end, &objects[i]);
 934         }
 935
 936         /*
 937          * And then all remaining commits and tags.
 938          */
 939         for (i = last_untagged; i < to_pack.nr_objects; i++) {
 940                 if (oe_type(&objects[i]) != OBJ_COMMIT &&
 941                     oe_type(&objects[i]) != OBJ_TAG)
 942                         continue;
 943                 add_to_write_order(wo, wo_end, &objects[i]);
 944         }
 945
 946         /*
 947          * And then all the trees.
 948          */
 949         for (i = last_untagged; i < to_pack.nr_objects; i++) {
 950                 if (oe_type(&objects[i]) != OBJ_TREE)
 951                         continue;
 952                 add_to_write_order(wo, wo_end, &objects[i]);
 953         }
 954
 955         /*
 956          * Finally all the rest in really tight order
 957          */
 958         for (i = last_untagged; i < to_pack.nr_objects; i++) {
 959                 if (!objects[i].filled && oe_layer(&to_pack, &objects[i]) == write_layer)
 960                         add_family_to_write_order(wo, wo_end, &objects[i]);
 961         }
 962 }
 963
 964 static struct object_entry **compute_write_order(void)
 965 {
 966         uint32_t max_layers = 1;
 967         unsigned int i, wo_end;
 968
 969         struct object_entry **wo;
 970         struct object_entry *objects = to_pack.objects;
 971
 972         for (i = 0; i < to_pack.nr_objects; i++) {
 973                 objects[i].tagged = 0;
 974                 objects[i].filled = 0;
 975                 SET_DELTA_CHILD(&objects[i], NULL);
 976                 SET_DELTA_SIBLING(&objects[i], NULL);
 977         }
 978
 979         /*
 980          * Fully connect delta_child/delta_sibling network.
 981          * Make sure delta_sibling is sorted in the original
 982          * recency order.
 983          */
 984         for (i = to_pack.nr_objects; i > 0;) {
 985                 struct object_entry *e = &objects[--i];
 986                 if (!DELTA(e))
 987                         continue;
 988                 /* Mark me as the first child */
 989                 e->delta_sibling_idx = DELTA(e)->delta_child_idx;
 990                 SET_DELTA_CHILD(DELTA(e), e);
 991         }
 992
 993         /*
 994          * Mark objects that are at the tip of tags.
 995          */
 996         refs_for_each_tag_ref(get_main_ref_store(the_repository), mark_tagged,
 997                               NULL);
 998
 999         if (use_delta_islands) {
1000                 max_layers = compute_pack_layers(&to_pack);
1001                 free_island_marks();
1002         }
1003
1004         ALLOC_ARRAY(wo, to_pack.nr_objects);
1005         wo_end = 0;
1006
1007         for (; write_layer < max_layers; ++write_layer)
1008                 compute_layer_order(wo, &wo_end);
1009
1010         if (wo_end != to_pack.nr_objects)
1011                 die(_("ordered %u objects, expected %"PRIu32),
1012                     wo_end, to_pack.nr_objects);
1013
1014         return wo;
1015 }
1016
1017
1018 /*
1019  * A reused set of objects. All objects in a chunk have the same
1020  * relative position in the original packfile and the generated
1021  * packfile.
1022  */
1023
1024 static struct reused_chunk {
1025         /* The offset of the first object of this chunk in the original
1026          * packfile. */
1027         off_t original;
1028         /* The difference for "original" minus the offset of the first object of
1029          * this chunk in the generated packfile. */
1030         off_t difference;
1031 } *reused_chunks;
1032 static int reused_chunks_nr;
1033 static int reused_chunks_alloc;
1034
1035 static void record_reused_object(off_t where, off_t offset)
1036 {
1037         if (reused_chunks_nr && reused_chunks[reused_chunks_nr-1].difference == offset)
1038                 return;
1039
1040         ALLOC_GROW(reused_chunks, reused_chunks_nr + 1,
1041                    reused_chunks_alloc);
1042         reused_chunks[reused_chunks_nr].original = where;
1043         reused_chunks[reused_chunks_nr].difference = offset;
1044         reused_chunks_nr++;
1045 }
1046
1047 /*
1048  * Binary search to find the chunk that "where" is in. Note
1049  * that we're not looking for an exact match, just the first
1050  * chunk that contains it (which implicitly ends at the start
1051  * of the next chunk.
1052  */
1053 static off_t find_reused_offset(off_t where)
1054 {
1055         int lo = 0, hi = reused_chunks_nr;
1056         while (lo < hi) {
1057                 int mi = lo + ((hi - lo) / 2);
1058                 if (where == reused_chunks[mi].original)
1059                         return reused_chunks[mi].difference;
1060                 if (where < reused_chunks[mi].original)
1061                         hi = mi;
1062                 else
1063                         lo = mi + 1;
1064         }
1065
1066         /*
1067          * The first chunk starts at zero, so we can't have gone below
1068          * there.
1069          */
1070         assert(lo);
1071         return reused_chunks[lo-1].difference;
1072 }
1073
1074 static void write_reused_pack_one(struct packed_git *reuse_packfile,
1075                                   size_t pos, struct hashfile *out,
1076                                   off_t pack_start,
1077                                   struct pack_window **w_curs)
1078 {
1079         off_t offset, next, cur;
1080         enum object_type type;
1081         unsigned long size;
1082
1083         offset = pack_pos_to_offset(reuse_packfile, pos);
1084         next = pack_pos_to_offset(reuse_packfile, pos + 1);
1085
1086         record_reused_object(offset,
1087                              offset - (hashfile_total(out) - pack_start));
1088
1089         cur = offset;
1090         type = unpack_object_header(reuse_packfile, w_curs, &cur, &size);
1091         assert(type >= 0);
1092
1093         if (type == OBJ_OFS_DELTA) {
1094                 off_t base_offset;
1095                 off_t fixup;
1096
1097                 unsigned char header[MAX_PACK_OBJECT_HEADER];
1098                 unsigned len;
1099
1100                 base_offset = get_delta_base(reuse_packfile, w_curs, &cur, type, offset);
1101                 assert(base_offset != 0);
1102
1103                 /* Convert to REF_DELTA if we must... */
1104                 if (!allow_ofs_delta) {
1105                         uint32_t base_pos;
1106                         struct object_id base_oid;
1107
1108                         if (offset_to_pack_pos(reuse_packfile, base_offset, &base_pos) < 0)
1109                                 die(_("expected object at offset %"PRIuMAX" "
1110                                       "in pack %s"),
1111                                     (uintmax_t)base_offset,
1112                                     reuse_packfile->pack_name);
1113
1114                         nth_packed_object_id(&base_oid, reuse_packfile,
1115                                              pack_pos_to_index(reuse_packfile, base_pos));
1116
1117                         len = encode_in_pack_object_header(header, sizeof(header),
1118                                                            OBJ_REF_DELTA, size);
1119                         hashwrite(out, header, len);
1120                         hashwrite(out, base_oid.hash, the_hash_algo->rawsz);
1121                         copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur);
1122                         return;
1123                 }
1124
1125                 /* Otherwise see if we need to rewrite the offset... */
1126                 fixup = find_reused_offset(offset) -
1127                         find_reused_offset(base_offset);
1128                 if (fixup) {
1129                         unsigned char ofs_header[MAX_PACK_OBJECT_HEADER];
1130                         unsigned i, ofs_len;
1131                         off_t ofs = offset - base_offset - fixup;
1132
1133                         len = encode_in_pack_object_header(header, sizeof(header),
1134                                                            OBJ_OFS_DELTA, size);
1135
1136                         i = sizeof(ofs_header) - 1;
1137                         ofs_header[i] = ofs & 127;
1138                         while (ofs >>= 7)
1139                                 ofs_header[--i] = 128 | (--ofs & 127);
1140
1141                         ofs_len = sizeof(ofs_header) - i;
1142
1143                         hashwrite(out, header, len);
1144                         hashwrite(out, ofs_header + sizeof(ofs_header) - ofs_len, ofs_len);
1145                         copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur);
1146                         return;
1147                 }
1148
1149                 /* ...otherwise we have no fixup, and can write it verbatim */
1150         }
1151
1152         copy_pack_data(out, reuse_packfile, w_curs, offset, next - offset);
1153 }
1154
1155 static size_t write_reused_pack_verbatim(struct bitmapped_pack *reuse_packfile,
1156                                          struct hashfile *out,
1157                                          struct pack_window **w_curs)
1158 {
1159         size_t pos = 0;
1160         size_t end;
1161
1162         if (reuse_packfile->bitmap_pos) {
1163                 /*
1164                  * We can't reuse whole chunks verbatim out of
1165                  * non-preferred packs since we can't guarantee that
1166                  * all duplicate objects were resolved in favor of
1167                  * that pack.
1168                  *
1169                  * Even if we have a whole eword_t worth of bits that
1170                  * could be reused, there may be objects between the
1171                  * objects corresponding to the first and last bit of
1172                  * that word which were selected from a different
1173                  * pack, causing us to send duplicate or unwanted
1174                  * objects.
1175                  *
1176                  * Handle non-preferred packs from within
1177                  * write_reused_pack(), which inspects and reuses
1178                  * individual bits.
1179                  */
1180                 return reuse_packfile->bitmap_pos / BITS_IN_EWORD;
1181         }
1182
1183         /*
1184          * Only read through the last word whose bits all correspond
1185          * to objects in the given packfile, since we must stop at a
1186          * word boundary.
1187          *
1188          * If there is no whole word to read (i.e. the packfile
1189          * contains fewer than BITS_IN_EWORD objects), then we'll
1190          * inspect bits one-by-one in write_reused_pack().
1191          */
1192         end = reuse_packfile->bitmap_nr / BITS_IN_EWORD;
1193         if (reuse_packfile_bitmap->word_alloc < end)
1194                 BUG("fewer words than expected in reuse_packfile_bitmap");
1195
1196         while (pos < end && reuse_packfile_bitmap->words[pos] == (eword_t)~0)
1197                 pos++;
1198
1199         if (pos) {
1200                 off_t to_write;
1201
1202                 written = (pos * BITS_IN_EWORD);
1203                 to_write = pack_pos_to_offset(reuse_packfile->p, written)
1204                         - sizeof(struct pack_header);
1205
1206                 /* We're recording one chunk, not one object. */
1207                 record_reused_object(sizeof(struct pack_header), 0);
1208                 hashflush(out);
1209                 copy_pack_data(out, reuse_packfile->p, w_curs,
1210                         sizeof(struct pack_header), to_write);
1211
1212                 display_progress(progress_state, written);
1213         }
1214         return pos;
1215 }
1216
1217 static void write_reused_pack(struct bitmapped_pack *reuse_packfile,
1218                               struct hashfile *f)
1219 {
1220         size_t i = reuse_packfile->bitmap_pos / BITS_IN_EWORD;
1221         uint32_t offset;
1222         off_t pack_start = hashfile_total(f) - sizeof(struct pack_header);
1223         struct pack_window *w_curs = NULL;
1224
1225         if (allow_ofs_delta)
1226                 i = write_reused_pack_verbatim(reuse_packfile, f, &w_curs);
1227
1228         for (; i < reuse_packfile_bitmap->word_alloc; ++i) {
1229                 eword_t word = reuse_packfile_bitmap->words[i];
1230                 size_t pos = (i * BITS_IN_EWORD);
1231
1232                 for (offset = 0; offset < BITS_IN_EWORD; ++offset) {
1233                         uint32_t pack_pos;
1234                         if ((word >> offset) == 0)
1235                                 break;
1236
1237                         offset += ewah_bit_ctz64(word >> offset);
1238                         if (pos + offset < reuse_packfile->bitmap_pos)
1239                                 continue;
1240                         if (pos + offset >= reuse_packfile->bitmap_pos + reuse_packfile->bitmap_nr)
1241                                 goto done;
1242
1243                         if (reuse_packfile->bitmap_pos) {
1244                                 /*
1245                                  * When doing multi-pack reuse on a
1246                                  * non-preferred pack, translate bit positions
1247                                  * from the MIDX pseudo-pack order back to their
1248                                  * pack-relative positions before attempting
1249                                  * reuse.
1250                                  */
1251                                 struct multi_pack_index *m = reuse_packfile->from_midx;
1252                                 uint32_t midx_pos;
1253                                 off_t pack_ofs;
1254
1255                                 if (!m)
1256                                         BUG("non-zero bitmap position without MIDX");
1257
1258                                 midx_pos = pack_pos_to_midx(m, pos + offset);
1259                                 pack_ofs = nth_midxed_offset(m, midx_pos);
1260
1261                                 if (offset_to_pack_pos(reuse_packfile->p,
1262                                                        pack_ofs, &pack_pos) < 0)
1263                                         BUG("could not find expected object at offset %"PRIuMAX" in pack %s",
1264                                             (uintmax_t)pack_ofs,
1265                                             pack_basename(reuse_packfile->p));
1266                         } else {
1267                                 /*
1268                                  * Can use bit positions directly, even for MIDX
1269                                  * bitmaps. See comment in try_partial_reuse()
1270                                  * for why.
1271                                  */
1272                                 pack_pos = pos + offset;
1273                         }
1274
1275                         write_reused_pack_one(reuse_packfile->p, pack_pos, f,
1276                                               pack_start, &w_curs);
1277                         display_progress(progress_state, ++written);
1278                 }
1279         }
1280
1281 done:
1282         unuse_pack(&w_curs);
1283 }
1284
1285 static void write_excluded_by_configs(void)
1286 {
1287         struct oidset_iter iter;
1288         const struct object_id *oid;
1289
1290         oidset_iter_init(&excluded_by_config, &iter);
1291         while ((oid = oidset_iter_next(&iter))) {
1292                 struct configured_exclusion *ex =
1293                         oidmap_get(&configured_exclusions, oid);
1294
1295                 if (!ex)
1296                         BUG("configured exclusion wasn't configured");
1297                 write_in_full(1, ex->pack_hash_hex, strlen(ex->pack_hash_hex));
1298                 write_in_full(1, " ", 1);
1299                 write_in_full(1, ex->uri, strlen(ex->uri));
1300                 write_in_full(1, "\n", 1);
1301         }
1302 }
1303
1304 static const char no_split_warning[] = N_(
1305 "disabling bitmap writing, packs are split due to pack.packSizeLimit"
1306 );
1307
1308 static void write_pack_file(void)
1309 {
1310         uint32_t i = 0, j;
1311         struct hashfile *f;
1312         off_t offset;
1313         uint32_t nr_remaining = nr_result;
1314         time_t last_mtime = 0;
1315         struct object_entry **write_order;
1316
1317         if (progress > pack_to_stdout)
1318                 progress_state = start_progress(the_repository,
1319                                                 _("Writing objects"), nr_result);
1320         ALLOC_ARRAY(written_list, to_pack.nr_objects);
1321         write_order = compute_write_order();
1322
1323         do {
1324                 unsigned char hash[GIT_MAX_RAWSZ];
1325                 char *pack_tmp_name = NULL;
1326
1327                 if (pack_to_stdout)
1328                         f = hashfd_throughput(the_repository->hash_algo, 1,
1329                                               "<stdout>", progress_state);
1330                 else
1331                         f = create_tmp_packfile(the_repository, &pack_tmp_name);
1332
1333                 offset = write_pack_header(f, nr_remaining);
1334
1335                 if (reuse_packfiles_nr) {
1336                         assert(pack_to_stdout);
1337                         for (j = 0; j < reuse_packfiles_nr; j++) {
1338                                 reused_chunks_nr = 0;
1339                                 write_reused_pack(&reuse_packfiles[j], f);
1340                                 if (reused_chunks_nr)
1341                                         reuse_packfiles_used_nr++;
1342                         }
1343                         offset = hashfile_total(f);
1344                 }
1345
1346                 nr_written = 0;
1347                 for (; i < to_pack.nr_objects; i++) {
1348                         struct object_entry *e = write_order[i];
1349                         if (write_one(f, e, &offset) == WRITE_ONE_BREAK)
1350                                 break;
1351                         display_progress(progress_state, written);
1352                 }
1353
1354                 if (pack_to_stdout) {
1355                         /*
1356                          * We never fsync when writing to stdout since we may
1357                          * not be writing to an actual pack file. For instance,
1358                          * the upload-pack code passes a pipe here. Calling
1359                          * fsync on a pipe results in unnecessary
1360                          * synchronization with the reader on some platforms.
1361                          */
1362                         finalize_hashfile(f, hash, FSYNC_COMPONENT_NONE,
1363                                           CSUM_HASH_IN_STREAM | CSUM_CLOSE);
1364                 } else if (nr_written == nr_remaining) {
1365                         finalize_hashfile(f, hash, FSYNC_COMPONENT_PACK,
1366                                           CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
1367                 } else {
1368                         /*
1369                          * If we wrote the wrong number of entries in the
1370                          * header, rewrite it like in fast-import.
1371                          */
1372
1373                         int fd = finalize_hashfile(f, hash, FSYNC_COMPONENT_PACK, 0);
1374                         fixup_pack_header_footer(the_hash_algo, fd, hash,
1375                                                  pack_tmp_name, nr_written,
1376                                                  hash, offset);
1377                         close(fd);
1378                         if (write_bitmap_index) {
1379                                 if (write_bitmap_index != WRITE_BITMAP_QUIET)
1380                                         warning(_(no_split_warning));
1381                                 write_bitmap_index = 0;
1382                         }
1383                 }
1384
1385                 if (!pack_to_stdout) {
1386                         struct stat st;
1387                         struct strbuf tmpname = STRBUF_INIT;
1388                         struct bitmap_writer bitmap_writer;
1389                         char *idx_tmp_name = NULL;
1390
1391                         /*
1392                          * Packs are runtime accessed in their mtime
1393                          * order since newer packs are more likely to contain
1394                          * younger objects.  So if we are creating multiple
1395                          * packs then we should modify the mtime of later ones
1396                          * to preserve this property.
1397                          */
1398                         if (stat(pack_tmp_name, &st) < 0) {
1399                                 warning_errno(_("failed to stat %s"), pack_tmp_name);
1400                         } else if (!last_mtime) {
1401                                 last_mtime = st.st_mtime;
1402                         } else {
1403                                 struct utimbuf utb;
1404                                 utb.actime = st.st_atime;
1405                                 utb.modtime = --last_mtime;
1406                                 if (utime(pack_tmp_name, &utb) < 0)
1407                                         warning_errno(_("failed utime() on %s"), pack_tmp_name);
1408                         }
1409
1410                         strbuf_addf(&tmpname, "%s-%s.", base_name,
1411                                     hash_to_hex(hash));
1412
1413                         if (write_bitmap_index) {
1414                                 bitmap_writer_init(&bitmap_writer,
1415                                                    the_repository, &to_pack,
1416                                                    NULL);
1417                                 bitmap_writer_set_checksum(&bitmap_writer, hash);
1418                                 bitmap_writer_build_type_index(&bitmap_writer,
1419                                                                written_list);
1420                         }
1421
1422                         if (cruft)
1423                                 pack_idx_opts.flags |= WRITE_MTIMES;
1424
1425                         stage_tmp_packfiles(the_repository, &tmpname,
1426                                             pack_tmp_name, written_list,
1427                                             nr_written, &to_pack,
1428                                             &pack_idx_opts, hash,
1429                                             &idx_tmp_name);
1430
1431                         if (write_bitmap_index) {
1432                                 size_t tmpname_len = tmpname.len;
1433
1434                                 strbuf_addstr(&tmpname, "bitmap");
1435                                 stop_progress(&progress_state);
1436
1437                                 bitmap_writer_show_progress(&bitmap_writer,
1438                                                             progress);
1439                                 bitmap_writer_select_commits(&bitmap_writer,
1440                                                              indexed_commits,
1441                                                              indexed_commits_nr);
1442                                 if (bitmap_writer_build(&bitmap_writer) < 0)
1443                                         die(_("failed to write bitmap index"));
1444                                 bitmap_writer_finish(&bitmap_writer,
1445                                                      written_list,
1446                                                      tmpname.buf, write_bitmap_options);
1447                                 bitmap_writer_free(&bitmap_writer);
1448                                 write_bitmap_index = 0;
1449                                 strbuf_setlen(&tmpname, tmpname_len);
1450                         }
1451
1452                         rename_tmp_packfile_idx(&tmpname, &idx_tmp_name);
1453
1454                         free(idx_tmp_name);
1455                         strbuf_release(&tmpname);
1456                         free(pack_tmp_name);
1457                         puts(hash_to_hex(hash));
1458                 }
1459
1460                 /* mark written objects as written to previous pack */
1461                 for (j = 0; j < nr_written; j++) {
1462                         written_list[j]->offset = (off_t)-1;
1463                 }
1464                 nr_remaining -= nr_written;
1465         } while (nr_remaining && i < to_pack.nr_objects);
1466
1467         free(written_list);
1468         free(write_order);
1469         stop_progress(&progress_state);
1470         if (written != nr_result)
1471                 die(_("wrote %"PRIu32" objects while expecting %"PRIu32),
1472                     written, nr_result);
1473         trace2_data_intmax("pack-objects", the_repository,
1474                            "write_pack_file/wrote", nr_result);
1475 }
1476
1477 static int no_try_delta(const char *path)
1478 {
1479         static struct attr_check *check;
1480
1481         if (!check)
1482                 check = attr_check_initl("delta", NULL);
1483         git_check_attr(the_repository->index, path, check);
1484         if (ATTR_FALSE(check->items[0].value))
1485                 return 1;
1486         return 0;
1487 }
1488
1489 /*
1490  * When adding an object, check whether we have already added it
1491  * to our packing list. If so, we can skip. However, if we are
1492  * being asked to excludei t, but the previous mention was to include
1493  * it, make sure to adjust its flags and tweak our numbers accordingly.
1494  *
1495  * As an optimization, we pass out the index position where we would have
1496  * found the item, since that saves us from having to look it up again a
1497  * few lines later when we want to add the new entry.
1498  */
1499 static int have_duplicate_entry(const struct object_id *oid,
1500                                 int exclude)
1501 {
1502         struct object_entry *entry;
1503
1504         if (reuse_packfile_bitmap &&
1505             bitmap_walk_contains(bitmap_git, reuse_packfile_bitmap, oid))
1506                 return 1;
1507
1508         entry = packlist_find(&to_pack, oid);
1509         if (!entry)
1510                 return 0;
1511
1512         if (exclude) {
1513                 if (!entry->preferred_base)
1514                         nr_result--;
1515                 entry->preferred_base = 1;
1516         }
1517
1518         return 1;
1519 }
1520
1521 static int want_cruft_object_mtime(struct repository *r,
1522                                    const struct object_id *oid,
1523                                    unsigned flags, uint32_t mtime)
1524 {
1525         struct packed_git **cache;
1526
1527         for (cache = kept_pack_cache(r, flags); *cache; cache++) {
1528                 struct packed_git *p = *cache;
1529                 off_t ofs;
1530                 uint32_t candidate_mtime;
1531
1532                 ofs = find_pack_entry_one(oid, p);
1533                 if (!ofs)
1534                         continue;
1535
1536                 /*
1537                  * We have a copy of the object 'oid' in a non-cruft
1538                  * pack. We can avoid packing an additional copy
1539                  * regardless of what the existing copy's mtime is since
1540                  * it is outside of a cruft pack.
1541                  */
1542                 if (!p->is_cruft)
1543                         return 0;
1544
1545                 /*
1546                  * If we have a copy of the object 'oid' in a cruft
1547                  * pack, then either read the cruft pack's mtime for
1548                  * that object, or, if that can't be loaded, assume the
1549                  * pack's mtime itself.
1550                  */
1551                 if (!load_pack_mtimes(p)) {
1552                         uint32_t pos;
1553                         if (offset_to_pack_pos(p, ofs, &pos) < 0)
1554                                 continue;
1555                         candidate_mtime = nth_packed_mtime(p, pos);
1556                 } else {
1557                         candidate_mtime = p->mtime;
1558                 }
1559
1560                 /*
1561                  * We have a surviving copy of the object in a cruft
1562                  * pack whose mtime is greater than or equal to the one
1563                  * we are considering. We can thus avoid packing an
1564                  * additional copy of that object.
1565                  */
1566                 if (mtime <= candidate_mtime)
1567                         return 0;
1568         }
1569
1570         return -1;
1571 }
1572
1573 static int want_found_object(const struct object_id *oid, int exclude,
1574                              struct packed_git *p, uint32_t mtime)
1575 {
1576         if (exclude)
1577                 return 1;
1578         if (incremental)
1579                 return 0;
1580
1581         if (!is_pack_valid(p))
1582                 return -1;
1583
1584         /*
1585          * When asked to do --local (do not include an object that appears in a
1586          * pack we borrow from elsewhere) or --honor-pack-keep (do not include
1587          * an object that appears in a pack marked with .keep), finding a pack
1588          * that matches the criteria is sufficient for us to decide to omit it.
1589          * However, even if this pack does not satisfy the criteria, we need to
1590          * make sure no copy of this object appears in _any_ pack that makes us
1591          * to omit the object, so we need to check all the packs.
1592          *
1593          * We can however first check whether these options can possibly matter;
1594          * if they do not matter we know we want the object in generated pack.
1595          * Otherwise, we signal "-1" at the end to tell the caller that we do
1596          * not know either way, and it needs to check more packs.
1597          */
1598
1599         /*
1600          * Objects in packs borrowed from elsewhere are discarded regardless of
1601          * if they appear in other packs that weren't borrowed.
1602          */
1603         if (local && !p->pack_local)
1604                 return 0;
1605
1606         /*
1607          * Then handle .keep first, as we have a fast(er) path there.
1608          */
1609         if (ignore_packed_keep_on_disk || ignore_packed_keep_in_core) {
1610                 /*
1611                  * Set the flags for the kept-pack cache to be the ones we want
1612                  * to ignore.
1613                  *
1614                  * That is, if we are ignoring objects in on-disk keep packs,
1615                  * then we want to search through the on-disk keep and ignore
1616                  * the in-core ones.
1617                  */
1618                 unsigned flags = 0;
1619                 if (ignore_packed_keep_on_disk)
1620                         flags |= ON_DISK_KEEP_PACKS;
1621                 if (ignore_packed_keep_in_core)
1622                         flags |= IN_CORE_KEEP_PACKS;
1623
1624                 /*
1625                  * If the object is in a pack that we want to ignore, *and* we
1626                  * don't have any cruft packs that are being retained, we can
1627                  * abort quickly.
1628                  */
1629                 if (!ignore_packed_keep_in_core_has_cruft) {
1630                         if (ignore_packed_keep_on_disk && p->pack_keep)
1631                                 return 0;
1632                         if (ignore_packed_keep_in_core && p->pack_keep_in_core)
1633                                 return 0;
1634                         if (has_object_kept_pack(p->repo, oid, flags))
1635                                 return 0;
1636                 } else {
1637                         /*
1638                          * But if there is at least one cruft pack which
1639                          * is being kept, we only want to include the
1640                          * provided object if it has a strictly greater
1641                          * mtime than any existing cruft copy.
1642                          */
1643                         if (!want_cruft_object_mtime(p->repo, oid, flags,
1644                                                      mtime))
1645                                 return 0;
1646                 }
1647         }
1648
1649         /*
1650          * At this point we know definitively that either we don't care about
1651          * keep-packs, or the object is not in one. Keep checking other
1652          * conditions...
1653          */
1654         if (!local || !have_non_local_packs)
1655                 return 1;
1656
1657         /* we don't know yet; keep looking for more packs */
1658         return -1;
1659 }
1660
1661 static int want_object_in_pack_one(struct packed_git *p,
1662                                    const struct object_id *oid,
1663                                    int exclude,
1664                                    struct packed_git **found_pack,
1665                                    off_t *found_offset,
1666                                    uint32_t found_mtime)
1667 {
1668         off_t offset;
1669
1670         if (p == *found_pack)
1671                 offset = *found_offset;
1672         else
1673                 offset = find_pack_entry_one(oid, p);
1674
1675         if (offset) {
1676                 if (!*found_pack) {
1677                         if (!is_pack_valid(p))
1678                                 return -1;
1679                         *found_offset = offset;
1680                         *found_pack = p;
1681                 }
1682                 return want_found_object(oid, exclude, p, found_mtime);
1683         }
1684         return -1;
1685 }
1686
1687 /*
1688  * Check whether we want the object in the pack (e.g., we do not want
1689  * objects found in non-local stores if the "--local" option was used).
1690  *
1691  * If the caller already knows an existing pack it wants to take the object
1692  * from, that is passed in *found_pack and *found_offset; otherwise this
1693  * function finds if there is any pack that has the object and returns the pack
1694  * and its offset in these variables.
1695  */
1696 static int want_object_in_pack_mtime(const struct object_id *oid,
1697                                      int exclude,
1698                                      struct packed_git **found_pack,
1699                                      off_t *found_offset,
1700                                      uint32_t found_mtime)
1701 {
1702         int want;
1703         struct list_head *pos;
1704         struct multi_pack_index *m;
1705
1706         if (!exclude && local && has_loose_object_nonlocal(oid))
1707                 return 0;
1708
1709         /*
1710          * If we already know the pack object lives in, start checks from that
1711          * pack - in the usual case when neither --local was given nor .keep files
1712          * are present we will determine the answer right now.
1713          */
1714         if (*found_pack) {
1715                 want = want_found_object(oid, exclude, *found_pack,
1716                                          found_mtime);
1717                 if (want != -1)
1718                         return want;
1719
1720                 *found_pack = NULL;
1721                 *found_offset = 0;
1722         }
1723
1724         for (m = get_multi_pack_index(the_repository); m; m = m->next) {
1725                 struct pack_entry e;
1726                 if (fill_midx_entry(the_repository, oid, &e, m)) {
1727                         want = want_object_in_pack_one(e.p, oid, exclude, found_pack, found_offset, found_mtime);
1728                         if (want != -1)
1729                                 return want;
1730                 }
1731         }
1732
1733         list_for_each(pos, get_packed_git_mru(the_repository)) {
1734                 struct packed_git *p = list_entry(pos, struct packed_git, mru);
1735                 want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime);
1736                 if (!exclude && want > 0)
1737                         list_move(&p->mru,
1738                                   get_packed_git_mru(the_repository));
1739                 if (want != -1)
1740                         return want;
1741         }
1742
1743         if (uri_protocols.nr) {
1744                 struct configured_exclusion *ex =
1745                         oidmap_get(&configured_exclusions, oid);
1746                 int i;
1747                 const char *p;
1748
1749                 if (ex) {
1750                         for (i = 0; i < uri_protocols.nr; i++) {
1751                                 if (skip_prefix(ex->uri,
1752                                                 uri_protocols.items[i].string,
1753                                                 &p) &&
1754                                     *p == ':') {
1755                                         oidset_insert(&excluded_by_config, oid);
1756                                         return 0;
1757                                 }
1758                         }
1759                 }
1760         }
1761
1762         return 1;
1763 }
1764
1765 static inline int want_object_in_pack(const struct object_id *oid,
1766                                       int exclude,
1767                                       struct packed_git **found_pack,
1768                                       off_t *found_offset)
1769 {
1770         return want_object_in_pack_mtime(oid, exclude, found_pack, found_offset,
1771                                          0);
1772 }
1773
1774 static struct object_entry *create_object_entry(const struct object_id *oid,
1775                                                 enum object_type type,
1776                                                 uint32_t hash,
1777                                                 int exclude,
1778                                                 int no_try_delta,
1779                                                 struct packed_git *found_pack,
1780                                                 off_t found_offset)
1781 {
1782         struct object_entry *entry;
1783
1784         entry = packlist_alloc(&to_pack, oid);
1785         entry->hash = hash;
1786         oe_set_type(entry, type);
1787         if (exclude)
1788                 entry->preferred_base = 1;
1789         else
1790                 nr_result++;
1791         if (found_pack) {
1792                 oe_set_in_pack(&to_pack, entry, found_pack);
1793                 entry->in_pack_offset = found_offset;
1794         }
1795
1796         entry->no_try_delta = no_try_delta;
1797
1798         return entry;
1799 }
1800
1801 static const char no_closure_warning[] = N_(
1802 "disabling bitmap writing, as some objects are not being packed"
1803 );
1804
1805 static int add_object_entry(const struct object_id *oid, enum object_type type,
1806                             const char *name, int exclude)
1807 {
1808         struct packed_git *found_pack = NULL;
1809         off_t found_offset = 0;
1810
1811         display_progress(progress_state, ++nr_seen);
1812
1813         if (have_duplicate_entry(oid, exclude))
1814                 return 0;
1815
1816         if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset)) {
1817                 /* The pack is missing an object, so it will not have closure */
1818                 if (write_bitmap_index) {
1819                         if (write_bitmap_index != WRITE_BITMAP_QUIET)
1820                                 warning(_(no_closure_warning));
1821                         write_bitmap_index = 0;
1822                 }
1823                 return 0;
1824         }
1825
1826         create_object_entry(oid, type, pack_name_hash_fn(name),
1827                             exclude, name && no_try_delta(name),
1828                             found_pack, found_offset);
1829         return 1;
1830 }
1831
1832 static int add_object_entry_from_bitmap(const struct object_id *oid,
1833                                         enum object_type type,
1834                                         int flags UNUSED, uint32_t name_hash,
1835                                         struct packed_git *pack, off_t offset,
1836                                         void *payload UNUSED)
1837 {
1838         display_progress(progress_state, ++nr_seen);
1839
1840         if (have_duplicate_entry(oid, 0))
1841                 return 0;
1842
1843         if (!want_object_in_pack(oid, 0, &pack, &offset))
1844                 return 0;
1845
1846         create_object_entry(oid, type, name_hash, 0, 0, pack, offset);
1847         return 1;
1848 }
1849
1850 struct pbase_tree_cache {
1851         struct object_id oid;
1852         int ref;
1853         int temporary;
1854         void *tree_data;
1855         unsigned long tree_size;
1856 };
1857
1858 static struct pbase_tree_cache *(pbase_tree_cache[256]);
1859 static int pbase_tree_cache_ix(const struct object_id *oid)
1860 {
1861         return oid->hash[0] % ARRAY_SIZE(pbase_tree_cache);
1862 }
1863 static int pbase_tree_cache_ix_incr(int ix)
1864 {
1865         return (ix+1) % ARRAY_SIZE(pbase_tree_cache);
1866 }
1867
1868 static struct pbase_tree {
1869         struct pbase_tree *next;
1870         /* This is a phony "cache" entry; we are not
1871          * going to evict it or find it through _get()
1872          * mechanism -- this is for the toplevel node that
1873          * would almost always change with any commit.
1874          */
1875         struct pbase_tree_cache pcache;
1876 } *pbase_tree;
1877
1878 static struct pbase_tree_cache *pbase_tree_get(const struct object_id *oid)
1879 {
1880         struct pbase_tree_cache *ent, *nent;
1881         void *data;
1882         unsigned long size;
1883         enum object_type type;
1884         int neigh;
1885         int my_ix = pbase_tree_cache_ix(oid);
1886         int available_ix = -1;
1887
1888         /* pbase-tree-cache acts as a limited hashtable.
1889          * your object will be found at your index or within a few
1890          * slots after that slot if it is cached.
1891          */
1892         for (neigh = 0; neigh < 8; neigh++) {
1893                 ent = pbase_tree_cache[my_ix];
1894                 if (ent && oideq(&ent->oid, oid)) {
1895                         ent->ref++;
1896                         return ent;
1897                 }
1898                 else if (((available_ix < 0) && (!ent || !ent->ref)) ||
1899                          ((0 <= available_ix) &&
1900                           (!ent && pbase_tree_cache[available_ix])))
1901                         available_ix = my_ix;
1902                 if (!ent)
1903                         break;
1904                 my_ix = pbase_tree_cache_ix_incr(my_ix);
1905         }
1906
1907         /* Did not find one.  Either we got a bogus request or
1908          * we need to read and perhaps cache.
1909          */
1910         data = repo_read_object_file(the_repository, oid, &type, &size);
1911         if (!data)
1912                 return NULL;
1913         if (type != OBJ_TREE) {
1914                 free(data);
1915                 return NULL;
1916         }
1917
1918         /* We need to either cache or return a throwaway copy */
1919
1920         if (available_ix < 0)
1921                 ent = NULL;
1922         else {
1923                 ent = pbase_tree_cache[available_ix];
1924                 my_ix = available_ix;
1925         }
1926
1927         if (!ent) {
1928                 nent = xmalloc(sizeof(*nent));
1929                 nent->temporary = (available_ix < 0);
1930         }
1931         else {
1932                 /* evict and reuse */
1933                 free(ent->tree_data);
1934                 nent = ent;
1935         }
1936         oidcpy(&nent->oid, oid);
1937         nent->tree_data = data;
1938         nent->tree_size = size;
1939         nent->ref = 1;
1940         if (!nent->temporary)
1941                 pbase_tree_cache[my_ix] = nent;
1942         return nent;
1943 }
1944
1945 static void pbase_tree_put(struct pbase_tree_cache *cache)
1946 {
1947         if (!cache->temporary) {
1948                 cache->ref--;
1949                 return;
1950         }
1951         free(cache->tree_data);
1952         free(cache);
1953 }
1954
1955 static size_t name_cmp_len(const char *name)
1956 {
1957         return strcspn(name, "\n/");
1958 }
1959
1960 static void add_pbase_object(struct tree_desc *tree,
1961                              const char *name,
1962                              size_t cmplen,
1963                              const char *fullname)
1964 {
1965         struct name_entry entry;
1966         int cmp;
1967
1968         while (tree_entry(tree,&entry)) {
1969                 if (S_ISGITLINK(entry.mode))
1970                         continue;
1971                 cmp = tree_entry_len(&entry) != cmplen ? 1 :
1972                       memcmp(name, entry.path, cmplen);
1973                 if (cmp > 0)
1974                         continue;
1975                 if (cmp < 0)
1976                         return;
1977                 if (name[cmplen] != '/') {
1978                         add_object_entry(&entry.oid,
1979                                          object_type(entry.mode),
1980                                          fullname, 1);
1981                         return;
1982                 }
1983                 if (S_ISDIR(entry.mode)) {
1984                         struct tree_desc sub;
1985                         struct pbase_tree_cache *tree;
1986                         const char *down = name+cmplen+1;
1987                         size_t downlen = name_cmp_len(down);
1988
1989                         tree = pbase_tree_get(&entry.oid);
1990                         if (!tree)
1991                                 return;
1992                         init_tree_desc(&sub, &tree->oid,
1993                                        tree->tree_data, tree->tree_size);
1994
1995                         add_pbase_object(&sub, down, downlen, fullname);
1996                         pbase_tree_put(tree);
1997                 }
1998         }
1999 }
2000
2001 static unsigned *done_pbase_paths;
2002 static int done_pbase_paths_num;
2003 static int done_pbase_paths_alloc;
2004 static int done_pbase_path_pos(unsigned hash)
2005 {
2006         int lo = 0;
2007         int hi = done_pbase_paths_num;
2008         while (lo < hi) {
2009                 int mi = lo + (hi - lo) / 2;
2010                 if (done_pbase_paths[mi] == hash)
2011                         return mi;
2012                 if (done_pbase_paths[mi] < hash)
2013                         hi = mi;
2014                 else
2015                         lo = mi + 1;
2016         }
2017         return -lo-1;
2018 }
2019
2020 static int check_pbase_path(unsigned hash)
2021 {
2022         int pos = done_pbase_path_pos(hash);
2023         if (0 <= pos)
2024                 return 1;
2025         pos = -pos - 1;
2026         ALLOC_GROW(done_pbase_paths,
2027                    done_pbase_paths_num + 1,
2028                    done_pbase_paths_alloc);
2029         done_pbase_paths_num++;
2030         if (pos < done_pbase_paths_num)
2031                 MOVE_ARRAY(done_pbase_paths + pos + 1, done_pbase_paths + pos,
2032                            done_pbase_paths_num - pos - 1);
2033         done_pbase_paths[pos] = hash;
2034         return 0;
2035 }
2036
2037 static void add_preferred_base_object(const char *name)
2038 {
2039         struct pbase_tree *it;
2040         size_t cmplen;
2041         unsigned hash = pack_name_hash_fn(name);
2042
2043         if (!num_preferred_base || check_pbase_path(hash))
2044                 return;
2045
2046         cmplen = name_cmp_len(name);
2047         for (it = pbase_tree; it; it = it->next) {
2048                 if (cmplen == 0) {
2049                         add_object_entry(&it->pcache.oid, OBJ_TREE, NULL, 1);
2050                 }
2051                 else {
2052                         struct tree_desc tree;
2053                         init_tree_desc(&tree, &it->pcache.oid,
2054                                        it->pcache.tree_data, it->pcache.tree_size);
2055                         add_pbase_object(&tree, name, cmplen, name);
2056                 }
2057         }
2058 }
2059
2060 static void add_preferred_base(struct object_id *oid)
2061 {
2062         struct pbase_tree *it;
2063         void *data;
2064         unsigned long size;
2065         struct object_id tree_oid;
2066
2067         if (window <= num_preferred_base++)
2068                 return;
2069
2070         data = read_object_with_reference(the_repository, oid,
2071                                           OBJ_TREE, &size, &tree_oid);
2072         if (!data)
2073                 return;
2074
2075         for (it = pbase_tree; it; it = it->next) {
2076                 if (oideq(&it->pcache.oid, &tree_oid)) {
2077                         free(data);
2078                         return;
2079                 }
2080         }
2081
2082         CALLOC_ARRAY(it, 1);
2083         it->next = pbase_tree;
2084         pbase_tree = it;
2085
2086         oidcpy(&it->pcache.oid, &tree_oid);
2087         it->pcache.tree_data = data;
2088         it->pcache.tree_size = size;
2089 }
2090
2091 static void cleanup_preferred_base(void)
2092 {
2093         struct pbase_tree *it;
2094         unsigned i;
2095
2096         it = pbase_tree;
2097         pbase_tree = NULL;
2098         while (it) {
2099                 struct pbase_tree *tmp = it;
2100                 it = tmp->next;
2101                 free(tmp->pcache.tree_data);
2102                 free(tmp);
2103         }
2104
2105         for (i = 0; i < ARRAY_SIZE(pbase_tree_cache); i++) {
2106                 if (!pbase_tree_cache[i])
2107                         continue;
2108                 free(pbase_tree_cache[i]->tree_data);
2109                 FREE_AND_NULL(pbase_tree_cache[i]);
2110         }
2111
2112         FREE_AND_NULL(done_pbase_paths);
2113         done_pbase_paths_num = done_pbase_paths_alloc = 0;
2114 }
2115
2116 /*
2117  * Return 1 iff the object specified by "delta" can be sent
2118  * literally as a delta against the base in "base_sha1". If
2119  * so, then *base_out will point to the entry in our packing
2120  * list, or NULL if we must use the external-base list.
2121  *
2122  * Depth value does not matter - find_deltas() will
2123  * never consider reused delta as the base object to
2124  * deltify other objects against, in order to avoid
2125  * circular deltas.
2126  */
2127 static int can_reuse_delta(const struct object_id *base_oid,
2128                            struct object_entry *delta,
2129                            struct object_entry **base_out)
2130 {
2131         struct object_entry *base;
2132
2133         /*
2134          * First see if we're already sending the base (or it's explicitly in
2135          * our "excluded" list).
2136          */
2137         base = packlist_find(&to_pack, base_oid);
2138         if (base) {
2139                 if (!in_same_island(&delta->idx.oid, &base->idx.oid))
2140                         return 0;
2141                 *base_out = base;
2142                 return 1;
2143         }
2144
2145         /*
2146          * Otherwise, reachability bitmaps may tell us if the receiver has it,
2147          * even if it was buried too deep in history to make it into the
2148          * packing list.
2149          */
2150         if (thin && bitmap_has_oid_in_uninteresting(bitmap_git, base_oid)) {
2151                 if (use_delta_islands) {
2152                         if (!in_same_island(&delta->idx.oid, base_oid))
2153                                 return 0;
2154                 }
2155                 *base_out = NULL;
2156                 return 1;
2157         }
2158
2159         return 0;
2160 }
2161
2162 static void prefetch_to_pack(uint32_t object_index_start) {
2163         struct oid_array to_fetch = OID_ARRAY_INIT;
2164         uint32_t i;
2165
2166         for (i = object_index_start; i < to_pack.nr_objects; i++) {
2167                 struct object_entry *entry = to_pack.objects + i;
2168
2169                 if (!oid_object_info_extended(the_repository,
2170                                               &entry->idx.oid,
2171                                               NULL,
2172                                               OBJECT_INFO_FOR_PREFETCH))
2173                         continue;
2174                 oid_array_append(&to_fetch, &entry->idx.oid);
2175         }
2176         promisor_remote_get_direct(the_repository,
2177                                    to_fetch.oid, to_fetch.nr);
2178         oid_array_clear(&to_fetch);
2179 }
2180
2181 static void check_object(struct object_entry *entry, uint32_t object_index)
2182 {
2183         unsigned long canonical_size;
2184         enum object_type type;
2185         struct object_info oi = {.typep = &type, .sizep = &canonical_size};
2186
2187         if (IN_PACK(entry)) {
2188                 struct packed_git *p = IN_PACK(entry);
2189                 struct pack_window *w_curs = NULL;
2190                 int have_base = 0;
2191                 struct object_id base_ref;
2192                 struct object_entry *base_entry;
2193                 unsigned long used, used_0;
2194                 unsigned long avail;
2195                 off_t ofs;
2196                 unsigned char *buf, c;
2197                 enum object_type type;
2198                 unsigned long in_pack_size;
2199
2200                 buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail);
2201
2202                 /*
2203                  * We want in_pack_type even if we do not reuse delta
2204                  * since non-delta representations could still be reused.
2205                  */
2206                 used = unpack_object_header_buffer(buf, avail,
2207                                                    &type,
2208                                                    &in_pack_size);
2209                 if (used == 0)
2210                         goto give_up;
2211
2212                 if (type < 0)
2213                         BUG("invalid type %d", type);
2214                 entry->in_pack_type = type;
2215
2216                 /*
2217                  * Determine if this is a delta and if so whether we can
2218                  * reuse it or not.  Otherwise let's find out as cheaply as
2219                  * possible what the actual type and size for this object is.
2220                  */
2221                 switch (entry->in_pack_type) {
2222                 default:
2223                         /* Not a delta hence we've already got all we need. */
2224                         oe_set_type(entry, entry->in_pack_type);
2225                         SET_SIZE(entry, in_pack_size);
2226                         entry->in_pack_header_size = used;
2227                         if (oe_type(entry) < OBJ_COMMIT || oe_type(entry) > OBJ_BLOB)
2228                                 goto give_up;
2229                         unuse_pack(&w_curs);
2230                         return;
2231                 case OBJ_REF_DELTA:
2232                         if (reuse_delta && !entry->preferred_base) {
2233                                 oidread(&base_ref,
2234                                         use_pack(p, &w_curs,
2235                                                  entry->in_pack_offset + used,
2236                                                  NULL),
2237                                         the_repository->hash_algo);
2238                                 have_base = 1;
2239                         }
2240                         entry->in_pack_header_size = used + the_hash_algo->rawsz;
2241                         break;
2242                 case OBJ_OFS_DELTA:
2243                         buf = use_pack(p, &w_curs,
2244                                        entry->in_pack_offset + used, NULL);
2245                         used_0 = 0;
2246                         c = buf[used_0++];
2247                         ofs = c & 127;
2248                         while (c & 128) {
2249                                 ofs += 1;
2250                                 if (!ofs || MSB(ofs, 7)) {
2251                                         error(_("delta base offset overflow in pack for %s"),
2252                                               oid_to_hex(&entry->idx.oid));
2253                                         goto give_up;
2254                                 }
2255                                 c = buf[used_0++];
2256                                 ofs = (ofs << 7) + (c & 127);
2257                         }
2258                         ofs = entry->in_pack_offset - ofs;
2259                         if (ofs <= 0 || ofs >= entry->in_pack_offset) {
2260                                 error(_("delta base offset out of bound for %s"),
2261                                       oid_to_hex(&entry->idx.oid));
2262                                 goto give_up;
2263                         }
2264                         if (reuse_delta && !entry->preferred_base) {
2265                                 uint32_t pos;
2266                                 if (offset_to_pack_pos(p, ofs, &pos) < 0)
2267                                         goto give_up;
2268                                 if (!nth_packed_object_id(&base_ref, p,
2269                                                           pack_pos_to_index(p, pos)))
2270                                         have_base = 1;
2271                         }
2272                         entry->in_pack_header_size = used + used_0;
2273                         break;
2274                 }
2275
2276                 if (have_base &&
2277                     can_reuse_delta(&base_ref, entry, &base_entry)) {
2278                         oe_set_type(entry, entry->in_pack_type);
2279                         SET_SIZE(entry, in_pack_size); /* delta size */
2280                         SET_DELTA_SIZE(entry, in_pack_size);
2281
2282                         if (base_entry) {
2283                                 SET_DELTA(entry, base_entry);
2284                                 entry->delta_sibling_idx = base_entry->delta_child_idx;
2285                                 SET_DELTA_CHILD(base_entry, entry);
2286                         } else {
2287                                 SET_DELTA_EXT(entry, &base_ref);
2288                         }
2289
2290                         unuse_pack(&w_curs);
2291                         return;
2292                 }
2293
2294                 if (oe_type(entry)) {
2295                         off_t delta_pos;
2296
2297                         /*
2298                          * This must be a delta and we already know what the
2299                          * final object type is.  Let's extract the actual
2300                          * object size from the delta header.
2301                          */
2302                         delta_pos = entry->in_pack_offset + entry->in_pack_header_size;
2303                         canonical_size = get_size_from_delta(p, &w_curs, delta_pos);
2304                         if (canonical_size == 0)
2305                                 goto give_up;
2306                         SET_SIZE(entry, canonical_size);
2307                         unuse_pack(&w_curs);
2308                         return;
2309                 }
2310
2311                 /*
2312                  * No choice but to fall back to the recursive delta walk
2313                  * with oid_object_info() to find about the object type
2314                  * at this point...
2315                  */
2316                 give_up:
2317                 unuse_pack(&w_curs);
2318         }
2319
2320         if (oid_object_info_extended(the_repository, &entry->idx.oid, &oi,
2321                                      OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0) {
2322                 if (repo_has_promisor_remote(the_repository)) {
2323                         prefetch_to_pack(object_index);
2324                         if (oid_object_info_extended(the_repository, &entry->idx.oid, &oi,
2325                                                      OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0)
2326                                 type = -1;
2327                 } else {
2328                         type = -1;
2329                 }
2330         }
2331         oe_set_type(entry, type);
2332         if (entry->type_valid) {
2333                 SET_SIZE(entry, canonical_size);
2334         } else {
2335                 /*
2336                  * Bad object type is checked in prepare_pack().  This is
2337                  * to permit a missing preferred base object to be ignored
2338                  * as a preferred base.  Doing so can result in a larger
2339                  * pack file, but the transfer will still take place.
2340                  */
2341         }
2342 }
2343
2344 static int pack_offset_sort(const void *_a, const void *_b)
2345 {
2346         const struct object_entry *a = *(struct object_entry **)_a;
2347         const struct object_entry *b = *(struct object_entry **)_b;
2348         const struct packed_git *a_in_pack = IN_PACK(a);
2349         const struct packed_git *b_in_pack = IN_PACK(b);
2350
2351         /* avoid filesystem trashing with loose objects */
2352         if (!a_in_pack && !b_in_pack)
2353                 return oidcmp(&a->idx.oid, &b->idx.oid);
2354
2355         if (a_in_pack < b_in_pack)
2356                 return -1;
2357         if (a_in_pack > b_in_pack)
2358                 return 1;
2359         return a->in_pack_offset < b->in_pack_offset ? -1 :
2360                         (a->in_pack_offset > b->in_pack_offset);
2361 }
2362
2363 /*
2364  * Drop an on-disk delta we were planning to reuse. Naively, this would
2365  * just involve blanking out the "delta" field, but we have to deal
2366  * with some extra book-keeping:
2367  *
2368  *   1. Removing ourselves from the delta_sibling linked list.
2369  *
2370  *   2. Updating our size/type to the non-delta representation. These were
2371  *      either not recorded initially (size) or overwritten with the delta type
2372  *      (type) when check_object() decided to reuse the delta.
2373  *
2374  *   3. Resetting our delta depth, as we are now a base object.
2375  */
2376 static void drop_reused_delta(struct object_entry *entry)
2377 {
2378         unsigned *idx = &to_pack.objects[entry->delta_idx - 1].delta_child_idx;
2379         struct object_info oi = OBJECT_INFO_INIT;
2380         enum object_type type;
2381         unsigned long size;
2382
2383         while (*idx) {
2384                 struct object_entry *oe = &to_pack.objects[*idx - 1];
2385
2386                 if (oe == entry)
2387                         *idx = oe->delta_sibling_idx;
2388                 else
2389                         idx = &oe->delta_sibling_idx;
2390         }
2391         SET_DELTA(entry, NULL);
2392         entry->depth = 0;
2393
2394         oi.sizep = &size;
2395         oi.typep = &type;
2396         if (packed_object_info(the_repository, IN_PACK(entry), entry->in_pack_offset, &oi) < 0) {
2397                 /*
2398                  * We failed to get the info from this pack for some reason;
2399                  * fall back to oid_object_info, which may find another copy.
2400                  * And if that fails, the error will be recorded in oe_type(entry)
2401                  * and dealt with in prepare_pack().
2402                  */
2403                 oe_set_type(entry,
2404                             oid_object_info(the_repository, &entry->idx.oid, &size));
2405         } else {
2406                 oe_set_type(entry, type);
2407         }
2408         SET_SIZE(entry, size);
2409 }
2410
2411 /*
2412  * Follow the chain of deltas from this entry onward, throwing away any links
2413  * that cause us to hit a cycle (as determined by the DFS state flags in
2414  * the entries).
2415  *
2416  * We also detect too-long reused chains that would violate our --depth
2417  * limit.
2418  */
2419 static void break_delta_chains(struct object_entry *entry)
2420 {
2421         /*
2422          * The actual depth of each object we will write is stored as an int,
2423          * as it cannot exceed our int "depth" limit. But before we break
2424          * changes based no that limit, we may potentially go as deep as the
2425          * number of objects, which is elsewhere bounded to a uint32_t.
2426          */
2427         uint32_t total_depth;
2428         struct object_entry *cur, *next;
2429
2430         for (cur = entry, total_depth = 0;
2431              cur;
2432              cur = DELTA(cur), total_depth++) {
2433                 if (cur->dfs_state == DFS_DONE) {
2434                         /*
2435                          * We've already seen this object and know it isn't
2436                          * part of a cycle. We do need to append its depth
2437                          * to our count.
2438                          */
2439                         total_depth += cur->depth;
2440                         break;
2441                 }
2442
2443                 /*
2444                  * We break cycles before looping, so an ACTIVE state (or any
2445                  * other cruft which made its way into the state variable)
2446                  * is a bug.
2447                  */
2448                 if (cur->dfs_state != DFS_NONE)
2449                         BUG("confusing delta dfs state in first pass: %d",
2450                             cur->dfs_state);
2451
2452                 /*
2453                  * Now we know this is the first time we've seen the object. If
2454                  * it's not a delta, we're done traversing, but we'll mark it
2455                  * done to save time on future traversals.
2456                  */
2457                 if (!DELTA(cur)) {
2458                         cur->dfs_state = DFS_DONE;
2459                         break;
2460                 }
2461
2462                 /*
2463                  * Mark ourselves as active and see if the next step causes
2464                  * us to cycle to another active object. It's important to do
2465                  * this _before_ we loop, because it impacts where we make the
2466                  * cut, and thus how our total_depth counter works.
2467                  * E.g., We may see a partial loop like:
2468                  *
2469                  *   A -> B -> C -> D -> B
2470                  *
2471                  * Cutting B->C breaks the cycle. But now the depth of A is
2472                  * only 1, and our total_depth counter is at 3. The size of the
2473                  * error is always one less than the size of the cycle we
2474                  * broke. Commits C and D were "lost" from A's chain.
2475                  *
2476                  * If we instead cut D->B, then the depth of A is correct at 3.
2477                  * We keep all commits in the chain that we examined.
2478                  */
2479                 cur->dfs_state = DFS_ACTIVE;
2480                 if (DELTA(cur)->dfs_state == DFS_ACTIVE) {
2481                         drop_reused_delta(cur);
2482                         cur->dfs_state = DFS_DONE;
2483                         break;
2484                 }
2485         }
2486
2487         /*
2488          * And now that we've gone all the way to the bottom of the chain, we
2489          * need to clear the active flags and set the depth fields as
2490          * appropriate. Unlike the loop above, which can quit when it drops a
2491          * delta, we need to keep going to look for more depth cuts. So we need
2492          * an extra "next" pointer to keep going after we reset cur->delta.
2493          */
2494         for (cur = entry; cur; cur = next) {
2495                 next = DELTA(cur);
2496
2497                 /*
2498                  * We should have a chain of zero or more ACTIVE states down to
2499                  * a final DONE. We can quit after the DONE, because either it
2500                  * has no bases, or we've already handled them in a previous
2501                  * call.
2502                  */
2503                 if (cur->dfs_state == DFS_DONE)
2504                         break;
2505                 else if (cur->dfs_state != DFS_ACTIVE)
2506                         BUG("confusing delta dfs state in second pass: %d",
2507                             cur->dfs_state);
2508
2509                 /*
2510                  * If the total_depth is more than depth, then we need to snip
2511                  * the chain into two or more smaller chains that don't exceed
2512                  * the maximum depth. Most of the resulting chains will contain
2513                  * (depth + 1) entries (i.e., depth deltas plus one base), and
2514                  * the last chain (i.e., the one containing entry) will contain
2515                  * whatever entries are left over, namely
2516                  * (total_depth % (depth + 1)) of them.
2517                  *
2518                  * Since we are iterating towards decreasing depth, we need to
2519                  * decrement total_depth as we go, and we need to write to the
2520                  * entry what its final depth will be after all of the
2521                  * snipping. Since we're snipping into chains of length (depth
2522                  * + 1) entries, the final depth of an entry will be its
2523                  * original depth modulo (depth + 1). Any time we encounter an
2524                  * entry whose final depth is supposed to be zero, we snip it
2525                  * from its delta base, thereby making it so.
2526                  */
2527                 cur->depth = (total_depth--) % (depth + 1);
2528                 if (!cur->depth)
2529                         drop_reused_delta(cur);
2530
2531                 cur->dfs_state = DFS_DONE;
2532         }
2533 }
2534
2535 static void get_object_details(void)
2536 {
2537         uint32_t i;
2538         struct object_entry **sorted_by_offset;
2539
2540         if (progress)
2541                 progress_state = start_progress(the_repository,
2542                                                 _("Counting objects"),
2543                                                 to_pack.nr_objects);
2544
2545         CALLOC_ARRAY(sorted_by_offset, to_pack.nr_objects);
2546         for (i = 0; i < to_pack.nr_objects; i++)
2547                 sorted_by_offset[i] = to_pack.objects + i;
2548         QSORT(sorted_by_offset, to_pack.nr_objects, pack_offset_sort);
2549
2550         for (i = 0; i < to_pack.nr_objects; i++) {
2551                 struct object_entry *entry = sorted_by_offset[i];
2552                 check_object(entry, i);
2553                 if (entry->type_valid &&
2554                     oe_size_greater_than(&to_pack, entry,
2555                                          repo_settings_get_big_file_threshold(the_repository)))
2556                         entry->no_try_delta = 1;
2557                 display_progress(progress_state, i + 1);
2558         }
2559         stop_progress(&progress_state);
2560
2561         /*
2562          * This must happen in a second pass, since we rely on the delta
2563          * information for the whole list being completed.
2564          */
2565         for (i = 0; i < to_pack.nr_objects; i++)
2566                 break_delta_chains(&to_pack.objects[i]);
2567
2568         free(sorted_by_offset);
2569 }
2570
2571 /*
2572  * We search for deltas in a list sorted by type, by filename hash, and then
2573  * by size, so that we see progressively smaller and smaller files.
2574  * That's because we prefer deltas to be from the bigger file
2575  * to the smaller -- deletes are potentially cheaper, but perhaps
2576  * more importantly, the bigger file is likely the more recent
2577  * one.  The deepest deltas are therefore the oldest objects which are
2578  * less susceptible to be accessed often.
2579  */
2580 static int type_size_sort(const void *_a, const void *_b)
2581 {
2582         const struct object_entry *a = *(struct object_entry **)_a;
2583         const struct object_entry *b = *(struct object_entry **)_b;
2584         const enum object_type a_type = oe_type(a);
2585         const enum object_type b_type = oe_type(b);
2586         const unsigned long a_size = SIZE(a);
2587         const unsigned long b_size = SIZE(b);
2588
2589         if (a_type > b_type)
2590                 return -1;
2591         if (a_type < b_type)
2592                 return 1;
2593         if (a->hash > b->hash)
2594                 return -1;
2595         if (a->hash < b->hash)
2596                 return 1;
2597         if (a->preferred_base > b->preferred_base)
2598                 return -1;
2599         if (a->preferred_base < b->preferred_base)
2600                 return 1;
2601         if (use_delta_islands) {
2602                 const int island_cmp = island_delta_cmp(&a->idx.oid, &b->idx.oid);
2603                 if (island_cmp)
2604                         return island_cmp;
2605         }
2606         if (a_size > b_size)
2607                 return -1;
2608         if (a_size < b_size)
2609                 return 1;
2610         return a < b ? -1 : (a > b);  /* newest first */
2611 }
2612
2613 struct unpacked {
2614         struct object_entry *entry;
2615         void *data;
2616         struct delta_index *index;
2617         unsigned depth;
2618 };
2619
2620 static int delta_cacheable(unsigned long src_size, unsigned long trg_size,
2621                            unsigned long delta_size)
2622 {
2623         if (max_delta_cache_size && delta_cache_size + delta_size > max_delta_cache_size)
2624                 return 0;
2625
2626         if (delta_size < cache_max_small_delta_size)
2627                 return 1;
2628
2629         /* cache delta, if objects are large enough compared to delta size */
2630         if ((src_size >> 20) + (trg_size >> 21) > (delta_size >> 10))
2631                 return 1;
2632
2633         return 0;
2634 }
2635
2636 /* Protect delta_cache_size */
2637 static pthread_mutex_t cache_mutex;
2638 #define cache_lock()            pthread_mutex_lock(&cache_mutex)
2639 #define cache_unlock()          pthread_mutex_unlock(&cache_mutex)
2640
2641 /*
2642  * Protect object list partitioning (e.g. struct thread_param) and
2643  * progress_state
2644  */
2645 static pthread_mutex_t progress_mutex;
2646 #define progress_lock()         pthread_mutex_lock(&progress_mutex)
2647 #define progress_unlock()       pthread_mutex_unlock(&progress_mutex)
2648
2649 /*
2650  * Access to struct object_entry is unprotected since each thread owns
2651  * a portion of the main object list. Just don't access object entries
2652  * ahead in the list because they can be stolen and would need
2653  * progress_mutex for protection.
2654  */
2655
2656 static inline int oe_size_less_than(struct packing_data *pack,
2657                                     const struct object_entry *lhs,
2658                                     unsigned long rhs)
2659 {
2660         if (lhs->size_valid)
2661                 return lhs->size_ < rhs;
2662         if (rhs < pack->oe_size_limit) /* rhs < 2^x <= lhs ? */
2663                 return 0;
2664         return oe_get_size_slow(pack, lhs) < rhs;
2665 }
2666
2667 static inline void oe_set_tree_depth(struct packing_data *pack,
2668                                      struct object_entry *e,
2669                                      unsigned int tree_depth)
2670 {
2671         if (!pack->tree_depth)
2672                 CALLOC_ARRAY(pack->tree_depth, pack->nr_alloc);
2673         pack->tree_depth[e - pack->objects] = tree_depth;
2674 }
2675
2676 /*
2677  * Return the size of the object without doing any delta
2678  * reconstruction (so non-deltas are true object sizes, but deltas
2679  * return the size of the delta data).
2680  */
2681 unsigned long oe_get_size_slow(struct packing_data *pack,
2682                                const struct object_entry *e)
2683 {
2684         struct packed_git *p;
2685         struct pack_window *w_curs;
2686         unsigned char *buf;
2687         enum object_type type;
2688         unsigned long used, avail, size;
2689
2690         if (e->type_ != OBJ_OFS_DELTA && e->type_ != OBJ_REF_DELTA) {
2691                 packing_data_lock(&to_pack);
2692                 if (oid_object_info(the_repository, &e->idx.oid, &size) < 0)
2693                         die(_("unable to get size of %s"),
2694                             oid_to_hex(&e->idx.oid));
2695                 packing_data_unlock(&to_pack);
2696                 return size;
2697         }
2698
2699         p = oe_in_pack(pack, e);
2700         if (!p)
2701                 BUG("when e->type is a delta, it must belong to a pack");
2702
2703         packing_data_lock(&to_pack);
2704         w_curs = NULL;
2705         buf = use_pack(p, &w_curs, e->in_pack_offset, &avail);
2706         used = unpack_object_header_buffer(buf, avail, &type, &size);
2707         if (used == 0)
2708                 die(_("unable to parse object header of %s"),
2709                     oid_to_hex(&e->idx.oid));
2710
2711         unuse_pack(&w_curs);
2712         packing_data_unlock(&to_pack);
2713         return size;
2714 }
2715
2716 static int try_delta(struct unpacked *trg, struct unpacked *src,
2717                      unsigned max_depth, unsigned long *mem_usage)
2718 {
2719         struct object_entry *trg_entry = trg->entry;
2720         struct object_entry *src_entry = src->entry;
2721         unsigned long trg_size, src_size, delta_size, sizediff, max_size, sz;
2722         unsigned ref_depth;
2723         enum object_type type;
2724         void *delta_buf;
2725
2726         /* Don't bother doing diffs between different types */
2727         if (oe_type(trg_entry) != oe_type(src_entry))
2728                 return -1;
2729
2730         /*
2731          * We do not bother to try a delta that we discarded on an
2732          * earlier try, but only when reusing delta data.  Note that
2733          * src_entry that is marked as the preferred_base should always
2734          * be considered, as even if we produce a suboptimal delta against
2735          * it, we will still save the transfer cost, as we already know
2736          * the other side has it and we won't send src_entry at all.
2737          */
2738         if (reuse_delta && IN_PACK(trg_entry) &&
2739             IN_PACK(trg_entry) == IN_PACK(src_entry) &&
2740             !src_entry->preferred_base &&
2741             trg_entry->in_pack_type != OBJ_REF_DELTA &&
2742             trg_entry->in_pack_type != OBJ_OFS_DELTA)
2743                 return 0;
2744
2745         /* Let's not bust the allowed depth. */
2746         if (src->depth >= max_depth)
2747                 return 0;
2748
2749         /* Now some size filtering heuristics. */
2750         trg_size = SIZE(trg_entry);
2751         if (!DELTA(trg_entry)) {
2752                 max_size = trg_size/2 - the_hash_algo->rawsz;
2753                 ref_depth = 1;
2754         } else {
2755                 max_size = DELTA_SIZE(trg_entry);
2756                 ref_depth = trg->depth;
2757         }
2758         max_size = (uint64_t)max_size * (max_depth - src->depth) /
2759                                                 (max_depth - ref_depth + 1);
2760         if (max_size == 0)
2761                 return 0;
2762         src_size = SIZE(src_entry);
2763         sizediff = src_size < trg_size ? trg_size - src_size : 0;
2764         if (sizediff >= max_size)
2765                 return 0;
2766         if (trg_size < src_size / 32)
2767                 return 0;
2768
2769         if (!in_same_island(&trg->entry->idx.oid, &src->entry->idx.oid))
2770                 return 0;
2771
2772         /* Load data if not already done */
2773         if (!trg->data) {
2774                 packing_data_lock(&to_pack);
2775                 trg->data = repo_read_object_file(the_repository,
2776                                                   &trg_entry->idx.oid, &type,
2777                                                   &sz);
2778                 packing_data_unlock(&to_pack);
2779                 if (!trg->data)
2780                         die(_("object %s cannot be read"),
2781                             oid_to_hex(&trg_entry->idx.oid));
2782                 if (sz != trg_size)
2783                         die(_("object %s inconsistent object length (%"PRIuMAX" vs %"PRIuMAX")"),
2784                             oid_to_hex(&trg_entry->idx.oid), (uintmax_t)sz,
2785                             (uintmax_t)trg_size);
2786                 *mem_usage += sz;
2787         }
2788         if (!src->data) {
2789                 packing_data_lock(&to_pack);
2790                 src->data = repo_read_object_file(the_repository,
2791                                                   &src_entry->idx.oid, &type,
2792                                                   &sz);
2793                 packing_data_unlock(&to_pack);
2794                 if (!src->data) {
2795                         if (src_entry->preferred_base) {
2796                                 static int warned = 0;
2797                                 if (!warned++)
2798                                         warning(_("object %s cannot be read"),
2799                                                 oid_to_hex(&src_entry->idx.oid));
2800                                 /*
2801                                  * Those objects are not included in the
2802                                  * resulting pack.  Be resilient and ignore
2803                                  * them if they can't be read, in case the
2804                                  * pack could be created nevertheless.
2805                                  */
2806                                 return 0;
2807                         }
2808                         die(_("object %s cannot be read"),
2809                             oid_to_hex(&src_entry->idx.oid));
2810                 }
2811                 if (sz != src_size)
2812                         die(_("object %s inconsistent object length (%"PRIuMAX" vs %"PRIuMAX")"),
2813                             oid_to_hex(&src_entry->idx.oid), (uintmax_t)sz,
2814                             (uintmax_t)src_size);
2815                 *mem_usage += sz;
2816         }
2817         if (!src->index) {
2818                 src->index = create_delta_index(src->data, src_size);
2819                 if (!src->index) {
2820                         static int warned = 0;
2821                         if (!warned++)
2822                                 warning(_("suboptimal pack - out of memory"));
2823                         return 0;
2824                 }
2825                 *mem_usage += sizeof_delta_index(src->index);
2826         }
2827
2828         delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size);
2829         if (!delta_buf)
2830                 return 0;
2831
2832         if (DELTA(trg_entry)) {
2833                 /* Prefer only shallower same-sized deltas. */
2834                 if (delta_size == DELTA_SIZE(trg_entry) &&
2835                     src->depth + 1 >= trg->depth) {
2836                         free(delta_buf);
2837                         return 0;
2838                 }
2839         }
2840
2841         /*
2842          * Handle memory allocation outside of the cache
2843          * accounting lock.  Compiler will optimize the strangeness
2844          * away when NO_PTHREADS is defined.
2845          */
2846         free(trg_entry->delta_data);
2847         cache_lock();
2848         if (trg_entry->delta_data) {
2849                 delta_cache_size -= DELTA_SIZE(trg_entry);
2850                 trg_entry->delta_data = NULL;
2851         }
2852         if (delta_cacheable(src_size, trg_size, delta_size)) {
2853                 delta_cache_size += delta_size;
2854                 cache_unlock();
2855                 trg_entry->delta_data = xrealloc(delta_buf, delta_size);
2856         } else {
2857                 cache_unlock();
2858                 free(delta_buf);
2859         }
2860
2861         SET_DELTA(trg_entry, src_entry);
2862         SET_DELTA_SIZE(trg_entry, delta_size);
2863         trg->depth = src->depth + 1;
2864
2865         return 1;
2866 }
2867
2868 static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
2869 {
2870         struct object_entry *child = DELTA_CHILD(me);
2871         unsigned int m = n;
2872         while (child) {
2873                 const unsigned int c = check_delta_limit(child, n + 1);
2874                 if (m < c)
2875                         m = c;
2876                 child = DELTA_SIBLING(child);
2877         }
2878         return m;
2879 }
2880
2881 static unsigned long free_unpacked(struct unpacked *n)
2882 {
2883         unsigned long freed_mem = sizeof_delta_index(n->index);
2884         free_delta_index(n->index);
2885         n->index = NULL;
2886         if (n->data) {
2887                 freed_mem += SIZE(n->entry);
2888                 FREE_AND_NULL(n->data);
2889         }
2890         n->entry = NULL;
2891         n->depth = 0;
2892         return freed_mem;
2893 }
2894
2895 static void find_deltas(struct object_entry **list, unsigned *list_size,
2896                         int window, int depth, unsigned *processed)
2897 {
2898         uint32_t i, idx = 0, count = 0;
2899         struct unpacked *array;
2900         unsigned long mem_usage = 0;
2901
2902         CALLOC_ARRAY(array, window);
2903
2904         for (;;) {
2905                 struct object_entry *entry;
2906                 struct unpacked *n = array + idx;
2907                 int j, max_depth, best_base = -1;
2908
2909                 progress_lock();
2910                 if (!*list_size) {
2911                         progress_unlock();
2912                         break;
2913                 }
2914                 entry = *list++;
2915                 (*list_size)--;
2916                 if (!entry->preferred_base) {
2917                         (*processed)++;
2918                         display_progress(progress_state, *processed);
2919                 }
2920                 progress_unlock();
2921
2922                 mem_usage -= free_unpacked(n);
2923                 n->entry = entry;
2924
2925                 while (window_memory_limit &&
2926                        mem_usage > window_memory_limit &&
2927                        count > 1) {
2928                         const uint32_t tail = (idx + window - count) % window;
2929                         mem_usage -= free_unpacked(array + tail);
2930                         count--;
2931                 }
2932
2933                 /* We do not compute delta to *create* objects we are not
2934                  * going to pack.
2935                  */
2936                 if (entry->preferred_base)
2937                         goto next;
2938
2939                 /*
2940                  * If the current object is at pack edge, take the depth the
2941                  * objects that depend on the current object into account
2942                  * otherwise they would become too deep.
2943                  */
2944                 max_depth = depth;
2945                 if (DELTA_CHILD(entry)) {
2946                         max_depth -= check_delta_limit(entry, 0);
2947                         if (max_depth <= 0)
2948                                 goto next;
2949                 }
2950
2951                 j = window;
2952                 while (--j > 0) {
2953                         int ret;
2954                         uint32_t other_idx = idx + j;
2955                         struct unpacked *m;
2956                         if (other_idx >= window)
2957                                 other_idx -= window;
2958                         m = array + other_idx;
2959                         if (!m->entry)
2960                                 break;
2961                         ret = try_delta(n, m, max_depth, &mem_usage);
2962                         if (ret < 0)
2963                                 break;
2964                         else if (ret > 0)
2965                                 best_base = other_idx;
2966                 }
2967
2968                 /*
2969                  * If we decided to cache the delta data, then it is best
2970                  * to compress it right away.  First because we have to do
2971                  * it anyway, and doing it here while we're threaded will
2972                  * save a lot of time in the non threaded write phase,
2973                  * as well as allow for caching more deltas within
2974                  * the same cache size limit.
2975                  * ...
2976                  * But only if not writing to stdout, since in that case
2977                  * the network is most likely throttling writes anyway,
2978                  * and therefore it is best to go to the write phase ASAP
2979                  * instead, as we can afford spending more time compressing
2980                  * between writes at that moment.
2981                  */
2982                 if (entry->delta_data && !pack_to_stdout) {
2983                         unsigned long size;
2984
2985                         size = do_compress(&entry->delta_data, DELTA_SIZE(entry));
2986                         if (size < (1U << OE_Z_DELTA_BITS)) {
2987                                 entry->z_delta_size = size;
2988                                 cache_lock();
2989                                 delta_cache_size -= DELTA_SIZE(entry);
2990                                 delta_cache_size += entry->z_delta_size;
2991                                 cache_unlock();
2992                         } else {
2993                                 FREE_AND_NULL(entry->delta_data);
2994                                 entry->z_delta_size = 0;
2995                         }
2996                 }
2997
2998                 /* if we made n a delta, and if n is already at max
2999                  * depth, leaving it in the window is pointless.  we
3000                  * should evict it first.
3001                  */
3002                 if (DELTA(entry) && max_depth <= n->depth)
3003                         continue;
3004
3005                 /*
3006                  * Move the best delta base up in the window, after the
3007                  * currently deltified object, to keep it longer.  It will
3008                  * be the first base object to be attempted next.
3009                  */
3010                 if (DELTA(entry)) {
3011                         struct unpacked swap = array[best_base];
3012                         int dist = (window + idx - best_base) % window;
3013                         int dst = best_base;
3014                         while (dist--) {
3015                                 int src = (dst + 1) % window;
3016                                 array[dst] = array[src];
3017                                 dst = src;
3018                         }
3019                         array[dst] = swap;
3020                 }
3021
3022                 next:
3023                 idx++;
3024                 if (count + 1 < window)
3025                         count++;
3026                 if (idx >= window)
3027                         idx = 0;
3028         }
3029
3030         for (i = 0; i < window; ++i) {
3031                 free_delta_index(array[i].index);
3032                 free(array[i].data);
3033         }
3034         free(array);
3035 }
3036
3037 /*
3038  * The main object list is split into smaller lists, each is handed to
3039  * one worker.
3040  *
3041  * The main thread waits on the condition that (at least) one of the workers
3042  * has stopped working (which is indicated in the .working member of
3043  * struct thread_params).
3044  *
3045  * When a work thread has completed its work, it sets .working to 0 and
3046  * signals the main thread and waits on the condition that .data_ready
3047  * becomes 1.
3048  *
3049  * The main thread steals half of the work from the worker that has
3050  * most work left to hand it to the idle worker.
3051  */
3052
3053 struct thread_params {
3054         pthread_t thread;
3055         struct object_entry **list;
3056         struct packing_region *regions;
3057         unsigned list_size;
3058         unsigned remaining;
3059         int window;
3060         int depth;
3061         int working;
3062         int data_ready;
3063         pthread_mutex_t mutex;
3064         pthread_cond_t cond;
3065         unsigned *processed;
3066 };
3067
3068 static pthread_cond_t progress_cond;
3069
3070 /*
3071  * Mutex and conditional variable can't be statically-initialized on Windows.
3072  */
3073 static void init_threaded_search(void)
3074 {
3075         pthread_mutex_init(&cache_mutex, NULL);
3076         pthread_mutex_init(&progress_mutex, NULL);
3077         pthread_cond_init(&progress_cond, NULL);
3078 }
3079
3080 static void cleanup_threaded_search(void)
3081 {
3082         pthread_cond_destroy(&progress_cond);
3083         pthread_mutex_destroy(&cache_mutex);
3084         pthread_mutex_destroy(&progress_mutex);
3085 }
3086
3087 static void *threaded_find_deltas(void *arg)
3088 {
3089         struct thread_params *me = arg;
3090
3091         progress_lock();
3092         while (me->remaining) {
3093                 progress_unlock();
3094
3095                 find_deltas(me->list, &me->remaining,
3096                             me->window, me->depth, me->processed);
3097
3098                 progress_lock();
3099                 me->working = 0;
3100                 pthread_cond_signal(&progress_cond);
3101                 progress_unlock();
3102
3103                 /*
3104                  * We must not set ->data_ready before we wait on the
3105                  * condition because the main thread may have set it to 1
3106                  * before we get here. In order to be sure that new
3107                  * work is available if we see 1 in ->data_ready, it
3108                  * was initialized to 0 before this thread was spawned
3109                  * and we reset it to 0 right away.
3110                  */
3111                 pthread_mutex_lock(&me->mutex);
3112                 while (!me->data_ready)
3113                         pthread_cond_wait(&me->cond, &me->mutex);
3114                 me->data_ready = 0;
3115                 pthread_mutex_unlock(&me->mutex);
3116
3117                 progress_lock();
3118         }
3119         progress_unlock();
3120         /* leave ->working 1 so that this doesn't get more work assigned */
3121         return NULL;
3122 }
3123
3124 static void ll_find_deltas(struct object_entry **list, unsigned list_size,
3125                            int window, int depth, unsigned *processed)
3126 {
3127         struct thread_params *p;
3128         int i, ret, active_threads = 0;
3129
3130         init_threaded_search();
3131
3132         if (delta_search_threads <= 1) {
3133                 find_deltas(list, &list_size, window, depth, processed);
3134                 cleanup_threaded_search();
3135                 return;
3136         }
3137         if (progress > pack_to_stdout)
3138                 fprintf_ln(stderr, _("Delta compression using up to %d threads"),
3139                            delta_search_threads);
3140         CALLOC_ARRAY(p, delta_search_threads);
3141
3142         /* Partition the work amongst work threads. */
3143         for (i = 0; i < delta_search_threads; i++) {
3144                 unsigned sub_size = list_size / (delta_search_threads - i);
3145
3146                 /* don't use too small segments or no deltas will be found */
3147                 if (sub_size < 2*window && i+1 < delta_search_threads)
3148                         sub_size = 0;
3149
3150                 p[i].window = window;
3151                 p[i].depth = depth;
3152                 p[i].processed = processed;
3153                 p[i].working = 1;
3154                 p[i].data_ready = 0;
3155
3156                 /* try to split chunks on "path" boundaries */
3157                 while (sub_size && sub_size < list_size &&
3158                        list[sub_size]->hash &&
3159                        list[sub_size]->hash == list[sub_size-1]->hash)
3160                         sub_size++;
3161
3162                 p[i].list = list;
3163                 p[i].list_size = sub_size;
3164                 p[i].remaining = sub_size;
3165
3166                 list += sub_size;
3167                 list_size -= sub_size;
3168         }
3169
3170         /* Start work threads. */
3171         for (i = 0; i < delta_search_threads; i++) {
3172                 if (!p[i].list_size)
3173                         continue;
3174                 pthread_mutex_init(&p[i].mutex, NULL);
3175                 pthread_cond_init(&p[i].cond, NULL);
3176                 ret = pthread_create(&p[i].thread, NULL,
3177                                      threaded_find_deltas, &p[i]);
3178                 if (ret)
3179                         die(_("unable to create thread: %s"), strerror(ret));
3180                 active_threads++;
3181         }
3182
3183         /*
3184          * Now let's wait for work completion.  Each time a thread is done
3185          * with its work, we steal half of the remaining work from the
3186          * thread with the largest number of unprocessed objects and give
3187          * it to that newly idle thread.  This ensure good load balancing
3188          * until the remaining object list segments are simply too short
3189          * to be worth splitting anymore.
3190          */
3191         while (active_threads) {
3192                 struct thread_params *target = NULL;
3193                 struct thread_params *victim = NULL;
3194                 unsigned sub_size = 0;
3195
3196                 progress_lock();
3197                 for (;;) {
3198                         for (i = 0; !target && i < delta_search_threads; i++)
3199                                 if (!p[i].working)
3200                                         target = &p[i];
3201                         if (target)
3202                                 break;
3203                         pthread_cond_wait(&progress_cond, &progress_mutex);
3204                 }
3205
3206                 for (i = 0; i < delta_search_threads; i++)
3207                         if (p[i].remaining > 2*window &&
3208                             (!victim || victim->remaining < p[i].remaining))
3209                                 victim = &p[i];
3210                 if (victim) {
3211                         sub_size = victim->remaining / 2;
3212                         list = victim->list + victim->list_size - sub_size;
3213                         while (sub_size && list[0]->hash &&
3214                                list[0]->hash == list[-1]->hash) {
3215                                 list++;
3216                                 sub_size--;
3217                         }
3218                         if (!sub_size) {
3219                                 /*
3220                                  * It is possible for some "paths" to have
3221                                  * so many objects that no hash boundary
3222                                  * might be found.  Let's just steal the
3223                                  * exact half in that case.
3224                                  */
3225                                 sub_size = victim->remaining / 2;
3226                                 list -= sub_size;
3227                         }
3228                         target->list = list;
3229                         victim->list_size -= sub_size;
3230                         victim->remaining -= sub_size;
3231                 }
3232                 target->list_size = sub_size;
3233                 target->remaining = sub_size;
3234                 target->working = 1;
3235                 progress_unlock();
3236
3237                 pthread_mutex_lock(&target->mutex);
3238                 target->data_ready = 1;
3239                 pthread_cond_signal(&target->cond);
3240                 pthread_mutex_unlock(&target->mutex);
3241
3242                 if (!sub_size) {
3243                         pthread_join(target->thread, NULL);
3244                         pthread_cond_destroy(&target->cond);
3245                         pthread_mutex_destroy(&target->mutex);
3246                         active_threads--;
3247                 }
3248         }
3249         cleanup_threaded_search();
3250         free(p);
3251 }
3252
3253 static int obj_is_packed(const struct object_id *oid)
3254 {
3255         return packlist_find(&to_pack, oid) ||
3256                 (reuse_packfile_bitmap &&
3257                  bitmap_walk_contains(bitmap_git, reuse_packfile_bitmap, oid));
3258 }
3259
3260 static void add_tag_chain(const struct object_id *oid)
3261 {
3262         struct tag *tag;
3263
3264         /*
3265          * We catch duplicates already in add_object_entry(), but we'd
3266          * prefer to do this extra check to avoid having to parse the
3267          * tag at all if we already know that it's being packed (e.g., if
3268          * it was included via bitmaps, we would not have parsed it
3269          * previously).
3270          */
3271         if (obj_is_packed(oid))
3272                 return;
3273
3274         tag = lookup_tag(the_repository, oid);
3275         while (1) {
3276                 if (!tag || parse_tag(tag) || !tag->tagged)
3277                         die(_("unable to pack objects reachable from tag %s"),
3278                             oid_to_hex(oid));
3279
3280                 add_object_entry(&tag->object.oid, OBJ_TAG, NULL, 0);
3281
3282                 if (tag->tagged->type != OBJ_TAG)
3283                         return;
3284
3285                 tag = (struct tag *)tag->tagged;
3286         }
3287 }
3288
3289 static int add_ref_tag(const char *tag UNUSED, const char *referent UNUSED, const struct object_id *oid,
3290                        int flag UNUSED, void *cb_data UNUSED)
3291 {
3292         struct object_id peeled;
3293
3294         if (!peel_iterated_oid(the_repository, oid, &peeled) && obj_is_packed(&peeled))
3295                 add_tag_chain(oid);
3296         return 0;
3297 }
3298
3299 static int should_attempt_deltas(struct object_entry *entry)
3300 {
3301         if (DELTA(entry))
3302                 /* This happens if we decided to reuse existing
3303                  * delta from a pack. "reuse_delta &&" is implied.
3304                  */
3305                 return 0;
3306
3307         if (!entry->type_valid ||
3308             oe_size_less_than(&to_pack, entry, 50))
3309                 return 0;
3310
3311         if (entry->no_try_delta)
3312                 return 0;
3313
3314         if (!entry->preferred_base) {
3315                 if (oe_type(entry) < 0)
3316                         die(_("unable to get type of object %s"),
3317                                 oid_to_hex(&entry->idx.oid));
3318         } else if (oe_type(entry) < 0) {
3319                 /*
3320                  * This object is not found, but we
3321                  * don't have to include it anyway.
3322                  */
3323                 return 0;
3324         }
3325
3326         return 1;
3327 }
3328
3329 static void find_deltas_for_region(struct object_entry *list,
3330                                    struct packing_region *region,
3331                                    unsigned int *processed)
3332 {
3333         struct object_entry **delta_list;
3334         unsigned int delta_list_nr = 0;
3335
3336         ALLOC_ARRAY(delta_list, region->nr);
3337         for (size_t i = 0; i < region->nr; i++) {
3338                 struct object_entry *entry = list + region->start + i;
3339                 if (should_attempt_deltas(entry))
3340                         delta_list[delta_list_nr++] = entry;
3341         }
3342
3343         QSORT(delta_list, delta_list_nr, type_size_sort);
3344         find_deltas(delta_list, &delta_list_nr, window, depth, processed);
3345         free(delta_list);
3346 }
3347
3348 static void find_deltas_by_region(struct object_entry *list,
3349                                   struct packing_region *regions,
3350                                   size_t start, size_t nr)
3351 {
3352         unsigned int processed = 0;
3353         size_t progress_nr;
3354
3355         if (!nr)
3356                 return;
3357
3358         progress_nr = regions[nr - 1].start + regions[nr - 1].nr;
3359
3360         if (progress)
3361                 progress_state = start_progress(the_repository,
3362                                                 _("Compressing objects by path"),
3363                                                 progress_nr);
3364
3365         while (nr--)
3366                 find_deltas_for_region(list,
3367                                        &regions[start++],
3368                                        &processed);
3369
3370         display_progress(progress_state, progress_nr);
3371         stop_progress(&progress_state);
3372 }
3373
3374 static void *threaded_find_deltas_by_path(void *arg)
3375 {
3376         struct thread_params *me = arg;
3377
3378         progress_lock();
3379         while (me->remaining) {
3380                 while (me->remaining) {
3381                         progress_unlock();
3382                         find_deltas_for_region(to_pack.objects,
3383                                                me->regions,
3384                                                me->processed);
3385                         progress_lock();
3386                         me->remaining--;
3387                         me->regions++;
3388                 }
3389
3390                 me->working = 0;
3391                 pthread_cond_signal(&progress_cond);
3392                 progress_unlock();
3393
3394                 /*
3395                  * We must not set ->data_ready before we wait on the
3396                  * condition because the main thread may have set it to 1
3397                  * before we get here. In order to be sure that new
3398                  * work is available if we see 1 in ->data_ready, it
3399                  * was initialized to 0 before this thread was spawned
3400                  * and we reset it to 0 right away.
3401                  */
3402                 pthread_mutex_lock(&me->mutex);
3403                 while (!me->data_ready)
3404                         pthread_cond_wait(&me->cond, &me->mutex);
3405                 me->data_ready = 0;
3406                 pthread_mutex_unlock(&me->mutex);
3407
3408                 progress_lock();
3409         }
3410         progress_unlock();
3411         /* leave ->working 1 so that this doesn't get more work assigned */
3412         return NULL;
3413 }
3414
3415 static void ll_find_deltas_by_region(struct object_entry *list,
3416                                      struct packing_region *regions,
3417                                      uint32_t start, uint32_t nr)
3418 {
3419         struct thread_params *p;
3420         int i, ret, active_threads = 0;
3421         unsigned int processed = 0;
3422         uint32_t progress_nr;
3423         init_threaded_search();
3424
3425         if (!nr)
3426                 return;
3427
3428         progress_nr =  regions[nr - 1].start + regions[nr - 1].nr;
3429         if (delta_search_threads <= 1) {
3430                 find_deltas_by_region(list, regions, start, nr);
3431                 cleanup_threaded_search();
3432                 return;
3433         }
3434
3435         if (progress > pack_to_stdout)
3436                 fprintf_ln(stderr,
3437                            Q_("Path-based delta compression using up to %d thread",
3438                               "Path-based delta compression using up to %d threads",
3439                               delta_search_threads),
3440                            delta_search_threads);
3441         CALLOC_ARRAY(p, delta_search_threads);
3442
3443         if (progress)
3444                 progress_state = start_progress(the_repository,
3445                                                 _("Compressing objects by path"),
3446                                                 progress_nr);
3447         /* Partition the work amongst work threads. */
3448         for (i = 0; i < delta_search_threads; i++) {
3449                 unsigned sub_size = nr / (delta_search_threads - i);
3450
3451                 p[i].window = window;
3452                 p[i].depth = depth;
3453                 p[i].processed = &processed;
3454                 p[i].working = 1;
3455                 p[i].data_ready = 0;
3456
3457                 p[i].regions = regions;
3458                 p[i].list_size = sub_size;
3459                 p[i].remaining = sub_size;
3460
3461                 regions += sub_size;
3462                 nr -= sub_size;
3463         }
3464
3465         /* Start work threads. */
3466         for (i = 0; i < delta_search_threads; i++) {
3467                 if (!p[i].list_size)
3468                         continue;
3469                 pthread_mutex_init(&p[i].mutex, NULL);
3470                 pthread_cond_init(&p[i].cond, NULL);
3471                 ret = pthread_create(&p[i].thread, NULL,
3472                                      threaded_find_deltas_by_path, &p[i]);
3473                 if (ret)
3474                         die(_("unable to create thread: %s"), strerror(ret));
3475                 active_threads++;
3476         }
3477
3478         /*
3479          * Now let's wait for work completion.  Each time a thread is done
3480          * with its work, we steal half of the remaining work from the
3481          * thread with the largest number of unprocessed objects and give
3482          * it to that newly idle thread.  This ensure good load balancing
3483          * until the remaining object list segments are simply too short
3484          * to be worth splitting anymore.
3485          */
3486         while (active_threads) {
3487                 struct thread_params *target = NULL;
3488                 struct thread_params *victim = NULL;
3489                 unsigned sub_size = 0;
3490
3491                 progress_lock();
3492                 for (;;) {
3493                         for (i = 0; !target && i < delta_search_threads; i++)
3494                                 if (!p[i].working)
3495                                         target = &p[i];
3496                         if (target)
3497                                 break;
3498                         pthread_cond_wait(&progress_cond, &progress_mutex);
3499                 }
3500
3501                 for (i = 0; i < delta_search_threads; i++)
3502                         if (p[i].remaining > 2*window &&
3503                             (!victim || victim->remaining < p[i].remaining))
3504                                 victim = &p[i];
3505                 if (victim) {
3506                         sub_size = victim->remaining / 2;
3507                         target->regions = victim->regions + victim->remaining - sub_size;
3508                         victim->list_size -= sub_size;
3509                         victim->remaining -= sub_size;
3510                 }
3511                 target->list_size = sub_size;
3512                 target->remaining = sub_size;
3513                 target->working = 1;
3514                 progress_unlock();
3515
3516                 pthread_mutex_lock(&target->mutex);
3517                 target->data_ready = 1;
3518                 pthread_cond_signal(&target->cond);
3519                 pthread_mutex_unlock(&target->mutex);
3520
3521                 if (!sub_size) {
3522                         pthread_join(target->thread, NULL);
3523                         pthread_cond_destroy(&target->cond);
3524                         pthread_mutex_destroy(&target->mutex);
3525                         active_threads--;
3526                 }
3527         }
3528         cleanup_threaded_search();
3529         free(p);
3530
3531         display_progress(progress_state, progress_nr);
3532         stop_progress(&progress_state);
3533 }
3534
3535 static void prepare_pack(int window, int depth)
3536 {
3537         struct object_entry **delta_list;
3538         uint32_t i, nr_deltas;
3539         unsigned n;
3540
3541         if (use_delta_islands)
3542                 resolve_tree_islands(the_repository, progress, &to_pack);
3543
3544         get_object_details();
3545
3546         /*
3547          * If we're locally repacking then we need to be doubly careful
3548          * from now on in order to make sure no stealth corruption gets
3549          * propagated to the new pack.  Clients receiving streamed packs
3550          * should validate everything they get anyway so no need to incur
3551          * the additional cost here in that case.
3552          */
3553         if (!pack_to_stdout)
3554                 do_check_packed_object_crc = 1;
3555
3556         if (!to_pack.nr_objects || !window || !depth)
3557                 return;
3558
3559         if (path_walk)
3560                 ll_find_deltas_by_region(to_pack.objects, to_pack.regions,
3561                                          0, to_pack.nr_regions);
3562
3563         ALLOC_ARRAY(delta_list, to_pack.nr_objects);
3564         nr_deltas = n = 0;
3565
3566         for (i = 0; i < to_pack.nr_objects; i++) {
3567                 struct object_entry *entry = to_pack.objects + i;
3568
3569                 if (!should_attempt_deltas(entry))
3570                         continue;
3571
3572                 if (!entry->preferred_base)
3573                         nr_deltas++;
3574
3575                 delta_list[n++] = entry;
3576         }
3577
3578         if (nr_deltas && n > 1) {
3579                 unsigned nr_done = 0;
3580
3581                 if (progress)
3582                         progress_state = start_progress(the_repository,
3583                                                         _("Compressing objects"),
3584                                                         nr_deltas);
3585                 QSORT(delta_list, n, type_size_sort);
3586                 ll_find_deltas(delta_list, n, window+1, depth, &nr_done);
3587                 stop_progress(&progress_state);
3588                 if (nr_done != nr_deltas)
3589                         die(_("inconsistency with delta count"));
3590         }
3591         free(delta_list);
3592 }
3593
3594 static int git_pack_config(const char *k, const char *v,
3595                            const struct config_context *ctx, void *cb)
3596 {
3597         if (!strcmp(k, "pack.window")) {
3598                 window = git_config_int(k, v, ctx->kvi);
3599                 return 0;
3600         }
3601         if (!strcmp(k, "pack.windowmemory")) {
3602                 window_memory_limit = git_config_ulong(k, v, ctx->kvi);
3603                 return 0;
3604         }
3605         if (!strcmp(k, "pack.depth")) {
3606                 depth = git_config_int(k, v, ctx->kvi);
3607                 return 0;
3608         }
3609         if (!strcmp(k, "pack.deltacachesize")) {
3610                 max_delta_cache_size = git_config_int(k, v, ctx->kvi);
3611                 return 0;
3612         }
3613         if (!strcmp(k, "pack.deltacachelimit")) {
3614                 cache_max_small_delta_size = git_config_int(k, v, ctx->kvi);
3615                 return 0;
3616         }
3617         if (!strcmp(k, "pack.writebitmaphashcache")) {
3618                 if (git_config_bool(k, v))
3619                         write_bitmap_options |= BITMAP_OPT_HASH_CACHE;
3620                 else
3621                         write_bitmap_options &= ~BITMAP_OPT_HASH_CACHE;
3622         }
3623
3624         if (!strcmp(k, "pack.writebitmaplookuptable")) {
3625                 if (git_config_bool(k, v))
3626                         write_bitmap_options |= BITMAP_OPT_LOOKUP_TABLE;
3627                 else
3628                         write_bitmap_options &= ~BITMAP_OPT_LOOKUP_TABLE;
3629         }
3630
3631         if (!strcmp(k, "pack.usebitmaps")) {
3632                 use_bitmap_index_default = git_config_bool(k, v);
3633                 return 0;
3634         }
3635         if (!strcmp(k, "pack.allowpackreuse")) {
3636                 int res = git_parse_maybe_bool_text(v);
3637                 if (res < 0) {
3638                         if (!strcasecmp(v, "single"))
3639                                 allow_pack_reuse = SINGLE_PACK_REUSE;
3640                         else if (!strcasecmp(v, "multi"))
3641                                 allow_pack_reuse = MULTI_PACK_REUSE;
3642                         else
3643                                 die(_("invalid pack.allowPackReuse value: '%s'"), v);
3644                 } else if (res) {
3645                         allow_pack_reuse = SINGLE_PACK_REUSE;
3646                 } else {
3647                         allow_pack_reuse = NO_PACK_REUSE;
3648                 }
3649                 return 0;
3650         }
3651         if (!strcmp(k, "pack.threads")) {
3652                 delta_search_threads = git_config_int(k, v, ctx->kvi);
3653                 if (delta_search_threads < 0)
3654                         die(_("invalid number of threads specified (%d)"),
3655                             delta_search_threads);
3656                 if (!HAVE_THREADS && delta_search_threads != 1) {
3657                         warning(_("no threads support, ignoring %s"), k);
3658                         delta_search_threads = 0;
3659                 }
3660                 return 0;
3661         }
3662         if (!strcmp(k, "pack.indexversion")) {
3663                 pack_idx_opts.version = git_config_int(k, v, ctx->kvi);
3664                 if (pack_idx_opts.version > 2)
3665                         die(_("bad pack.indexVersion=%"PRIu32),
3666                             pack_idx_opts.version);
3667                 return 0;
3668         }
3669         if (!strcmp(k, "pack.writereverseindex")) {
3670                 if (git_config_bool(k, v))
3671                         pack_idx_opts.flags |= WRITE_REV;
3672                 else
3673                         pack_idx_opts.flags &= ~WRITE_REV;
3674                 return 0;
3675         }
3676         if (!strcmp(k, "uploadpack.blobpackfileuri")) {
3677                 struct configured_exclusion *ex;
3678                 const char *oid_end, *pack_end;
3679                 /*
3680                  * Stores the pack hash. This is not a true object ID, but is
3681                  * of the same form.
3682                  */
3683                 struct object_id pack_hash;
3684
3685                 if (!v)
3686                         return config_error_nonbool(k);
3687
3688                 ex = xmalloc(sizeof(*ex));
3689                 if (parse_oid_hex(v, &ex->e.oid, &oid_end) ||
3690                     *oid_end != ' ' ||
3691                     parse_oid_hex(oid_end + 1, &pack_hash, &pack_end) ||
3692                     *pack_end != ' ')
3693                         die(_("value of uploadpack.blobpackfileuri must be "
3694                               "of the form '<object-hash> <pack-hash> <uri>' (got '%s')"), v);
3695                 if (oidmap_get(&configured_exclusions, &ex->e.oid))
3696                         die(_("object already configured in another "
3697                               "uploadpack.blobpackfileuri (got '%s')"), v);
3698                 ex->pack_hash_hex = xcalloc(1, pack_end - oid_end);
3699                 memcpy(ex->pack_hash_hex, oid_end + 1, pack_end - oid_end - 1);
3700                 ex->uri = xstrdup(pack_end + 1);
3701                 oidmap_put(&configured_exclusions, ex);
3702         }
3703         return git_default_config(k, v, ctx, cb);
3704 }
3705
3706 /* Counters for trace2 output when in --stdin-packs mode. */
3707 static int stdin_packs_found_nr;
3708 static int stdin_packs_hints_nr;
3709
3710 static int add_object_entry_from_pack(const struct object_id *oid,
3711                                       struct packed_git *p,
3712                                       uint32_t pos,
3713                                       void *_data)
3714 {
3715         off_t ofs;
3716         enum object_type type = OBJ_NONE;
3717
3718         display_progress(progress_state, ++nr_seen);
3719
3720         if (have_duplicate_entry(oid, 0))
3721                 return 0;
3722
3723         ofs = nth_packed_object_offset(p, pos);
3724         if (!want_object_in_pack(oid, 0, &p, &ofs))
3725                 return 0;
3726
3727         if (p) {
3728                 struct rev_info *revs = _data;
3729                 struct object_info oi = OBJECT_INFO_INIT;
3730
3731                 oi.typep = &type;
3732                 if (packed_object_info(the_repository, p, ofs, &oi) < 0) {
3733                         die(_("could not get type of object %s in pack %s"),
3734                             oid_to_hex(oid), p->pack_name);
3735                 } else if (type == OBJ_COMMIT) {
3736                         /*
3737                          * commits in included packs are used as starting points for the
3738                          * subsequent revision walk
3739                          */
3740                         add_pending_oid(revs, NULL, oid, 0);
3741                 }
3742
3743                 stdin_packs_found_nr++;
3744         }
3745
3746         create_object_entry(oid, type, 0, 0, 0, p, ofs);
3747
3748         return 0;
3749 }
3750
3751 static void show_commit_pack_hint(struct commit *commit UNUSED,
3752                                   void *data UNUSED)
3753 {
3754         /* nothing to do; commits don't have a namehash */
3755 }
3756
3757 static void show_object_pack_hint(struct object *object, const char *name,
3758                                   void *data UNUSED)
3759 {
3760         struct object_entry *oe = packlist_find(&to_pack, &object->oid);
3761         if (!oe)
3762                 return;
3763
3764         /*
3765          * Our 'to_pack' list was constructed by iterating all objects packed in
3766          * included packs, and so doesn't have a non-zero hash field that you
3767          * would typically pick up during a reachability traversal.
3768          *
3769          * Make a best-effort attempt to fill in the ->hash and ->no_try_delta
3770          * here using a now in order to perhaps improve the delta selection
3771          * process.
3772          */
3773         oe->hash = pack_name_hash_fn(name);
3774         oe->no_try_delta = name && no_try_delta(name);
3775
3776         stdin_packs_hints_nr++;
3777 }
3778
3779 static int pack_mtime_cmp(const void *_a, const void *_b)
3780 {
3781         struct packed_git *a = ((const struct string_list_item*)_a)->util;
3782         struct packed_git *b = ((const struct string_list_item*)_b)->util;
3783
3784         /*
3785          * order packs by descending mtime so that objects are laid out
3786          * roughly as newest-to-oldest
3787          */
3788         if (a->mtime < b->mtime)
3789                 return 1;
3790         else if (b->mtime < a->mtime)
3791                 return -1;
3792         else
3793                 return 0;
3794 }
3795
3796 static void read_packs_list_from_stdin(void)
3797 {
3798         struct strbuf buf = STRBUF_INIT;
3799         struct string_list include_packs = STRING_LIST_INIT_DUP;
3800         struct string_list exclude_packs = STRING_LIST_INIT_DUP;
3801         struct string_list_item *item = NULL;
3802
3803         struct packed_git *p;
3804         struct rev_info revs;
3805
3806         repo_init_revisions(the_repository, &revs, NULL);
3807         /*
3808          * Use a revision walk to fill in the namehash of objects in the include
3809          * packs. To save time, we'll avoid traversing through objects that are
3810          * in excluded packs.
3811          *
3812          * That may cause us to avoid populating all of the namehash fields of
3813          * all included objects, but our goal is best-effort, since this is only
3814          * an optimization during delta selection.
3815          */
3816         revs.no_kept_objects = 1;
3817         revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
3818         revs.blob_objects = 1;
3819         revs.tree_objects = 1;
3820         revs.tag_objects = 1;
3821         revs.ignore_missing_links = 1;
3822
3823         while (strbuf_getline(&buf, stdin) != EOF) {
3824                 if (!buf.len)
3825                         continue;
3826
3827                 if (*buf.buf == '^')
3828                         string_list_append(&exclude_packs, buf.buf + 1);
3829                 else
3830                         string_list_append(&include_packs, buf.buf);
3831
3832                 strbuf_reset(&buf);
3833         }
3834
3835         string_list_sort(&include_packs);
3836         string_list_remove_duplicates(&include_packs, 0);
3837         string_list_sort(&exclude_packs);
3838         string_list_remove_duplicates(&exclude_packs, 0);
3839
3840         for (p = get_all_packs(the_repository); p; p = p->next) {
3841                 const char *pack_name = pack_basename(p);
3842
3843                 if ((item = string_list_lookup(&include_packs, pack_name)))
3844                         item->util = p;
3845                 if ((item = string_list_lookup(&exclude_packs, pack_name)))
3846                         item->util = p;
3847         }
3848
3849         /*
3850          * Arguments we got on stdin may not even be packs. First
3851          * check that to avoid segfaulting later on in
3852          * e.g. pack_mtime_cmp(), excluded packs are handled below.
3853          *
3854          * Since we first parsed our STDIN and then sorted the input
3855          * lines the pack we error on will be whatever line happens to
3856          * sort first. This is lazy, it's enough that we report one
3857          * bad case here, we don't need to report the first/last one,
3858          * or all of them.
3859          */
3860         for_each_string_list_item(item, &include_packs) {
3861                 struct packed_git *p = item->util;
3862                 if (!p)
3863                         die(_("could not find pack '%s'"), item->string);
3864                 if (!is_pack_valid(p))
3865                         die(_("packfile %s cannot be accessed"), p->pack_name);
3866         }
3867
3868         /*
3869          * Then, handle all of the excluded packs, marking them as
3870          * kept in-core so that later calls to add_object_entry()
3871          * discards any objects that are also found in excluded packs.
3872          */
3873         for_each_string_list_item(item, &exclude_packs) {
3874                 struct packed_git *p = item->util;
3875                 if (!p)
3876                         die(_("could not find pack '%s'"), item->string);
3877                 p->pack_keep_in_core = 1;
3878         }
3879
3880         /*
3881          * Order packs by ascending mtime; use QSORT directly to access the
3882          * string_list_item's ->util pointer, which string_list_sort() does not
3883          * provide.
3884          */
3885         QSORT(include_packs.items, include_packs.nr, pack_mtime_cmp);
3886
3887         for_each_string_list_item(item, &include_packs) {
3888                 struct packed_git *p = item->util;
3889                 for_each_object_in_pack(p,
3890                                         add_object_entry_from_pack,
3891                                         &revs,
3892                                         FOR_EACH_OBJECT_PACK_ORDER);
3893         }
3894
3895         if (prepare_revision_walk(&revs))
3896                 die(_("revision walk setup failed"));
3897         traverse_commit_list(&revs,
3898                              show_commit_pack_hint,
3899                              show_object_pack_hint,
3900                              NULL);
3901
3902         trace2_data_intmax("pack-objects", the_repository, "stdin_packs_found",
3903                            stdin_packs_found_nr);
3904         trace2_data_intmax("pack-objects", the_repository, "stdin_packs_hints",
3905                            stdin_packs_hints_nr);
3906
3907         strbuf_release(&buf);
3908         string_list_clear(&include_packs, 0);
3909         string_list_clear(&exclude_packs, 0);
3910 }
3911
3912 static void add_cruft_object_entry(const struct object_id *oid, enum object_type type,
3913                                    struct packed_git *pack, off_t offset,
3914                                    const char *name, uint32_t mtime)
3915 {
3916         struct object_entry *entry;
3917
3918         display_progress(progress_state, ++nr_seen);
3919
3920         entry = packlist_find(&to_pack, oid);
3921         if (entry) {
3922                 if (name) {
3923                         entry->hash = pack_name_hash_fn(name);
3924                         entry->no_try_delta = no_try_delta(name);
3925                 }
3926         } else {
3927                 if (!want_object_in_pack_mtime(oid, 0, &pack, &offset, mtime))
3928                         return;
3929                 if (!pack && type == OBJ_BLOB && !has_loose_object(oid)) {
3930                         /*
3931                          * If a traversed tree has a missing blob then we want
3932                          * to avoid adding that missing object to our pack.
3933                          *
3934                          * This only applies to missing blobs, not trees,
3935                          * because the traversal needs to parse sub-trees but
3936                          * not blobs.
3937                          *
3938                          * Note we only perform this check when we couldn't
3939                          * already find the object in a pack, so we're really
3940                          * limited to "ensure non-tip blobs which don't exist in
3941                          * packs do exist via loose objects". Confused?
3942                          */
3943                         return;
3944                 }
3945
3946                 entry = create_object_entry(oid, type, pack_name_hash_fn(name),
3947                                             0, name && no_try_delta(name),
3948                                             pack, offset);
3949         }
3950
3951         if (mtime > oe_cruft_mtime(&to_pack, entry))
3952                 oe_set_cruft_mtime(&to_pack, entry, mtime);
3953         return;
3954 }
3955
3956 static void show_cruft_object(struct object *obj, const char *name, void *data UNUSED)
3957 {
3958         /*
3959          * if we did not record it earlier, it's at least as old as our
3960          * expiration value. Rather than find it exactly, just use that
3961          * value.  This may bump it forward from its real mtime, but it
3962          * will still be "too old" next time we run with the same
3963          * expiration.
3964          *
3965          * if obj does appear in the packing list, this call is a noop (or may
3966          * set the namehash).
3967          */
3968         add_cruft_object_entry(&obj->oid, obj->type, NULL, 0, name, cruft_expiration);
3969 }
3970
3971 static void show_cruft_commit(struct commit *commit, void *data)
3972 {
3973         show_cruft_object((struct object*)commit, NULL, data);
3974 }
3975
3976 static int cruft_include_check_obj(struct object *obj, void *data UNUSED)
3977 {
3978         return !has_object_kept_pack(to_pack.repo, &obj->oid, IN_CORE_KEEP_PACKS);
3979 }
3980
3981 static int cruft_include_check(struct commit *commit, void *data)
3982 {
3983         return cruft_include_check_obj((struct object*)commit, data);
3984 }
3985
3986 static void set_cruft_mtime(const struct object *object,
3987                             struct packed_git *pack,
3988                             off_t offset, time_t mtime)
3989 {
3990         add_cruft_object_entry(&object->oid, object->type, pack, offset, NULL,
3991                                mtime);
3992 }
3993
3994 static void mark_pack_kept_in_core(struct string_list *packs, unsigned keep)
3995 {
3996         struct string_list_item *item = NULL;
3997         for_each_string_list_item(item, packs) {
3998                 struct packed_git *p = item->util;
3999                 if (!p)
4000                         die(_("could not find pack '%s'"), item->string);
4001                 if (p->is_cruft && keep)
4002                         ignore_packed_keep_in_core_has_cruft = 1;
4003                 p->pack_keep_in_core = keep;
4004         }
4005 }
4006
4007 static void add_unreachable_loose_objects(void);
4008 static void add_objects_in_unpacked_packs(void);
4009
4010 static void enumerate_cruft_objects(void)
4011 {
4012         if (progress)
4013                 progress_state = start_progress(the_repository,
4014                                                 _("Enumerating cruft objects"), 0);
4015
4016         add_objects_in_unpacked_packs();
4017         add_unreachable_loose_objects();
4018
4019         stop_progress(&progress_state);
4020 }
4021
4022 static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs)
4023 {
4024         struct packed_git *p;
4025         struct rev_info revs;
4026         int ret;
4027
4028         repo_init_revisions(the_repository, &revs, NULL);
4029
4030         revs.tag_objects = 1;
4031         revs.tree_objects = 1;
4032         revs.blob_objects = 1;
4033
4034         revs.include_check = cruft_include_check;
4035         revs.include_check_obj = cruft_include_check_obj;
4036
4037         revs.ignore_missing_links = 1;
4038
4039         if (progress)
4040                 progress_state = start_progress(the_repository,
4041                                                 _("Enumerating cruft objects"), 0);
4042         ret = add_unseen_recent_objects_to_traversal(&revs, cruft_expiration,
4043                                                      set_cruft_mtime, 1);
4044         stop_progress(&progress_state);
4045
4046         if (ret)
4047                 die(_("unable to add cruft objects"));
4048
4049         /*
4050          * Re-mark only the fresh packs as kept so that objects in
4051          * unknown packs do not halt the reachability traversal early.
4052          */
4053         for (p = get_all_packs(the_repository); p; p = p->next)
4054                 p->pack_keep_in_core = 0;
4055         mark_pack_kept_in_core(fresh_packs, 1);
4056
4057         if (prepare_revision_walk(&revs))
4058                 die(_("revision walk setup failed"));
4059         if (progress)
4060                 progress_state = start_progress(the_repository,
4061                                                 _("Traversing cruft objects"), 0);
4062         nr_seen = 0;
4063         traverse_commit_list(&revs, show_cruft_commit, show_cruft_object, NULL);
4064
4065         stop_progress(&progress_state);
4066 }
4067
4068 static void read_cruft_objects(void)
4069 {
4070         struct strbuf buf = STRBUF_INIT;
4071         struct string_list discard_packs = STRING_LIST_INIT_DUP;
4072         struct string_list fresh_packs = STRING_LIST_INIT_DUP;
4073         struct packed_git *p;
4074
4075         ignore_packed_keep_in_core = 1;
4076
4077         while (strbuf_getline(&buf, stdin) != EOF) {
4078                 if (!buf.len)
4079                         continue;
4080
4081                 if (*buf.buf == '-')
4082                         string_list_append(&discard_packs, buf.buf + 1);
4083                 else
4084                         string_list_append(&fresh_packs, buf.buf);
4085         }
4086
4087         string_list_sort(&discard_packs);
4088         string_list_sort(&fresh_packs);
4089
4090         for (p = get_all_packs(the_repository); p; p = p->next) {
4091                 const char *pack_name = pack_basename(p);
4092                 struct string_list_item *item;
4093
4094                 item = string_list_lookup(&fresh_packs, pack_name);
4095                 if (!item)
4096                         item = string_list_lookup(&discard_packs, pack_name);
4097
4098                 if (item) {
4099                         item->util = p;
4100                 } else {
4101                         /*
4102                          * This pack wasn't mentioned in either the "fresh" or
4103                          * "discard" list, so the caller didn't know about it.
4104                          *
4105                          * Mark it as kept so that its objects are ignored by
4106                          * add_unseen_recent_objects_to_traversal(). We'll
4107                          * unmark it before starting the traversal so it doesn't
4108                          * halt the traversal early.
4109                          */
4110                         p->pack_keep_in_core = 1;
4111                 }
4112         }
4113
4114         mark_pack_kept_in_core(&fresh_packs, 1);
4115         mark_pack_kept_in_core(&discard_packs, 0);
4116
4117         if (cruft_expiration)
4118                 enumerate_and_traverse_cruft_objects(&fresh_packs);
4119         else
4120                 enumerate_cruft_objects();
4121
4122         strbuf_release(&buf);
4123         string_list_clear(&discard_packs, 0);
4124         string_list_clear(&fresh_packs, 0);
4125 }
4126
4127 static void read_object_list_from_stdin(void)
4128 {
4129         char line[GIT_MAX_HEXSZ + 1 + PATH_MAX + 2];
4130         struct object_id oid;
4131         const char *p;
4132
4133         for (;;) {
4134                 if (!fgets(line, sizeof(line), stdin)) {
4135                         if (feof(stdin))
4136                                 break;
4137                         if (!ferror(stdin))
4138                                 BUG("fgets returned NULL, not EOF, not error!");
4139                         if (errno != EINTR)
4140                                 die_errno("fgets");
4141                         clearerr(stdin);
4142                         continue;
4143                 }
4144                 if (line[0] == '-') {
4145                         if (get_oid_hex(line+1, &oid))
4146                                 die(_("expected edge object ID, got garbage:\n %s"),
4147                                     line);
4148                         add_preferred_base(&oid);
4149                         continue;
4150                 }
4151                 if (parse_oid_hex(line, &oid, &p))
4152                         die(_("expected object ID, got garbage:\n %s"), line);
4153
4154                 add_preferred_base_object(p + 1);
4155                 add_object_entry(&oid, OBJ_NONE, p + 1, 0);
4156         }
4157 }
4158
4159 static void show_commit(struct commit *commit, void *data UNUSED)
4160 {
4161         add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0);
4162
4163         if (write_bitmap_index)
4164                 index_commit_for_bitmap(commit);
4165
4166         if (use_delta_islands)
4167                 propagate_island_marks(the_repository, commit);
4168 }
4169
4170 static void show_object(struct object *obj, const char *name,
4171                         void *data UNUSED)
4172 {
4173         add_preferred_base_object(name);
4174         add_object_entry(&obj->oid, obj->type, name, 0);
4175
4176         if (use_delta_islands) {
4177                 const char *p;
4178                 unsigned depth;
4179                 struct object_entry *ent;
4180
4181                 /* the empty string is a root tree, which is depth 0 */
4182                 depth = *name ? 1 : 0;
4183                 for (p = strchr(name, '/'); p; p = strchr(p + 1, '/'))
4184                         depth++;
4185
4186                 ent = packlist_find(&to_pack, &obj->oid);
4187                 if (ent && depth > oe_tree_depth(&to_pack, ent))
4188                         oe_set_tree_depth(&to_pack, ent, depth);
4189         }
4190 }
4191
4192 static void show_object__ma_allow_any(struct object *obj, const char *name, void *data)
4193 {
4194         assert(arg_missing_action == MA_ALLOW_ANY);
4195
4196         /*
4197          * Quietly ignore ALL missing objects.  This avoids problems with
4198          * staging them now and getting an odd error later.
4199          */
4200         if (!has_object(the_repository, &obj->oid, 0))
4201                 return;
4202
4203         show_object(obj, name, data);
4204 }
4205
4206 static void show_object__ma_allow_promisor(struct object *obj, const char *name, void *data)
4207 {
4208         assert(arg_missing_action == MA_ALLOW_PROMISOR);
4209
4210         /*
4211          * Quietly ignore EXPECTED missing objects.  This avoids problems with
4212          * staging them now and getting an odd error later.
4213          */
4214         if (!has_object(the_repository, &obj->oid, 0) &&
4215             is_promisor_object(to_pack.repo, &obj->oid))
4216                 return;
4217
4218         show_object(obj, name, data);
4219 }
4220
4221 static int option_parse_missing_action(const struct option *opt UNUSED,
4222                                        const char *arg, int unset)
4223 {
4224         assert(arg);
4225         assert(!unset);
4226
4227         if (!strcmp(arg, "error")) {
4228                 arg_missing_action = MA_ERROR;
4229                 fn_show_object = show_object;
4230                 return 0;
4231         }
4232
4233         if (!strcmp(arg, "allow-any")) {
4234                 arg_missing_action = MA_ALLOW_ANY;
4235                 fetch_if_missing = 0;
4236                 fn_show_object = show_object__ma_allow_any;
4237                 return 0;
4238         }
4239
4240         if (!strcmp(arg, "allow-promisor")) {
4241                 arg_missing_action = MA_ALLOW_PROMISOR;
4242                 fetch_if_missing = 0;
4243                 fn_show_object = show_object__ma_allow_promisor;
4244                 return 0;
4245         }
4246
4247         die(_("invalid value for '%s': '%s'"), "--missing", arg);
4248         return 0;
4249 }
4250
4251 static void show_edge(struct commit *commit)
4252 {
4253         add_preferred_base(&commit->object.oid);
4254 }
4255
4256 static int add_object_in_unpacked_pack(const struct object_id *oid,
4257                                        struct packed_git *pack,
4258                                        uint32_t pos,
4259                                        void *data UNUSED)
4260 {
4261         if (cruft) {
4262                 off_t offset;
4263                 time_t mtime;
4264
4265                 if (pack->is_cruft) {
4266                         if (load_pack_mtimes(pack) < 0)
4267                                 die(_("could not load cruft pack .mtimes"));
4268                         mtime = nth_packed_mtime(pack, pos);
4269                 } else {
4270                         mtime = pack->mtime;
4271                 }
4272                 offset = nth_packed_object_offset(pack, pos);
4273
4274                 add_cruft_object_entry(oid, OBJ_NONE, pack, offset,
4275                                        NULL, mtime);
4276         } else {
4277                 add_object_entry(oid, OBJ_NONE, "", 0);
4278         }
4279         return 0;
4280 }
4281
4282 static void add_objects_in_unpacked_packs(void)
4283 {
4284         if (for_each_packed_object(to_pack.repo,
4285                                    add_object_in_unpacked_pack,
4286                                    NULL,
4287                                    FOR_EACH_OBJECT_PACK_ORDER |
4288                                    FOR_EACH_OBJECT_LOCAL_ONLY |
4289                                    FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
4290                                    FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
4291                 die(_("cannot open pack index"));
4292 }
4293
4294 static int add_loose_object(const struct object_id *oid, const char *path,
4295                             void *data UNUSED)
4296 {
4297         enum object_type type = oid_object_info(the_repository, oid, NULL);
4298
4299         if (type < 0) {
4300                 warning(_("loose object at %s could not be examined"), path);
4301                 return 0;
4302         }
4303
4304         if (cruft) {
4305                 struct stat st;
4306                 if (stat(path, &st) < 0) {
4307                         if (errno == ENOENT)
4308                                 return 0;
4309                         return error_errno("unable to stat %s", oid_to_hex(oid));
4310                 }
4311
4312                 add_cruft_object_entry(oid, type, NULL, 0, NULL,
4313                                        st.st_mtime);
4314         } else {
4315                 add_object_entry(oid, type, "", 0);
4316         }
4317         return 0;
4318 }
4319
4320 /*
4321  * We actually don't even have to worry about reachability here.
4322  * add_object_entry will weed out duplicates, so we just add every
4323  * loose object we find.
4324  */
4325 static void add_unreachable_loose_objects(void)
4326 {
4327         for_each_loose_file_in_objdir(repo_get_object_directory(the_repository),
4328                                       add_loose_object,
4329                                       NULL, NULL, NULL);
4330 }
4331
4332 static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid)
4333 {
4334         static struct packed_git *last_found = (void *)1;
4335         struct packed_git *p;
4336
4337         p = (last_found != (void *)1) ? last_found :
4338                                         get_all_packs(the_repository);
4339
4340         while (p) {
4341                 if ((!p->pack_local || p->pack_keep ||
4342                                 p->pack_keep_in_core) &&
4343                         find_pack_entry_one(oid, p)) {
4344                         last_found = p;
4345                         return 1;
4346                 }
4347                 if (p == last_found)
4348                         p = get_all_packs(the_repository);
4349                 else
4350                         p = p->next;
4351                 if (p == last_found)
4352                         p = p->next;
4353         }
4354         return 0;
4355 }
4356
4357 /*
4358  * Store a list of sha1s that are should not be discarded
4359  * because they are either written too recently, or are
4360  * reachable from another object that was.
4361  *
4362  * This is filled by get_object_list.
4363  */
4364 static struct oid_array recent_objects;
4365
4366 static int loosened_object_can_be_discarded(const struct object_id *oid,
4367                                             timestamp_t mtime)
4368 {
4369         if (!unpack_unreachable_expiration)
4370                 return 0;
4371         if (mtime > unpack_unreachable_expiration)
4372                 return 0;
4373         if (oid_array_lookup(&recent_objects, oid) >= 0)
4374                 return 0;
4375         return 1;
4376 }
4377
4378 static void loosen_unused_packed_objects(void)
4379 {
4380         struct packed_git *p;
4381         uint32_t i;
4382         uint32_t loosened_objects_nr = 0;
4383         struct object_id oid;
4384
4385         for (p = get_all_packs(the_repository); p; p = p->next) {
4386                 if (!p->pack_local || p->pack_keep || p->pack_keep_in_core)
4387                         continue;
4388
4389                 if (open_pack_index(p))
4390                         die(_("cannot open pack index"));
4391
4392                 for (i = 0; i < p->num_objects; i++) {
4393                         nth_packed_object_id(&oid, p, i);
4394                         if (!packlist_find(&to_pack, &oid) &&
4395                             !has_sha1_pack_kept_or_nonlocal(&oid) &&
4396                             !loosened_object_can_be_discarded(&oid, p->mtime)) {
4397                                 if (force_object_loose(&oid, p->mtime))
4398                                         die(_("unable to force loose object"));
4399                                 loosened_objects_nr++;
4400                         }
4401                 }
4402         }
4403
4404         trace2_data_intmax("pack-objects", the_repository,
4405                            "loosen_unused_packed_objects/loosened", loosened_objects_nr);
4406 }
4407
4408 /*
4409  * This tracks any options which pack-reuse code expects to be on, or which a
4410  * reader of the pack might not understand, and which would therefore prevent
4411  * blind reuse of what we have on disk.
4412  */
4413 static int pack_options_allow_reuse(void)
4414 {
4415         return allow_pack_reuse != NO_PACK_REUSE &&
4416                pack_to_stdout &&
4417                !ignore_packed_keep_on_disk &&
4418                !ignore_packed_keep_in_core &&
4419                (!local || !have_non_local_packs) &&
4420                !incremental;
4421 }
4422
4423 static int get_object_list_from_bitmap(struct rev_info *revs)
4424 {
4425         if (!(bitmap_git = prepare_bitmap_walk(revs, 0)))
4426                 return -1;
4427
4428         /*
4429          * For now, force the name-hash version to be 1 since that
4430          * is the version implied by the bitmap format. Later, the
4431          * format can include this version explicitly in its format,
4432          * allowing readers to know the version that was used during
4433          * the bitmap write.
4434          */
4435         name_hash_version = 1;
4436
4437         if (pack_options_allow_reuse())
4438                 reuse_partial_packfile_from_bitmap(bitmap_git,
4439                                                    &reuse_packfiles,
4440                                                    &reuse_packfiles_nr,
4441                                                    &reuse_packfile_bitmap,
4442                                                    allow_pack_reuse == MULTI_PACK_REUSE);
4443
4444         if (reuse_packfiles) {
4445                 reuse_packfile_objects = bitmap_popcount(reuse_packfile_bitmap);
4446                 if (!reuse_packfile_objects)
4447                         BUG("expected non-empty reuse bitmap");
4448
4449                 nr_result += reuse_packfile_objects;
4450                 nr_seen += reuse_packfile_objects;
4451                 display_progress(progress_state, nr_seen);
4452         }
4453
4454         traverse_bitmap_commit_list(bitmap_git, revs,
4455                                     &add_object_entry_from_bitmap);
4456         return 0;
4457 }
4458
4459 static void record_recent_object(struct object *obj,
4460                                  const char *name UNUSED,
4461                                  void *data UNUSED)
4462 {
4463         oid_array_append(&recent_objects, &obj->oid);
4464 }
4465
4466 static void record_recent_commit(struct commit *commit, void *data UNUSED)
4467 {
4468         oid_array_append(&recent_objects, &commit->object.oid);
4469 }
4470
4471 static int mark_bitmap_preferred_tip(const char *refname,
4472                                      const char *referent UNUSED,
4473                                      const struct object_id *oid,
4474                                      int flags UNUSED,
4475                                      void *data UNUSED)
4476 {
4477         struct object_id peeled;
4478         struct object *object;
4479
4480         if (!peel_iterated_oid(the_repository, oid, &peeled))
4481                 oid = &peeled;
4482
4483         object = parse_object_or_die(the_repository, oid, refname);
4484         if (object->type == OBJ_COMMIT)
4485                 object->flags |= NEEDS_BITMAP;
4486
4487         return 0;
4488 }
4489
4490 static void mark_bitmap_preferred_tips(void)
4491 {
4492         struct string_list_item *item;
4493         const struct string_list *preferred_tips;
4494
4495         preferred_tips = bitmap_preferred_tips(the_repository);
4496         if (!preferred_tips)
4497                 return;
4498
4499         for_each_string_list_item(item, preferred_tips) {
4500                 refs_for_each_ref_in(get_main_ref_store(the_repository),
4501                                      item->string, mark_bitmap_preferred_tip,
4502                                      NULL);
4503         }
4504 }
4505
4506 static inline int is_oid_uninteresting(struct repository *repo,
4507                                        struct object_id *oid)
4508 {
4509         struct object *o = lookup_object(repo, oid);
4510         return !o || (o->flags & UNINTERESTING);
4511 }
4512
4513 static int add_objects_by_path(const char *path,
4514                                struct oid_array *oids,
4515                                enum object_type type,
4516                                void *data)
4517 {
4518         size_t oe_start = to_pack.nr_objects;
4519         size_t oe_end;
4520         unsigned int *processed = data;
4521
4522         /*
4523          * First, add all objects to the packing data, including the ones
4524          * marked UNINTERESTING (translated to 'exclude') as they can be
4525          * used as delta bases.
4526          */
4527         for (size_t i = 0; i < oids->nr; i++) {
4528                 int exclude;
4529                 struct object_info oi = OBJECT_INFO_INIT;
4530                 struct object_id *oid = &oids->oid[i];
4531
4532                 /* Skip objects that do not exist locally. */
4533                 if ((exclude_promisor_objects || arg_missing_action != MA_ERROR) &&
4534                     oid_object_info_extended(the_repository, oid, &oi,
4535                                              OBJECT_INFO_FOR_PREFETCH) < 0)
4536                         continue;
4537
4538                 exclude = is_oid_uninteresting(the_repository, oid);
4539
4540                 if (exclude && !thin)
4541                         continue;
4542
4543                 add_object_entry(oid, type, path, exclude);
4544         }
4545
4546         oe_end = to_pack.nr_objects;
4547
4548         /* We can skip delta calculations if it is a no-op. */
4549         if (oe_end == oe_start || !window)
4550                 return 0;
4551
4552         ALLOC_GROW(to_pack.regions,
4553                    to_pack.nr_regions + 1,
4554                    to_pack.nr_regions_alloc);
4555
4556         to_pack.regions[to_pack.nr_regions].start = oe_start;
4557         to_pack.regions[to_pack.nr_regions].nr = oe_end - oe_start;
4558         to_pack.nr_regions++;
4559
4560         *processed += oids->nr;
4561         display_progress(progress_state, *processed);
4562
4563         return 0;
4564 }
4565
4566 static void get_object_list_path_walk(struct rev_info *revs)
4567 {
4568         struct path_walk_info info = PATH_WALK_INFO_INIT;
4569         unsigned int processed = 0;
4570         int result;
4571
4572         info.revs = revs;
4573         info.path_fn = add_objects_by_path;
4574         info.path_fn_data = &processed;
4575
4576         /*
4577          * Allow the --[no-]sparse option to be interesting here, if only
4578          * for testing purposes. Paths with no interesting objects will not
4579          * contribute to the resulting pack, but only create noisy preferred
4580          * base objects.
4581          */
4582         info.prune_all_uninteresting = sparse;
4583         info.edge_aggressive = shallow;
4584
4585         trace2_region_enter("pack-objects", "path-walk", revs->repo);
4586         result = walk_objects_by_path(&info);
4587         trace2_region_leave("pack-objects", "path-walk", revs->repo);
4588
4589         if (result)
4590                 die(_("failed to pack objects via path-walk"));
4591 }
4592
4593 static void get_object_list(struct rev_info *revs, int ac, const char **av)
4594 {
4595         struct setup_revision_opt s_r_opt = {
4596                 .allow_exclude_promisor_objects = 1,
4597         };
4598         char line[1000];
4599         int flags = 0;
4600         int save_warning;
4601
4602         save_commit_buffer = 0;
4603         setup_revisions(ac, av, revs, &s_r_opt);
4604
4605         /* make sure shallows are read */
4606         is_repository_shallow(the_repository);
4607
4608         save_warning = warn_on_object_refname_ambiguity;
4609         warn_on_object_refname_ambiguity = 0;
4610
4611         while (fgets(line, sizeof(line), stdin) != NULL) {
4612                 int len = strlen(line);
4613                 if (len && line[len - 1] == '\n')
4614                         line[--len] = 0;
4615                 if (!len)
4616                         break;
4617                 if (*line == '-') {
4618                         if (!strcmp(line, "--not")) {
4619                                 flags ^= UNINTERESTING;
4620                                 write_bitmap_index = 0;
4621                                 continue;
4622                         }
4623                         if (starts_with(line, "--shallow ")) {
4624                                 struct object_id oid;
4625                                 if (get_oid_hex(line + 10, &oid))
4626                                         die("not an object name '%s'", line + 10);
4627                                 register_shallow(the_repository, &oid);
4628                                 use_bitmap_index = 0;
4629                                 continue;
4630                         }
4631                         die(_("not a rev '%s'"), line);
4632                 }
4633                 if (handle_revision_arg(line, revs, flags, REVARG_CANNOT_BE_FILENAME))
4634                         die(_("bad revision '%s'"), line);
4635         }
4636
4637         warn_on_object_refname_ambiguity = save_warning;
4638
4639         if (use_bitmap_index && !get_object_list_from_bitmap(revs))
4640                 return;
4641
4642         if (use_delta_islands)
4643                 load_delta_islands(the_repository, progress);
4644
4645         if (write_bitmap_index)
4646                 mark_bitmap_preferred_tips();
4647
4648         if (!fn_show_object)
4649                 fn_show_object = show_object;
4650
4651         if (path_walk) {
4652                 get_object_list_path_walk(revs);
4653         } else {
4654                 if (prepare_revision_walk(revs))
4655                         die(_("revision walk setup failed"));
4656                 mark_edges_uninteresting(revs, show_edge, sparse);
4657                 traverse_commit_list(revs,
4658                                 show_commit, fn_show_object,
4659                                 NULL);
4660         }
4661
4662         if (unpack_unreachable_expiration) {
4663                 revs->ignore_missing_links = 1;
4664                 if (add_unseen_recent_objects_to_traversal(revs,
4665                                 unpack_unreachable_expiration, NULL, 0))
4666                         die(_("unable to add recent objects"));
4667                 if (prepare_revision_walk(revs))
4668                         die(_("revision walk setup failed"));
4669                 traverse_commit_list(revs, record_recent_commit,
4670                                      record_recent_object, NULL);
4671         }
4672
4673         if (keep_unreachable)
4674                 add_objects_in_unpacked_packs();
4675         if (pack_loose_unreachable)
4676                 add_unreachable_loose_objects();
4677         if (unpack_unreachable)
4678                 loosen_unused_packed_objects();
4679
4680         oid_array_clear(&recent_objects);
4681 }
4682
4683 static void add_extra_kept_packs(const struct string_list *names)
4684 {
4685         struct packed_git *p;
4686
4687         if (!names->nr)
4688                 return;
4689
4690         for (p = get_all_packs(the_repository); p; p = p->next) {
4691                 const char *name = basename(p->pack_name);
4692                 int i;
4693
4694                 if (!p->pack_local)
4695                         continue;
4696
4697                 for (i = 0; i < names->nr; i++)
4698                         if (!fspathcmp(name, names->items[i].string))
4699                                 break;
4700
4701                 if (i < names->nr) {
4702                         p->pack_keep_in_core = 1;
4703                         ignore_packed_keep_in_core = 1;
4704                         continue;
4705                 }
4706         }
4707 }
4708
4709 static int option_parse_quiet(const struct option *opt, const char *arg,
4710                               int unset)
4711 {
4712         int *val = opt->value;
4713
4714         BUG_ON_OPT_ARG(arg);
4715
4716         if (!unset)
4717                 *val = 0;
4718         else if (!*val)
4719                 *val = 1;
4720         return 0;
4721 }
4722
4723 static int option_parse_index_version(const struct option *opt,
4724                                       const char *arg, int unset)
4725 {
4726         struct pack_idx_option *popts = opt->value;
4727         char *c;
4728         const char *val = arg;
4729
4730         BUG_ON_OPT_NEG(unset);
4731
4732         popts->version = strtoul(val, &c, 10);
4733         if (popts->version > 2)
4734                 die(_("unsupported index version %s"), val);
4735         if (*c == ',' && c[1])
4736                 popts->off32_limit = strtoul(c+1, &c, 0);
4737         if (*c || popts->off32_limit & 0x80000000)
4738                 die(_("bad index version '%s'"), val);
4739         return 0;
4740 }
4741
4742 static int option_parse_unpack_unreachable(const struct option *opt UNUSED,
4743                                            const char *arg, int unset)
4744 {
4745         if (unset) {
4746                 unpack_unreachable = 0;
4747                 unpack_unreachable_expiration = 0;
4748         }
4749         else {
4750                 unpack_unreachable = 1;
4751                 if (arg)
4752                         unpack_unreachable_expiration = approxidate(arg);
4753         }
4754         return 0;
4755 }
4756
4757 static int option_parse_cruft_expiration(const struct option *opt UNUSED,
4758                                          const char *arg, int unset)
4759 {
4760         if (unset) {
4761                 cruft = 0;
4762                 cruft_expiration = 0;
4763         } else {
4764                 cruft = 1;
4765                 if (arg)
4766                         cruft_expiration = approxidate(arg);
4767         }
4768         return 0;
4769 }
4770
4771 static int is_not_in_promisor_pack_obj(struct object *obj, void *data UNUSED)
4772 {
4773         struct object_info info = OBJECT_INFO_INIT;
4774         if (oid_object_info_extended(the_repository, &obj->oid, &info, 0))
4775                 BUG("should_include_obj should only be called on existing objects");
4776         return info.whence != OI_PACKED || !info.u.packed.pack->pack_promisor;
4777 }
4778
4779 static int is_not_in_promisor_pack(struct commit *commit, void *data) {
4780         return is_not_in_promisor_pack_obj((struct object *) commit, data);
4781 }
4782
4783 int cmd_pack_objects(int argc,
4784                      const char **argv,
4785                      const char *prefix,
4786                      struct repository *repo UNUSED)
4787 {
4788         int use_internal_rev_list = 0;
4789         int all_progress_implied = 0;
4790         struct strvec rp = STRVEC_INIT;
4791         int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;
4792         int rev_list_index = 0;
4793         int stdin_packs = 0;
4794         struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
4795         struct list_objects_filter_options filter_options =
4796                 LIST_OBJECTS_FILTER_INIT;
4797
4798         struct option pack_objects_options[] = {
4799                 OPT_CALLBACK_F('q', "quiet", &progress, NULL,
4800                                N_("do not show progress meter"),
4801                                PARSE_OPT_NOARG, option_parse_quiet),
4802                 OPT_SET_INT(0, "progress", &progress,
4803                             N_("show progress meter"), 1),
4804                 OPT_SET_INT(0, "all-progress", &progress,
4805                             N_("show progress meter during object writing phase"), 2),
4806                 OPT_BOOL(0, "all-progress-implied",
4807                          &all_progress_implied,
4808                          N_("similar to --all-progress when progress meter is shown")),
4809                 OPT_CALLBACK_F(0, "index-version", &pack_idx_opts, N_("<version>[,<offset>]"),
4810                   N_("write the pack index file in the specified idx format version"),
4811                   PARSE_OPT_NONEG, option_parse_index_version),
4812                 OPT_UNSIGNED(0, "max-pack-size", &pack_size_limit,
4813                              N_("maximum size of each output pack file")),
4814                 OPT_BOOL(0, "local", &local,
4815                          N_("ignore borrowed objects from alternate object store")),
4816                 OPT_BOOL(0, "incremental", &incremental,
4817                          N_("ignore packed objects")),
4818                 OPT_INTEGER(0, "window", &window,
4819                             N_("limit pack window by objects")),
4820                 OPT_UNSIGNED(0, "window-memory", &window_memory_limit,
4821                              N_("limit pack window by memory in addition to object limit")),
4822                 OPT_INTEGER(0, "depth", &depth,
4823                             N_("maximum length of delta chain allowed in the resulting pack")),
4824                 OPT_BOOL(0, "reuse-delta", &reuse_delta,
4825                          N_("reuse existing deltas")),
4826                 OPT_BOOL(0, "reuse-object", &reuse_object,
4827                          N_("reuse existing objects")),
4828                 OPT_BOOL(0, "delta-base-offset", &allow_ofs_delta,
4829                          N_("use OFS_DELTA objects")),
4830                 OPT_INTEGER(0, "threads", &delta_search_threads,
4831                             N_("use threads when searching for best delta matches")),
4832                 OPT_BOOL(0, "non-empty", &non_empty,
4833                          N_("do not create an empty pack output")),
4834                 OPT_BOOL(0, "revs", &use_internal_rev_list,
4835                          N_("read revision arguments from standard input")),
4836                 OPT_SET_INT_F(0, "unpacked", &rev_list_unpacked,
4837                               N_("limit the objects to those that are not yet packed"),
4838                               1, PARSE_OPT_NONEG),
4839                 OPT_SET_INT_F(0, "all", &rev_list_all,
4840                               N_("include objects reachable from any reference"),
4841                               1, PARSE_OPT_NONEG),
4842                 OPT_SET_INT_F(0, "reflog", &rev_list_reflog,
4843                               N_("include objects referred by reflog entries"),
4844                               1, PARSE_OPT_NONEG),
4845                 OPT_SET_INT_F(0, "indexed-objects", &rev_list_index,
4846                               N_("include objects referred to by the index"),
4847                               1, PARSE_OPT_NONEG),
4848                 OPT_BOOL(0, "stdin-packs", &stdin_packs,
4849                          N_("read packs from stdin")),
4850                 OPT_BOOL(0, "stdout", &pack_to_stdout,
4851                          N_("output pack to stdout")),
4852                 OPT_BOOL(0, "include-tag", &include_tag,
4853                          N_("include tag objects that refer to objects to be packed")),
4854                 OPT_BOOL(0, "keep-unreachable", &keep_unreachable,
4855                          N_("keep unreachable objects")),
4856                 OPT_BOOL(0, "pack-loose-unreachable", &pack_loose_unreachable,
4857                          N_("pack loose unreachable objects")),
4858                 OPT_CALLBACK_F(0, "unpack-unreachable", NULL, N_("time"),
4859                   N_("unpack unreachable objects newer than <time>"),
4860                   PARSE_OPT_OPTARG, option_parse_unpack_unreachable),
4861                 OPT_BOOL(0, "cruft", &cruft, N_("create a cruft pack")),
4862                 OPT_CALLBACK_F(0, "cruft-expiration", NULL, N_("time"),
4863                   N_("expire cruft objects older than <time>"),
4864                   PARSE_OPT_OPTARG, option_parse_cruft_expiration),
4865                 OPT_BOOL(0, "sparse", &sparse,
4866                          N_("use the sparse reachability algorithm")),
4867                 OPT_BOOL(0, "thin", &thin,
4868                          N_("create thin packs")),
4869                 OPT_BOOL(0, "path-walk", &path_walk,
4870                          N_("use the path-walk API to walk objects when possible")),
4871                 OPT_BOOL(0, "shallow", &shallow,
4872                          N_("create packs suitable for shallow fetches")),
4873                 OPT_BOOL(0, "honor-pack-keep", &ignore_packed_keep_on_disk,
4874                          N_("ignore packs that have companion .keep file")),
4875                 OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"),
4876                                 N_("ignore this pack")),
4877                 OPT_INTEGER(0, "compression", &pack_compression_level,
4878                             N_("pack compression level")),
4879                 OPT_BOOL(0, "keep-true-parents", &grafts_keep_true_parents,
4880                          N_("do not hide commits by grafts")),
4881                 OPT_BOOL(0, "use-bitmap-index", &use_bitmap_index,
4882                          N_("use a bitmap index if available to speed up counting objects")),
4883                 OPT_SET_INT(0, "write-bitmap-index", &write_bitmap_index,
4884                             N_("write a bitmap index together with the pack index"),
4885                             WRITE_BITMAP_TRUE),
4886                 OPT_SET_INT_F(0, "write-bitmap-index-quiet",
4887                               &write_bitmap_index,
4888                               N_("write a bitmap index if possible"),
4889                               WRITE_BITMAP_QUIET, PARSE_OPT_HIDDEN),
4890                 OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options),
4891                 OPT_CALLBACK_F(0, "missing", NULL, N_("action"),
4892                   N_("handling for missing objects"), PARSE_OPT_NONEG,
4893                   option_parse_missing_action),
4894                 OPT_BOOL(0, "exclude-promisor-objects", &exclude_promisor_objects,
4895                          N_("do not pack objects in promisor packfiles")),
4896                 OPT_BOOL(0, "exclude-promisor-objects-best-effort",
4897                          &exclude_promisor_objects_best_effort,
4898                          N_("implies --missing=allow-any")),
4899                 OPT_BOOL(0, "delta-islands", &use_delta_islands,
4900                          N_("respect islands during delta compression")),
4901                 OPT_STRING_LIST(0, "uri-protocol", &uri_protocols,
4902                                 N_("protocol"),
4903                                 N_("exclude any configured uploadpack.blobpackfileuri with this protocol")),
4904                 OPT_INTEGER(0, "name-hash-version", &name_hash_version,
4905                          N_("use the specified name-hash function to group similar objects")),
4906                 OPT_END(),
4907         };
4908
4909         if (DFS_NUM_STATES > (1 << OE_DFS_STATE_BITS))
4910                 BUG("too many dfs states, increase OE_DFS_STATE_BITS");
4911
4912         disable_replace_refs();
4913
4914         sparse = git_env_bool("GIT_TEST_PACK_SPARSE", -1);
4915         if (the_repository->gitdir) {
4916                 prepare_repo_settings(the_repository);
4917                 if (sparse < 0)
4918                         sparse = the_repository->settings.pack_use_sparse;
4919                 if (the_repository->settings.pack_use_multi_pack_reuse)
4920                         allow_pack_reuse = MULTI_PACK_REUSE;
4921         }
4922
4923         reset_pack_idx_option(&pack_idx_opts);
4924         pack_idx_opts.flags |= WRITE_REV;
4925         git_config(git_pack_config, NULL);
4926         if (git_env_bool(GIT_TEST_NO_WRITE_REV_INDEX, 0))
4927                 pack_idx_opts.flags &= ~WRITE_REV;
4928
4929         progress = isatty(2);
4930         argc = parse_options(argc, argv, prefix, pack_objects_options,
4931                              pack_usage, 0);
4932
4933         if (argc) {
4934                 base_name = argv[0];
4935                 argc--;
4936         }
4937         if (pack_to_stdout != !base_name || argc)
4938                 usage_with_options(pack_usage, pack_objects_options);
4939
4940         if (path_walk < 0) {
4941                 if (use_bitmap_index > 0 ||
4942                     !use_internal_rev_list)
4943                         path_walk = 0;
4944                 else if (the_repository->gitdir &&
4945                          the_repository->settings.pack_use_path_walk)
4946                         path_walk = 1;
4947                 else
4948                         path_walk = git_env_bool("GIT_TEST_PACK_PATH_WALK", 0);
4949         }
4950
4951         if (depth < 0)
4952                 depth = 0;
4953         if (depth >= (1 << OE_DEPTH_BITS)) {
4954                 warning(_("delta chain depth %d is too deep, forcing %d"),
4955                         depth, (1 << OE_DEPTH_BITS) - 1);
4956                 depth = (1 << OE_DEPTH_BITS) - 1;
4957         }
4958         if (cache_max_small_delta_size >= (1U << OE_Z_DELTA_BITS)) {
4959                 warning(_("pack.deltaCacheLimit is too high, forcing %d"),
4960                         (1U << OE_Z_DELTA_BITS) - 1);
4961                 cache_max_small_delta_size = (1U << OE_Z_DELTA_BITS) - 1;
4962         }
4963         if (window < 0)
4964                 window = 0;
4965
4966         strvec_push(&rp, "pack-objects");
4967
4968         if (path_walk) {
4969                 const char *option = NULL;
4970                 if (filter_options.choice)
4971                         option = "--filter";
4972                 else if (use_delta_islands)
4973                         option = "--delta-islands";
4974
4975                 if (option) {
4976                         warning(_("cannot use %s with %s"),
4977                                 option, "--path-walk");
4978                         path_walk = 0;
4979                 }
4980         }
4981         if (path_walk) {
4982                 strvec_push(&rp, "--boundary");
4983                  /*
4984                   * We must disable the bitmaps because we are removing
4985                   * the --objects / --objects-edge[-aggressive] options.
4986                   */
4987                 use_bitmap_index = 0;
4988         } else if (thin) {
4989                 use_internal_rev_list = 1;
4990                 strvec_push(&rp, shallow
4991                                 ? "--objects-edge-aggressive"
4992                                 : "--objects-edge");
4993         } else
4994                 strvec_push(&rp, "--objects");
4995
4996         if (rev_list_all) {
4997                 use_internal_rev_list = 1;
4998                 strvec_push(&rp, "--all");
4999         }
5000         if (rev_list_reflog) {
5001                 use_internal_rev_list = 1;
5002                 strvec_push(&rp, "--reflog");
5003         }
5004         if (rev_list_index) {
5005                 use_internal_rev_list = 1;
5006                 strvec_push(&rp, "--indexed-objects");
5007         }
5008         if (rev_list_unpacked && !stdin_packs) {
5009                 use_internal_rev_list = 1;
5010                 strvec_push(&rp, "--unpacked");
5011         }
5012
5013         if (exclude_promisor_objects && exclude_promisor_objects_best_effort)
5014                 die(_("options '%s' and '%s' cannot be used together"),
5015                     "--exclude-promisor-objects", "--exclude-promisor-objects-best-effort");
5016         if (exclude_promisor_objects) {
5017                 use_internal_rev_list = 1;
5018                 fetch_if_missing = 0;
5019                 strvec_push(&rp, "--exclude-promisor-objects");
5020         } else if (exclude_promisor_objects_best_effort) {
5021                 use_internal_rev_list = 1;
5022                 fetch_if_missing = 0;
5023                 option_parse_missing_action(NULL, "allow-any", 0);
5024                 /* revs configured below */
5025         }
5026         if (unpack_unreachable || keep_unreachable || pack_loose_unreachable)
5027                 use_internal_rev_list = 1;
5028
5029         if (!reuse_object)
5030                 reuse_delta = 0;
5031         if (pack_compression_level == -1)
5032                 pack_compression_level = Z_DEFAULT_COMPRESSION;
5033         else if (pack_compression_level < 0 || pack_compression_level > Z_BEST_COMPRESSION)
5034                 die(_("bad pack compression level %d"), pack_compression_level);
5035
5036         if (!delta_search_threads)      /* --threads=0 means autodetect */
5037                 delta_search_threads = online_cpus();
5038
5039         if (!HAVE_THREADS && delta_search_threads != 1)
5040                 warning(_("no threads support, ignoring --threads"));
5041         if (!pack_to_stdout && !pack_size_limit)
5042                 pack_size_limit = pack_size_limit_cfg;
5043         if (pack_to_stdout && pack_size_limit)
5044                 die(_("--max-pack-size cannot be used to build a pack for transfer"));
5045         if (pack_size_limit && pack_size_limit < 1024*1024) {
5046                 warning(_("minimum pack size limit is 1 MiB"));
5047                 pack_size_limit = 1024*1024;
5048         }
5049
5050         if (!pack_to_stdout && thin)
5051                 die(_("--thin cannot be used to build an indexable pack"));
5052
5053         if (keep_unreachable && unpack_unreachable)
5054                 die(_("options '%s' and '%s' cannot be used together"), "--keep-unreachable", "--unpack-unreachable");
5055         if (!rev_list_all || !rev_list_reflog || !rev_list_index)
5056                 unpack_unreachable_expiration = 0;
5057
5058         if (stdin_packs && filter_options.choice)
5059                 die(_("cannot use --filter with --stdin-packs"));
5060
5061         if (stdin_packs && use_internal_rev_list)
5062                 die(_("cannot use internal rev list with --stdin-packs"));
5063
5064         if (cruft) {
5065                 if (use_internal_rev_list)
5066                         die(_("cannot use internal rev list with --cruft"));
5067                 if (stdin_packs)
5068                         die(_("cannot use --stdin-packs with --cruft"));
5069         }
5070
5071         /*
5072          * "soft" reasons not to use bitmaps - for on-disk repack by default we want
5073          *
5074          * - to produce good pack (with bitmap index not-yet-packed objects are
5075          *   packed in suboptimal order).
5076          *
5077          * - to use more robust pack-generation codepath (avoiding possible
5078          *   bugs in bitmap code and possible bitmap index corruption).
5079          */
5080         if (!pack_to_stdout)
5081                 use_bitmap_index_default = 0;
5082
5083         if (use_bitmap_index < 0)
5084                 use_bitmap_index = use_bitmap_index_default;
5085
5086         /* "hard" reasons not to use bitmaps; these just won't work at all */
5087         if (!use_internal_rev_list || (!pack_to_stdout && write_bitmap_index) || is_repository_shallow(the_repository))
5088                 use_bitmap_index = 0;
5089
5090         if (pack_to_stdout || !rev_list_all)
5091                 write_bitmap_index = 0;
5092
5093         if (name_hash_version < 0)
5094                 name_hash_version = (int)git_env_ulong("GIT_TEST_NAME_HASH_VERSION", 1);
5095
5096         validate_name_hash_version();
5097
5098         if (use_delta_islands)
5099                 strvec_push(&rp, "--topo-order");
5100
5101         if (progress && all_progress_implied)
5102                 progress = 2;
5103
5104         add_extra_kept_packs(&keep_pack_list);
5105         if (ignore_packed_keep_on_disk) {
5106                 struct packed_git *p;
5107                 for (p = get_all_packs(the_repository); p; p = p->next)
5108                         if (p->pack_local && p->pack_keep)
5109                                 break;
5110                 if (!p) /* no keep-able packs found */
5111                         ignore_packed_keep_on_disk = 0;
5112         }
5113         if (local) {
5114                 /*
5115                  * unlike ignore_packed_keep_on_disk above, we do not
5116                  * want to unset "local" based on looking at packs, as
5117                  * it also covers non-local objects
5118                  */
5119                 struct packed_git *p;
5120                 for (p = get_all_packs(the_repository); p; p = p->next) {
5121                         if (!p->pack_local) {
5122                                 have_non_local_packs = 1;
5123                                 break;
5124                         }
5125                 }
5126         }
5127
5128         trace2_region_enter("pack-objects", "enumerate-objects",
5129                             the_repository);
5130         prepare_packing_data(the_repository, &to_pack);
5131
5132         if (progress && !cruft)
5133                 progress_state = start_progress(the_repository,
5134                                                 _("Enumerating objects"), 0);
5135         if (stdin_packs) {
5136                 /* avoids adding objects in excluded packs */
5137                 ignore_packed_keep_in_core = 1;
5138                 read_packs_list_from_stdin();
5139                 if (rev_list_unpacked)
5140                         add_unreachable_loose_objects();
5141         } else if (cruft) {
5142                 read_cruft_objects();
5143         } else if (!use_internal_rev_list) {
5144                 read_object_list_from_stdin();
5145         } else {
5146                 struct rev_info revs;
5147
5148                 repo_init_revisions(the_repository, &revs, NULL);
5149                 list_objects_filter_copy(&revs.filter, &filter_options);
5150                 if (exclude_promisor_objects_best_effort) {
5151                         revs.include_check = is_not_in_promisor_pack;
5152                         revs.include_check_obj = is_not_in_promisor_pack_obj;
5153                 }
5154                 get_object_list(&revs, rp.nr, rp.v);
5155                 release_revisions(&revs);
5156         }
5157         cleanup_preferred_base();
5158         if (include_tag && nr_result)
5159                 refs_for_each_tag_ref(get_main_ref_store(the_repository),
5160                                       add_ref_tag, NULL);
5161         stop_progress(&progress_state);
5162         trace2_region_leave("pack-objects", "enumerate-objects",
5163                             the_repository);
5164
5165         if (non_empty && !nr_result)
5166                 goto cleanup;
5167         if (nr_result) {
5168                 trace2_region_enter("pack-objects", "prepare-pack",
5169                                     the_repository);
5170                 prepare_pack(window, depth);
5171                 trace2_region_leave("pack-objects", "prepare-pack",
5172                                     the_repository);
5173         }
5174
5175         trace2_region_enter("pack-objects", "write-pack-file", the_repository);
5176         write_excluded_by_configs();
5177         write_pack_file();
5178         trace2_region_leave("pack-objects", "write-pack-file", the_repository);
5179
5180         if (progress)
5181                 fprintf_ln(stderr,
5182                            _("Total %"PRIu32" (delta %"PRIu32"),"
5183                              " reused %"PRIu32" (delta %"PRIu32"),"
5184                              " pack-reused %"PRIu32" (from %"PRIuMAX")"),
5185                            written, written_delta, reused, reused_delta,
5186                            reuse_packfile_objects,
5187                            (uintmax_t)reuse_packfiles_used_nr);
5188
5189         trace2_data_intmax("pack-objects", the_repository, "written", written);
5190         trace2_data_intmax("pack-objects", the_repository, "written/delta", written_delta);
5191         trace2_data_intmax("pack-objects", the_repository, "reused", reused);
5192         trace2_data_intmax("pack-objects", the_repository, "reused/delta", reused_delta);
5193         trace2_data_intmax("pack-objects", the_repository, "pack-reused", reuse_packfile_objects);
5194         trace2_data_intmax("pack-objects", the_repository, "packs-reused", reuse_packfiles_used_nr);
5195
5196 cleanup:
5197         clear_packing_data(&to_pack);
5198         list_objects_filter_release(&filter_options);
5199         string_list_clear(&keep_pack_list, 0);
5200         strvec_clear(&rp);
5201
5202         return 0;
5203 }