]> git.ipfire.org Git - thirdparty/git.git/blobdiff - builtin/pack-objects.c
Merge branch 'jk/nth-packed-object-id'
[thirdparty/git.git] / builtin / pack-objects.c
index 393c20a2d78b50f8852ed11bc9e943b27ba58d2e..02aa6ee4808a96f264a861bc49789341179056be 100644 (file)
@@ -92,10 +92,11 @@ static struct progress *progress_state;
 
 static struct packed_git *reuse_packfile;
 static uint32_t reuse_packfile_objects;
-static off_t reuse_packfile_offset;
+static struct bitmap *reuse_packfile_bitmap;
 
 static int use_bitmap_index_default = 1;
 static int use_bitmap_index = -1;
+static int allow_pack_reuse = 1;
 static enum {
        WRITE_BITMAP_FALSE = 0,
        WRITE_BITMAP_QUIET,
@@ -303,7 +304,8 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
        if (!usable_delta) {
                if (oe_type(entry) == OBJ_BLOB &&
                    oe_size_greater_than(&to_pack, entry, big_file_threshold) &&
-                   (st = open_istream(&entry->idx.oid, &type, &size, NULL)) != NULL)
+                   (st = open_istream(the_repository, &entry->idx.oid, &type,
+                                      &size, NULL)) != NULL)
                        buf = NULL;
                else {
                        buf = read_object_file(&entry->idx.oid, &type, &size);
@@ -784,57 +786,186 @@ static struct object_entry **compute_write_order(void)
        return wo;
 }
 
-static off_t write_reused_pack(struct hashfile *f)
+
+/*
+ * A reused set of objects. All objects in a chunk have the same
+ * relative position in the original packfile and the generated
+ * packfile.
+ */
+
+static struct reused_chunk {
+       /* The offset of the first object of this chunk in the original
+        * packfile. */
+       off_t original;
+       /* The offset of the first object of this chunk in the generated
+        * packfile minus "original". */
+       off_t difference;
+} *reused_chunks;
+static int reused_chunks_nr;
+static int reused_chunks_alloc;
+
+static void record_reused_object(off_t where, off_t offset)
 {
-       unsigned char buffer[8192];
-       off_t to_write, total;
-       int fd;
+       if (reused_chunks_nr && reused_chunks[reused_chunks_nr-1].difference == offset)
+               return;
 
-       if (!is_pack_valid(reuse_packfile))
-               die(_("packfile is invalid: %s"), reuse_packfile->pack_name);
+       ALLOC_GROW(reused_chunks, reused_chunks_nr + 1,
+                  reused_chunks_alloc);
+       reused_chunks[reused_chunks_nr].original = where;
+       reused_chunks[reused_chunks_nr].difference = offset;
+       reused_chunks_nr++;
+}
 
-       fd = git_open(reuse_packfile->pack_name);
-       if (fd < 0)
-               die_errno(_("unable to open packfile for reuse: %s"),
-                         reuse_packfile->pack_name);
+/*
+ * Binary search to find the chunk that "where" is in. Note
+ * that we're not looking for an exact match, just the first
+ * chunk that contains it (which implicitly ends at the start
+ * of the next chunk.
+ */
+static off_t find_reused_offset(off_t where)
+{
+       int lo = 0, hi = reused_chunks_nr;
+       while (lo < hi) {
+               int mi = lo + ((hi - lo) / 2);
+               if (where == reused_chunks[mi].original)
+                       return reused_chunks[mi].difference;
+               if (where < reused_chunks[mi].original)
+                       hi = mi;
+               else
+                       lo = mi + 1;
+       }
 
-       if (lseek(fd, sizeof(struct pack_header), SEEK_SET) == -1)
-               die_errno(_("unable to seek in reused packfile"));
+       /*
+        * The first chunk starts at zero, so we can't have gone below
+        * there.
+        */
+       assert(lo);
+       return reused_chunks[lo-1].difference;
+}
 
-       if (reuse_packfile_offset < 0)
-               reuse_packfile_offset = reuse_packfile->pack_size - the_hash_algo->rawsz;
+static void write_reused_pack_one(size_t pos, struct hashfile *out,
+                                 struct pack_window **w_curs)
+{
+       off_t offset, next, cur;
+       enum object_type type;
+       unsigned long size;
 
-       total = to_write = reuse_packfile_offset - sizeof(struct pack_header);
+       offset = reuse_packfile->revindex[pos].offset;
+       next = reuse_packfile->revindex[pos + 1].offset;
 
-       while (to_write) {
-               int read_pack = xread(fd, buffer, sizeof(buffer));
+       record_reused_object(offset, offset - hashfile_total(out));
 
-               if (read_pack <= 0)
-                       die_errno(_("unable to read from reused packfile"));
+       cur = offset;
+       type = unpack_object_header(reuse_packfile, w_curs, &cur, &size);
+       assert(type >= 0);
 
-               if (read_pack > to_write)
-                       read_pack = to_write;
+       if (type == OBJ_OFS_DELTA) {
+               off_t base_offset;
+               off_t fixup;
 
-               hashwrite(f, buffer, read_pack);
-               to_write -= read_pack;
+               unsigned char header[MAX_PACK_OBJECT_HEADER];
+               unsigned len;
+
+               base_offset = get_delta_base(reuse_packfile, w_curs, &cur, type, offset);
+               assert(base_offset != 0);
+
+               /* Convert to REF_DELTA if we must... */
+               if (!allow_ofs_delta) {
+                       int base_pos = find_revindex_position(reuse_packfile, base_offset);
+                       struct object_id base_oid;
+
+                       nth_packed_object_id(&base_oid, reuse_packfile,
+                                            reuse_packfile->revindex[base_pos].nr);
+
+                       len = encode_in_pack_object_header(header, sizeof(header),
+                                                          OBJ_REF_DELTA, size);
+                       hashwrite(out, header, len);
+                       hashwrite(out, base_oid.hash, 20);
+                       copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur);
+                       return;
+               }
+
+               /* Otherwise see if we need to rewrite the offset... */
+               fixup = find_reused_offset(offset) -
+                       find_reused_offset(base_offset);
+               if (fixup) {
+                       unsigned char ofs_header[10];
+                       unsigned i, ofs_len;
+                       off_t ofs = offset - base_offset - fixup;
+
+                       len = encode_in_pack_object_header(header, sizeof(header),
+                                                          OBJ_OFS_DELTA, size);
+
+                       i = sizeof(ofs_header) - 1;
+                       ofs_header[i] = ofs & 127;
+                       while (ofs >>= 7)
+                               ofs_header[--i] = 128 | (--ofs & 127);
+
+                       ofs_len = sizeof(ofs_header) - i;
+
+                       hashwrite(out, header, len);
+                       hashwrite(out, ofs_header + sizeof(ofs_header) - ofs_len, ofs_len);
+                       copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur);
+                       return;
+               }
+
+               /* ...otherwise we have no fixup, and can write it verbatim */
+       }
+
+       copy_pack_data(out, reuse_packfile, w_curs, offset, next - offset);
+}
+
+static size_t write_reused_pack_verbatim(struct hashfile *out,
+                                        struct pack_window **w_curs)
+{
+       size_t pos = 0;
+
+       while (pos < reuse_packfile_bitmap->word_alloc &&
+                       reuse_packfile_bitmap->words[pos] == (eword_t)~0)
+               pos++;
+
+       if (pos) {
+               off_t to_write;
+
+               written = (pos * BITS_IN_EWORD);
+               to_write = reuse_packfile->revindex[written].offset
+                       - sizeof(struct pack_header);
+
+               /* We're recording one chunk, not one object. */
+               record_reused_object(sizeof(struct pack_header), 0);
+               hashflush(out);
+               copy_pack_data(out, reuse_packfile, w_curs,
+                       sizeof(struct pack_header), to_write);
 
-               /*
-                * We don't know the actual number of objects written,
-                * only how many bytes written, how many bytes total, and
-                * how many objects total. So we can fake it by pretending all
-                * objects we are writing are the same size. This gives us a
-                * smooth progress meter, and at the end it matches the true
-                * answer.
-                */
-               written = reuse_packfile_objects *
-                               (((double)(total - to_write)) / total);
                display_progress(progress_state, written);
        }
+       return pos;
+}
+
+static void write_reused_pack(struct hashfile *f)
+{
+       size_t i = 0;
+       uint32_t offset;
+       struct pack_window *w_curs = NULL;
+
+       if (allow_ofs_delta)
+               i = write_reused_pack_verbatim(f, &w_curs);
 
-       close(fd);
-       written = reuse_packfile_objects;
-       display_progress(progress_state, written);
-       return reuse_packfile_offset - sizeof(struct pack_header);
+       for (; i < reuse_packfile_bitmap->word_alloc; ++i) {
+               eword_t word = reuse_packfile_bitmap->words[i];
+               size_t pos = (i * BITS_IN_EWORD);
+
+               for (offset = 0; offset < BITS_IN_EWORD; ++offset) {
+                       if ((word >> offset) == 0)
+                               break;
+
+                       offset += ewah_bit_ctz64(word >> offset);
+                       write_reused_pack_one(pos + offset, f, &w_curs);
+                       display_progress(progress_state, ++written);
+               }
+       }
+
+       unuse_pack(&w_curs);
 }
 
 static const char no_split_warning[] = N_(
@@ -867,11 +998,9 @@ static void write_pack_file(void)
                offset = write_pack_header(f, nr_remaining);
 
                if (reuse_packfile) {
-                       off_t packfile_size;
                        assert(pack_to_stdout);
-
-                       packfile_size = write_reused_pack(f);
-                       offset += packfile_size;
+                       write_reused_pack(f);
+                       offset = hashfile_total(f);
                }
 
                nr_written = 0;
@@ -1000,6 +1129,10 @@ static int have_duplicate_entry(const struct object_id *oid,
 {
        struct object_entry *entry;
 
+       if (reuse_packfile_bitmap &&
+           bitmap_walk_contains(bitmap_git, reuse_packfile_bitmap, oid))
+               return 1;
+
        entry = packlist_find(&to_pack, oid);
        if (!entry)
                return 0;
@@ -1486,23 +1619,17 @@ static void cleanup_preferred_base(void)
  * deltify other objects against, in order to avoid
  * circular deltas.
  */
-static int can_reuse_delta(const unsigned char *base_sha1,
+static int can_reuse_delta(const struct object_id *base_oid,
                           struct object_entry *delta,
                           struct object_entry **base_out)
 {
        struct object_entry *base;
-       struct object_id base_oid;
-
-       if (!base_sha1)
-               return 0;
-
-       oidread(&base_oid, base_sha1);
 
        /*
         * First see if we're already sending the base (or it's explicitly in
         * our "excluded" list).
         */
-       base = packlist_find(&to_pack, &base_oid);
+       base = packlist_find(&to_pack, base_oid);
        if (base) {
                if (!in_same_island(&delta->idx.oid, &base->idx.oid))
                        return 0;
@@ -1515,9 +1642,9 @@ static int can_reuse_delta(const unsigned char *base_sha1,
         * even if it was buried too deep in history to make it into the
         * packing list.
         */
-       if (thin && bitmap_has_oid_in_uninteresting(bitmap_git, &base_oid)) {
+       if (thin && bitmap_has_oid_in_uninteresting(bitmap_git, base_oid)) {
                if (use_delta_islands) {
-                       if (!in_same_island(&delta->idx.oid, &base_oid))
+                       if (!in_same_island(&delta->idx.oid, base_oid))
                                return 0;
                }
                *base_out = NULL;
@@ -1534,7 +1661,8 @@ static void check_object(struct object_entry *entry)
        if (IN_PACK(entry)) {
                struct packed_git *p = IN_PACK(entry);
                struct pack_window *w_curs = NULL;
-               const unsigned char *base_ref = NULL;
+               int have_base = 0;
+               struct object_id base_ref;
                struct object_entry *base_entry;
                unsigned long used, used_0;
                unsigned long avail;
@@ -1575,9 +1703,13 @@ static void check_object(struct object_entry *entry)
                        unuse_pack(&w_curs);
                        return;
                case OBJ_REF_DELTA:
-                       if (reuse_delta && !entry->preferred_base)
-                               base_ref = use_pack(p, &w_curs,
-                                               entry->in_pack_offset + used, NULL);
+                       if (reuse_delta && !entry->preferred_base) {
+                               oidread(&base_ref,
+                                       use_pack(p, &w_curs,
+                                                entry->in_pack_offset + used,
+                                                NULL));
+                               have_base = 1;
+                       }
                        entry->in_pack_header_size = used + the_hash_algo->rawsz;
                        break;
                case OBJ_OFS_DELTA:
@@ -1607,13 +1739,15 @@ static void check_object(struct object_entry *entry)
                                revidx = find_pack_revindex(p, ofs);
                                if (!revidx)
                                        goto give_up;
-                               base_ref = nth_packed_object_sha1(p, revidx->nr);
+                               if (!nth_packed_object_id(&base_ref, p, revidx->nr))
+                                       have_base = 1;
                        }
                        entry->in_pack_header_size = used + used_0;
                        break;
                }
 
-               if (can_reuse_delta(base_ref, entry, &base_entry)) {
+               if (have_base &&
+                   can_reuse_delta(&base_ref, entry, &base_entry)) {
                        oe_set_type(entry, entry->in_pack_type);
                        SET_SIZE(entry, in_pack_size); /* delta size */
                        SET_DELTA_SIZE(entry, in_pack_size);
@@ -1623,7 +1757,7 @@ static void check_object(struct object_entry *entry)
                                entry->delta_sibling_idx = base_entry->delta_child_idx;
                                SET_DELTA_CHILD(base_entry, entry);
                        } else {
-                               SET_DELTA_EXT(entry, base_ref);
+                               SET_DELTA_EXT(entry, &base_ref);
                        }
 
                        unuse_pack(&w_curs);
@@ -2552,6 +2686,13 @@ static void ll_find_deltas(struct object_entry **list, unsigned list_size,
        free(p);
 }
 
+static int obj_is_packed(const struct object_id *oid)
+{
+       return packlist_find(&to_pack, oid) ||
+               (reuse_packfile_bitmap &&
+                bitmap_walk_contains(bitmap_git, reuse_packfile_bitmap, oid));
+}
+
 static void add_tag_chain(const struct object_id *oid)
 {
        struct tag *tag;
@@ -2563,7 +2704,7 @@ static void add_tag_chain(const struct object_id *oid)
         * it was included via bitmaps, we would not have parsed it
         * previously).
         */
-       if (packlist_find(&to_pack, oid))
+       if (obj_is_packed(oid))
                return;
 
        tag = lookup_tag(the_repository, oid);
@@ -2587,7 +2728,7 @@ static int add_ref_tag(const char *path, const struct object_id *oid, int flag,
 
        if (starts_with(path, "refs/tags/") && /* is a tag? */
            !peel_ref(path, &peeled)    && /* peelable? */
-           packlist_find(&to_pack, &peeled))      /* object packed? */
+           obj_is_packed(&peeled)) /* object packed? */
                add_tag_chain(oid);
        return 0;
 }
@@ -2655,6 +2796,7 @@ static void prepare_pack(int window, int depth)
 
        if (nr_deltas && n > 1) {
                unsigned nr_done = 0;
+
                if (progress)
                        progress_state = start_progress(_("Compressing objects"),
                                                        nr_deltas);
@@ -2699,6 +2841,10 @@ static int git_pack_config(const char *k, const char *v, void *cb)
                use_bitmap_index_default = git_config_bool(k, v);
                return 0;
        }
+       if (!strcmp(k, "pack.allowpackreuse")) {
+               allow_pack_reuse = git_config_bool(k, v);
+               return 0;
+       }
        if (!strcmp(k, "pack.threads")) {
                delta_search_threads = git_config_int(k, v);
                if (delta_search_threads < 0)
@@ -2909,7 +3055,7 @@ static void add_objects_in_unpacked_packs(void)
                           in_pack.alloc);
 
                for (i = 0; i < p->num_objects; i++) {
-                       nth_packed_object_oid(&oid, p, i);
+                       nth_packed_object_id(&oid, p, i);
                        o = lookup_unknown_object(&oid);
                        if (!(o->flags & OBJECT_ADDED))
                                mark_in_pack_object(o, p, &in_pack);
@@ -3013,7 +3159,7 @@ static void loosen_unused_packed_objects(void)
                        die(_("cannot open pack index"));
 
                for (i = 0; i < p->num_objects; i++) {
-                       nth_packed_object_oid(&oid, p, i);
+                       nth_packed_object_id(&oid, p, i);
                        if (!packlist_find(&to_pack, &oid) &&
                            !has_sha1_pack_kept_or_nonlocal(&oid) &&
                            !loosened_object_can_be_discarded(&oid, p->mtime))
@@ -3030,8 +3176,8 @@ static void loosen_unused_packed_objects(void)
  */
 static int pack_options_allow_reuse(void)
 {
-       return pack_to_stdout &&
-              allow_ofs_delta &&
+       return allow_pack_reuse &&
+              pack_to_stdout &&
               !ignore_packed_keep_on_disk &&
               !ignore_packed_keep_in_core &&
               (!local || !have_non_local_packs) &&
@@ -3040,7 +3186,7 @@ static int pack_options_allow_reuse(void)
 
 static int get_object_list_from_bitmap(struct rev_info *revs)
 {
-       if (!(bitmap_git = prepare_bitmap_walk(revs)))
+       if (!(bitmap_git = prepare_bitmap_walk(revs, &filter_options)))
                return -1;
 
        if (pack_options_allow_reuse() &&
@@ -3048,13 +3194,14 @@ static int get_object_list_from_bitmap(struct rev_info *revs)
                        bitmap_git,
                        &reuse_packfile,
                        &reuse_packfile_objects,
-                       &reuse_packfile_offset)) {
+                       &reuse_packfile_bitmap)) {
                assert(reuse_packfile_objects);
                nr_result += reuse_packfile_objects;
                display_progress(progress_state, nr_result);
        }
 
-       traverse_bitmap_commit_list(bitmap_git, &add_object_entry_from_bitmap);
+       traverse_bitmap_commit_list(bitmap_git, revs,
+                                   &add_object_entry_from_bitmap);
        return 0;
 }
 
@@ -3418,7 +3565,6 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
        if (filter_options.choice) {
                if (!pack_to_stdout)
                        die(_("cannot use --filter without --stdout"));
-               use_bitmap_index = 0;
        }
 
        /*
@@ -3509,7 +3655,9 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
        if (progress)
                fprintf_ln(stderr,
                           _("Total %"PRIu32" (delta %"PRIu32"),"
-                            " reused %"PRIu32" (delta %"PRIu32")"),
-                          written, written_delta, reused, reused_delta);
+                            " reused %"PRIu32" (delta %"PRIu32"),"
+                            " pack-reused %"PRIu32),
+                          written, written_delta, reused, reused_delta,
+                          reuse_packfile_objects);
        return 0;
 }