]> git.ipfire.org Git - thirdparty/git.git/blobdiff - pack-bitmap-write.c
Sync with 2.30.3
[thirdparty/git.git] / pack-bitmap-write.c
index 7b2dc3e7dc768a2d2405a9ae3b1d13f86069a93d..88d9e696a546a8db20d3ff7147a1bb15f8651d36 100644 (file)
@@ -1,4 +1,5 @@
 #include "cache.h"
+#include "object-store.h"
 #include "commit.h"
 #include "tag.h"
 #include "diff.h"
@@ -8,8 +9,10 @@
 #include "pack-revindex.h"
 #include "pack.h"
 #include "pack-bitmap.h"
-#include "sha1-lookup.h"
+#include "hash-lookup.h"
 #include "pack-objects.h"
+#include "commit-reach.h"
+#include "prio-queue.h"
 
 struct bitmapped_commit {
        struct commit *commit;
@@ -26,8 +29,7 @@ struct bitmap_writer {
        struct ewah_bitmap *blobs;
        struct ewah_bitmap *tags;
 
-       khash_sha1 *bitmaps;
-       khash_sha1 *reused;
+       kh_oid_map_t *bitmaps;
        struct packing_data *to_pack;
 
        struct bitmapped_commit *selected;
@@ -35,7 +37,7 @@ struct bitmap_writer {
 
        struct progress *progress;
        int show_progress;
-       unsigned char pack_checksum[20];
+       unsigned char pack_checksum[GIT_MAX_RAWSZ];
 };
 
 static struct bitmap_writer writer;
@@ -75,7 +77,7 @@ void bitmap_writer_build_type_index(struct packing_data *to_pack,
                        break;
 
                default:
-                       real_type = oid_object_info(the_repository,
+                       real_type = oid_object_info(to_pack->repo,
                                                    &entry->idx.oid, NULL);
                        break;
                }
@@ -108,10 +110,8 @@ void bitmap_writer_build_type_index(struct packing_data *to_pack,
 /**
  * Compute the actual bitmaps
  */
-static struct object **seen_objects;
-static unsigned int seen_objects_nr, seen_objects_alloc;
 
-static inline void push_bitmapped_commit(struct commit *commit, struct ewah_bitmap *reused)
+static inline void push_bitmapped_commit(struct commit *commit)
 {
        if (writer.selected_nr >= writer.selected_alloc) {
                writer.selected_alloc = (writer.selected_alloc + 32) * 2;
@@ -119,93 +119,24 @@ static inline void push_bitmapped_commit(struct commit *commit, struct ewah_bitm
        }
 
        writer.selected[writer.selected_nr].commit = commit;
-       writer.selected[writer.selected_nr].bitmap = reused;
+       writer.selected[writer.selected_nr].bitmap = NULL;
        writer.selected[writer.selected_nr].flags = 0;
 
        writer.selected_nr++;
 }
 
-static inline void mark_as_seen(struct object *object)
+static uint32_t find_object_pos(const struct object_id *oid)
 {
-       ALLOC_GROW(seen_objects, seen_objects_nr + 1, seen_objects_alloc);
-       seen_objects[seen_objects_nr++] = object;
-}
-
-static inline void reset_all_seen(void)
-{
-       unsigned int i;
-       for (i = 0; i < seen_objects_nr; ++i) {
-               seen_objects[i]->flags &= ~(SEEN | ADDED | SHOWN);
-       }
-       seen_objects_nr = 0;
-}
-
-static uint32_t find_object_pos(const unsigned char *sha1)
-{
-       struct object_entry *entry = packlist_find(writer.to_pack, sha1, NULL);
+       struct object_entry *entry = packlist_find(writer.to_pack, oid);
 
        if (!entry) {
                die("Failed to write bitmap index. Packfile doesn't have full closure "
-                       "(object %s is missing)", sha1_to_hex(sha1));
+                       "(object %s is missing)", oid_to_hex(oid));
        }
 
        return oe_in_pack_pos(writer.to_pack, entry);
 }
 
-static void show_object(struct object *object, const char *name, void *data)
-{
-       struct bitmap *base = data;
-       bitmap_set(base, find_object_pos(object->oid.hash));
-       mark_as_seen(object);
-}
-
-static void show_commit(struct commit *commit, void *data)
-{
-       mark_as_seen((struct object *)commit);
-}
-
-static int
-add_to_include_set(struct bitmap *base, struct commit *commit)
-{
-       khiter_t hash_pos;
-       uint32_t bitmap_pos = find_object_pos(commit->object.oid.hash);
-
-       if (bitmap_get(base, bitmap_pos))
-               return 0;
-
-       hash_pos = kh_get_sha1(writer.bitmaps, commit->object.oid.hash);
-       if (hash_pos < kh_end(writer.bitmaps)) {
-               struct bitmapped_commit *bc = kh_value(writer.bitmaps, hash_pos);
-               bitmap_or_ewah(base, bc->bitmap);
-               return 0;
-       }
-
-       bitmap_set(base, bitmap_pos);
-       return 1;
-}
-
-static int
-should_include(struct commit *commit, void *_data)
-{
-       struct bitmap *base = _data;
-
-       if (!add_to_include_set(base, commit)) {
-               struct commit_list *parent = commit->parents;
-
-               mark_as_seen((struct object *)commit);
-
-               while (parent) {
-                       parent->item->object.flags |= SEEN;
-                       mark_as_seen((struct object *)parent->item);
-                       parent = parent->next;
-               }
-
-               return 0;
-       }
-
-       return 1;
-}
-
 static void compute_xor_offsets(void)
 {
        static const int MAX_XOR_OFFSET_SEARCH = 10;
@@ -246,79 +177,326 @@ static void compute_xor_offsets(void)
        }
 }
 
-void bitmap_writer_build(struct packing_data *to_pack)
-{
-       static const double REUSE_BITMAP_THRESHOLD = 0.2;
+struct bb_commit {
+       struct commit_list *reverse_edges;
+       struct bitmap *commit_mask;
+       struct bitmap *bitmap;
+       unsigned selected:1,
+                maximal:1;
+       unsigned idx; /* within selected array */
+};
 
-       int i, reuse_after, need_reset;
-       struct bitmap *base = bitmap_new();
-       struct rev_info revs;
+define_commit_slab(bb_data, struct bb_commit);
 
-       writer.bitmaps = kh_init_sha1();
-       writer.to_pack = to_pack;
+struct bitmap_builder {
+       struct bb_data data;
+       struct commit **commits;
+       size_t commits_nr, commits_alloc;
+};
 
-       if (writer.show_progress)
-               writer.progress = start_progress("Building bitmaps", writer.selected_nr);
+static void bitmap_builder_init(struct bitmap_builder *bb,
+                               struct bitmap_writer *writer,
+                               struct bitmap_index *old_bitmap)
+{
+       struct rev_info revs;
+       struct commit *commit;
+       struct commit_list *reusable = NULL;
+       struct commit_list *r;
+       unsigned int i, num_maximal = 0;
 
-       init_revisions(&revs, NULL);
-       revs.tag_objects = 1;
-       revs.tree_objects = 1;
-       revs.blob_objects = 1;
-       revs.no_walk = 0;
+       memset(bb, 0, sizeof(*bb));
+       init_bb_data(&bb->data);
 
-       revs.include_check = should_include;
        reset_revision_walk();
+       repo_init_revisions(writer->to_pack->repo, &revs, NULL);
+       revs.topo_order = 1;
+       revs.first_parent_only = 1;
 
-       reuse_after = writer.selected_nr * REUSE_BITMAP_THRESHOLD;
-       need_reset = 0;
+       for (i = 0; i < writer->selected_nr; i++) {
+               struct commit *c = writer->selected[i].commit;
+               struct bb_commit *ent = bb_data_at(&bb->data, c);
 
-       for (i = writer.selected_nr - 1; i >= 0; --i) {
-               struct bitmapped_commit *stored;
-               struct object *object;
+               ent->selected = 1;
+               ent->maximal = 1;
+               ent->idx = i;
 
-               khiter_t hash_pos;
-               int hash_ret;
+               ent->commit_mask = bitmap_new();
+               bitmap_set(ent->commit_mask, i);
+
+               add_pending_object(&revs, &c->object, "");
+       }
+
+       if (prepare_revision_walk(&revs))
+               die("revision walk setup failed");
+
+       while ((commit = get_revision(&revs))) {
+               struct commit_list *p = commit->parents;
+               struct bb_commit *c_ent;
+
+               parse_commit_or_die(commit);
+
+               c_ent = bb_data_at(&bb->data, commit);
+
+               /*
+                * If there is no commit_mask, there is no reason to iterate
+                * over this commit; it is not selected (if it were, it would
+                * not have a blank commit mask) and all its children have
+                * existing bitmaps (see the comment starting with "This commit
+                * has an existing bitmap" below), so it does not contribute
+                * anything to the final bitmap file or its descendants.
+                */
+               if (!c_ent->commit_mask)
+                       continue;
+
+               if (old_bitmap && bitmap_for_commit(old_bitmap, commit)) {
+                       /*
+                        * This commit has an existing bitmap, so we can
+                        * get its bits immediately without an object
+                        * walk. That is, it is reusable as-is and there is no
+                        * need to continue walking beyond it.
+                        *
+                        * Mark it as such and add it to bb->commits separately
+                        * to avoid allocating a position in the commit mask.
+                        */
+                       commit_list_insert(commit, &reusable);
+                       goto next;
+               }
+
+               if (c_ent->maximal) {
+                       num_maximal++;
+                       ALLOC_GROW(bb->commits, bb->commits_nr + 1, bb->commits_alloc);
+                       bb->commits[bb->commits_nr++] = commit;
+               }
+
+               if (p) {
+                       struct bb_commit *p_ent = bb_data_at(&bb->data, p->item);
+                       int c_not_p, p_not_c;
+
+                       if (!p_ent->commit_mask) {
+                               p_ent->commit_mask = bitmap_new();
+                               c_not_p = 1;
+                               p_not_c = 0;
+                       } else {
+                               c_not_p = bitmap_is_subset(c_ent->commit_mask, p_ent->commit_mask);
+                               p_not_c = bitmap_is_subset(p_ent->commit_mask, c_ent->commit_mask);
+                       }
+
+                       if (!c_not_p)
+                               continue;
+
+                       bitmap_or(p_ent->commit_mask, c_ent->commit_mask);
+
+                       if (p_not_c)
+                               p_ent->maximal = 1;
+                       else {
+                               p_ent->maximal = 0;
+                               free_commit_list(p_ent->reverse_edges);
+                               p_ent->reverse_edges = NULL;
+                       }
 
-               stored = &writer.selected[i];
-               object = (struct object *)stored->commit;
+                       if (c_ent->maximal) {
+                               commit_list_insert(commit, &p_ent->reverse_edges);
+                       } else {
+                               struct commit_list *cc = c_ent->reverse_edges;
 
-               if (stored->bitmap == NULL) {
-                       if (i < writer.selected_nr - 1 &&
-                           (need_reset ||
-                            !in_merge_bases(writer.selected[i + 1].commit,
-                                            stored->commit))) {
-                           bitmap_reset(base);
-                           reset_all_seen();
+                               for (; cc; cc = cc->next) {
+                                       if (!commit_list_contains(cc->item, p_ent->reverse_edges))
+                                               commit_list_insert(cc->item, &p_ent->reverse_edges);
+                               }
                        }
+               }
 
-                       add_pending_object(&revs, object, "");
-                       revs.include_check_data = base;
+next:
+               bitmap_free(c_ent->commit_mask);
+               c_ent->commit_mask = NULL;
+       }
 
-                       if (prepare_revision_walk(&revs))
-                               die("revision walk setup failed");
+       for (r = reusable; r; r = r->next) {
+               ALLOC_GROW(bb->commits, bb->commits_nr + 1, bb->commits_alloc);
+               bb->commits[bb->commits_nr++] = r->item;
+       }
 
-                       traverse_commit_list(&revs, show_commit, show_object, base);
+       trace2_data_intmax("pack-bitmap-write", the_repository,
+                          "num_selected_commits", writer->selected_nr);
+       trace2_data_intmax("pack-bitmap-write", the_repository,
+                          "num_maximal_commits", num_maximal);
 
-                       object_array_clear(&revs.pending);
+       free_commit_list(reusable);
+}
 
-                       stored->bitmap = bitmap_to_ewah(base);
-                       need_reset = 0;
-               } else
-                       need_reset = 1;
+static void bitmap_builder_clear(struct bitmap_builder *bb)
+{
+       clear_bb_data(&bb->data);
+       free(bb->commits);
+       bb->commits_nr = bb->commits_alloc = 0;
+}
+
+static void fill_bitmap_tree(struct bitmap *bitmap,
+                            struct tree *tree)
+{
+       uint32_t pos;
+       struct tree_desc desc;
+       struct name_entry entry;
+
+       /*
+        * If our bit is already set, then there is nothing to do. Both this
+        * tree and all of its children will be set.
+        */
+       pos = find_object_pos(&tree->object.oid);
+       if (bitmap_get(bitmap, pos))
+               return;
+       bitmap_set(bitmap, pos);
+
+       if (parse_tree(tree) < 0)
+               die("unable to load tree object %s",
+                   oid_to_hex(&tree->object.oid));
+       init_tree_desc(&desc, tree->buffer, tree->size);
+
+       while (tree_entry(&desc, &entry)) {
+               switch (object_type(entry.mode)) {
+               case OBJ_TREE:
+                       fill_bitmap_tree(bitmap,
+                                        lookup_tree(the_repository, &entry.oid));
+                       break;
+               case OBJ_BLOB:
+                       bitmap_set(bitmap, find_object_pos(&entry.oid));
+                       break;
+               default:
+                       /* Gitlink, etc; not reachable */
+                       break;
+               }
+       }
 
-               if (i >= reuse_after)
-                       stored->flags |= BITMAP_FLAG_REUSE;
+       free_tree_buffer(tree);
+}
 
-               hash_pos = kh_put_sha1(writer.bitmaps, object->oid.hash, &hash_ret);
-               if (hash_ret == 0)
-                       die("Duplicate entry when writing index: %s",
-                           oid_to_hex(&object->oid));
+static void fill_bitmap_commit(struct bb_commit *ent,
+                              struct commit *commit,
+                              struct prio_queue *queue,
+                              struct prio_queue *tree_queue,
+                              struct bitmap_index *old_bitmap,
+                              const uint32_t *mapping)
+{
+       if (!ent->bitmap)
+               ent->bitmap = bitmap_new();
+
+       prio_queue_put(queue, commit);
+
+       while (queue->nr) {
+               struct commit_list *p;
+               struct commit *c = prio_queue_get(queue);
+
+               if (old_bitmap && mapping) {
+                       struct ewah_bitmap *old = bitmap_for_commit(old_bitmap, c);
+                       /*
+                        * If this commit has an old bitmap, then translate that
+                        * bitmap and add its bits to this one. No need to walk
+                        * parents or the tree for this commit.
+                        */
+                       if (old && !rebuild_bitmap(mapping, old, ent->bitmap))
+                               continue;
+               }
 
-               kh_value(writer.bitmaps, hash_pos) = stored;
-               display_progress(writer.progress, writer.selected_nr - i);
+               /*
+                * Mark ourselves and queue our tree. The commit
+                * walk ensures we cover all parents.
+                */
+               bitmap_set(ent->bitmap, find_object_pos(&c->object.oid));
+               prio_queue_put(tree_queue, get_commit_tree(c));
+
+               for (p = c->parents; p; p = p->next) {
+                       int pos = find_object_pos(&p->item->object.oid);
+                       if (!bitmap_get(ent->bitmap, pos)) {
+                               bitmap_set(ent->bitmap, pos);
+                               prio_queue_put(queue, p->item);
+                       }
+               }
        }
 
-       bitmap_free(base);
+       while (tree_queue->nr)
+               fill_bitmap_tree(ent->bitmap, prio_queue_get(tree_queue));
+}
+
+static void store_selected(struct bb_commit *ent, struct commit *commit)
+{
+       struct bitmapped_commit *stored = &writer.selected[ent->idx];
+       khiter_t hash_pos;
+       int hash_ret;
+
+       stored->bitmap = bitmap_to_ewah(ent->bitmap);
+
+       hash_pos = kh_put_oid_map(writer.bitmaps, commit->object.oid, &hash_ret);
+       if (hash_ret == 0)
+               die("Duplicate entry when writing index: %s",
+                   oid_to_hex(&commit->object.oid));
+       kh_value(writer.bitmaps, hash_pos) = stored;
+}
+
+void bitmap_writer_build(struct packing_data *to_pack)
+{
+       struct bitmap_builder bb;
+       size_t i;
+       int nr_stored = 0; /* for progress */
+       struct prio_queue queue = { compare_commits_by_gen_then_commit_date };
+       struct prio_queue tree_queue = { NULL };
+       struct bitmap_index *old_bitmap;
+       uint32_t *mapping;
+
+       writer.bitmaps = kh_init_oid_map();
+       writer.to_pack = to_pack;
+
+       if (writer.show_progress)
+               writer.progress = start_progress("Building bitmaps", writer.selected_nr);
+       trace2_region_enter("pack-bitmap-write", "building_bitmaps_total",
+                           the_repository);
+
+       old_bitmap = prepare_bitmap_git(to_pack->repo);
+       if (old_bitmap)
+               mapping = create_bitmap_mapping(old_bitmap, to_pack);
+       else
+               mapping = NULL;
+
+       bitmap_builder_init(&bb, &writer, old_bitmap);
+       for (i = bb.commits_nr; i > 0; i--) {
+               struct commit *commit = bb.commits[i-1];
+               struct bb_commit *ent = bb_data_at(&bb.data, commit);
+               struct commit *child;
+               int reused = 0;
+
+               fill_bitmap_commit(ent, commit, &queue, &tree_queue,
+                                  old_bitmap, mapping);
+
+               if (ent->selected) {
+                       store_selected(ent, commit);
+                       nr_stored++;
+                       display_progress(writer.progress, nr_stored);
+               }
+
+               while ((child = pop_commit(&ent->reverse_edges))) {
+                       struct bb_commit *child_ent =
+                               bb_data_at(&bb.data, child);
+
+                       if (child_ent->bitmap)
+                               bitmap_or(child_ent->bitmap, ent->bitmap);
+                       else if (reused)
+                               child_ent->bitmap = bitmap_dup(ent->bitmap);
+                       else {
+                               child_ent->bitmap = ent->bitmap;
+                               reused = 1;
+                       }
+               }
+               if (!reused)
+                       bitmap_free(ent->bitmap);
+               ent->bitmap = NULL;
+       }
+       clear_prio_queue(&queue);
+       clear_prio_queue(&tree_queue);
+       bitmap_builder_clear(&bb);
+       free(mapping);
+
+       trace2_region_leave("pack-bitmap-write", "building_bitmaps_total",
+                           the_repository);
+
        stop_progress(&writer.progress);
 
        compute_xor_offsets();
@@ -358,29 +536,6 @@ static int date_compare(const void *_a, const void *_b)
        return (long)b->date - (long)a->date;
 }
 
-void bitmap_writer_reuse_bitmaps(struct packing_data *to_pack)
-{
-       if (prepare_bitmap_git() < 0)
-               return;
-
-       writer.reused = kh_init_sha1();
-       rebuild_existing_bitmaps(to_pack, writer.reused, writer.show_progress);
-}
-
-static struct ewah_bitmap *find_reused_bitmap(const unsigned char *sha1)
-{
-       khiter_t hash_pos;
-
-       if (!writer.reused)
-               return NULL;
-
-       hash_pos = kh_get_sha1(writer.reused, sha1);
-       if (hash_pos >= kh_end(writer.reused))
-               return NULL;
-
-       return kh_value(writer.reused, hash_pos);
-}
-
 void bitmap_writer_select_commits(struct commit **indexed_commits,
                                  unsigned int indexed_commits_nr,
                                  int max_bitmaps)
@@ -394,12 +549,11 @@ void bitmap_writer_select_commits(struct commit **indexed_commits,
 
        if (indexed_commits_nr < 100) {
                for (i = 0; i < indexed_commits_nr; ++i)
-                       push_bitmapped_commit(indexed_commits[i], NULL);
+                       push_bitmapped_commit(indexed_commits[i]);
                return;
        }
 
        for (;;) {
-               struct ewah_bitmap *reused_bitmap = NULL;
                struct commit *chosen = NULL;
 
                next = next_commit_index(i);
@@ -414,15 +568,13 @@ void bitmap_writer_select_commits(struct commit **indexed_commits,
 
                if (next == 0) {
                        chosen = indexed_commits[i];
-                       reused_bitmap = find_reused_bitmap(chosen->object.oid.hash);
                } else {
                        chosen = indexed_commits[i + next];
 
                        for (j = 0; j <= next; ++j) {
                                struct commit *cm = indexed_commits[i + j];
 
-                               reused_bitmap = find_reused_bitmap(cm->object.oid.hash);
-                               if (reused_bitmap || (cm->object.flags & NEEDS_BITMAP) != 0) {
+                               if ((cm->object.flags & NEEDS_BITMAP) != 0) {
                                        chosen = cm;
                                        break;
                                }
@@ -432,7 +584,7 @@ void bitmap_writer_select_commits(struct commit **indexed_commits,
                        }
                }
 
-               push_bitmapped_commit(chosen, reused_bitmap);
+               push_bitmapped_commit(chosen);
 
                i += next + 1;
                display_progress(writer.progress, i);
@@ -458,10 +610,10 @@ static inline void dump_bitmap(struct hashfile *f, struct ewah_bitmap *bitmap)
                die("Failed to write bitmap index");
 }
 
-static const unsigned char *sha1_access(size_t pos, void *table)
+static const struct object_id *oid_access(size_t pos, const void *table)
 {
-       struct pack_idx_entry **index = table;
-       return index[pos]->oid.hash;
+       const struct pack_idx_entry * const *index = table;
+       return &index[pos]->oid;
 }
 
 static void write_selected_commits_v1(struct hashfile *f,
@@ -474,7 +626,7 @@ static void write_selected_commits_v1(struct hashfile *f,
                struct bitmapped_commit *stored = &writer.selected[i];
 
                int commit_pos =
-                       sha1_pos(stored->commit->object.oid.hash, index, index_nr, sha1_access);
+                       oid_pos(&stored->commit->object.oid, index, index_nr, oid_access);
 
                if (commit_pos < 0)
                        BUG("trying to write commit not in index");
@@ -495,8 +647,7 @@ static void write_hash_cache(struct hashfile *f,
 
        for (i = 0; i < index_nr; ++i) {
                struct object_entry *entry = (struct object_entry *)index[i];
-               uint32_t hash_value = htonl(entry->hash);
-               hashwrite(f, &hash_value, sizeof(hash_value));
+               hashwrite_be32(f, entry->hash);
        }
 }
 
@@ -527,7 +678,7 @@ void bitmap_writer_finish(struct pack_idx_entry **index,
        header.entry_count = htonl(writer.selected_nr);
        hashcpy(header.checksum, writer.pack_checksum);
 
-       hashwrite(f, &header, sizeof(header));
+       hashwrite(f, &header, sizeof(header) - GIT_MAX_RAWSZ + the_hash_algo->rawsz);
        dump_bitmap(f, writer.commits);
        dump_bitmap(f, writer.trees);
        dump_bitmap(f, writer.blobs);