]> git.ipfire.org Git - thirdparty/git.git/commitdiff
pseudo-merge: implement support for finding existing merges
authorTaylor Blau <me@ttaylorr.com>
Thu, 23 May 2024 21:27:21 +0000 (17:27 -0400)
committerJunio C Hamano <gitster@pobox.com>
Fri, 24 May 2024 18:40:44 +0000 (11:40 -0700)
This patch implements support for reusing existing pseudo-merge commits
when writing bitmaps when there is an existing pseudo-merge bitmap which
has exactly the same set of parents as one that we are about to write.

Note that unstable pseudo-merges are likely to change between
consecutive repacks, and so are generally poor candidates for reuse.
However, stable pseudo-merges (see the configuration option
'bitmapPseudoMerge.<name>.stableThreshold') are by definition unlikely
to change between runs (as they represent long-running branches).

Because there is no index from a *set* of pseudo-merge parents to a
matching pseudo-merge bitmap, we have to construct the bitmap
corresponding to the set of parents for each pending pseudo-merge commit
and see if a matching bitmap exists.

This is technically quadratic in the number of pseudo-merges, but is OK
in practice for a couple of reasons:

  - non-matching pseudo-merge bitmaps are rejected quickly as soon as
    they differ in a single bit

  - already-matched pseudo-merge bitmaps are discarded from subsequent
    rounds of search

  - the number of pseudo-merges is generally small, even for large
    repositories

In order to do this, implement (a) a function that finds a matching
pseudo-merge given some uncompressed bitset describing its parents, (b)
a function that computes the bitset of parents for a given pseudo-merge
commit, and (c) call that function before computing the set of reachable
objects for some pending pseudo-merge.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
pack-bitmap-write.c
pack-bitmap.c
pack-bitmap.h
pseudo-merge.c
pseudo-merge.h
t/t5333-pseudo-merge-bitmaps.sh

index 47250398aa2e2f8ceb69d59fbc6cfc27edacca98..6e8060f8a0b99f33d16d428c47cf7aef39d7494f 100644 (file)
 #include "tree-walk.h"
 #include "pseudo-merge.h"
 #include "oid-array.h"
+#include "config.h"
+#include "alloc.h"
+#include "refs.h"
+#include "strmap.h"
 
 struct bitmapped_commit {
        struct commit *commit;
@@ -465,6 +469,7 @@ static int fill_bitmap_tree(struct bitmap_writer *writer,
 }
 
 static int reused_bitmaps_nr;
+static int reused_pseudo_merge_bitmaps_nr;
 
 static int fill_bitmap_commit(struct bitmap_writer *writer,
                              struct bb_commit *ent,
@@ -490,7 +495,7 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
                        struct bitmap *remapped = bitmap_new();
 
                        if (commit->object.flags & BITMAP_PSEUDO_MERGE)
-                               old = NULL;
+                               old = pseudo_merge_bitmap_for_commit(old_bitmap, c);
                        else
                                old = bitmap_for_commit(old_bitmap, c);
                        /*
@@ -501,7 +506,10 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
                        if (old && !rebuild_bitmap(mapping, old, remapped)) {
                                bitmap_or(ent->bitmap, remapped);
                                bitmap_free(remapped);
-                               reused_bitmaps_nr++;
+                               if (commit->object.flags & BITMAP_PSEUDO_MERGE)
+                                       reused_pseudo_merge_bitmaps_nr++;
+                               else
+                                       reused_bitmaps_nr++;
                                continue;
                        }
                        bitmap_free(remapped);
@@ -631,6 +639,9 @@ int bitmap_writer_build(struct bitmap_writer *writer,
                            the_repository);
        trace2_data_intmax("pack-bitmap-write", the_repository,
                           "building_bitmaps_reused", reused_bitmaps_nr);
+       trace2_data_intmax("pack-bitmap-write", the_repository,
+                          "building_bitmaps_pseudo_merge_reused",
+                          reused_pseudo_merge_bitmaps_nr);
 
        stop_progress(&writer->progress);
 
index 1966b3b95f11f38e6353aad706306a6367e5d05d..70230e2647927d7ad03509a2843a178bf837b884 100644 (file)
@@ -1316,6 +1316,37 @@ cleanup:
        return cb.base;
 }
 
+struct ewah_bitmap *pseudo_merge_bitmap_for_commit(struct bitmap_index *bitmap_git,
+                                                  struct commit *commit)
+{
+       struct commit_list *p;
+       struct bitmap *parents;
+       struct pseudo_merge *match = NULL;
+
+       if (!bitmap_git->pseudo_merges.nr)
+               return NULL;
+
+       parents = bitmap_new();
+
+       for (p = commit->parents; p; p = p->next) {
+               int pos = bitmap_position(bitmap_git, &p->item->object.oid);
+               if (pos < 0 || pos >= bitmap_num_objects(bitmap_git))
+                       goto done;
+
+               bitmap_set(parents, pos);
+       }
+
+       match = pseudo_merge_for_parents(&bitmap_git->pseudo_merges,
+                                               parents);
+
+done:
+       bitmap_free(parents);
+       if (match)
+               return pseudo_merge_bitmap(&bitmap_git->pseudo_merges, match);
+
+       return NULL;
+}
+
 static void unsatisfy_all_pseudo_merges(struct bitmap_index *bitmap_git)
 {
        uint32_t i;
@@ -2809,6 +2840,7 @@ void free_bitmap_index(struct bitmap_index *b)
                 */
                close_midx_revindex(b->midx);
        }
+       free_pseudo_merge_map(&b->pseudo_merges);
        free(b);
 }
 
index 4466b5ad0fbf69582f9d34e84f8b8058d9d56494..1171e6d989379376a6bc839646f8eca0dd15ae35 100644 (file)
@@ -142,6 +142,8 @@ int rebuild_bitmap(const uint32_t *reposition,
                   struct bitmap *dest);
 struct ewah_bitmap *bitmap_for_commit(struct bitmap_index *bitmap_git,
                                      struct commit *commit);
+struct ewah_bitmap *pseudo_merge_bitmap_for_commit(struct bitmap_index *bitmap_git,
+                                                  struct commit *commit);
 void bitmap_writer_select_commits(struct bitmap_writer *writer,
                                  struct commit **indexed_commits,
                                  unsigned int indexed_commits_nr);
index 7d131011497126691764fdd2784002141e781721..a117520996c3771ec579502e6413e8f595d2df9b 100644 (file)
@@ -699,3 +699,58 @@ int cascade_pseudo_merges(const struct pseudo_merge_map *pm,
 
        return ret;
 }
+
+struct pseudo_merge *pseudo_merge_for_parents(const struct pseudo_merge_map *pm,
+                                             struct bitmap *parents)
+{
+       struct pseudo_merge *match = NULL;
+       size_t i;
+
+       if (!pm->nr)
+               return NULL;
+
+       /*
+        * NOTE: this loop is quadratic in the worst-case (where no
+        * matching pseudo-merge bitmaps are found), but in practice
+        * this is OK for a few reasons:
+        *
+        *   - Rejecting pseudo-merge bitmaps that do not match the
+        *     given commit is done quickly (i.e. `bitmap_equals_ewah()`
+        *     returns early when we know the two bitmaps aren't equal.
+        *
+        *   - Already matched pseudo-merge bitmaps (which we track with
+        *     the `->satisfied` bit here) are skipped as potential
+        *     candidates.
+        *
+        *   - The number of pseudo-merges should be small (in the
+        *     hundreds for most repositories).
+        *
+        * If in the future this semi-quadratic behavior does become a
+        * problem, another approach would be to keep track of which
+        * pseudo-merges are still "viable" after enumerating the
+        * pseudo-merge commit's parents:
+        *
+        *   - A pseudo-merge bitmap becomes non-viable when the bit(s)
+        *     corresponding to one or more parent(s) of the given
+        *     commit are not set in a candidate pseudo-merge's commits
+        *     bitmap.
+        *
+        *   - After processing all bits, enumerate the remaining set of
+        *     viable pseudo-merge bitmaps, and check that their
+        *     popcount() matches the number of parents in the given
+        *     commit.
+        */
+       for (i = 0; i < pm->nr; i++) {
+               struct pseudo_merge *candidate = use_pseudo_merge(pm, &pm->v[i]);
+               if (!candidate || candidate->satisfied)
+                       continue;
+               if (!bitmap_equals_ewah(parents, candidate->commits))
+                       continue;
+
+               match = candidate;
+               match->satisfied = 1;
+               break;
+       }
+
+       return match;
+}
index 755edc054aef4844cc86d85acaae7534154270be..2aca01d056666e4341a773888f843616751d475e 100644 (file)
@@ -206,4 +206,11 @@ int cascade_pseudo_merges(const struct pseudo_merge_map *pm,
                          struct bitmap *result,
                          struct bitmap *roots);
 
+/*
+ * Returns a pseudo-merge which contains the exact set of commits
+ * listed in the "parents" bitamp, or NULL if none could be found.
+ */
+struct pseudo_merge *pseudo_merge_for_parents(const struct pseudo_merge_map *pm,
+                                             struct bitmap *parents);
+
 #endif
index 4c9aebcffdc691d9c455785a3a0f0fca429d5227..f052f395a778fd3107c6fb5ae5e5fad23091e961 100755 (executable)
@@ -22,6 +22,10 @@ test_pseudo_merges_cascades () {
        test_trace2_data bitmap pseudo_merges_cascades "$1"
 }
 
+test_pseudo_merges_reused () {
+       test_trace2_data pack-bitmap-write building_bitmaps_pseudo_merge_reused "$1"
+}
+
 tag_everything () {
        git rev-list --all --no-object-names >in &&
        perl -lne '
@@ -325,4 +329,65 @@ test_expect_success 'pseudo-merge overlap stale traversal' '
        )
 '
 
+test_expect_success 'pseudo-merge reuse' '
+       git init pseudo-merge-reuse &&
+       (
+               cd pseudo-merge-reuse &&
+
+               stable="1641013200" && # 2022-01-01
+               unstable="1672549200" && # 2023-01-01
+
+               GIT_COMMITTER_DATE="$stable +0000" &&
+               export GIT_COMMITTER_DATE &&
+               test_commit_bulk --notick 128 &&
+               GIT_COMMITTER_DATE="$unstable +0000" &&
+               export GIT_COMMITTER_DATE &&
+               test_commit_bulk --notick 128 &&
+
+               tag_everything &&
+
+               git \
+                       -c bitmapPseudoMerge.test.pattern="refs/tags/" \
+                       -c bitmapPseudoMerge.test.maxMerges=1 \
+                       -c bitmapPseudoMerge.test.threshold=now \
+                       -c bitmapPseudoMerge.test.stableThreshold=$(($unstable - 1)) \
+                       -c bitmapPseudoMerge.test.stableSize=512 \
+                       repack -adb &&
+
+               test_pseudo_merges >merges &&
+               test_line_count = 2 merges &&
+
+               test_pseudo_merge_commits 0 >stable-oids.before &&
+               test_pseudo_merge_commits 1 >unstable-oids.before &&
+
+               : >trace2.txt &&
+               GIT_TRACE2_EVENT=$PWD/trace2.txt git \
+                       -c bitmapPseudoMerge.test.pattern="refs/tags/" \
+                       -c bitmapPseudoMerge.test.maxMerges=2 \
+                       -c bitmapPseudoMerge.test.threshold=now \
+                       -c bitmapPseudoMerge.test.stableThreshold=$(($unstable - 1)) \
+                       -c bitmapPseudoMerge.test.stableSize=512 \
+                       repack -adb &&
+
+               test_pseudo_merges_reused 1 <trace2.txt &&
+
+               test_pseudo_merges >merges &&
+               test_line_count = 3 merges &&
+
+               test_pseudo_merge_commits 0 >stable-oids.after &&
+               for i in 1 2
+               do
+                       test_pseudo_merge_commits $i || return 1
+               done >unstable-oids.after &&
+
+               sort -u <stable-oids.before >expect &&
+               sort -u <stable-oids.after >actual &&
+               test_cmp expect actual &&
+
+               sort -u <unstable-oids.before >expect &&
+               sort -u <unstable-oids.after >actual &&
+               test_cmp expect actual
+       )
+'
+
 test_done