]> git.ipfire.org Git - thirdparty/git.git/commitdiff
midx-write: skip rewriting MIDX with `--stdin-packs` unless needed
authorPatrick Steinhardt <ps@pks.im>
Wed, 10 Dec 2025 12:52:20 +0000 (13:52 +0100)
committerJunio C Hamano <gitster@pobox.com>
Thu, 11 Dec 2025 03:09:59 +0000 (12:09 +0900)
In `write_midx_internal()` we know to skip rewriting the multi-pack
index in case the existing one already covers all packs. This logic does
not know to handle `git multi-pack-index write --stdin-packs` though, so
we end up always rewriting the MIDX in this case even if the MIDX would
not change.

With our default maintenance strategy this isn't really much of a
problem, as git-gc(1) does not use the "--stdin-packs" option. But that
is changing with geometric repacking, where "--stdin-packs" is used to
explicitly select the packfiles part of the geometric sequence.

This issue can be demonstrated trivially with a benchmark in the Git
repository: executing `git repack --geometric=2 --write-midx -d` in the
Git repository takes more than 3 seconds only to end up with the same
multi-pack index as we already had before.

The logic that decides if we need to rewrite the MIDX only checks
whether the number of packfiles covered will change. That check is of
course too lenient for "--stdin-packs", as it could happen that we want
to cover a different-but-same-size set of packfiles. But there is no
inherent reason why we cannot handle "--stdin-packs".

Improve the logic to not only check for the number of packs, but to also
verify that we are asked to generate a MIDX for the _same_ packs. This
allows us to also skip no-op rewrites for "--stdin-packs".

Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
midx-write.c
t/t5319-multi-pack-index.sh
t/t7703-repack-geometric.sh

index c1eed04691f0a7de20973b2a0fb92b23082956c4..40abe3868c4d6f357568af6b99159f16818cfdd4 100644 (file)
@@ -1015,9 +1015,10 @@ static void clear_midx_files(struct odb_source *source,
        strbuf_release(&buf);
 }
 
-static bool midx_needs_update(struct write_midx_context *ctx)
+static bool midx_needs_update(struct multi_pack_index *midx, struct write_midx_context *ctx)
 {
-       struct multi_pack_index *midx = ctx->m;
+       struct strset packs = STRSET_INIT;
+       struct strbuf buf = STRBUF_INIT;
        bool needed = true;
 
        /*
@@ -1028,25 +1029,48 @@ static bool midx_needs_update(struct write_midx_context *ctx)
        if (ctx->incremental)
                goto out;
 
-       /*
-        * If there is no MIDX then either it doesn't exist, or we're doing a
-        * geometric repack. We cannot (yet) determine whether we need to
-        * update the multi-pack index in the second case.
-        */
-       if (!midx)
-               goto out;
-
        /*
         * Otherwise, we need to verify that the packs covered by the existing
-        * MIDX match the packs that we already have. This test is somewhat
-        * lenient and will be fixed.
+        * MIDX match the packs that we already have. The logic to do so is way
+        * more complicated than it has any right to be. This is because:
+        *
+        *   - We cannot assume any ordering.
+        *
+        *   - The MIDX packs may not be loaded at all, and loading them would
+        *     be wasteful. So we need to use the pack names tracked by the
+        *     MIDX itself.
+        *
+        *   - The MIDX pack names are tracking the ".idx" files, whereas the
+        *     packs themselves are tracking the ".pack" files. So we need to
+        *     strip suffixes.
         */
        if (ctx->nr != midx->num_packs + midx->num_packs_in_base)
                goto out;
 
+       for (uint32_t i = 0; i < ctx->nr; i++) {
+               strbuf_reset(&buf);
+               strbuf_addstr(&buf, pack_basename(ctx->info[i].p));
+               strbuf_strip_suffix(&buf, ".pack");
+
+               if (!strset_add(&packs, buf.buf))
+                       BUG("same pack added twice?");
+       }
+
+       for (uint32_t i = 0; i < ctx->nr; i++) {
+               strbuf_reset(&buf);
+               strbuf_addstr(&buf, midx->pack_names[i]);
+               strbuf_strip_suffix(&buf, ".idx");
+
+               if (!strset_contains(&packs, buf.buf))
+                       goto out;
+               strset_remove(&packs, buf.buf);
+       }
+
        needed = false;
 
 out:
+       strbuf_release(&buf);
+       strset_clear(&packs);
        return needed;
 }
 
@@ -1067,6 +1091,7 @@ static int write_midx_internal(struct odb_source *source,
        struct write_midx_context ctx = {
                .preferred_pack_idx = NO_PREFERRED_PACK,
         };
+       struct multi_pack_index *midx_to_free = NULL;
        int bitmapped_packs_concat_len = 0;
        int pack_name_concat_len = 0;
        int dropped_packs = 0;
@@ -1147,25 +1172,39 @@ static int write_midx_internal(struct odb_source *source,
        for_each_file_in_pack_dir(source->path, add_pack_to_midx, &ctx);
        stop_progress(&ctx.progress);
 
-       if (!packs_to_include && !packs_to_drop && !midx_needs_update(&ctx)) {
-               struct bitmap_index *bitmap_git;
-               int bitmap_exists;
-               int want_bitmap = flags & MIDX_WRITE_BITMAP;
-
-               bitmap_git = prepare_midx_bitmap_git(ctx.m);
-               bitmap_exists = bitmap_git && bitmap_is_midx(bitmap_git);
-               free_bitmap_index(bitmap_git);
-
-               if (bitmap_exists || !want_bitmap) {
-                       /*
-                        * The correct MIDX already exists, and so does a
-                        * corresponding bitmap (or one wasn't requested).
-                        */
-                       if (!want_bitmap)
-                               clear_midx_files_ext(source, "bitmap", NULL);
-                       result = 0;
-                       goto cleanup;
+       if (!packs_to_drop) {
+               /*
+                * If there is no MIDX then either it doesn't exist, or we're
+                * doing a geometric repack. Try to load it from the source to
+                * tell these two cases apart.
+                */
+               struct multi_pack_index *midx = ctx.m;
+               if (!midx)
+                       midx = midx_to_free = load_multi_pack_index(ctx.source);
+
+               if (midx && !midx_needs_update(midx, &ctx)) {
+                       struct bitmap_index *bitmap_git;
+                       int bitmap_exists;
+                       int want_bitmap = flags & MIDX_WRITE_BITMAP;
+
+                       bitmap_git = prepare_midx_bitmap_git(midx);
+                       bitmap_exists = bitmap_git && bitmap_is_midx(bitmap_git);
+                       free_bitmap_index(bitmap_git);
+
+                       if (bitmap_exists || !want_bitmap) {
+                               /*
+                                * The correct MIDX already exists, and so does a
+                                * corresponding bitmap (or one wasn't requested).
+                                */
+                               if (!want_bitmap)
+                                       clear_midx_files_ext(source, "bitmap", NULL);
+                               result = 0;
+                               goto cleanup;
+                       }
                }
+
+               close_midx(midx_to_free);
+               midx_to_free = NULL;
        }
 
        if (ctx.incremental && !ctx.nr) {
@@ -1521,6 +1560,7 @@ cleanup:
                free(keep_hashes);
        }
        strbuf_release(&midx_name);
+       close_midx(midx_to_free);
 
        trace2_region_leave("midx", "write_midx_internal", r);
 
index 9492a9737b5f8ec2e9319834f04775aa98ad20c7..794f8b5ab4e136254abf6951cd7fe6690884f6b8 100755 (executable)
@@ -366,6 +366,57 @@ test_expect_success 'preferred pack cannot be determined without bitmap' '
        )
 '
 
+test_midx_is_retained () {
+       test-tool chmtime =0 .git/objects/pack/multi-pack-index &&
+       ls -l .git/objects/pack/multi-pack-index >expect &&
+       git multi-pack-index write "$@" &&
+       ls -l .git/objects/pack/multi-pack-index >actual &&
+       test_cmp expect actual
+}
+
+test_midx_is_rewritten () {
+       test-tool chmtime =0 .git/objects/pack/multi-pack-index &&
+       ls -l .git/objects/pack/multi-pack-index >expect &&
+       git multi-pack-index write "$@" &&
+       ls -l .git/objects/pack/multi-pack-index >actual &&
+       ! test_cmp expect actual
+}
+
+test_expect_success 'up-to-date multi-pack-index is retained' '
+       test_when_finished "rm -fr midx-up-to-date" &&
+       git init midx-up-to-date &&
+       (
+               cd midx-up-to-date &&
+
+               # Write the initial pack that contains the most objects.
+               test_commit first &&
+               test_commit second &&
+               git repack -Ad --write-midx &&
+               test_midx_is_retained &&
+
+               # Writing a new bitmap index should cause us to regenerate the MIDX.
+               test_midx_is_rewritten --bitmap &&
+               test_midx_is_retained --bitmap &&
+
+               # Ensure that writing a new packfile causes us to rewrite the index.
+               test_commit incremental &&
+               git repack -d &&
+               test_midx_is_rewritten &&
+               test_midx_is_retained &&
+
+               for pack in .git/objects/pack/*.idx
+               do
+                       basename "$pack" || exit 1
+               done >stdin &&
+               test_line_count = 2 stdin &&
+               test_midx_is_retained --stdin-packs <stdin &&
+               head -n1 stdin >stdin.trimmed &&
+               test_midx_is_rewritten --stdin-packs <stdin.trimmed
+       )
+'
+
+test_done
+
 test_expect_success 'verify multi-pack-index success' '
        git multi-pack-index verify --object-dir=$objdir
 '
index 9fc1626fbfde8989348537903a266eb6cae8600d..98806cdb6fe9b78775bdf5dc83cf22dd71555c08 100755 (executable)
@@ -287,6 +287,41 @@ test_expect_success '--geometric with pack.packSizeLimit' '
        )
 '
 
+test_expect_success '--geometric --write-midx retains up-to-date MIDX without bitmap index' '
+       test_when_finished "rm -fr repo" &&
+       git init repo &&
+       (
+               cd repo &&
+               test_commit initial &&
+
+               test_path_is_missing .git/objects/pack/multi-pack-index &&
+               git repack --geometric=2 --write-midx --no-write-bitmap-index &&
+               test_path_is_file .git/objects/pack/multi-pack-index &&
+               test-tool chmtime =0 .git/objects/pack/multi-pack-index &&
+
+               ls -l .git/objects/pack/ >expect &&
+               git repack --geometric=2 --write-midx --no-write-bitmap-index &&
+               ls -l .git/objects/pack/ >actual &&
+               test_cmp expect actual
+       )
+'
+
+test_expect_success '--geometric --write-midx retains up-to-date MIDX with bitmap index' '
+       test_when_finished "rm -fr repo" &&
+       git init repo &&
+       test_commit -C repo initial &&
+
+       test_path_is_missing repo/.git/objects/pack/multi-pack-index &&
+       git -C repo repack --geometric=2 --write-midx --write-bitmap-index &&
+       test_path_is_file repo/.git/objects/pack/multi-pack-index &&
+       test-tool chmtime =0 repo/.git/objects/pack/multi-pack-index &&
+
+       ls -l repo/.git/objects/pack/ >expect &&
+       git -C repo repack --geometric=2 --write-midx --write-bitmap-index &&
+       ls -l repo/.git/objects/pack/ >actual &&
+       test_cmp expect actual
+'
+
 test_expect_success '--geometric --write-midx with packfiles in main and alternate ODB' '
        test_when_finished "rm -fr shared member" &&