From: Patrick Steinhardt Date: Wed, 10 Dec 2025 12:52:20 +0000 (+0100) Subject: midx-write: skip rewriting MIDX with `--stdin-packs` unless needed X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6ce9d558ced275a707393d044e5b0035412f8360;p=thirdparty%2Fgit.git midx-write: skip rewriting MIDX with `--stdin-packs` unless needed In `write_midx_internal()` we know to skip rewriting the multi-pack index in case the existing one already covers all packs. This logic does not know to handle `git multi-pack-index write --stdin-packs` though, so we end up always rewriting the MIDX in this case even if the MIDX would not change. With our default maintenance strategy this isn't really much of a problem, as git-gc(1) does not use the "--stdin-packs" option. But that is changing with geometric repacking, where "--stdin-packs" is used to explicitly select the packfiles part of the geometric sequence. This issue can be demonstrated trivially with a benchmark in the Git repository: executing `git repack --geometric=2 --write-midx -d` in the Git repository takes more than 3 seconds only to end up with the same multi-pack index as we already had before. The logic that decides if we need to rewrite the MIDX only checks whether the number of packfiles covered will change. That check is of course too lenient for "--stdin-packs", as it could happen that we want to cover a different-but-same-size set of packfiles. But there is no inherent reason why we cannot handle "--stdin-packs". Improve the logic to not only check for the number of packs, but to also verify that we are asked to generate a MIDX for the _same_ packs. This allows us to also skip no-op rewrites for "--stdin-packs". Helped-by: Taylor Blau Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- diff --git a/midx-write.c b/midx-write.c index c1eed04691..40abe3868c 100644 --- a/midx-write.c +++ b/midx-write.c @@ -1015,9 +1015,10 @@ static void clear_midx_files(struct odb_source *source, strbuf_release(&buf); } -static bool midx_needs_update(struct write_midx_context *ctx) +static bool midx_needs_update(struct multi_pack_index *midx, struct write_midx_context *ctx) { - struct multi_pack_index *midx = ctx->m; + struct strset packs = STRSET_INIT; + struct strbuf buf = STRBUF_INIT; bool needed = true; /* @@ -1028,25 +1029,48 @@ static bool midx_needs_update(struct write_midx_context *ctx) if (ctx->incremental) goto out; - /* - * If there is no MIDX then either it doesn't exist, or we're doing a - * geometric repack. We cannot (yet) determine whether we need to - * update the multi-pack index in the second case. - */ - if (!midx) - goto out; - /* * Otherwise, we need to verify that the packs covered by the existing - * MIDX match the packs that we already have. This test is somewhat - * lenient and will be fixed. + * MIDX match the packs that we already have. The logic to do so is way + * more complicated than it has any right to be. This is because: + * + * - We cannot assume any ordering. + * + * - The MIDX packs may not be loaded at all, and loading them would + * be wasteful. So we need to use the pack names tracked by the + * MIDX itself. + * + * - The MIDX pack names are tracking the ".idx" files, whereas the + * packs themselves are tracking the ".pack" files. So we need to + * strip suffixes. */ if (ctx->nr != midx->num_packs + midx->num_packs_in_base) goto out; + for (uint32_t i = 0; i < ctx->nr; i++) { + strbuf_reset(&buf); + strbuf_addstr(&buf, pack_basename(ctx->info[i].p)); + strbuf_strip_suffix(&buf, ".pack"); + + if (!strset_add(&packs, buf.buf)) + BUG("same pack added twice?"); + } + + for (uint32_t i = 0; i < ctx->nr; i++) { + strbuf_reset(&buf); + strbuf_addstr(&buf, midx->pack_names[i]); + strbuf_strip_suffix(&buf, ".idx"); + + if (!strset_contains(&packs, buf.buf)) + goto out; + strset_remove(&packs, buf.buf); + } + needed = false; out: + strbuf_release(&buf); + strset_clear(&packs); return needed; } @@ -1067,6 +1091,7 @@ static int write_midx_internal(struct odb_source *source, struct write_midx_context ctx = { .preferred_pack_idx = NO_PREFERRED_PACK, }; + struct multi_pack_index *midx_to_free = NULL; int bitmapped_packs_concat_len = 0; int pack_name_concat_len = 0; int dropped_packs = 0; @@ -1147,25 +1172,39 @@ static int write_midx_internal(struct odb_source *source, for_each_file_in_pack_dir(source->path, add_pack_to_midx, &ctx); stop_progress(&ctx.progress); - if (!packs_to_include && !packs_to_drop && !midx_needs_update(&ctx)) { - struct bitmap_index *bitmap_git; - int bitmap_exists; - int want_bitmap = flags & MIDX_WRITE_BITMAP; - - bitmap_git = prepare_midx_bitmap_git(ctx.m); - bitmap_exists = bitmap_git && bitmap_is_midx(bitmap_git); - free_bitmap_index(bitmap_git); - - if (bitmap_exists || !want_bitmap) { - /* - * The correct MIDX already exists, and so does a - * corresponding bitmap (or one wasn't requested). - */ - if (!want_bitmap) - clear_midx_files_ext(source, "bitmap", NULL); - result = 0; - goto cleanup; + if (!packs_to_drop) { + /* + * If there is no MIDX then either it doesn't exist, or we're + * doing a geometric repack. Try to load it from the source to + * tell these two cases apart. + */ + struct multi_pack_index *midx = ctx.m; + if (!midx) + midx = midx_to_free = load_multi_pack_index(ctx.source); + + if (midx && !midx_needs_update(midx, &ctx)) { + struct bitmap_index *bitmap_git; + int bitmap_exists; + int want_bitmap = flags & MIDX_WRITE_BITMAP; + + bitmap_git = prepare_midx_bitmap_git(midx); + bitmap_exists = bitmap_git && bitmap_is_midx(bitmap_git); + free_bitmap_index(bitmap_git); + + if (bitmap_exists || !want_bitmap) { + /* + * The correct MIDX already exists, and so does a + * corresponding bitmap (or one wasn't requested). + */ + if (!want_bitmap) + clear_midx_files_ext(source, "bitmap", NULL); + result = 0; + goto cleanup; + } } + + close_midx(midx_to_free); + midx_to_free = NULL; } if (ctx.incremental && !ctx.nr) { @@ -1521,6 +1560,7 @@ cleanup: free(keep_hashes); } strbuf_release(&midx_name); + close_midx(midx_to_free); trace2_region_leave("midx", "write_midx_internal", r); diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 9492a9737b..794f8b5ab4 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -366,6 +366,57 @@ test_expect_success 'preferred pack cannot be determined without bitmap' ' ) ' +test_midx_is_retained () { + test-tool chmtime =0 .git/objects/pack/multi-pack-index && + ls -l .git/objects/pack/multi-pack-index >expect && + git multi-pack-index write "$@" && + ls -l .git/objects/pack/multi-pack-index >actual && + test_cmp expect actual +} + +test_midx_is_rewritten () { + test-tool chmtime =0 .git/objects/pack/multi-pack-index && + ls -l .git/objects/pack/multi-pack-index >expect && + git multi-pack-index write "$@" && + ls -l .git/objects/pack/multi-pack-index >actual && + ! test_cmp expect actual +} + +test_expect_success 'up-to-date multi-pack-index is retained' ' + test_when_finished "rm -fr midx-up-to-date" && + git init midx-up-to-date && + ( + cd midx-up-to-date && + + # Write the initial pack that contains the most objects. + test_commit first && + test_commit second && + git repack -Ad --write-midx && + test_midx_is_retained && + + # Writing a new bitmap index should cause us to regenerate the MIDX. + test_midx_is_rewritten --bitmap && + test_midx_is_retained --bitmap && + + # Ensure that writing a new packfile causes us to rewrite the index. + test_commit incremental && + git repack -d && + test_midx_is_rewritten && + test_midx_is_retained && + + for pack in .git/objects/pack/*.idx + do + basename "$pack" || exit 1 + done >stdin && + test_line_count = 2 stdin && + test_midx_is_retained --stdin-packs stdin.trimmed && + test_midx_is_rewritten --stdin-packs expect && + git repack --geometric=2 --write-midx --no-write-bitmap-index && + ls -l .git/objects/pack/ >actual && + test_cmp expect actual + ) +' + +test_expect_success '--geometric --write-midx retains up-to-date MIDX with bitmap index' ' + test_when_finished "rm -fr repo" && + git init repo && + test_commit -C repo initial && + + test_path_is_missing repo/.git/objects/pack/multi-pack-index && + git -C repo repack --geometric=2 --write-midx --write-bitmap-index && + test_path_is_file repo/.git/objects/pack/multi-pack-index && + test-tool chmtime =0 repo/.git/objects/pack/multi-pack-index && + + ls -l repo/.git/objects/pack/ >expect && + git -C repo repack --geometric=2 --write-midx --write-bitmap-index && + ls -l repo/.git/objects/pack/ >actual && + test_cmp expect actual +' + test_expect_success '--geometric --write-midx with packfiles in main and alternate ODB' ' test_when_finished "rm -fr shared member" &&