]> git.ipfire.org Git - thirdparty/git.git/commitdiff
repack: 'write_midx_included_packs' API from the builtin
authorTaylor Blau <me@ttaylorr.com>
Wed, 15 Oct 2025 22:29:13 +0000 (18:29 -0400)
committerJunio C Hamano <gitster@pobox.com>
Thu, 16 Oct 2025 17:08:56 +0000 (10:08 -0700)
Now that we have sufficiently cleaned up the write_midx_included_packs()
function, we can move it (along with the struct repack_write_midx_opts)
out of the builtin, and into the repack.h header.

Since this function (and the static ones that it depends on) are
MIDX-specific details of the repacking process, move them to the
repack-midx.c compilation unit instead of the general repack.c one.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
builtin/repack.c
repack-midx.c
repack.h

index fad10be42ac5cb5a24af21c96b6e4b4904b2ac60..2f49a18283535c4f0de4412eb421478d8a92d2f6 100644 (file)
@@ -107,311 +107,6 @@ static int repack_config(const char *var, const char *value,
        return git_default_config(var, value, ctx, cb);
 }
 
-struct repack_write_midx_opts {
-       struct existing_packs *existing;
-       struct pack_geometry *geometry;
-       struct string_list *names;
-       const char *refs_snapshot;
-       const char *packdir;
-       int show_progress;
-       int write_bitmaps;
-       int midx_must_contain_cruft;
-};
-
-static int midx_has_unknown_packs(struct string_list *include,
-                                 struct pack_geometry *geometry,
-                                 struct existing_packs *existing)
-{
-       struct string_list_item *item;
-
-       string_list_sort(include);
-
-       for_each_string_list_item(item, &existing->midx_packs) {
-               const char *pack_name = item->string;
-
-               /*
-                * Determine whether or not each MIDX'd pack from the existing
-                * MIDX (if any) is represented in the new MIDX. For each pack
-                * in the MIDX, it must either be:
-                *
-                *  - In the "include" list of packs to be included in the new
-                *    MIDX. Note this function is called before the include
-                *    list is populated with any cruft pack(s).
-                *
-                *  - Below the geometric split line (if using pack geometry),
-                *    indicating that the pack won't be included in the new
-                *    MIDX, but its contents were rolled up as part of the
-                *    geometric repack.
-                *
-                *  - In the existing non-kept packs list (if not using pack
-                *    geometry), and marked as non-deleted.
-                */
-               if (string_list_has_string(include, pack_name)) {
-                       continue;
-               } else if (geometry) {
-                       struct strbuf buf = STRBUF_INIT;
-                       uint32_t j;
-
-                       for (j = 0; j < geometry->split; j++) {
-                               strbuf_reset(&buf);
-                               strbuf_addstr(&buf, pack_basename(geometry->pack[j]));
-                               strbuf_strip_suffix(&buf, ".pack");
-                               strbuf_addstr(&buf, ".idx");
-
-                               if (!strcmp(pack_name, buf.buf)) {
-                                       strbuf_release(&buf);
-                                       break;
-                               }
-                       }
-
-                       strbuf_release(&buf);
-
-                       if (j < geometry->split)
-                               continue;
-               } else {
-                       struct string_list_item *item;
-
-                       item = string_list_lookup(&existing->non_kept_packs,
-                                                 pack_name);
-                       if (item && !existing_pack_is_marked_for_deletion(item))
-                               continue;
-               }
-
-               /*
-                * If we got to this point, the MIDX includes some pack that we
-                * don't know about.
-                */
-               return 1;
-       }
-
-       return 0;
-}
-
-static void midx_included_packs(struct string_list *include,
-                               struct repack_write_midx_opts *opts)
-{
-       struct existing_packs *existing = opts->existing;
-       struct pack_geometry *geometry = opts->geometry;
-       struct string_list *names = opts->names;
-       struct string_list_item *item;
-       struct strbuf buf = STRBUF_INIT;
-
-       for_each_string_list_item(item, &existing->kept_packs) {
-               strbuf_reset(&buf);
-               strbuf_addf(&buf, "%s.idx", item->string);
-               string_list_insert(include, buf.buf);
-       }
-
-       for_each_string_list_item(item, names) {
-               strbuf_reset(&buf);
-               strbuf_addf(&buf, "pack-%s.idx", item->string);
-               string_list_insert(include, buf.buf);
-       }
-
-       if (geometry->split_factor) {
-               uint32_t i;
-
-               for (i = geometry->split; i < geometry->pack_nr; i++) {
-                       struct packed_git *p = geometry->pack[i];
-
-                       /*
-                        * The multi-pack index never refers to packfiles part
-                        * of an alternate object database, so we skip these.
-                        * While git-multi-pack-index(1) would silently ignore
-                        * them anyway, this allows us to skip executing the
-                        * command completely when we have only non-local
-                        * packfiles.
-                        */
-                       if (!p->pack_local)
-                               continue;
-
-                       strbuf_reset(&buf);
-                       strbuf_addstr(&buf, pack_basename(p));
-                       strbuf_strip_suffix(&buf, ".pack");
-                       strbuf_addstr(&buf, ".idx");
-
-                       string_list_insert(include, buf.buf);
-               }
-       } else {
-               for_each_string_list_item(item, &existing->non_kept_packs) {
-                       if (existing_pack_is_marked_for_deletion(item))
-                               continue;
-
-                       strbuf_reset(&buf);
-                       strbuf_addf(&buf, "%s.idx", item->string);
-                       string_list_insert(include, buf.buf);
-               }
-       }
-
-       if (opts->midx_must_contain_cruft ||
-           midx_has_unknown_packs(include, geometry, existing)) {
-               /*
-                * If there are one or more unknown pack(s) present (see
-                * midx_has_unknown_packs() for what makes a pack
-                * "unknown") in the MIDX before the repack, keep them
-                * as they may be required to form a reachability
-                * closure if the MIDX is bitmapped.
-                *
-                * For example, a cruft pack can be required to form a
-                * reachability closure if the MIDX is bitmapped and one
-                * or more of the bitmap's selected commits reaches a
-                * once-cruft object that was later made reachable.
-                */
-               for_each_string_list_item(item, &existing->cruft_packs) {
-                       /*
-                        * When doing a --geometric repack, there is no
-                        * need to check for deleted packs, since we're
-                        * by definition not doing an ALL_INTO_ONE
-                        * repack (hence no packs will be deleted).
-                        * Otherwise we must check for and exclude any
-                        * packs which are enqueued for deletion.
-                        *
-                        * So we could omit the conditional below in the
-                        * --geometric case, but doing so is unnecessary
-                        *  since no packs are marked as pending
-                        *  deletion (since we only call
-                        *  `existing_packs_mark_for_deletion()` when
-                        *  doing an all-into-one repack).
-                        */
-                       if (existing_pack_is_marked_for_deletion(item))
-                               continue;
-
-                       strbuf_reset(&buf);
-                       strbuf_addf(&buf, "%s.idx", item->string);
-                       string_list_insert(include, buf.buf);
-               }
-       } else {
-               /*
-                * Modern versions of Git (with the appropriate
-                * configuration setting) will write new copies of
-                * once-cruft objects when doing a --geometric repack.
-                *
-                * If the MIDX has no cruft pack, new packs written
-                * during a --geometric repack will not rely on the
-                * cruft pack to form a reachability closure, so we can
-                * avoid including them in the MIDX in that case.
-                */
-               ;
-       }
-
-       strbuf_release(&buf);
-}
-
-static void remove_redundant_bitmaps(struct string_list *include,
-                                    const char *packdir)
-{
-       struct strbuf path = STRBUF_INIT;
-       struct string_list_item *item;
-       size_t packdir_len;
-
-       strbuf_addstr(&path, packdir);
-       strbuf_addch(&path, '/');
-       packdir_len = path.len;
-
-       /*
-        * Remove any pack bitmaps corresponding to packs which are now
-        * included in the MIDX.
-        */
-       for_each_string_list_item(item, include) {
-               strbuf_addstr(&path, item->string);
-               strbuf_strip_suffix(&path, ".idx");
-               strbuf_addstr(&path, ".bitmap");
-
-               if (unlink(path.buf) && errno != ENOENT)
-                       warning_errno(_("could not remove stale bitmap: %s"),
-                                     path.buf);
-
-               strbuf_setlen(&path, packdir_len);
-       }
-       strbuf_release(&path);
-}
-
-static int write_midx_included_packs(struct repack_write_midx_opts *opts)
-{
-       struct child_process cmd = CHILD_PROCESS_INIT;
-       struct string_list include = STRING_LIST_INIT_DUP;
-       struct string_list_item *item;
-       struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry);
-       FILE *in;
-       int ret = 0;
-
-       midx_included_packs(&include, opts);
-       if (!include.nr)
-               goto done;
-
-       cmd.in = -1;
-       cmd.git_cmd = 1;
-
-       strvec_push(&cmd.args, "multi-pack-index");
-       strvec_pushl(&cmd.args, "write", "--stdin-packs", NULL);
-
-       if (opts->show_progress)
-               strvec_push(&cmd.args, "--progress");
-       else
-               strvec_push(&cmd.args, "--no-progress");
-
-       if (opts->write_bitmaps)
-               strvec_push(&cmd.args, "--bitmap");
-
-       if (preferred)
-               strvec_pushf(&cmd.args, "--preferred-pack=%s",
-                            pack_basename(preferred));
-       else if (opts->names->nr) {
-               /* The largest pack was repacked, meaning that either
-                * one or two packs exist depending on whether the
-                * repository has a cruft pack or not.
-                *
-                * Select the non-cruft one as preferred to encourage
-                * pack-reuse among packs containing reachable objects
-                * over unreachable ones.
-                *
-                * (Note we could write multiple packs here if
-                * `--max-pack-size` was given, but any one of them
-                * will suffice, so pick the first one.)
-                */
-               for_each_string_list_item(item, opts->names) {
-                       struct generated_pack *pack = item->util;
-                       if (generated_pack_has_ext(pack, ".mtimes"))
-                               continue;
-
-                       strvec_pushf(&cmd.args, "--preferred-pack=pack-%s.pack",
-                                    item->string);
-                       break;
-               }
-       } else {
-               /*
-                * No packs were kept, and no packs were written. The
-                * only thing remaining are .keep packs (unless
-                * --pack-kept-objects was given).
-                *
-                * Set the `--preferred-pack` arbitrarily here.
-                */
-               ;
-       }
-
-       if (opts->refs_snapshot)
-               strvec_pushf(&cmd.args, "--refs-snapshot=%s",
-                            opts->refs_snapshot);
-
-       ret = start_command(&cmd);
-       if (ret)
-               goto done;
-
-       in = xfdopen(cmd.in, "w");
-       for_each_string_list_item(item, &include)
-               fprintf(in, "%s\n", item->string);
-       fclose(in);
-
-       ret = finish_command(&cmd);
-done:
-       if (!ret && opts->write_bitmaps)
-               remove_redundant_bitmaps(&include, opts->packdir);
-
-       string_list_clear(&include, 0);
-
-       return ret;
-}
-
 static int finish_pack_objects_cmd(const struct git_hash_algo *algop,
                                   struct child_process *cmd,
                                   struct string_list *names,
index 354df729a5efc7dfa20e47dd63751b9f782529a1..6f6202c5bccd89a9087bd86a80b7f56d0678f7fb 100644 (file)
@@ -6,6 +6,7 @@
 #include "oidset.h"
 #include "pack-bitmap.h"
 #include "refs.h"
+#include "run-command.h"
 #include "tempfile.h"
 
 struct midx_snapshot_ref_data {
@@ -75,3 +76,297 @@ void midx_snapshot_refs(struct repository *repo, struct tempfile *f)
 
        oidset_clear(&data.seen);
 }
+
+static int midx_has_unknown_packs(struct string_list *include,
+                                 struct pack_geometry *geometry,
+                                 struct existing_packs *existing)
+{
+       struct string_list_item *item;
+
+       string_list_sort(include);
+
+       for_each_string_list_item(item, &existing->midx_packs) {
+               const char *pack_name = item->string;
+
+               /*
+                * Determine whether or not each MIDX'd pack from the existing
+                * MIDX (if any) is represented in the new MIDX. For each pack
+                * in the MIDX, it must either be:
+                *
+                *  - In the "include" list of packs to be included in the new
+                *    MIDX. Note this function is called before the include
+                *    list is populated with any cruft pack(s).
+                *
+                *  - Below the geometric split line (if using pack geometry),
+                *    indicating that the pack won't be included in the new
+                *    MIDX, but its contents were rolled up as part of the
+                *    geometric repack.
+                *
+                *  - In the existing non-kept packs list (if not using pack
+                *    geometry), and marked as non-deleted.
+                */
+               if (string_list_has_string(include, pack_name)) {
+                       continue;
+               } else if (geometry) {
+                       struct strbuf buf = STRBUF_INIT;
+                       uint32_t j;
+
+                       for (j = 0; j < geometry->split; j++) {
+                               strbuf_reset(&buf);
+                               strbuf_addstr(&buf, pack_basename(geometry->pack[j]));
+                               strbuf_strip_suffix(&buf, ".pack");
+                               strbuf_addstr(&buf, ".idx");
+
+                               if (!strcmp(pack_name, buf.buf)) {
+                                       strbuf_release(&buf);
+                                       break;
+                               }
+                       }
+
+                       strbuf_release(&buf);
+
+                       if (j < geometry->split)
+                               continue;
+               } else {
+                       struct string_list_item *item;
+
+                       item = string_list_lookup(&existing->non_kept_packs,
+                                                 pack_name);
+                       if (item && !existing_pack_is_marked_for_deletion(item))
+                               continue;
+               }
+
+               /*
+                * If we got to this point, the MIDX includes some pack that we
+                * don't know about.
+                */
+               return 1;
+       }
+
+       return 0;
+}
+
+static void midx_included_packs(struct string_list *include,
+                               struct repack_write_midx_opts *opts)
+{
+       struct existing_packs *existing = opts->existing;
+       struct pack_geometry *geometry = opts->geometry;
+       struct string_list *names = opts->names;
+       struct string_list_item *item;
+       struct strbuf buf = STRBUF_INIT;
+
+       for_each_string_list_item(item, &existing->kept_packs) {
+               strbuf_reset(&buf);
+               strbuf_addf(&buf, "%s.idx", item->string);
+               string_list_insert(include, buf.buf);
+       }
+
+       for_each_string_list_item(item, names) {
+               strbuf_reset(&buf);
+               strbuf_addf(&buf, "pack-%s.idx", item->string);
+               string_list_insert(include, buf.buf);
+       }
+
+       if (geometry->split_factor) {
+               uint32_t i;
+
+               for (i = geometry->split; i < geometry->pack_nr; i++) {
+                       struct packed_git *p = geometry->pack[i];
+
+                       /*
+                        * The multi-pack index never refers to packfiles part
+                        * of an alternate object database, so we skip these.
+                        * While git-multi-pack-index(1) would silently ignore
+                        * them anyway, this allows us to skip executing the
+                        * command completely when we have only non-local
+                        * packfiles.
+                        */
+                       if (!p->pack_local)
+                               continue;
+
+                       strbuf_reset(&buf);
+                       strbuf_addstr(&buf, pack_basename(p));
+                       strbuf_strip_suffix(&buf, ".pack");
+                       strbuf_addstr(&buf, ".idx");
+
+                       string_list_insert(include, buf.buf);
+               }
+       } else {
+               for_each_string_list_item(item, &existing->non_kept_packs) {
+                       if (existing_pack_is_marked_for_deletion(item))
+                               continue;
+
+                       strbuf_reset(&buf);
+                       strbuf_addf(&buf, "%s.idx", item->string);
+                       string_list_insert(include, buf.buf);
+               }
+       }
+
+       if (opts->midx_must_contain_cruft ||
+           midx_has_unknown_packs(include, geometry, existing)) {
+               /*
+                * If there are one or more unknown pack(s) present (see
+                * midx_has_unknown_packs() for what makes a pack
+                * "unknown") in the MIDX before the repack, keep them
+                * as they may be required to form a reachability
+                * closure if the MIDX is bitmapped.
+                *
+                * For example, a cruft pack can be required to form a
+                * reachability closure if the MIDX is bitmapped and one
+                * or more of the bitmap's selected commits reaches a
+                * once-cruft object that was later made reachable.
+                */
+               for_each_string_list_item(item, &existing->cruft_packs) {
+                       /*
+                        * When doing a --geometric repack, there is no
+                        * need to check for deleted packs, since we're
+                        * by definition not doing an ALL_INTO_ONE
+                        * repack (hence no packs will be deleted).
+                        * Otherwise we must check for and exclude any
+                        * packs which are enqueued for deletion.
+                        *
+                        * So we could omit the conditional below in the
+                        * --geometric case, but doing so is unnecessary
+                        *  since no packs are marked as pending
+                        *  deletion (since we only call
+                        *  `existing_packs_mark_for_deletion()` when
+                        *  doing an all-into-one repack).
+                        */
+                       if (existing_pack_is_marked_for_deletion(item))
+                               continue;
+
+                       strbuf_reset(&buf);
+                       strbuf_addf(&buf, "%s.idx", item->string);
+                       string_list_insert(include, buf.buf);
+               }
+       } else {
+               /*
+                * Modern versions of Git (with the appropriate
+                * configuration setting) will write new copies of
+                * once-cruft objects when doing a --geometric repack.
+                *
+                * If the MIDX has no cruft pack, new packs written
+                * during a --geometric repack will not rely on the
+                * cruft pack to form a reachability closure, so we can
+                * avoid including them in the MIDX in that case.
+                */
+               ;
+       }
+
+       strbuf_release(&buf);
+}
+
+static void remove_redundant_bitmaps(struct string_list *include,
+                                    const char *packdir)
+{
+       struct strbuf path = STRBUF_INIT;
+       struct string_list_item *item;
+       size_t packdir_len;
+
+       strbuf_addstr(&path, packdir);
+       strbuf_addch(&path, '/');
+       packdir_len = path.len;
+
+       /*
+        * Remove any pack bitmaps corresponding to packs which are now
+        * included in the MIDX.
+        */
+       for_each_string_list_item(item, include) {
+               strbuf_addstr(&path, item->string);
+               strbuf_strip_suffix(&path, ".idx");
+               strbuf_addstr(&path, ".bitmap");
+
+               if (unlink(path.buf) && errno != ENOENT)
+                       warning_errno(_("could not remove stale bitmap: %s"),
+                                     path.buf);
+
+               strbuf_setlen(&path, packdir_len);
+       }
+       strbuf_release(&path);
+}
+
+int write_midx_included_packs(struct repack_write_midx_opts *opts)
+{
+       struct child_process cmd = CHILD_PROCESS_INIT;
+       struct string_list include = STRING_LIST_INIT_DUP;
+       struct string_list_item *item;
+       struct packed_git *preferred = pack_geometry_preferred_pack(opts->geometry);
+       FILE *in;
+       int ret = 0;
+
+       midx_included_packs(&include, opts);
+       if (!include.nr)
+               goto done;
+
+       cmd.in = -1;
+       cmd.git_cmd = 1;
+
+       strvec_push(&cmd.args, "multi-pack-index");
+       strvec_pushl(&cmd.args, "write", "--stdin-packs", NULL);
+
+       if (opts->show_progress)
+               strvec_push(&cmd.args, "--progress");
+       else
+               strvec_push(&cmd.args, "--no-progress");
+
+       if (opts->write_bitmaps)
+               strvec_push(&cmd.args, "--bitmap");
+
+       if (preferred)
+               strvec_pushf(&cmd.args, "--preferred-pack=%s",
+                            pack_basename(preferred));
+       else if (opts->names->nr) {
+               /* The largest pack was repacked, meaning that either
+                * one or two packs exist depending on whether the
+                * repository has a cruft pack or not.
+                *
+                * Select the non-cruft one as preferred to encourage
+                * pack-reuse among packs containing reachable objects
+                * over unreachable ones.
+                *
+                * (Note we could write multiple packs here if
+                * `--max-pack-size` was given, but any one of them
+                * will suffice, so pick the first one.)
+                */
+               for_each_string_list_item(item, opts->names) {
+                       struct generated_pack *pack = item->util;
+                       if (generated_pack_has_ext(pack, ".mtimes"))
+                               continue;
+
+                       strvec_pushf(&cmd.args, "--preferred-pack=pack-%s.pack",
+                                    item->string);
+                       break;
+               }
+       } else {
+               /*
+                * No packs were kept, and no packs were written. The
+                * only thing remaining are .keep packs (unless
+                * --pack-kept-objects was given).
+                *
+                * Set the `--preferred-pack` arbitrarily here.
+                */
+               ;
+       }
+
+       if (opts->refs_snapshot)
+               strvec_pushf(&cmd.args, "--refs-snapshot=%s",
+                            opts->refs_snapshot);
+
+       ret = start_command(&cmd);
+       if (ret)
+               goto done;
+
+       in = xfdopen(cmd.in, "w");
+       for_each_string_list_item(item, &include)
+               fprintf(in, "%s\n", item->string);
+       fclose(in);
+
+       ret = finish_command(&cmd);
+done:
+       if (!ret && opts->write_bitmaps)
+               remove_redundant_bitmaps(&include, opts->packdir);
+
+       string_list_clear(&include, 0);
+
+       return ret;
+}
index 6aa5b4e0f0fd2c529b1abc43703727a97e03fbed..25a31ac0a05fe26f13fd68a38032f0276aefe065 100644 (file)
--- a/repack.h
+++ b/repack.h
@@ -101,6 +101,18 @@ void pack_geometry_release(struct pack_geometry *geometry);
 
 struct tempfile;
 
+struct repack_write_midx_opts {
+       struct existing_packs *existing;
+       struct pack_geometry *geometry;
+       struct string_list *names;
+       const char *refs_snapshot;
+       const char *packdir;
+       int show_progress;
+       int write_bitmaps;
+       int midx_must_contain_cruft;
+};
+
 void midx_snapshot_refs(struct repository *repo, struct tempfile *f);
+int write_midx_included_packs(struct repack_write_midx_opts *opts);
 
 #endif /* REPACK_H */