]> git.ipfire.org Git - thirdparty/git.git/commitdiff
pack-objects: refactor `read_packs_list_from_stdin()` to use `strmap`
authorTaylor Blau <me@ttaylorr.com>
Fri, 27 Mar 2026 20:06:46 +0000 (16:06 -0400)
committerJunio C Hamano <gitster@pobox.com>
Fri, 27 Mar 2026 20:40:39 +0000 (13:40 -0700)
The '--stdin-packs' mode of pack-objects maintains two separate
string_lists: one for included packs, and one for excluded packs. Each
list stores the pack basename as a string and the corresponding
`packed_git` pointer in its `->util` field.

This works, but makes it awkward to extend the set of pack "kinds" that
pack-objects can accept via stdin, since each new kind would need its
own string_list and duplicated handling. A future commit will want to do
just this, so prepare for that change by handling the various "kinds" of
packs specified over stdin in a more generic fashion.

Namely, replace the two `string_list`s with a single `strmap` keyed on
the pack basename, with values pointing to a new `struct
stdin_pack_info`. This struct tracks both the `packed_git` pointer and a
`kind` bitfield indicating whether the pack was specified as included or
excluded.

Extract the logic for sorting packs by mtime and adding their objects
into a separate `stdin_packs_add_pack_entries()` helper.

While we could have used a `string_list`, we must handle the case where
the same pack is specified more than once. With a `string_list` only, we
would have to pay a quadratic cost to either (a) insert elements into
their sorted positions, or (b) a repeated linear search, which is
accidentally quadratic. For that reason, use a strmap instead.

This patch does not include any functional changes.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
builtin/pack-objects.c

index 9a89bc5c4c924aeb9a23200c107fade382f7c5ec..945100b405c536bc90693d241cb17b86c7b0015d 100644 (file)
@@ -28,6 +28,7 @@
 #include "reachable.h"
 #include "oid-array.h"
 #include "strvec.h"
+#include "strmap.h"
 #include "list.h"
 #include "packfile.h"
 #include "object-file.h"
@@ -3820,107 +3821,151 @@ static void show_commit_pack_hint(struct commit *commit, void *data)
 
 }
 
+/*
+ * stdin_pack_info_kind specifies how a pack specified over stdin
+ * should be treated when pack-objects is invoked with --stdin-packs.
+ *
+ *  - STDIN_PACK_INCLUDE: objects in any packs with this flag bit set
+ *    should be included in the output pack, unless they appear in an
+ *    excluded pack.
+ *
+ *  - STDIN_PACK_EXCLUDE_CLOSED: objects in any packs with this flag
+ *    bit set should be excluded from the output pack.
+ *
+ * Objects in packs whose 'kind' bits include STDIN_PACK_INCLUDE are
+ * used as traversal tips when invoked with --stdin-packs=follow.
+ */
+enum stdin_pack_info_kind {
+       STDIN_PACK_INCLUDE = (1<<0),
+       STDIN_PACK_EXCLUDE_CLOSED = (1<<1),
+};
+
+struct stdin_pack_info {
+       struct packed_git *p;
+       enum stdin_pack_info_kind kind;
+};
+
 static int pack_mtime_cmp(const void *_a, const void *_b)
 {
-       struct packed_git *a = ((const struct string_list_item*)_a)->util;
-       struct packed_git *b = ((const struct string_list_item*)_b)->util;
+       struct stdin_pack_info *a = ((const struct string_list_item*)_a)->util;
+       struct stdin_pack_info *b = ((const struct string_list_item*)_b)->util;
 
        /*
         * order packs by descending mtime so that objects are laid out
         * roughly as newest-to-oldest
         */
-       if (a->mtime < b->mtime)
+       if (a->p->mtime < b->p->mtime)
                return 1;
-       else if (b->mtime < a->mtime)
+       else if (b->p->mtime < a->p->mtime)
                return -1;
        else
                return 0;
 }
 
-static void read_packs_list_from_stdin(struct rev_info *revs)
+static void stdin_packs_add_pack_entries(struct strmap *packs,
+                                        struct rev_info *revs)
+{
+       struct string_list keys = STRING_LIST_INIT_NODUP;
+       struct string_list_item *item;
+       struct hashmap_iter iter;
+       struct strmap_entry *entry;
+
+       strmap_for_each_entry(packs, &iter, entry) {
+               struct stdin_pack_info *info = entry->value;
+               if (!info->p)
+                       die(_("could not find pack '%s'"), entry->key);
+
+               string_list_append(&keys, entry->key)->util = info;
+       }
+
+       /*
+        * Order packs by ascending mtime; use QSORT directly to access the
+        * string_list_item's ->util pointer, which string_list_sort() does not
+        * provide.
+        */
+       QSORT(keys.items, keys.nr, pack_mtime_cmp);
+
+       for_each_string_list_item(item, &keys) {
+               struct stdin_pack_info *info = item->util;
+
+               if (info->kind & STDIN_PACK_INCLUDE)
+                       for_each_object_in_pack(info->p,
+                                               add_object_entry_from_pack,
+                                               revs,
+                                               ODB_FOR_EACH_OBJECT_PACK_ORDER);
+       }
+
+       string_list_clear(&keys, 0);
+}
+
+static void stdin_packs_read_input(struct rev_info *revs)
 {
        struct strbuf buf = STRBUF_INIT;
-       struct string_list include_packs = STRING_LIST_INIT_DUP;
-       struct string_list exclude_packs = STRING_LIST_INIT_DUP;
-       struct string_list_item *item = NULL;
+       struct strmap packs = STRMAP_INIT;
        struct packed_git *p;
 
        while (strbuf_getline(&buf, stdin) != EOF) {
-               if (!buf.len)
+               struct stdin_pack_info *info;
+               enum stdin_pack_info_kind kind = STDIN_PACK_INCLUDE;
+               const char *key = buf.buf;
+
+               if (!*key)
                        continue;
+               else if (*key == '^')
+                       kind = STDIN_PACK_EXCLUDE_CLOSED;
 
-               if (*buf.buf == '^')
-                       string_list_append(&exclude_packs, buf.buf + 1);
-               else
-                       string_list_append(&include_packs, buf.buf);
+               if (kind != STDIN_PACK_INCLUDE)
+                       key++;
+
+               info = strmap_get(&packs, key);
+               if (!info) {
+                       CALLOC_ARRAY(info, 1);
+                       strmap_put(&packs, key, info);
+               }
+
+               info->kind |= kind;
 
                strbuf_reset(&buf);
        }
 
-       string_list_sort_u(&include_packs, 0);
-       string_list_sort_u(&exclude_packs, 0);
-
        repo_for_each_pack(the_repository, p) {
-               const char *pack_name = pack_basename(p);
+               struct stdin_pack_info *info;
+
+               info = strmap_get(&packs, pack_basename(p));
+               if (!info)
+                       continue;
 
-               if ((item = string_list_lookup(&include_packs, pack_name))) {
+               if (info->kind & STDIN_PACK_INCLUDE) {
                        if (exclude_promisor_objects && p->pack_promisor)
                                die(_("packfile %s is a promisor but --exclude-promisor-objects was given"), p->pack_name);
-                       item->util = p;
+
+                       /*
+                        * Arguments we got on stdin may not even be
+                        * packs. First check that to avoid segfaulting
+                        * later on in e.g.  pack_mtime_cmp(), excluded
+                        * packs are handled below.
+                        */
+                       if (!is_pack_valid(p))
+                               die(_("packfile %s cannot be accessed"), p->pack_name);
                }
-               if ((item = string_list_lookup(&exclude_packs, pack_name)))
-                       item->util = p;
-       }
 
-       /*
-        * Arguments we got on stdin may not even be packs. First
-        * check that to avoid segfaulting later on in
-        * e.g. pack_mtime_cmp(), excluded packs are handled below.
-        *
-        * Since we first parsed our STDIN and then sorted the input
-        * lines the pack we error on will be whatever line happens to
-        * sort first. This is lazy, it's enough that we report one
-        * bad case here, we don't need to report the first/last one,
-        * or all of them.
-        */
-       for_each_string_list_item(item, &include_packs) {
-               struct packed_git *p = item->util;
-               if (!p)
-                       die(_("could not find pack '%s'"), item->string);
-               if (!is_pack_valid(p))
-                       die(_("packfile %s cannot be accessed"), p->pack_name);
-       }
+               if (info->kind & STDIN_PACK_EXCLUDE_CLOSED) {
+                       /*
+                        * Marking excluded packs as kept in-core so
+                        * that later calls to add_object_entry()
+                        * discards any objects that are also found in
+                        * excluded packs.
+                        */
+                       p->pack_keep_in_core = 1;
+               }
 
-       /*
-        * Then, handle all of the excluded packs, marking them as
-        * kept in-core so that later calls to add_object_entry()
-        * discards any objects that are also found in excluded packs.
-        */
-       for_each_string_list_item(item, &exclude_packs) {
-               struct packed_git *p = item->util;
-               if (!p)
-                       die(_("could not find pack '%s'"), item->string);
-               p->pack_keep_in_core = 1;
+               info->p = p;
        }
 
-       /*
-        * Order packs by ascending mtime; use QSORT directly to access the
-        * string_list_item's ->util pointer, which string_list_sort() does not
-        * provide.
-        */
-       QSORT(include_packs.items, include_packs.nr, pack_mtime_cmp);
-
-       for_each_string_list_item(item, &include_packs) {
-               struct packed_git *p = item->util;
-               for_each_object_in_pack(p,
-                                       add_object_entry_from_pack,
-                                       revs,
-                                       ODB_FOR_EACH_OBJECT_PACK_ORDER);
-       }
+       stdin_packs_add_pack_entries(&packs, revs);
 
        strbuf_release(&buf);
-       string_list_clear(&include_packs, 0);
-       string_list_clear(&exclude_packs, 0);
+       strmap_clear(&packs, 1);
 }
 
 static void add_unreachable_loose_objects(struct rev_info *revs);
@@ -3957,7 +4002,7 @@ static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked)
 
        /* avoids adding objects in excluded packs */
        ignore_packed_keep_in_core = 1;
-       read_packs_list_from_stdin(&revs);
+       stdin_packs_read_input(&revs);
        if (rev_list_unpacked)
                add_unreachable_loose_objects(&revs);