]> git.ipfire.org Git - thirdparty/git.git/commitdiff
path-walk: support blobless filter
authorDerrick Stolee <stolee@gmail.com>
Fri, 22 May 2026 18:24:29 +0000 (18:24 +0000)
committerJunio C Hamano <gitster@pobox.com>
Sun, 24 May 2026 09:41:06 +0000 (18:41 +0900)
The 'git pack-objects' command can opt-in to using the path-walk API for
scanning the objects. Currently, this option is dynamically disabled if
combined with '--filter=<X>', even when using a simple filter such as
'blob:none' to signal a blobless packfile. This is a common scenario for
repos at scale, so is worth integrating.

Also, users can opt-in to the '--path-walk' option by default through
the pack.usePathWalk=true config option. When using that in a blobless
partial clone, the following warning can appear even though the user did
not specify either option directly:

  warning: cannot use --filter with --path-walk

Teach the path-walk API to handle the 'blob:none' object filter
natively. When revs->filter.choice is LOFC_BLOB_NONE, the path-walk
sets info->blobs to 0 (skipping all blob objects) and clears the
filter from revs so that prepare_revision_walk() does not reject the
configuration.

This check is implemented in the static prepare_filters() method, which
will simultaneously check if the input filters are compatible and will
make the appropriate mutations to the path_walk_info and filters if the
path_walk_info is non-NULL. This allows us to use this logic both in the
API method path_walk_filter_compatible() for use in
builtin/pack-objects.c and as a prep step in walk_objects_by_path().

Update the test helper (test-path-walk) to accept --filter=<spec>
as a test-tool option (before '--'), applying it to revs after
setup_revisions() to avoid the --objects requirement check. We can also
revert recent GIT_TEST_PACK_PATH_WALK overrides in t5620.

Also switch test-path-walk from REV_INFO_INIT with manual repo
assignment to repo_init_revisions(), which properly initializes
the filter_spec strbuf needed for filter parsing.

Add tests for blob:none with --all and with a single branch.

The performance test p5315 shows the impact of this change when using
blobless filters:

Test                                           HEAD~1     HEAD
---------------------------------------------------------------------
5315.6: repack (blob:none)                      13.53   13.87  +2.5%
5315.7: repack size (blob:none)                137.7M  137.8M  +0.1%
5315.8: repack (blob:none, --path-walk)         13.51   23.43 +73.4%
5315.9: repack size (blob:none, --path-walk)   137.7M  115.2M -16.3%

These performance tests were run on the Git repository. The --path-walk
feature shows meaningful space savings (16% smaller for blobless packs)
at the cost of increased computation time due to the two compression
passes. This data demonstrates that the feature is engaged and provides
real compression benefits when --no-reuse-delta forces fresh deltas.

Co-Authored-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Derrick Stolee <stolee@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Documentation/git-pack-objects.adoc
builtin/pack-objects.c
path-walk.c
path-walk.h
t/helper/test-path-walk.c
t/t5620-backfill.sh
t/t6601-path-walk.sh

index b78175fbe1b97bff679c0078033df4fc12fbd41a..2994faf988505f42af30bb633448790a3458117f 100644 (file)
@@ -402,9 +402,9 @@ will be automatically changed to version `1`.
        of filenames that cause collisions in Git's default name-hash
        algorithm.
 +
-Incompatible with `--delta-islands`, `--shallow`, or `--filter`. The
-`--use-bitmap-index` option will be ignored in the presence of
-`--path-walk.`
+Incompatible with `--delta-islands`. The `--use-bitmap-index` option is
+ignored in the presence of `--path-walk`. The `--path-walk` option
+supports the `--filter=<spec>` form `blob:none`.
 
 
 DELTA ISLANDS
index 4338962904bc944c35c882c1ed6cd91154e6bb0a..bc9fb5b45737a32c545c51263edb522fb8e50bde 100644 (file)
@@ -5177,7 +5177,7 @@ int cmd_pack_objects(int argc,
 
        if (path_walk) {
                const char *option = NULL;
-               if (filter_options.choice)
+               if (!path_walk_filter_compatible(&filter_options))
                        option = "--filter";
                else if (use_delta_islands)
                        option = "--delta-islands";
index 05bfc1c1142ae29ba971a7fb4825a48f0f8b572c..bd81508163f97cdf43eaec1aed186df7af150895 100644 (file)
@@ -9,6 +9,7 @@
 #include "hashmap.h"
 #include "hex.h"
 #include "list-objects.h"
+#include "list-objects-filter-options.h"
 #include "object.h"
 #include "oid-array.h"
 #include "path.h"
@@ -495,6 +496,32 @@ static int setup_pending_objects(struct path_walk_info *info,
        return 0;
 }
 
+static int prepare_filters(struct path_walk_info *info,
+                          struct list_objects_filter_options *options)
+{
+       switch (options->choice) {
+       case LOFC_DISABLED:
+               return 1;
+
+       case LOFC_BLOB_NONE:
+               if (info) {
+                       info->blobs = 0;
+                       list_objects_filter_release(options);
+               }
+               return 1;
+
+       default:
+               error(_("object filter '%s' not supported by the path-walk API"),
+                     list_objects_filter_spec(options));
+               return 0;
+       }
+}
+
+int path_walk_filter_compatible(struct list_objects_filter_options *options)
+{
+       return prepare_filters(NULL, options);
+}
+
 /**
  * Given the configuration of 'info', walk the commits based on 'info->revs' and
  * call 'info->path_fn' on each discovered path.
@@ -522,6 +549,9 @@ int walk_objects_by_path(struct path_walk_info *info)
 
        trace2_region_enter("path-walk", "commit-walk", info->revs->repo);
 
+       if (!prepare_filters(info, &info->revs->filter))
+               return -1;
+
        CALLOC_ARRAY(commit_list, 1);
        commit_list->type = OBJ_COMMIT;
 
index 657eeda8ec00e7e366cee651a2932a1e68f495cb..a1736ecb2b923e41c810d78788af6b9dbab15614 100644 (file)
@@ -90,3 +90,10 @@ void path_walk_info_clear(struct path_walk_info *info);
  * Returns nonzero on an error.
  */
 int walk_objects_by_path(struct path_walk_info *info);
+
+struct list_objects_filter_options;
+/**
+ * Given a set of options for filtering objects, return 1 if the options
+ * are compatible with the path-walk API and 0 otherwise.
+ */
+int path_walk_filter_compatible(struct list_objects_filter_options *options);
index fe63002c2be27d28ab3b0180978b97f407480cf7..88f86ae0dc115705bcc6dca28f98711d90c49c87 100644 (file)
@@ -4,6 +4,7 @@
 #include "dir.h"
 #include "environment.h"
 #include "hex.h"
+#include "list-objects-filter-options.h"
 #include "object-name.h"
 #include "object.h"
 #include "pretty.h"
@@ -71,6 +72,8 @@ int cmd__path_walk(int argc, const char **argv)
        struct rev_info revs = REV_INFO_INIT;
        struct path_walk_info info = PATH_WALK_INFO_INIT;
        struct path_walk_test_data data = { 0 };
+       struct list_objects_filter_options filter_options =
+               LIST_OBJECTS_FILTER_INIT;
        struct option options[] = {
                OPT_BOOL(0, "blobs", &info.blobs,
                         N_("toggle inclusion of blob objects")),
@@ -86,11 +89,12 @@ int cmd__path_walk(int argc, const char **argv)
                         N_("toggle aggressive edge walk")),
                OPT_BOOL(0, "stdin-pl", &stdin_pl,
                         N_("read a pattern list over stdin")),
+               OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options),
                OPT_END(),
        };
 
        setup_git_directory();
-       revs.repo = the_repository;
+       repo_init_revisions(the_repository, &revs, NULL);
 
        argc = parse_options(argc, argv, NULL,
                             options, path_walk_usage,
@@ -101,6 +105,10 @@ int cmd__path_walk(int argc, const char **argv)
        else
                usage(path_walk_usage[0]);
 
+       /* Apply the filter after setup_revisions to avoid the --objects check. */
+       if (filter_options.choice)
+               list_objects_filter_copy(&revs.filter, &filter_options);
+
        info.revs = &revs;
        info.path_fn = emit_block;
        info.path_fn_data = &data;
@@ -129,6 +137,7 @@ int cmd__path_walk(int argc, const char **argv)
                free(info.pl);
        }
 
+       list_objects_filter_release(&filter_options);
        release_revisions(&revs);
        return res;
 }
index e1742907871ba3e5bba7f93d3de3db43b51e7a8b..94f35ce19016716b2ca8c22faffaa8a05fb3e7a6 100755 (executable)
@@ -298,9 +298,6 @@ test_expect_success 'backfill with prefix pathspec' '
        git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
        test_line_count = 48 missing &&
 
-       # If we enable --path-walk here, we will get a warning overs stderr
-       # due to incompatibilities with --filter.
-       GIT_TEST_PACK_PATH_WALK=0 \
        git -C backfill-path backfill HEAD -- d/f 2>err &&
        test_must_be_empty err &&
 
@@ -318,9 +315,6 @@ test_expect_success 'backfill with multiple pathspecs' '
        git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
        test_line_count = 48 missing &&
 
-       # If we enable --path-walk here, we will get a warning overs stderr
-       # due to incompatibilities with --filter.
-       GIT_TEST_PACK_PATH_WALK=0 \
        git -C backfill-path backfill HEAD -- d/f a 2>err &&
        test_must_be_empty err &&
 
@@ -338,9 +332,6 @@ test_expect_success 'backfill with wildcard pathspec' '
        git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing &&
        test_line_count = 48 missing &&
 
-       # If we enable --path-walk here, we will get a warning overs stderr
-       # due to incompatibilities with --filter.
-       GIT_TEST_PACK_PATH_WALK=0 \
        git -C backfill-path backfill HEAD -- "d/file.*.txt" 2>err &&
        test_must_be_empty err &&
 
index 56bd1e3c5bec9759e2b7873f22fc2fdd1831be3f..b0ee31ee2dadf6e11d0cb7945452a9bfeca4c971 100755 (executable)
@@ -415,4 +415,66 @@ test_expect_success 'trees are reported exactly once' '
        test_line_count = 1 out-filtered
 '
 
+test_expect_success 'all, blob:none filter' '
+       test-tool path-walk --filter=blob:none -- --all >out &&
+
+       cat >expect <<-EOF &&
+       0:commit::$(git rev-parse topic)
+       0:commit::$(git rev-parse base)
+       0:commit::$(git rev-parse base~1)
+       0:commit::$(git rev-parse base~2)
+       1:tag:/tags:$(git rev-parse refs/tags/first)
+       1:tag:/tags:$(git rev-parse refs/tags/second.1)
+       1:tag:/tags:$(git rev-parse refs/tags/second.2)
+       1:tag:/tags:$(git rev-parse refs/tags/third)
+       1:tag:/tags:$(git rev-parse refs/tags/fourth)
+       1:tag:/tags:$(git rev-parse refs/tags/tree-tag)
+       1:tag:/tags:$(git rev-parse refs/tags/blob-tag)
+       2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag^{})
+       2:blob:/tagged-blobs:$(git rev-parse refs/tags/blob-tag2^{})
+       3:tree::$(git rev-parse topic^{tree})
+       3:tree::$(git rev-parse base^{tree})
+       3:tree::$(git rev-parse base~1^{tree})
+       3:tree::$(git rev-parse base~2^{tree})
+       3:tree::$(git rev-parse refs/tags/tree-tag^{})
+       3:tree::$(git rev-parse refs/tags/tree-tag2^{})
+       4:tree:a/:$(git rev-parse base:a)
+       5:tree:child/:$(git rev-parse refs/tags/tree-tag:child)
+       6:tree:left/:$(git rev-parse base:left)
+       6:tree:left/:$(git rev-parse base~2:left)
+       7:tree:right/:$(git rev-parse topic:right)
+       7:tree:right/:$(git rev-parse base~1:right)
+       7:tree:right/:$(git rev-parse base~2:right)
+       blobs:2
+       commits:4
+       tags:7
+       trees:13
+       EOF
+
+       test_cmp_sorted expect out
+'
+
+test_expect_success 'topic only, blob:none filter' '
+       test-tool path-walk --filter=blob:none -- topic >out &&
+
+       cat >expect <<-EOF &&
+       0:commit::$(git rev-parse topic)
+       0:commit::$(git rev-parse base~1)
+       0:commit::$(git rev-parse base~2)
+       1:tree::$(git rev-parse topic^{tree})
+       1:tree::$(git rev-parse base~1^{tree})
+       1:tree::$(git rev-parse base~2^{tree})
+       2:tree:left/:$(git rev-parse base~2:left)
+       3:tree:right/:$(git rev-parse topic:right)
+       3:tree:right/:$(git rev-parse base~1:right)
+       3:tree:right/:$(git rev-parse base~2:right)
+       blobs:0
+       commits:3
+       tags:0
+       trees:7
+       EOF
+
+       test_cmp_sorted expect out
+'
+
 test_done