]> git.ipfire.org Git - thirdparty/git.git/commitdiff
path-walk: always emit directly-requested objects
authorDerrick Stolee <stolee@gmail.com>
Fri, 22 May 2026 18:24:28 +0000 (18:24 +0000)
committerJunio C Hamano <gitster@pobox.com>
Sun, 24 May 2026 09:41:06 +0000 (18:41 +0900)
We are preparing to integrate the path-walk API with some --filter options
in 'git pack-objects', but there is a subtle issue that is revealed when
those are put together and the test suite is run with
GIT_TEST_PACK_PATH_WALK=1.

When a filter reduces the set of requested objects, this results in
filtering out directly-requested objects, such as in the download of needed
blobs in a blobless partial clone.

The root cause is that the scan of pending objects in the path-walk API
respects the filters set in the path_walk_info instead of overriding them
for pending objects.

We can tell that a path is part of the directly-referenced objects if its
path name starts with '/' (other paths, including root trees never have this
starting character). Create a path_is_for_direct_objects() to make this
meaning clear, especially as we add more references in the future as we
integrate the path-walk API with partial clone filter options.

Signed-off-by: Derrick Stolee <stolee@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Documentation/technical/api-path-walk.adoc
path-walk.c
path-walk.h

index a67de1b143ab5b81c72275130e931fa63745106b..6e17b13d61b9690c9dbd13b714ec285de99c6476 100644 (file)
@@ -48,6 +48,13 @@ commits.
        applications could disable some options to make it simpler to walk
        the objects or to have fewer calls to `path_fn`.
 +
+Note that objects directly requested as pending objects (such as targets
+of lightweight tags or other ref tips) are always emitted to `path_fn`,
+even when the corresponding type flag is disabled. Only objects
+discovered during the tree walk are subject to these type filters. This
+ensures that objects specifically requested through the revision input
+are never silently dropped.
++
 While it is possible to walk only commits in this way, consumers would be
 better off using the revision walk API instead.
 
index 6e426af43308931c018fa07bee79379917d45c04..05bfc1c1142ae29ba971a7fb4825a48f0f8b572c 100644 (file)
@@ -248,6 +248,17 @@ static int add_tree_entries(struct path_walk_context *ctx,
        return 0;
 }
 
+/*
+ * Paths starting with '/' (e.g., "/tags", "/tagged-blobs") hold objects that
+ * were directly requested by 'pending' objects rather than discovered during
+ * tree traversal.
+ */
+static int path_is_for_direct_objects(const char *path)
+{
+       ASSERT(path);
+       return path[0] == '/';
+}
+
 /*
  * For each path in paths_to_explore, walk the trees another level
  * and add any found blobs to the batch (but only if they exist and
@@ -306,14 +317,19 @@ static int walk_path(struct path_walk_context *ctx,
 
        if (list->type == OBJ_BLOB &&
            ctx->revs->prune_data.nr &&
+           !path_is_for_direct_objects(path) &&
            !match_pathspec(ctx->repo->index, &ctx->revs->prune_data,
                           path, strlen(path), 0,
                           NULL, 0))
                return 0;
 
-       /* Evaluate function pointer on this data, if requested. */
-       if ((list->type == OBJ_TREE && ctx->info->trees) ||
-           (list->type == OBJ_BLOB && ctx->info->blobs) ||
+       /*
+        * Evaluate function pointer on this data, if requested.
+        * Ignore object type filters for tagged objects (path starts
+        * with `/`).
+        */
+       if ((list->type == OBJ_TREE && (ctx->info->trees || path_is_for_direct_objects(path))) ||
+           (list->type == OBJ_BLOB && (ctx->info->blobs || path_is_for_direct_objects(path))) ||
            (list->type == OBJ_TAG && ctx->info->tags))
                ret = ctx->info->path_fn(path, &list->oids, list->type,
                                        ctx->info->path_fn_data);
@@ -374,10 +390,8 @@ static int setup_pending_objects(struct path_walk_info *info,
 
        if (info->tags)
                CALLOC_ARRAY(tags, 1);
-       if (info->blobs)
-               CALLOC_ARRAY(tagged_blobs, 1);
-       if (info->trees)
-               root_tree_list = strmap_get(&ctx->paths_to_lists, root_path);
+       CALLOC_ARRAY(tagged_blobs, 1);
+       root_tree_list = strmap_get(&ctx->paths_to_lists, root_path);
 
        /*
         * Pending objects include:
@@ -421,8 +435,6 @@ static int setup_pending_objects(struct path_walk_info *info,
 
                switch (obj->type) {
                case OBJ_TREE:
-                       if (!info->trees)
-                               continue;
                        if (pending->path) {
                                char *path = *pending->path ? xstrfmt("%s/", pending->path)
                                                            : xstrdup("");
@@ -435,8 +447,6 @@ static int setup_pending_objects(struct path_walk_info *info,
                        break;
 
                case OBJ_BLOB:
-                       if (!info->blobs)
-                               continue;
                        if (pending->path)
                                add_path_to_list(ctx, pending->path, OBJ_BLOB, &obj->oid, 1);
                        else
@@ -532,15 +542,17 @@ int walk_objects_by_path(struct path_walk_info *info)
        push_to_stack(&ctx, root_path);
 
        /*
-        * Set these values before preparing the walk to catch
-        * lightweight tags pointing to non-commits and indexed objects.
+        * Ensure that prepare_revision_walk() keeps all pending objects
+        * even through an object type filter.
         */
-       info->revs->blob_objects = info->blobs;
-       info->revs->tree_objects = info->trees;
+       info->revs->blob_objects = info->revs->tree_objects = 1;
 
        if (prepare_revision_walk(info->revs))
                die(_("failed to setup revision walk"));
 
+       info->revs->blob_objects = info->blobs;
+       info->revs->tree_objects = info->trees;
+
        /*
         * Walk trees to mark them as UNINTERESTING.
         * This is particularly important when 'edge_aggressive' is set.
index 5ef5a8440e6b5e6eff24b4ec27f8741c4c708080..657eeda8ec00e7e366cee651a2932a1e68f495cb 100644 (file)
@@ -36,6 +36,11 @@ struct path_walk_info {
        /**
         * Initialize which object types the path_fn should be called on. This
         * could also limit the walk to skip blobs if not set.
+        *
+        * Note: even when 'blobs' or 'trees' is disabled, objects that are
+        * directly requested as pending objects will still be emitted to
+        * path_fn. Only objects discovered during the tree walk are filtered by
+        * these flags.
         */
        int commits;
        int trees;