From: Derrick Stolee Date: Fri, 22 May 2026 18:24:28 +0000 (+0000) Subject: path-walk: always emit directly-requested objects X-Git-Url: http://git.ipfire.org/gitweb/index.cgi?a=commitdiff_plain;h=7a7070eebce16f20548bd4685362bca7df6af431;p=thirdparty%2Fgit.git path-walk: always emit directly-requested objects We are preparing to integrate the path-walk API with some --filter options in 'git pack-objects', but there is a subtle issue that is revealed when those are put together and the test suite is run with GIT_TEST_PACK_PATH_WALK=1. When a filter reduces the set of requested objects, this results in filtering out directly-requested objects, such as in the download of needed blobs in a blobless partial clone. The root cause is that the scan of pending objects in the path-walk API respects the filters set in the path_walk_info instead of overriding them for pending objects. We can tell that a path is part of the directly-referenced objects if its path name starts with '/' (other paths, including root trees never have this starting character). Create a path_is_for_direct_objects() to make this meaning clear, especially as we add more references in the future as we integrate the path-walk API with partial clone filter options. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- diff --git a/Documentation/technical/api-path-walk.adoc b/Documentation/technical/api-path-walk.adoc index a67de1b143..6e17b13d61 100644 --- a/Documentation/technical/api-path-walk.adoc +++ b/Documentation/technical/api-path-walk.adoc @@ -48,6 +48,13 @@ commits. applications could disable some options to make it simpler to walk the objects or to have fewer calls to `path_fn`. + +Note that objects directly requested as pending objects (such as targets +of lightweight tags or other ref tips) are always emitted to `path_fn`, +even when the corresponding type flag is disabled. Only objects +discovered during the tree walk are subject to these type filters. This +ensures that objects specifically requested through the revision input +are never silently dropped. ++ While it is possible to walk only commits in this way, consumers would be better off using the revision walk API instead. diff --git a/path-walk.c b/path-walk.c index 6e426af433..05bfc1c114 100644 --- a/path-walk.c +++ b/path-walk.c @@ -248,6 +248,17 @@ static int add_tree_entries(struct path_walk_context *ctx, return 0; } +/* + * Paths starting with '/' (e.g., "/tags", "/tagged-blobs") hold objects that + * were directly requested by 'pending' objects rather than discovered during + * tree traversal. + */ +static int path_is_for_direct_objects(const char *path) +{ + ASSERT(path); + return path[0] == '/'; +} + /* * For each path in paths_to_explore, walk the trees another level * and add any found blobs to the batch (but only if they exist and @@ -306,14 +317,19 @@ static int walk_path(struct path_walk_context *ctx, if (list->type == OBJ_BLOB && ctx->revs->prune_data.nr && + !path_is_for_direct_objects(path) && !match_pathspec(ctx->repo->index, &ctx->revs->prune_data, path, strlen(path), 0, NULL, 0)) return 0; - /* Evaluate function pointer on this data, if requested. */ - if ((list->type == OBJ_TREE && ctx->info->trees) || - (list->type == OBJ_BLOB && ctx->info->blobs) || + /* + * Evaluate function pointer on this data, if requested. + * Ignore object type filters for tagged objects (path starts + * with `/`). + */ + if ((list->type == OBJ_TREE && (ctx->info->trees || path_is_for_direct_objects(path))) || + (list->type == OBJ_BLOB && (ctx->info->blobs || path_is_for_direct_objects(path))) || (list->type == OBJ_TAG && ctx->info->tags)) ret = ctx->info->path_fn(path, &list->oids, list->type, ctx->info->path_fn_data); @@ -374,10 +390,8 @@ static int setup_pending_objects(struct path_walk_info *info, if (info->tags) CALLOC_ARRAY(tags, 1); - if (info->blobs) - CALLOC_ARRAY(tagged_blobs, 1); - if (info->trees) - root_tree_list = strmap_get(&ctx->paths_to_lists, root_path); + CALLOC_ARRAY(tagged_blobs, 1); + root_tree_list = strmap_get(&ctx->paths_to_lists, root_path); /* * Pending objects include: @@ -421,8 +435,6 @@ static int setup_pending_objects(struct path_walk_info *info, switch (obj->type) { case OBJ_TREE: - if (!info->trees) - continue; if (pending->path) { char *path = *pending->path ? xstrfmt("%s/", pending->path) : xstrdup(""); @@ -435,8 +447,6 @@ static int setup_pending_objects(struct path_walk_info *info, break; case OBJ_BLOB: - if (!info->blobs) - continue; if (pending->path) add_path_to_list(ctx, pending->path, OBJ_BLOB, &obj->oid, 1); else @@ -532,15 +542,17 @@ int walk_objects_by_path(struct path_walk_info *info) push_to_stack(&ctx, root_path); /* - * Set these values before preparing the walk to catch - * lightweight tags pointing to non-commits and indexed objects. + * Ensure that prepare_revision_walk() keeps all pending objects + * even through an object type filter. */ - info->revs->blob_objects = info->blobs; - info->revs->tree_objects = info->trees; + info->revs->blob_objects = info->revs->tree_objects = 1; if (prepare_revision_walk(info->revs)) die(_("failed to setup revision walk")); + info->revs->blob_objects = info->blobs; + info->revs->tree_objects = info->trees; + /* * Walk trees to mark them as UNINTERESTING. * This is particularly important when 'edge_aggressive' is set. diff --git a/path-walk.h b/path-walk.h index 5ef5a8440e..657eeda8ec 100644 --- a/path-walk.h +++ b/path-walk.h @@ -36,6 +36,11 @@ struct path_walk_info { /** * Initialize which object types the path_fn should be called on. This * could also limit the walk to skip blobs if not set. + * + * Note: even when 'blobs' or 'trees' is disabled, objects that are + * directly requested as pending objects will still be emitted to + * path_fn. Only objects discovered during the tree walk are filtered by + * these flags. */ int commits; int trees;