From: Derrick Stolee Date: Thu, 26 Mar 2026 15:14:52 +0000 (+0000) Subject: backfill: work with prefix pathspecs X-Git-Tag: v2.54.0-rc1~34^2~2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7be182045a6a113b118982fc81296d5b9746779e;p=thirdparty%2Fgit.git backfill: work with prefix pathspecs The previous change allowed specifying revision arguments over the 'git backfill' command-line. This created the opportunity for restricting the initial commit set by filtering the revision walk through a pathspec. Other than filtering the commit set (and thereby the root trees), this did not restrict the path-walk implementation of 'git backfill' and did not restrict the blobs that were downloaded to only those matching the pathspec. Update the path-walk API to accept certain kinds of pathspecs and to silently ignore anything too complex, for now. We will update this in the next change to properly restrict to even complex pathspecs. The current behavior focuses on pathspecs that match paths exactly. This includes exact filenames, including directory names as prefixes. Pathspecs containing wildcards or magic are cleared so the path walk downloads all blobs, as before. The reason for this restriction is to allow for a faster execution by pruning the path walk to only trees that could contribute towards one of those paths as a parent directory. The test directory 'd/f/' (next to 'd/file*.txt') was prepared in a previous commit to exercise the subtlety in prefix matching. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- diff --git a/path-walk.c b/path-walk.c index 364e4cfa19..3750552978 100644 --- a/path-walk.c +++ b/path-walk.c @@ -11,6 +11,7 @@ #include "list-objects.h" #include "object.h" #include "oid-array.h" +#include "path.h" #include "prio-queue.h" #include "repository.h" #include "revision.h" @@ -206,6 +207,33 @@ static int add_tree_entries(struct path_walk_context *ctx, match != MATCHED) continue; } + if (ctx->revs->prune_data.nr) { + struct pathspec *pd = &ctx->revs->prune_data; + bool found = false; + int did_strip_suffix = strbuf_strip_suffix(&path, "/"); + + + for (int i = 0; i < pd->nr; i++) { + struct pathspec_item *item = &pd->items[i]; + + /* + * Continue if either is a directory prefix + * of the other. + */ + if (dir_prefix(path.buf, item->match) || + dir_prefix(item->match, path.buf)) { + found = true; + break; + } + } + + if (did_strip_suffix) + strbuf_addch(&path, '/'); + + /* Skip paths that do not match the prefix. */ + if (!found) + continue; + } add_path_to_list(ctx, path.buf, type, &entry.oid, !(o->flags & UNINTERESTING)); @@ -481,6 +509,17 @@ int walk_objects_by_path(struct path_walk_info *info) if (info->tags) info->revs->tag_objects = 1; + if (ctx.revs->prune_data.nr) { + /* + * Only exact prefix pathspecs are currently supported. + * Clear any wildcard or magic pathspecs to avoid + * incorrect prefix matching. + */ + if (ctx.revs->prune_data.has_wildcard || + ctx.revs->prune_data.magic) + clear_pathspec(&ctx.revs->prune_data); + } + /* Insert a single list for the root tree into the paths. */ CALLOC_ARRAY(root_tree_list, 1); root_tree_list->type = OBJ_TREE; diff --git a/path.c b/path.c index c285357859..d7e17bf174 100644 --- a/path.c +++ b/path.c @@ -56,7 +56,7 @@ static void strbuf_cleanup_path(struct strbuf *sb) strbuf_remove(sb, 0, path - sb->buf); } -static int dir_prefix(const char *buf, const char *dir) +int dir_prefix(const char *buf, const char *dir) { size_t len = strlen(dir); return !strncmp(buf, dir, len) && diff --git a/path.h b/path.h index cbcad254a0..0434ba5e07 100644 --- a/path.h +++ b/path.h @@ -112,6 +112,12 @@ const char *repo_submodule_path_replace(struct repository *repo, const char *fmt, ...) __attribute__((format (printf, 4, 5))); +/* + * Given a directory name 'dir' (not ending with a trailing '/'), + * determine if 'buf' is equal to 'dir' or has prefix 'dir'+'/'. + */ +int dir_prefix(const char *buf, const char *dir); + void report_linked_checkout_garbage(struct repository *r); /* diff --git a/t/t5620-backfill.sh b/t/t5620-backfill.sh index db66d8b614..52f6484ca1 100755 --- a/t/t5620-backfill.sh +++ b/t/t5620-backfill.sh @@ -273,13 +273,11 @@ test_expect_success 'backfill with prefix pathspec' ' git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && test_line_count = 48 missing && - # TODO: The pathspec should limit the downloaded blobs to - # only those matching the prefix "d/f", but currently all - # blobs are downloaded. - git -C backfill-path backfill HEAD -- d/f && + git -C backfill-path backfill HEAD -- d/f 2>err && + test_must_be_empty err && git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && - test_line_count = 0 missing + test_line_count = 40 missing ' test_expect_success 'backfill with multiple pathspecs' ' @@ -292,13 +290,11 @@ test_expect_success 'backfill with multiple pathspecs' ' git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && test_line_count = 48 missing && - # TODO: The pathspecs should limit the downloaded blobs to - # only those matching "d/f" or "a", but currently all blobs - # are downloaded. - git -C backfill-path backfill HEAD -- d/f a && + git -C backfill-path backfill HEAD -- d/f a 2>err && + test_must_be_empty err && git -C backfill-path rev-list --quiet --objects --missing=print HEAD >missing && - test_line_count = 0 missing + test_line_count = 16 missing ' test_expect_success 'backfill with wildcard pathspec' '