]> git.ipfire.org Git - thirdparty/git.git/commitdiff
fsck: report non-consecutive duplicate names in trees
authorRené Scharfe <l.s.r@web.de>
Sun, 10 May 2020 16:12:16 +0000 (18:12 +0200)
committerJunio C Hamano <gitster@pobox.com>
Mon, 11 May 2020 15:40:28 +0000 (08:40 -0700)
Tree entries are sorted in path order, meaning that directory names get
a slash ('/') appended implicitly.  Git fsck checks if trees contains
consecutive duplicates, but due to that ordering there can be
non-consecutive duplicates as well if one of them is a directory and the
other one isn't.  Such a tree cannot be fully checked out.

Find these duplicates by recording candidate file names on a stack and
check candidate directory names against that stack to find matches.

Suggested-by: Brandon Williams <bwilliamseng@gmail.com>
Original-test-by: Brandon Williams <bwilliamseng@gmail.com>
Signed-off-by: René Scharfe <l.s.r@web.de>
Reviewed-by: Luke Diamand <luke@diamand.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
fsck.c
t/t1450-fsck.sh

diff --git a/fsck.c b/fsck.c
index 73f30773f28acc06f4b9c04dda1e1a54b46c6930..b9b3350f762fa4d0d9c90fccfe137165b2b16339 100644 (file)
--- a/fsck.c
+++ b/fsck.c
@@ -523,6 +523,28 @@ int fsck_walk(struct object *obj, void *data, struct fsck_options *options)
        }
 }
 
+struct name_stack {
+       const char **names;
+       size_t nr, alloc;
+};
+
+static void name_stack_push(struct name_stack *stack, const char *name)
+{
+       ALLOC_GROW(stack->names, stack->nr + 1, stack->alloc);
+       stack->names[stack->nr++] = name;
+}
+
+static const char *name_stack_pop(struct name_stack *stack)
+{
+       return stack->nr ? stack->names[--stack->nr] : NULL;
+}
+
+static void name_stack_clear(struct name_stack *stack)
+{
+       FREE_AND_NULL(stack->names);
+       stack->nr = stack->alloc = 0;
+}
+
 /*
  * The entries in a tree are ordered in the _path_ order,
  * which means that a directory entry is ordered by adding
@@ -534,7 +556,14 @@ int fsck_walk(struct object *obj, void *data, struct fsck_options *options)
 #define TREE_UNORDERED (-1)
 #define TREE_HAS_DUPS  (-2)
 
-static int verify_ordered(unsigned mode1, const char *name1, unsigned mode2, const char *name2)
+static int is_less_than_slash(unsigned char c)
+{
+       return '\0' < c && c < '/';
+}
+
+static int verify_ordered(unsigned mode1, const char *name1,
+                         unsigned mode2, const char *name2,
+                         struct name_stack *candidates)
 {
        int len1 = strlen(name1);
        int len2 = strlen(name2);
@@ -566,6 +595,41 @@ static int verify_ordered(unsigned mode1, const char *name1, unsigned mode2, con
                c1 = '/';
        if (!c2 && S_ISDIR(mode2))
                c2 = '/';
+
+       /*
+        * There can be non-consecutive duplicates due to the implicitly
+        * add slash, e.g.:
+        *
+        *   foo
+        *   foo.bar
+        *   foo.bar.baz
+        *   foo.bar/
+        *   foo/
+        *
+        * Record non-directory candidates (like "foo" and "foo.bar" in
+        * the example) on a stack and check directory candidates (like
+        * foo/" and "foo.bar/") against that stack.
+        */
+       if (!c1 && is_less_than_slash(c2)) {
+               name_stack_push(candidates, name1);
+       } else if (c2 == '/' && is_less_than_slash(c1)) {
+               for (;;) {
+                       const char *p;
+                       const char *f_name = name_stack_pop(candidates);
+
+                       if (!f_name)
+                               break;
+                       if (!skip_prefix(name2, f_name, &p))
+                               break;
+                       if (!*p)
+                               return TREE_HAS_DUPS;
+                       if (is_less_than_slash(*p)) {
+                               name_stack_push(candidates, f_name);
+                               break;
+                       }
+               }
+       }
+
        return c1 < c2 ? 0 : TREE_UNORDERED;
 }
 
@@ -587,6 +651,7 @@ static int fsck_tree(const struct object_id *oid,
        struct tree_desc desc;
        unsigned o_mode;
        const char *o_name;
+       struct name_stack df_dup_candidates = { NULL };
 
        if (init_tree_desc_gently(&desc, buffer, size)) {
                retval += report(options, oid, OBJ_TREE, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree");
@@ -666,7 +731,8 @@ static int fsck_tree(const struct object_id *oid,
                }
 
                if (o_name) {
-                       switch (verify_ordered(o_mode, o_name, mode, name)) {
+                       switch (verify_ordered(o_mode, o_name, mode, name,
+                                              &df_dup_candidates)) {
                        case TREE_UNORDERED:
                                not_properly_sorted = 1;
                                break;
@@ -682,6 +748,8 @@ static int fsck_tree(const struct object_id *oid,
                o_name = name;
        }
 
+       name_stack_clear(&df_dup_candidates);
+
        if (has_null_sha1)
                retval += report(options, oid, OBJ_TREE, FSCK_MSG_NULL_SHA1, "contains entries pointing to null sha1");
        if (has_full_path)
index 02478bc4ece20564a7d5ea27df91cb0695d6e5ec..c43bc6870a86808e771b40f36388be2628c24fd3 100755 (executable)
@@ -234,6 +234,22 @@ test_expect_success 'tree object with duplicate entries' '
        test_i18ngrep "error in tree .*contains duplicate file entries" out
 '
 
+test_expect_success 'tree object with dublicate names' '
+       test_when_finished "remove_object \$blob" &&
+       test_when_finished "remove_object \$tree" &&
+       test_when_finished "remove_object \$badtree" &&
+       blob=$(echo blob | git hash-object -w --stdin) &&
+       printf "100644 blob %s\t%s\n" $blob x.2 >tree &&
+       tree=$(git mktree <tree) &&
+       printf "100644 blob %s\t%s\n" $blob x.1 >badtree &&
+       printf "100644 blob %s\t%s\n" $blob x >>badtree &&
+       printf "040000 tree %s\t%s\n" $tree x >>badtree &&
+       badtree=$(git mktree <badtree) &&
+       test_must_fail git fsck 2>out &&
+       test_i18ngrep "$badtree" out &&
+       test_i18ngrep "error in tree .*contains duplicate file entries" out
+'
+
 test_expect_success 'unparseable tree object' '
        test_oid_cache <<-\EOF &&
        junk sha1:twenty-bytes-of-junk