]> git.ipfire.org Git - thirdparty/git.git/commitdiff
merge-ort: abort merge when trees have duplicate entries
authorElijah Newren <newren@gmail.com>
Tue, 21 Apr 2026 00:26:10 +0000 (00:26 +0000)
committerJunio C Hamano <gitster@pobox.com>
Wed, 22 Apr 2026 23:22:19 +0000 (16:22 -0700)
Trees with duplicate entries are malformed; fsck reports "contains
duplicate file entries" for them.  merge-ort has from the beginning
assumed that we would never hit such trees.  It was written with the
assumption that traverse_trees() calls collect_merge_info_callback() at
most once per path.  The "sanity checks" in that callback (added in
d2bc1994f363 (merge-ort: implement a very basic collect_merge_info(),
2020-12-13)) verify properties of each individual call but not that
invariant.  The strmap_put() in setup_path_info() silently overwrites
the entry from any prior call for the same path, because it assumed
there would be no other path.  Unfortunately, supplemental data
structures for various optimizations could still be tweaked before the
extra paths were overwritten, and those data structures not matching
expected state could trip various assertions.

Change the return type of setup_path_info() from void to int to allow us
to detect this case, and abort the merge with a clear error message when
it occurs.

Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
merge-ort.c
t/t6422-merge-rename-corner-cases.sh

index 8f911cb63979ebaf9b1683390eac9577eb87b50e..be0829bbb781ef90b19e400f75ae4cc13d27e9d5 100644 (file)
@@ -1026,18 +1026,18 @@ static int traverse_trees_wrapper(struct index_state *istate,
        return ret < 0 ? ret : 0;
 }
 
-static void setup_path_info(struct merge_options *opt,
-                           struct string_list_item *result,
-                           const char *current_dir_name,
-                           int current_dir_name_len,
-                           char *fullpath, /* we'll take over ownership */
-                           struct name_entry *names,
-                           struct name_entry *merged_version,
-                           unsigned is_null,     /* boolean */
-                           unsigned df_conflict, /* boolean */
-                           unsigned filemask,
-                           unsigned dirmask,
-                           int resolved          /* boolean */)
+static int setup_path_info(struct merge_options *opt,
+                          struct string_list_item *result,
+                          const char *current_dir_name,
+                          int current_dir_name_len,
+                          char *fullpath, /* we'll take over ownership */
+                          struct name_entry *names,
+                          struct name_entry *merged_version,
+                          unsigned is_null,     /* boolean */
+                          unsigned df_conflict, /* boolean */
+                          unsigned filemask,
+                          unsigned dirmask,
+                          int resolved          /* boolean */)
 {
        /* result->util is void*, so mi is a convenience typed variable */
        struct merged_info *mi;
@@ -1081,9 +1081,11 @@ static void setup_path_info(struct merge_options *opt,
                         */
                        mi->is_null = 1;
        }
-       strmap_put(&opt->priv->paths, fullpath, mi);
+       if (strmap_put(&opt->priv->paths, fullpath, mi))
+               return error(_("tree has duplicate entries for '%s'"), fullpath);
        result->string = fullpath;
        result->util = mi;
+       return 0;
 }
 
 static void add_pair(struct merge_options *opt,
@@ -1350,9 +1352,10 @@ static int collect_merge_info_callback(int n,
         */
        if (side1_matches_mbase && side2_matches_mbase) {
                /* mbase, side1, & side2 all match; use mbase as resolution */
-               setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
-                               names, names+0, mbase_null, 0 /* df_conflict */,
-                               filemask, dirmask, 1 /* resolved */);
+               if (setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
+                                   names, names+0, mbase_null, 0 /* df_conflict */,
+                                   filemask, dirmask, 1 /* resolved */))
+                       return -1; /* Quit traversing */
                return mask;
        }
 
@@ -1364,9 +1367,10 @@ static int collect_merge_info_callback(int n,
         */
        if (sides_match && filemask == 0x07) {
                /* use side1 (== side2) version as resolution */
-               setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
-                               names, names+1, side1_null, 0,
-                               filemask, dirmask, 1);
+               if (setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
+                                   names, names+1, side1_null, 0,
+                                   filemask, dirmask, 1))
+                       return -1; /* Quit traversing */
                return mask;
        }
 
@@ -1378,18 +1382,20 @@ static int collect_merge_info_callback(int n,
         */
        if (side1_matches_mbase && filemask == 0x07) {
                /* use side2 version as resolution */
-               setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
-                               names, names+2, side2_null, 0,
-                               filemask, dirmask, 1);
+               if (setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
+                                   names, names+2, side2_null, 0,
+                                   filemask, dirmask, 1))
+                       return -1; /* Quit traversing */
                return mask;
        }
 
        /* Similar to above but swapping sides 1 and 2 */
        if (side2_matches_mbase && filemask == 0x07) {
                /* use side1 version as resolution */
-               setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
-                               names, names+1, side1_null, 0,
-                               filemask, dirmask, 1);
+               if (setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
+                                   names, names+1, side1_null, 0,
+                                   filemask, dirmask, 1))
+                       return -1; /* Quit traversing */
                return mask;
        }
 
@@ -1413,8 +1419,9 @@ static int collect_merge_info_callback(int n,
         * unconflict some more cases, but that comes later so all we can
         * do now is record the different non-null file hashes.)
         */
-       setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
-                       names, NULL, 0, df_conflict, filemask, dirmask, 0);
+       if (setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
+                           names, NULL, 0, df_conflict, filemask, dirmask, 0))
+               return -1; /* Quit traversing */
 
        ci = pi.util;
        VERIFY_CI(ci);
index e18d5a227d54f7a5b40ff0d48b2cdc811135ebe4..81b645bb3bdc5b3098737ed698209e9bfa7931fd 100755 (executable)
@@ -1525,4 +1525,58 @@ test_expect_success 'submodule/directory preliminary conflict' '
        )
 '
 
+# Testcase: submodule/directory conflict with duplicate tree entries
+#   One side has a path as a gitlink (submodule).  The other side replaces
+#   the gitlink with a directory.  A third-party tool creates a tree on the
+#   submodule side that has *both* a gitlink and a tree entry for the same
+#   path (adding a file inside the submodule path ignoring that there's a
+#   gitlink there).  collect_merge_info_callback() should detect the
+#   duplicate and abort rather than silently corrupting its bookkeeping.
+
+test_expect_success 'duplicate tree entries trigger an error' '
+       test_when_finished "rm -rf duplicate-entry" &&
+       git init duplicate-entry &&
+       (
+               cd duplicate-entry &&
+
+               # Base commit: "docs" is a gitlink (submodule)
+               empty_tree=$(git mktree </dev/null) &&
+               fake_commit=$(git commit-tree $empty_tree </dev/null) &&
+               git update-index --add --cacheinfo 160000,$fake_commit,docs &&
+               echo base >file.txt &&
+               git add file.txt &&
+               git commit -m base &&
+
+               # side1: remove the gitlink, replace with a directory
+               git checkout -b side1 &&
+               git rm --cached docs &&
+               mkdir -p docs &&
+               echo hello >docs/requirements.txt &&
+               git add docs/requirements.txt &&
+               git commit -m "side1: submodule to directory" &&
+
+               # side2: keep the gitlink but craft a tree that also
+               # contains a tree entry for "docs" (simulating a tool
+               # that adds files inside a submodule path without
+               # removing the gitlink first).
+               git checkout main &&
+               git checkout -b side2 &&
+               blob_oid=$(echo world | git hash-object -w --stdin) &&
+               docs_tree=$(printf "100644 blob %s\trequirements.txt\n" \
+                       "$blob_oid" | git mktree) &&
+               cur_tree=$(git rev-parse HEAD^{tree}) &&
+               git cat-file -p $cur_tree >tree-listing &&
+               printf "040000 tree %s\tdocs\n" "$docs_tree" >>tree-listing &&
+               new_tree=$(git mktree <tree-listing) &&
+               side2_commit=$(git commit-tree $new_tree -p HEAD \
+                       -m "side2: add file alongside submodule") &&
+               git update-ref refs/heads/side2 $side2_commit &&
+
+               # Merging must detect the duplicate and abort
+               git checkout side1 &&
+               test_must_fail git merge side2 2>err &&
+               test_grep "duplicate entries" err
+       )
+'
+
 test_done