]> git.ipfire.org Git - thirdparty/git.git/blobdiff - merge-ort.c
Merge branch 'tk/simple-autosetupmerge'
[thirdparty/git.git] / merge-ort.c
index 92dea35e57a856571882cb250d058f03554197c9..0d3f42592fb208739840a0cb37e0e205b22d1ab8 100644 (file)
@@ -18,6 +18,7 @@
 #include "merge-ort.h"
 
 #include "alloc.h"
+#include "attr.h"
 #include "blob.h"
 #include "cache-tree.h"
 #include "commit.h"
 #include "diff.h"
 #include "diffcore.h"
 #include "dir.h"
+#include "entry.h"
 #include "ll-merge.h"
 #include "object-store.h"
+#include "promisor-remote.h"
 #include "revision.h"
 #include "strmap.h"
+#include "submodule-config.h"
 #include "submodule.h"
 #include "tree.h"
 #include "unpack-trees.h"
@@ -51,6 +55,61 @@ enum merge_side {
        MERGE_SIDE2 = 2
 };
 
+static unsigned RESULT_INITIALIZED = 0x1abe11ed; /* unlikely accidental value */
+
+struct traversal_callback_data {
+       unsigned long mask;
+       unsigned long dirmask;
+       struct name_entry names[3];
+};
+
+struct deferred_traversal_data {
+       /*
+        * possible_trivial_merges: directories to be explored only when needed
+        *
+        * possible_trivial_merges is a map of directory names to
+        * dir_rename_mask.  When we detect that a directory is unchanged on
+        * one side, we can sometimes resolve the directory without recursing
+        * into it.  Renames are the only things that can prevent such an
+        * optimization.  However, for rename sources:
+        *   - If no parent directory needed directory rename detection, then
+        *     no path under such a directory can be a relevant_source.
+        * and for rename destinations:
+        *   - If no cached rename has a target path under the directory AND
+        *   - If there are no unpaired relevant_sources elsewhere in the
+        *     repository
+        * then we don't need any path under this directory for a rename
+        * destination.  The only way to know the last item above is to defer
+        * handling such directories until the end of collect_merge_info(),
+        * in handle_deferred_entries().
+        *
+        * For each we store dir_rename_mask, since that's the only bit of
+        * information we need, other than the path, to resume the recursive
+        * traversal.
+        */
+       struct strintmap possible_trivial_merges;
+
+       /*
+        * trivial_merges_okay: if trivial directory merges are okay
+        *
+        * See possible_trivial_merges above.  The "no unpaired
+        * relevant_sources elsewhere in the repository" is a single boolean
+        * per merge side, which we store here.  Note that while 0 means no,
+        * 1 only means "maybe" rather than "yes"; we optimistically set it
+        * to 1 initially and only clear when we determine it is unsafe to
+        * do trivial directory merges.
+        */
+       unsigned trivial_merges_okay;
+
+       /*
+        * target_dirs: ancestor directories of rename targets
+        *
+        * target_dirs contains all directory names that are an ancestor of
+        * any rename destination.
+        */
+       struct strset target_dirs;
+};
+
 struct rename_info {
        /*
         * All variables that are arrays of size 3 correspond to data tracked
@@ -67,8 +126,12 @@ struct rename_info {
 
        /*
         * dirs_removed: directories removed on a given side of history.
+        *
+        * The keys of dirs_removed[side] are the directories that were removed
+        * on the given side of history.  The value of the strintmap for each
+        * directory is a value from enum dir_rename_relevance.
         */
-       struct strset dirs_removed[3];
+       struct strintmap dirs_removed[3];
 
        /*
         * dir_rename_count: tracking where parts of a directory were renamed to
@@ -88,6 +151,137 @@ struct rename_info {
         */
        struct strmap dir_renames[3];
 
+       /*
+        * relevant_sources: deleted paths wanted in rename detection, and why
+        *
+        * relevant_sources is a set of deleted paths on each side of
+        * history for which we need rename detection.  If a path is deleted
+        * on one side of history, we need to detect if it is part of a
+        * rename if either
+        *    * the file is modified/deleted on the other side of history
+        *    * we need to detect renames for an ancestor directory
+        * If neither of those are true, we can skip rename detection for
+        * that path.  The reason is stored as a value from enum
+        * file_rename_relevance, as the reason can inform the algorithm in
+        * diffcore_rename_extended().
+        */
+       struct strintmap relevant_sources[3];
+
+       struct deferred_traversal_data deferred[3];
+
+       /*
+        * dir_rename_mask:
+        *   0: optimization removing unmodified potential rename source okay
+        *   2 or 4: optimization okay, but must check for files added to dir
+        *   7: optimization forbidden; need rename source in case of dir rename
+        */
+       unsigned dir_rename_mask:3;
+
+       /*
+        * callback_data_*: supporting data structures for alternate traversal
+        *
+        * We sometimes need to be able to traverse through all the files
+        * in a given tree before all immediate subdirectories within that
+        * tree.  Since traverse_trees() doesn't do that naturally, we have
+        * a traverse_trees_wrapper() that stores any immediate
+        * subdirectories while traversing files, then traverses the
+        * immediate subdirectories later.  These callback_data* variables
+        * store the information for the subdirectories so that we can do
+        * that traversal order.
+        */
+       struct traversal_callback_data *callback_data;
+       int callback_data_nr, callback_data_alloc;
+       char *callback_data_traverse_path;
+
+       /*
+        * merge_trees: trees passed to the merge algorithm for the merge
+        *
+        * merge_trees records the trees passed to the merge algorithm.  But,
+        * this data also is stored in merge_result->priv.  If a sequence of
+        * merges are being done (such as when cherry-picking or rebasing),
+        * the next merge can look at this and re-use information from
+        * previous merges under certain circumstances.
+        *
+        * See also all the cached_* variables.
+        */
+       struct tree *merge_trees[3];
+
+       /*
+        * cached_pairs_valid_side: which side's cached info can be reused
+        *
+        * See the description for merge_trees.  For repeated merges, at most
+        * only one side's cached information can be used.  Valid values:
+        *   MERGE_SIDE2: cached data from side2 can be reused
+        *   MERGE_SIDE1: cached data from side1 can be reused
+        *   0:           no cached data can be reused
+        *   -1:          See redo_after_renames; both sides can be reused.
+        */
+       int cached_pairs_valid_side;
+
+       /*
+        * cached_pairs: Caching of renames and deletions.
+        *
+        * These are mappings recording renames and deletions of individual
+        * files (not directories).  They are thus a map from an old
+        * filename to either NULL (for deletions) or a new filename (for
+        * renames).
+        */
+       struct strmap cached_pairs[3];
+
+       /*
+        * cached_target_names: just the destinations from cached_pairs
+        *
+        * We sometimes want a fast lookup to determine if a given filename
+        * is one of the destinations in cached_pairs.  cached_target_names
+        * is thus duplicative information, but it provides a fast lookup.
+        */
+       struct strset cached_target_names[3];
+
+       /*
+        * cached_irrelevant: Caching of rename_sources that aren't relevant.
+        *
+        * If we try to detect a rename for a source path and succeed, it's
+        * part of a rename.  If we try to detect a rename for a source path
+        * and fail, then it's a delete.  If we do not try to detect a rename
+        * for a path, then we don't know if it's a rename or a delete.  If
+        * merge-ort doesn't think the path is relevant, then we just won't
+        * cache anything for that path.  But there's a slight problem in
+        * that merge-ort can think a path is RELEVANT_LOCATION, but due to
+        * commit 9bd342137e ("diffcore-rename: determine which
+        * relevant_sources are no longer relevant", 2021-03-13),
+        * diffcore-rename can downgrade the path to RELEVANT_NO_MORE.  To
+        * avoid excessive calls to diffcore_rename_extended() we still need
+        * to cache such paths, though we cannot record them as either
+        * renames or deletes.  So we cache them here as a "turned out to be
+        * irrelevant *for this commit*" as they are often also irrelevant
+        * for subsequent commits, though we will have to do some extra
+        * checking to see whether such paths become relevant for rename
+        * detection when cherry-picking/rebasing subsequent commits.
+        */
+       struct strset cached_irrelevant[3];
+
+       /*
+        * redo_after_renames: optimization flag for "restarting" the merge
+        *
+        * Sometimes it pays to detect renames, cache them, and then
+        * restart the merge operation from the beginning.  The reason for
+        * this is that when we know where all the renames are, we know
+        * whether a certain directory has any paths under it affected --
+        * and if a directory is not affected then it permits us to do
+        * trivial tree merging in more cases.  Doing trivial tree merging
+        * prevents the need to run process_entry() on every path
+        * underneath trees that can be trivially merged, and
+        * process_entry() is more expensive than collect_merge_info() --
+        * plus, the second collect_merge_info() will be much faster since
+        * it doesn't have to recurse into the relevant trees.
+        *
+        * Values for this flag:
+        *   0 = don't bother, not worth it (or conditions not yet checked)
+        *   1 = conditions for optimization met, optimization worthwhile
+        *   2 = we already did it (don't restart merge yet again)
+        */
+       unsigned redo_after_renames;
+
        /*
         * needed_limit: value needed for inexact rename detection to run
         *
@@ -110,8 +304,6 @@ struct merge_options_internal {
         *   * these keys serve to intern all the path strings, which allows
         *     us to do pointer comparison on directory names instead of
         *     strcmp; we just have to be careful to use the interned strings.
-        *     (Technically paths_to_free may track some strings that were
-        *      removed from froms paths.)
         *
         * The values of paths:
         *   * either a pointer to a merged_info, or a conflict_info struct
@@ -147,14 +339,14 @@ struct merge_options_internal {
        struct strmap conflicted;
 
        /*
-        * paths_to_free: additional list of strings to free
+        * pool: memory pool for fast allocation/deallocation
         *
-        * If keys are removed from "paths", they are added to paths_to_free
-        * to ensure they are later freed.  We avoid free'ing immediately since
-        * other places (e.g. conflict_info.pathnames[]) may still be
-        * referencing these paths.
+        * We allocate room for lots of filenames and auxiliary data
+        * structures in merge_options_internal, and it tends to all be
+        * freed together too.  Using a memory pool for these provides a
+        * nice speedup.
         */
-       struct string_list paths_to_free;
+       struct mem_pool pool;
 
        /*
         * output: special messages and conflict notices for various paths
@@ -170,6 +362,16 @@ struct merge_options_internal {
         */
        struct rename_info renames;
 
+       /*
+        * attr_index: hacky minimal index used for renormalization
+        *
+        * renormalization code _requires_ an index, though it only needs to
+        * find a .gitattributes file within the index.  So, when
+        * renormalization is important, we create a special index with just
+        * that one file.
+        */
+       struct index_state attr_index;
+
        /*
         * current_dir_name, toplevel_dir: temporary vars
         *
@@ -316,55 +518,49 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
 {
        struct rename_info *renames = &opti->renames;
        int i;
-       void (*strmap_func)(struct strmap *, int) =
+       void (*strmap_clear_func)(struct strmap *, int) =
                reinitialize ? strmap_partial_clear : strmap_clear;
-       void (*strset_func)(struct strset *) =
+       void (*strintmap_clear_func)(struct strintmap *) =
+               reinitialize ? strintmap_partial_clear : strintmap_clear;
+       void (*strset_clear_func)(struct strset *) =
                reinitialize ? strset_partial_clear : strset_clear;
 
-       /*
-        * We marked opti->paths with strdup_strings = 0, so that we
-        * wouldn't have to make another copy of the fullpath created by
-        * make_traverse_path from setup_path_info().  But, now that we've
-        * used it and have no other references to these strings, it is time
-        * to deallocate them.
-        */
-       free_strmap_strings(&opti->paths);
-       strmap_func(&opti->paths, 1);
+       strmap_clear_func(&opti->paths, 0);
 
        /*
         * All keys and values in opti->conflicted are a subset of those in
         * opti->paths.  We don't want to deallocate anything twice, so we
         * don't free the keys and we pass 0 for free_values.
         */
-       strmap_func(&opti->conflicted, 0);
+       strmap_clear_func(&opti->conflicted, 0);
 
-       /*
-        * opti->paths_to_free is similar to opti->paths; we created it with
-        * strdup_strings = 0 to avoid making _another_ copy of the fullpath
-        * but now that we've used it and have no other references to these
-        * strings, it is time to deallocate them.  We do so by temporarily
-        * setting strdup_strings to 1.
-        */
-       opti->paths_to_free.strdup_strings = 1;
-       string_list_clear(&opti->paths_to_free, 0);
-       opti->paths_to_free.strdup_strings = 0;
+       if (opti->attr_index.cache_nr) /* true iff opt->renormalize */
+               discard_index(&opti->attr_index);
 
        /* Free memory used by various renames maps */
        for (i = MERGE_SIDE1; i <= MERGE_SIDE2; ++i) {
-               struct hashmap_iter iter;
-               struct strmap_entry *entry;
-
-               strset_func(&renames->dirs_removed[i]);
-
-               strmap_for_each_entry(&renames->dir_rename_count[i],
-                                     &iter, entry) {
-                       struct strintmap *counts = entry->value;
-                       strintmap_clear(counts);
+               strintmap_clear_func(&renames->dirs_removed[i]);
+               strmap_clear_func(&renames->dir_renames[i], 0);
+               strintmap_clear_func(&renames->relevant_sources[i]);
+               if (!reinitialize)
+                       assert(renames->cached_pairs_valid_side == 0);
+               if (i != renames->cached_pairs_valid_side &&
+                   -1 != renames->cached_pairs_valid_side) {
+                       strset_clear_func(&renames->cached_target_names[i]);
+                       strmap_clear_func(&renames->cached_pairs[i], 1);
+                       strset_clear_func(&renames->cached_irrelevant[i]);
+                       partial_clear_dir_rename_count(&renames->dir_rename_count[i]);
+                       if (!reinitialize)
+                               strmap_clear(&renames->dir_rename_count[i], 1);
                }
-               strmap_func(&renames->dir_rename_count[i], 1);
-
-               strmap_func(&renames->dir_renames[i], 0);
        }
+       for (i = MERGE_SIDE1; i <= MERGE_SIDE2; ++i) {
+               strintmap_clear_func(&renames->deferred[i].possible_trivial_merges);
+               strset_clear_func(&renames->deferred[i].target_dirs);
+               renames->deferred[i].trivial_merges_okay = 1; /* 1 == maybe */
+       }
+       renames->cached_pairs_valid_side = 0;
+       renames->dir_rename_mask = 0;
 
        if (!reinitialize) {
                struct hashmap_iter iter;
@@ -386,8 +582,15 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
                }
                strmap_clear(&opti->output, 0);
        }
+
+       mem_pool_discard(&opti->pool, 0);
+
+       /* Clean out callback_data as well. */
+       FREE_AND_NULL(renames->callback_data);
+       renames->callback_data_nr = renames->callback_data_alloc = 0;
 }
 
+__attribute__((format (printf, 2, 3)))
 static int err(struct merge_options *opt, const char *err, ...)
 {
        va_list params;
@@ -406,6 +609,7 @@ static int err(struct merge_options *opt, const char *err, ...)
 
 static void format_commit(struct strbuf *sb,
                          int indent,
+                         struct repository *repo,
                          struct commit *commit)
 {
        struct merge_remote_desc *desc;
@@ -419,7 +623,7 @@ static void format_commit(struct strbuf *sb,
                return;
        }
 
-       format_commit_message(commit, "%h %s", sb, &ctx);
+       repo_format_commit_message(repo, commit, "%h %s", sb, &ctx);
        strbuf_addch(sb, '\n');
 }
 
@@ -430,20 +634,90 @@ static void path_msg(struct merge_options *opt,
                     const char *fmt, ...)
 {
        va_list ap;
-       struct strbuf *sb = strmap_get(&opt->priv->output, path);
+       struct strbuf *sb, *dest;
+       struct strbuf tmp = STRBUF_INIT;
+
+       if (opt->record_conflict_msgs_as_headers && omittable_hint)
+               return; /* Do not record mere hints in headers */
+       if (opt->priv->call_depth && opt->verbosity < 5)
+               return; /* Ignore messages from inner merges */
+
+       sb = strmap_get(&opt->priv->output, path);
        if (!sb) {
                sb = xmalloc(sizeof(*sb));
                strbuf_init(sb, 0);
                strmap_put(&opt->priv->output, path, sb);
        }
 
+       dest = (opt->record_conflict_msgs_as_headers ? &tmp : sb);
+
        va_start(ap, fmt);
-       strbuf_vaddf(sb, fmt, ap);
+       if (opt->priv->call_depth) {
+               strbuf_addchars(dest, ' ', 2);
+               strbuf_addstr(dest, "From inner merge:");
+               strbuf_addchars(dest, ' ', opt->priv->call_depth * 2);
+       }
+       strbuf_vaddf(dest, fmt, ap);
        va_end(ap);
 
+       if (opt->record_conflict_msgs_as_headers) {
+               int i_sb = 0, i_tmp = 0;
+
+               /* Start with the specified prefix */
+               if (opt->msg_header_prefix)
+                       strbuf_addf(sb, "%s ", opt->msg_header_prefix);
+
+               /* Copy tmp to sb, adding spaces after newlines */
+               strbuf_grow(sb, sb->len + 2*tmp.len); /* more than sufficient */
+               for (; i_tmp < tmp.len; i_tmp++, i_sb++) {
+                       /* Copy next character from tmp to sb */
+                       sb->buf[sb->len + i_sb] = tmp.buf[i_tmp];
+
+                       /* If we copied a newline, add a space */
+                       if (tmp.buf[i_tmp] == '\n')
+                               sb->buf[++i_sb] = ' ';
+               }
+               /* Update length and ensure it's NUL-terminated */
+               sb->len += i_sb;
+               sb->buf[sb->len] = '\0';
+
+               strbuf_release(&tmp);
+       }
+
+       /* Add final newline character to sb */
        strbuf_addch(sb, '\n');
 }
 
+static struct diff_filespec *pool_alloc_filespec(struct mem_pool *pool,
+                                                const char *path)
+{
+       /* Similar to alloc_filespec(), but allocate from pool and reuse path */
+       struct diff_filespec *spec;
+
+       spec = mem_pool_calloc(pool, 1, sizeof(*spec));
+       spec->path = (char*)path; /* spec won't modify it */
+
+       spec->count = 1;
+       spec->is_binary = -1;
+       return spec;
+}
+
+static struct diff_filepair *pool_diff_queue(struct mem_pool *pool,
+                                            struct diff_queue_struct *queue,
+                                            struct diff_filespec *one,
+                                            struct diff_filespec *two)
+{
+       /* Same code as diff_queue(), except allocate from pool */
+       struct diff_filepair *dp;
+
+       dp = mem_pool_calloc(pool, 1, sizeof(*dp));
+       dp->one = one;
+       dp->two = two;
+       if (queue)
+               diff_q(queue, dp);
+       return dp;
+}
+
 /* add a string to a strbuf, but converting "/" to "_" */
 static void add_flattened_path(struct strbuf *out, const char *s)
 {
@@ -454,13 +728,15 @@ static void add_flattened_path(struct strbuf *out, const char *s)
                        out->buf[i] = '_';
 }
 
-static char *unique_path(struct strmap *existing_paths,
+static char *unique_path(struct merge_options *opt,
                         const char *path,
                         const char *branch)
 {
+       char *ret = NULL;
        struct strbuf newpath = STRBUF_INIT;
        int suffix = 0;
        size_t base_len;
+       struct strmap *existing_paths = &opt->priv->paths;
 
        strbuf_addf(&newpath, "%s~", path);
        add_flattened_path(&newpath, branch);
@@ -471,11 +747,91 @@ static char *unique_path(struct strmap *existing_paths,
                strbuf_addf(&newpath, "_%d", suffix++);
        }
 
-       return strbuf_detach(&newpath, NULL);
+       /* Track the new path in our memory pool */
+       ret = mem_pool_alloc(&opt->priv->pool, newpath.len + 1);
+       memcpy(ret, newpath.buf, newpath.len + 1);
+       strbuf_release(&newpath);
+       return ret;
 }
 
 /*** Function Grouping: functions related to collect_merge_info() ***/
 
+static int traverse_trees_wrapper_callback(int n,
+                                          unsigned long mask,
+                                          unsigned long dirmask,
+                                          struct name_entry *names,
+                                          struct traverse_info *info)
+{
+       struct merge_options *opt = info->data;
+       struct rename_info *renames = &opt->priv->renames;
+       unsigned filemask = mask & ~dirmask;
+
+       assert(n==3);
+
+       if (!renames->callback_data_traverse_path)
+               renames->callback_data_traverse_path = xstrdup(info->traverse_path);
+
+       if (filemask && filemask == renames->dir_rename_mask)
+               renames->dir_rename_mask = 0x07;
+
+       ALLOC_GROW(renames->callback_data, renames->callback_data_nr + 1,
+                  renames->callback_data_alloc);
+       renames->callback_data[renames->callback_data_nr].mask = mask;
+       renames->callback_data[renames->callback_data_nr].dirmask = dirmask;
+       COPY_ARRAY(renames->callback_data[renames->callback_data_nr].names,
+                  names, 3);
+       renames->callback_data_nr++;
+
+       return mask;
+}
+
+/*
+ * Much like traverse_trees(), BUT:
+ *   - read all the tree entries FIRST, saving them
+ *   - note that the above step provides an opportunity to compute necessary
+ *     additional details before the "real" traversal
+ *   - loop through the saved entries and call the original callback on them
+ */
+static int traverse_trees_wrapper(struct index_state *istate,
+                                 int n,
+                                 struct tree_desc *t,
+                                 struct traverse_info *info)
+{
+       int ret, i, old_offset;
+       traverse_callback_t old_fn;
+       char *old_callback_data_traverse_path;
+       struct merge_options *opt = info->data;
+       struct rename_info *renames = &opt->priv->renames;
+
+       assert(renames->dir_rename_mask == 2 || renames->dir_rename_mask == 4);
+
+       old_callback_data_traverse_path = renames->callback_data_traverse_path;
+       old_fn = info->fn;
+       old_offset = renames->callback_data_nr;
+
+       renames->callback_data_traverse_path = NULL;
+       info->fn = traverse_trees_wrapper_callback;
+       ret = traverse_trees(istate, n, t, info);
+       if (ret < 0)
+               return ret;
+
+       info->traverse_path = renames->callback_data_traverse_path;
+       info->fn = old_fn;
+       for (i = old_offset; i < renames->callback_data_nr; ++i) {
+               info->fn(n,
+                        renames->callback_data[i].mask,
+                        renames->callback_data[i].dirmask,
+                        renames->callback_data[i].names,
+                        info);
+       }
+
+       renames->callback_data_nr = old_offset;
+       free(renames->callback_data_traverse_path);
+       renames->callback_data_traverse_path = old_callback_data_traverse_path;
+       info->traverse_path = NULL;
+       return 0;
+}
+
 static void setup_path_info(struct merge_options *opt,
                            struct string_list_item *result,
                            const char *current_dir_name,
@@ -496,8 +852,9 @@ static void setup_path_info(struct merge_options *opt,
        assert(!df_conflict || !resolved); /* df_conflict implies !resolved */
        assert(resolved == (merged_version != NULL));
 
-       mi = xcalloc(1, resolved ? sizeof(struct merged_info) :
-                                  sizeof(struct conflict_info));
+       mi = mem_pool_calloc(&opt->priv->pool, 1,
+                            resolved ? sizeof(struct merged_info) :
+                                       sizeof(struct conflict_info));
        mi->directory_name = current_dir_name;
        mi->basename_offset = current_dir_name_len;
        mi->clean = !!resolved;
@@ -539,17 +896,66 @@ static void add_pair(struct merge_options *opt,
                     struct name_entry *names,
                     const char *pathname,
                     unsigned side,
-                    unsigned is_add /* if false, is_delete */)
+                    unsigned is_add /* if false, is_delete */,
+                    unsigned match_mask,
+                    unsigned dir_rename_mask)
 {
        struct diff_filespec *one, *two;
        struct rename_info *renames = &opt->priv->renames;
        int names_idx = is_add ? side : 0;
 
-       one = alloc_filespec(pathname);
-       two = alloc_filespec(pathname);
+       if (is_add) {
+               assert(match_mask == 0 || match_mask == 6);
+               if (strset_contains(&renames->cached_target_names[side],
+                                   pathname))
+                       return;
+       } else {
+               unsigned content_relevant = (match_mask == 0);
+               unsigned location_relevant = (dir_rename_mask == 0x07);
+
+               assert(match_mask == 0 || match_mask == 3 || match_mask == 5);
+
+               /*
+                * If pathname is found in cached_irrelevant[side] due to
+                * previous pick but for this commit content is relevant,
+                * then we need to remove it from cached_irrelevant.
+                */
+               if (content_relevant)
+                       /* strset_remove is no-op if strset doesn't have key */
+                       strset_remove(&renames->cached_irrelevant[side],
+                                     pathname);
+
+               /*
+                * We do not need to re-detect renames for paths that we already
+                * know the pairing, i.e. for cached_pairs (or
+                * cached_irrelevant).  However, handle_deferred_entries() needs
+                * to loop over the union of keys from relevant_sources[side] and
+                * cached_pairs[side], so for simplicity we set relevant_sources
+                * for all the cached_pairs too and then strip them back out in
+                * prune_cached_from_relevant() at the beginning of
+                * detect_regular_renames().
+                */
+               if (content_relevant || location_relevant) {
+                       /* content_relevant trumps location_relevant */
+                       strintmap_set(&renames->relevant_sources[side], pathname,
+                                     content_relevant ? RELEVANT_CONTENT : RELEVANT_LOCATION);
+               }
+
+               /*
+                * Avoid creating pair if we've already cached rename results.
+                * Note that we do this after setting relevant_sources[side]
+                * as noted in the comment above.
+                */
+               if (strmap_contains(&renames->cached_pairs[side], pathname) ||
+                   strset_contains(&renames->cached_irrelevant[side], pathname))
+                       return;
+       }
+
+       one = pool_alloc_filespec(&opt->priv->pool, pathname);
+       two = pool_alloc_filespec(&opt->priv->pool, pathname);
        fill_filespec(is_add ? two : one,
                      &names[names_idx].oid, 1, names[names_idx].mode);
-       diff_queue(&renames->pairs[side], one, two);
+       pool_diff_queue(&opt->priv->pool, &renames->pairs[side], one, two);
 }
 
 static void collect_rename_info(struct merge_options *opt,
@@ -563,14 +969,75 @@ static void collect_rename_info(struct merge_options *opt,
        struct rename_info *renames = &opt->priv->renames;
        unsigned side;
 
+       /*
+        * Update dir_rename_mask (determines ignore-rename-source validity)
+        *
+        * dir_rename_mask helps us keep track of when directory rename
+        * detection may be relevant.  Basically, whenver a directory is
+        * removed on one side of history, and a file is added to that
+        * directory on the other side of history, directory rename
+        * detection is relevant (meaning we have to detect renames for all
+        * files within that directory to deduce where the directory
+        * moved).  Also, whenever a directory needs directory rename
+        * detection, due to the "majority rules" choice for where to move
+        * it (see t6423 testcase 1f), we also need to detect renames for
+        * all files within subdirectories of that directory as well.
+        *
+        * Here we haven't looked at files within the directory yet, we are
+        * just looking at the directory itself.  So, if we aren't yet in
+        * a case where a parent directory needed directory rename detection
+        * (i.e. dir_rename_mask != 0x07), and if the directory was removed
+        * on one side of history, record the mask of the other side of
+        * history in dir_rename_mask.
+        */
+       if (renames->dir_rename_mask != 0x07 &&
+           (dirmask == 3 || dirmask == 5)) {
+               /* simple sanity check */
+               assert(renames->dir_rename_mask == 0 ||
+                      renames->dir_rename_mask == (dirmask & ~1));
+               /* update dir_rename_mask; have it record mask of new side */
+               renames->dir_rename_mask = (dirmask & ~1);
+       }
+
        /* Update dirs_removed, as needed */
        if (dirmask == 1 || dirmask == 3 || dirmask == 5) {
                /* absent_mask = 0x07 - dirmask; sides = absent_mask/2 */
                unsigned sides = (0x07 - dirmask)/2;
+               unsigned relevance = (renames->dir_rename_mask == 0x07) ?
+                                       RELEVANT_FOR_ANCESTOR : NOT_RELEVANT;
+               /*
+                * Record relevance of this directory.  However, note that
+                * when collect_merge_info_callback() recurses into this
+                * directory and calls collect_rename_info() on paths
+                * within that directory, if we find a path that was added
+                * to this directory on the other side of history, we will
+                * upgrade this value to RELEVANT_FOR_SELF; see below.
+                */
                if (sides & 1)
-                       strset_add(&renames->dirs_removed[1], fullname);
+                       strintmap_set(&renames->dirs_removed[1], fullname,
+                                     relevance);
                if (sides & 2)
-                       strset_add(&renames->dirs_removed[2], fullname);
+                       strintmap_set(&renames->dirs_removed[2], fullname,
+                                     relevance);
+       }
+
+       /*
+        * Here's the block that potentially upgrades to RELEVANT_FOR_SELF.
+        * When we run across a file added to a directory.  In such a case,
+        * find the directory of the file and upgrade its relevance.
+        */
+       if (renames->dir_rename_mask == 0x07 &&
+           (filemask == 2 || filemask == 4)) {
+               /*
+                * Need directory rename for parent directory on other side
+                * of history from added file.  Thus
+                *    side = (~filemask & 0x06) >> 1
+                * or
+                *    side = 3 - (filemask/2).
+                */
+               unsigned side = 3 - (filemask >> 1);
+               strintmap_set(&renames->dirs_removed[side], dirname,
+                             RELEVANT_FOR_SELF);
        }
 
        if (filemask == 0 || filemask == 7)
@@ -581,11 +1048,15 @@ static void collect_rename_info(struct merge_options *opt,
 
                /* Check for deletion on side */
                if ((filemask & 1) && !(filemask & side_mask))
-                       add_pair(opt, names, fullname, side, 0 /* delete */);
+                       add_pair(opt, names, fullname, side, 0 /* delete */,
+                                match_mask & filemask,
+                                renames->dir_rename_mask);
 
                /* Check for addition on side */
                if (!(filemask & 1) && (filemask & side_mask))
-                       add_pair(opt, names, fullname, side, 1 /* add */);
+                       add_pair(opt, names, fullname, side, 1 /* add */,
+                                match_mask & filemask,
+                                renames->dir_rename_mask);
        }
 }
 
@@ -603,12 +1074,14 @@ static int collect_merge_info_callback(int n,
         */
        struct merge_options *opt = info->data;
        struct merge_options_internal *opti = opt->priv;
+       struct rename_info *renames = &opt->priv->renames;
        struct string_list_item pi;  /* Path Info */
        struct conflict_info *ci; /* typed alias to pi.util (which is void*) */
        struct name_entry *p;
        size_t len;
        char *fullpath;
        const char *dirname = opti->current_dir_name;
+       unsigned prev_dir_rename_mask = renames->dir_rename_mask;
        unsigned filemask = mask & ~dirmask;
        unsigned match_mask = 0; /* will be updated below */
        unsigned mbase_null = !(mask & 1);
@@ -673,7 +1146,7 @@ static int collect_merge_info_callback(int n,
        len = traverse_path_len(info, p->pathlen);
 
        /* +1 in both of the following lines to include the NUL byte */
-       fullpath = xmalloc(len + 1);
+       fullpath = mem_pool_alloc(&opt->priv->pool, len + 1);
        make_traverse_path(fullpath, len + 1, info, p->path, p->pathlen);
 
        /*
@@ -684,20 +1157,67 @@ static int collect_merge_info_callback(int n,
        if (side1_matches_mbase && side2_matches_mbase) {
                /* mbase, side1, & side2 all match; use mbase as resolution */
                setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
-                               names, names+0, mbase_null, 0,
+                               names, names+0, mbase_null, 0 /* df_conflict */,
+                               filemask, dirmask, 1 /* resolved */);
+               return mask;
+       }
+
+       /*
+        * If the sides match, and all three paths are present and are
+        * files, then we can take either as the resolution.  We can't do
+        * this with trees, because there may be rename sources from the
+        * merge_base.
+        */
+       if (sides_match && filemask == 0x07) {
+               /* use side1 (== side2) version as resolution */
+               setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
+                               names, names+1, side1_null, 0,
                                filemask, dirmask, 1);
                return mask;
        }
 
        /*
-        * Gather additional information used in rename detection.
+        * If side1 matches mbase and all three paths are present and are
+        * files, then we can use side2 as the resolution.  We cannot
+        * necessarily do so this for trees, because there may be rename
+        * destinations within side2.
+        */
+       if (side1_matches_mbase && filemask == 0x07) {
+               /* use side2 version as resolution */
+               setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
+                               names, names+2, side2_null, 0,
+                               filemask, dirmask, 1);
+               return mask;
+       }
+
+       /* Similar to above but swapping sides 1 and 2 */
+       if (side2_matches_mbase && filemask == 0x07) {
+               /* use side1 version as resolution */
+               setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
+                               names, names+1, side1_null, 0,
+                               filemask, dirmask, 1);
+               return mask;
+       }
+
+       /*
+        * Sometimes we can tell that a source path need not be included in
+        * rename detection -- namely, whenever either
+        *    side1_matches_mbase && side2_null
+        * or
+        *    side2_matches_mbase && side1_null
+        * However, we call collect_rename_info() even in those cases,
+        * because exact renames are cheap and would let us remove both a
+        * source and destination path.  We'll cull the unneeded sources
+        * later.
         */
        collect_rename_info(opt, names, dirname, fullpath,
                            filemask, dirmask, match_mask);
 
        /*
-        * Record information about the path so we can resolve later in
-        * process_entries.
+        * None of the special cases above matched, so we have a
+        * provisional conflict.  (Rename detection might allow us to
+        * unconflict some more cases, but that comes later so all we can
+        * do now is record the different non-null file hashes.)
         */
        setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
                        names, NULL, 0, df_conflict, filemask, dirmask, 0);
@@ -712,8 +1232,36 @@ static int collect_merge_info_callback(int n,
                struct tree_desc t[3];
                void *buf[3] = {NULL, NULL, NULL};
                const char *original_dir_name;
-               int i, ret;
+               int i, ret, side;
+
+               /*
+                * Check for whether we can avoid recursing due to one side
+                * matching the merge base.  The side that does NOT match is
+                * the one that might have a rename destination we need.
+                */
+               assert(!side1_matches_mbase || !side2_matches_mbase);
+               side = side1_matches_mbase ? MERGE_SIDE2 :
+                       side2_matches_mbase ? MERGE_SIDE1 : MERGE_BASE;
+               if (filemask == 0 && (dirmask == 2 || dirmask == 4)) {
+                       /*
+                        * Also defer recursing into new directories; set up a
+                        * few variables to let us do so.
+                        */
+                       ci->match_mask = (7 - dirmask);
+                       side = dirmask / 2;
+               }
+               if (renames->dir_rename_mask != 0x07 &&
+                   side != MERGE_BASE &&
+                   renames->deferred[side].trivial_merges_okay &&
+                   !strset_contains(&renames->deferred[side].target_dirs,
+                                    pi.string)) {
+                       strintmap_set(&renames->deferred[side].possible_trivial_merges,
+                                     pi.string, renames->dir_rename_mask);
+                       renames->dir_rename_mask = prev_dir_rename_mask;
+                       return mask;
+               }
 
+               /* We need to recurse */
                ci->match_mask &= filemask;
                newinfo = *info;
                newinfo.prev = info;
@@ -749,8 +1297,13 @@ static int collect_merge_info_callback(int n,
 
                original_dir_name = opti->current_dir_name;
                opti->current_dir_name = pi.string;
-               ret = traverse_trees(NULL, 3, t, &newinfo);
+               if (renames->dir_rename_mask == 0 ||
+                   renames->dir_rename_mask == 0x07)
+                       ret = traverse_trees(NULL, 3, t, &newinfo);
+               else
+                       ret = traverse_trees_wrapper(NULL, 3, t, &newinfo);
                opti->current_dir_name = original_dir_name;
+               renames->dir_rename_mask = prev_dir_rename_mask;
 
                for (i = MERGE_BASE; i <= MERGE_SIDE2; i++)
                        free(buf[i]);
@@ -762,6 +1315,192 @@ static int collect_merge_info_callback(int n,
        return mask;
 }
 
+static void resolve_trivial_directory_merge(struct conflict_info *ci, int side)
+{
+       VERIFY_CI(ci);
+       assert((side == 1 && ci->match_mask == 5) ||
+              (side == 2 && ci->match_mask == 3));
+       oidcpy(&ci->merged.result.oid, &ci->stages[side].oid);
+       ci->merged.result.mode = ci->stages[side].mode;
+       ci->merged.is_null = is_null_oid(&ci->stages[side].oid);
+       ci->match_mask = 0;
+       ci->merged.clean = 1; /* (ci->filemask == 0); */
+}
+
+static int handle_deferred_entries(struct merge_options *opt,
+                                  struct traverse_info *info)
+{
+       struct rename_info *renames = &opt->priv->renames;
+       struct hashmap_iter iter;
+       struct strmap_entry *entry;
+       int side, ret = 0;
+       int path_count_before, path_count_after = 0;
+
+       path_count_before = strmap_get_size(&opt->priv->paths);
+       for (side = MERGE_SIDE1; side <= MERGE_SIDE2; side++) {
+               unsigned optimization_okay = 1;
+               struct strintmap copy;
+
+               /* Loop over the set of paths we need to know rename info for */
+               strset_for_each_entry(&renames->relevant_sources[side],
+                                     &iter, entry) {
+                       char *rename_target, *dir, *dir_marker;
+                       struct strmap_entry *e;
+
+                       /*
+                        * If we don't know delete/rename info for this path,
+                        * then we need to recurse into all trees to get all
+                        * adds to make sure we have it.
+                        */
+                       if (strset_contains(&renames->cached_irrelevant[side],
+                                           entry->key))
+                               continue;
+                       e = strmap_get_entry(&renames->cached_pairs[side],
+                                            entry->key);
+                       if (!e) {
+                               optimization_okay = 0;
+                               break;
+                       }
+
+                       /* If this is a delete, we have enough info already */
+                       rename_target = e->value;
+                       if (!rename_target)
+                               continue;
+
+                       /* If we already walked the rename target, we're good */
+                       if (strmap_contains(&opt->priv->paths, rename_target))
+                               continue;
+
+                       /*
+                        * Otherwise, we need to get a list of directories that
+                        * will need to be recursed into to get this
+                        * rename_target.
+                        */
+                       dir = xstrdup(rename_target);
+                       while ((dir_marker = strrchr(dir, '/'))) {
+                               *dir_marker = '\0';
+                               if (strset_contains(&renames->deferred[side].target_dirs,
+                                                   dir))
+                                       break;
+                               strset_add(&renames->deferred[side].target_dirs,
+                                          dir);
+                       }
+                       free(dir);
+               }
+               renames->deferred[side].trivial_merges_okay = optimization_okay;
+               /*
+                * We need to recurse into any directories in
+                * possible_trivial_merges[side] found in target_dirs[side].
+                * But when we recurse, we may need to queue up some of the
+                * subdirectories for possible_trivial_merges[side].  Since
+                * we can't safely iterate through a hashmap while also adding
+                * entries, move the entries into 'copy', iterate over 'copy',
+                * and then we'll also iterate anything added into
+                * possible_trivial_merges[side] once this loop is done.
+                */
+               copy = renames->deferred[side].possible_trivial_merges;
+               strintmap_init_with_options(&renames->deferred[side].possible_trivial_merges,
+                                           0,
+                                           &opt->priv->pool,
+                                           0);
+               strintmap_for_each_entry(&copy, &iter, entry) {
+                       const char *path = entry->key;
+                       unsigned dir_rename_mask = (intptr_t)entry->value;
+                       struct conflict_info *ci;
+                       unsigned dirmask;
+                       struct tree_desc t[3];
+                       void *buf[3] = {NULL,};
+                       int i;
+
+                       ci = strmap_get(&opt->priv->paths, path);
+                       VERIFY_CI(ci);
+                       dirmask = ci->dirmask;
+
+                       if (optimization_okay &&
+                           !strset_contains(&renames->deferred[side].target_dirs,
+                                            path)) {
+                               resolve_trivial_directory_merge(ci, side);
+                               continue;
+                       }
+
+                       info->name = path;
+                       info->namelen = strlen(path);
+                       info->pathlen = info->namelen + 1;
+
+                       for (i = 0; i < 3; i++, dirmask >>= 1) {
+                               if (i == 1 && ci->match_mask == 3)
+                                       t[1] = t[0];
+                               else if (i == 2 && ci->match_mask == 5)
+                                       t[2] = t[0];
+                               else if (i == 2 && ci->match_mask == 6)
+                                       t[2] = t[1];
+                               else {
+                                       const struct object_id *oid = NULL;
+                                       if (dirmask & 1)
+                                               oid = &ci->stages[i].oid;
+                                       buf[i] = fill_tree_descriptor(opt->repo,
+                                                                     t+i, oid);
+                               }
+                       }
+
+                       ci->match_mask &= ci->filemask;
+                       opt->priv->current_dir_name = path;
+                       renames->dir_rename_mask = dir_rename_mask;
+                       if (renames->dir_rename_mask == 0 ||
+                           renames->dir_rename_mask == 0x07)
+                               ret = traverse_trees(NULL, 3, t, info);
+                       else
+                               ret = traverse_trees_wrapper(NULL, 3, t, info);
+
+                       for (i = MERGE_BASE; i <= MERGE_SIDE2; i++)
+                               free(buf[i]);
+
+                       if (ret < 0)
+                               return ret;
+               }
+               strintmap_clear(&copy);
+               strintmap_for_each_entry(&renames->deferred[side].possible_trivial_merges,
+                                        &iter, entry) {
+                       const char *path = entry->key;
+                       struct conflict_info *ci;
+
+                       ci = strmap_get(&opt->priv->paths, path);
+                       VERIFY_CI(ci);
+
+                       assert(renames->deferred[side].trivial_merges_okay &&
+                              !strset_contains(&renames->deferred[side].target_dirs,
+                                               path));
+                       resolve_trivial_directory_merge(ci, side);
+               }
+               if (!optimization_okay || path_count_after)
+                       path_count_after = strmap_get_size(&opt->priv->paths);
+       }
+       if (path_count_after) {
+               /*
+                * The choice of wanted_factor here does not affect
+                * correctness, only performance.  When the
+                *    path_count_after / path_count_before
+                * ratio is high, redoing after renames is a big
+                * performance boost.  I suspect that redoing is a wash
+                * somewhere near a value of 2, and below that redoing will
+                * slow things down.  I applied a fudge factor and picked
+                * 3; see the commit message when this was introduced for
+                * back of the envelope calculations for this ratio.
+                */
+               const int wanted_factor = 3;
+
+               /* We should only redo collect_merge_info one time */
+               assert(renames->redo_after_renames == 0);
+
+               if (path_count_after / path_count_before >= wanted_factor) {
+                       renames->redo_after_renames = 1;
+                       renames->cached_pairs_valid_side = -1;
+               }
+       } else if (renames->redo_after_renames == 2)
+               renames->redo_after_renames = 0;
+       return ret;
+}
+
 static int collect_merge_info(struct merge_options *opt,
                              struct tree *merge_base,
                              struct tree *side1,
@@ -787,6 +1526,8 @@ static int collect_merge_info(struct merge_options *opt,
 
        trace2_region_enter("merge", "traverse_trees", opt->repo);
        ret = traverse_trees(NULL, 3, t, &info);
+       if (ret == 0)
+               ret = handle_deferred_entries(opt, &info);
        trace2_region_leave("merge", "traverse_trees", opt->repo);
 
        return ret;
@@ -818,7 +1559,6 @@ static int find_first_merges(struct repository *repo,
        xsnprintf(merged_revision, sizeof(merged_revision), "^%s",
                  oid_to_hex(&a->object.oid));
        repo_init_revisions(repo, &revs, NULL);
-       rev_opts.submodule = path;
        /* FIXME: can't handle linked worktrees in submodules yet */
        revs.single_worktree = path != NULL;
        setup_revisions(ARRAY_SIZE(rev_args)-1, rev_args, &revs, &rev_opts);
@@ -828,7 +1568,7 @@ static int find_first_merges(struct repository *repo,
                die("revision walk setup failed");
        while ((commit = get_revision(&revs)) != NULL) {
                struct object *o = &(commit->object);
-               if (in_merge_bases(b, commit))
+               if (repo_in_merge_bases(repo, b, commit))
                        add_object_array(o, NULL, &merges);
        }
        reset_revision_walk();
@@ -843,7 +1583,7 @@ static int find_first_merges(struct repository *repo,
                contains_another = 0;
                for (j = 0; j < merges.nr; j++) {
                        struct commit *m2 = (struct commit *) merges.objects[j].item;
-                       if (i != j && in_merge_bases(m2, m1)) {
+                       if (i != j && repo_in_merge_bases(repo, m2, m1)) {
                                contains_another = 1;
                                break;
                        }
@@ -864,10 +1604,12 @@ static int merge_submodule(struct merge_options *opt,
                           const struct object_id *b,
                           struct object_id *result)
 {
+       struct repository subrepo;
+       struct strbuf sb = STRBUF_INIT;
+       int ret = 0;
        struct commit *commit_o, *commit_a, *commit_b;
        int parent_count;
        struct object_array merges;
-       struct strbuf sb = STRBUF_INIT;
 
        int i;
        int search = !opt->priv->call_depth;
@@ -883,46 +1625,48 @@ static int merge_submodule(struct merge_options *opt,
        if (is_null_oid(b))
                return 0;
 
-       if (add_submodule_odb(path)) {
+       if (repo_submodule_init(&subrepo, opt->repo, path, null_oid())) {
                path_msg(opt, path, 0,
-                        _("Failed to merge submodule %s (not checked out)"),
-                        path);
+                               _("Failed to merge submodule %s (not checked out)"),
+                               path);
                return 0;
        }
 
-       if (!(commit_o = lookup_commit_reference(opt->repo, o)) ||
-           !(commit_a = lookup_commit_reference(opt->repo, a)) ||
-           !(commit_b = lookup_commit_reference(opt->repo, b))) {
+       if (!(commit_o = lookup_commit_reference(&subrepo, o)) ||
+           !(commit_a = lookup_commit_reference(&subrepo, a)) ||
+           !(commit_b = lookup_commit_reference(&subrepo, b))) {
                path_msg(opt, path, 0,
                         _("Failed to merge submodule %s (commits not present)"),
                         path);
-               return 0;
+               goto cleanup;
        }
 
        /* check whether both changes are forward */
-       if (!in_merge_bases(commit_o, commit_a) ||
-           !in_merge_bases(commit_o, commit_b)) {
+       if (!repo_in_merge_bases(&subrepo, commit_o, commit_a) ||
+           !repo_in_merge_bases(&subrepo, commit_o, commit_b)) {
                path_msg(opt, path, 0,
                         _("Failed to merge submodule %s "
                           "(commits don't follow merge-base)"),
                         path);
-               return 0;
+               goto cleanup;
        }
 
        /* Case #1: a is contained in b or vice versa */
-       if (in_merge_bases(commit_a, commit_b)) {
+       if (repo_in_merge_bases(&subrepo, commit_a, commit_b)) {
                oidcpy(result, b);
                path_msg(opt, path, 1,
                         _("Note: Fast-forwarding submodule %s to %s"),
                         path, oid_to_hex(b));
-               return 1;
+               ret = 1;
+               goto cleanup;
        }
-       if (in_merge_bases(commit_b, commit_a)) {
+       if (repo_in_merge_bases(&subrepo, commit_b, commit_a)) {
                oidcpy(result, a);
                path_msg(opt, path, 1,
                         _("Note: Fast-forwarding submodule %s to %s"),
                         path, oid_to_hex(a));
-               return 1;
+               ret = 1;
+               goto cleanup;
        }
 
        /*
@@ -934,10 +1678,10 @@ static int merge_submodule(struct merge_options *opt,
 
        /* Skip the search if makes no sense to the calling context.  */
        if (!search)
-               return 0;
+               goto cleanup;
 
        /* find commit which merges them */
-       parent_count = find_first_merges(opt->repo, path, commit_a, commit_b,
+       parent_count = find_first_merges(&subrepo, path, commit_a, commit_b,
                                         &merges);
        switch (parent_count) {
        case 0:
@@ -945,7 +1689,7 @@ static int merge_submodule(struct merge_options *opt,
                break;
 
        case 1:
-               format_commit(&sb, 4,
+               format_commit(&sb, 4, &subrepo,
                              (struct commit *)merges.objects[0].item);
                path_msg(opt, path, 0,
                         _("Failed to merge submodule %s, but a possible merge "
@@ -962,7 +1706,7 @@ static int merge_submodule(struct merge_options *opt,
                break;
        default:
                for (i = 0; i < merges.nr; i++)
-                       format_commit(&sb, 4,
+                       format_commit(&sb, 4, &subrepo,
                                      (struct commit *)merges.objects[i].item);
                path_msg(opt, path, 0,
                         _("Failed to merge submodule %s, but multiple "
@@ -971,7 +1715,66 @@ static int merge_submodule(struct merge_options *opt,
        }
 
        object_array_clear(&merges);
-       return 0;
+cleanup:
+       repo_clear(&subrepo);
+       return ret;
+}
+
+static void initialize_attr_index(struct merge_options *opt)
+{
+       /*
+        * The renormalize_buffer() functions require attributes, and
+        * annoyingly those can only be read from the working tree or from
+        * an index_state.  merge-ort doesn't have an index_state, so we
+        * generate a fake one containing only attribute information.
+        */
+       struct merged_info *mi;
+       struct index_state *attr_index = &opt->priv->attr_index;
+       struct cache_entry *ce;
+
+       attr_index->initialized = 1;
+
+       if (!opt->renormalize)
+               return;
+
+       mi = strmap_get(&opt->priv->paths, GITATTRIBUTES_FILE);
+       if (!mi)
+               return;
+
+       if (mi->clean) {
+               int len = strlen(GITATTRIBUTES_FILE);
+               ce = make_empty_cache_entry(attr_index, len);
+               ce->ce_mode = create_ce_mode(mi->result.mode);
+               ce->ce_flags = create_ce_flags(0);
+               ce->ce_namelen = len;
+               oidcpy(&ce->oid, &mi->result.oid);
+               memcpy(ce->name, GITATTRIBUTES_FILE, len);
+               add_index_entry(attr_index, ce,
+                               ADD_CACHE_OK_TO_ADD | ADD_CACHE_OK_TO_REPLACE);
+               get_stream_filter(attr_index, GITATTRIBUTES_FILE, &ce->oid);
+       } else {
+               int stage, len;
+               struct conflict_info *ci;
+
+               ASSIGN_AND_VERIFY_CI(ci, mi);
+               for (stage = 0; stage < 3; stage++) {
+                       unsigned stage_mask = (1 << stage);
+
+                       if (!(ci->filemask & stage_mask))
+                               continue;
+                       len = strlen(GITATTRIBUTES_FILE);
+                       ce = make_empty_cache_entry(attr_index, len);
+                       ce->ce_mode = create_ce_mode(ci->stages[stage].mode);
+                       ce->ce_flags = create_ce_flags(stage);
+                       ce->ce_namelen = len;
+                       oidcpy(&ce->oid, &ci->stages[stage].oid);
+                       memcpy(ce->name, GITATTRIBUTES_FILE, len);
+                       add_index_entry(attr_index, ce,
+                                       ADD_CACHE_OK_TO_ADD | ADD_CACHE_OK_TO_REPLACE);
+                       get_stream_filter(attr_index, GITATTRIBUTES_FILE,
+                                         &ce->oid);
+               }
+       }
 }
 
 static int merge_3way(struct merge_options *opt,
@@ -986,7 +1789,10 @@ static int merge_3way(struct merge_options *opt,
        mmfile_t orig, src1, src2;
        struct ll_merge_options ll_opts = {0};
        char *base, *name1, *name2;
-       int merge_status;
+       enum ll_merge_result merge_status;
+
+       if (!opt->priv->attr_index.initialized)
+               initialize_attr_index(opt);
 
        ll_opts.renormalize = opt->renormalize;
        ll_opts.extra_marker_size = extra_marker_size;
@@ -1026,7 +1832,11 @@ static int merge_3way(struct merge_options *opt,
 
        merge_status = ll_merge(result_buf, path, &orig, base,
                                &src1, name1, &src2, name2,
-                               opt->repo->index, &ll_opts);
+                               &opt->priv->attr_index, &ll_opts);
+       if (merge_status == LL_MERGE_BINARY_CONFLICT)
+               path_msg(opt, path, 0,
+                        "warning: Cannot merge binary files: %s (%s vs. %s)",
+                        path, name1, name2);
 
        free(base);
        free(name1);
@@ -1118,7 +1928,7 @@ static int handle_content_merge(struct merge_options *opt,
                two_way = ((S_IFMT & o->mode) != (S_IFMT & a->mode));
 
                merge_status = merge_3way(opt, path,
-                                         two_way ? &null_oid : &o->oid,
+                                         two_way ? null_oid() : &o->oid,
                                          &a->oid, &b->oid,
                                          pathnames, extra_marker_size,
                                          &result_buf);
@@ -1128,7 +1938,7 @@ static int handle_content_merge(struct merge_options *opt,
 
                if (!ret &&
                    write_object_file(result_buf.ptr, result_buf.size,
-                                     blob_type, &result->oid))
+                                     OBJ_BLOB, &result->oid))
                        ret = err(opt, _("Unable to add %s to database"),
                                  path);
 
@@ -1140,7 +1950,7 @@ static int handle_content_merge(struct merge_options *opt,
        } else if (S_ISGITLINK(a->mode)) {
                int two_way = ((S_IFMT & o->mode) != (S_IFMT & a->mode));
                clean = merge_submodule(opt, pathnames[0],
-                                       two_way ? &null_oid : &o->oid,
+                                       two_way ? null_oid() : &o->oid,
                                        &a->oid, &b->oid, &result->oid);
                if (opt->priv->call_depth && two_way && !clean) {
                        result->mode = o->mode;
@@ -1258,7 +2068,7 @@ static char *handle_path_level_conflicts(struct merge_options *opt,
         * to ensure that's the case.
         */
        c_info = strmap_get(collisions, new_path);
-       if (c_info == NULL)
+       if (!c_info)
                BUG("c_info is NULL");
 
        /*
@@ -1302,131 +2112,6 @@ static char *handle_path_level_conflicts(struct merge_options *opt,
        return new_path;
 }
 
-static void dirname_munge(char *filename)
-{
-       char *slash = strrchr(filename, '/');
-       if (!slash)
-               slash = filename;
-       *slash = '\0';
-}
-
-static void increment_count(struct strmap *dir_rename_count,
-                           char *old_dir,
-                           char *new_dir)
-{
-       struct strintmap *counts;
-       struct strmap_entry *e;
-
-       /* Get the {new_dirs -> counts} mapping using old_dir */
-       e = strmap_get_entry(dir_rename_count, old_dir);
-       if (e) {
-               counts = e->value;
-       } else {
-               counts = xmalloc(sizeof(*counts));
-               strintmap_init_with_options(counts, 0, NULL, 1);
-               strmap_put(dir_rename_count, old_dir, counts);
-       }
-
-       /* Increment the count for new_dir */
-       strintmap_incr(counts, new_dir, 1);
-}
-
-static void update_dir_rename_counts(struct strmap *dir_rename_count,
-                                    struct strset *dirs_removed,
-                                    const char *oldname,
-                                    const char *newname)
-{
-       char *old_dir = xstrdup(oldname);
-       char *new_dir = xstrdup(newname);
-       char new_dir_first_char = new_dir[0];
-       int first_time_in_loop = 1;
-
-       while (1) {
-               dirname_munge(old_dir);
-               dirname_munge(new_dir);
-
-               /*
-                * When renaming
-                *   "a/b/c/d/e/foo.c" -> "a/b/some/thing/else/e/foo.c"
-                * then this suggests that both
-                *   a/b/c/d/e/ => a/b/some/thing/else/e/
-                *   a/b/c/d/   => a/b/some/thing/else/
-                * so we want to increment counters for both.  We do NOT,
-                * however, also want to suggest that there was the following
-                * rename:
-                *   a/b/c/ => a/b/some/thing/
-                * so we need to quit at that point.
-                *
-                * Note the when first_time_in_loop, we only strip off the
-                * basename, and we don't care if that's different.
-                */
-               if (!first_time_in_loop) {
-                       char *old_sub_dir = strchr(old_dir, '\0')+1;
-                       char *new_sub_dir = strchr(new_dir, '\0')+1;
-                       if (!*new_dir) {
-                               /*
-                                * Special case when renaming to root directory,
-                                * i.e. when new_dir == "".  In this case, we had
-                                * something like
-                                *    a/b/subdir => subdir
-                                * and so dirname_munge() sets things up so that
-                                *    old_dir = "a/b\0subdir\0"
-                                *    new_dir = "\0ubdir\0"
-                                * We didn't have a '/' to overwrite a '\0' onto
-                                * in new_dir, so we have to compare differently.
-                                */
-                               if (new_dir_first_char != old_sub_dir[0] ||
-                                   strcmp(old_sub_dir+1, new_sub_dir))
-                                       break;
-                       } else {
-                               if (strcmp(old_sub_dir, new_sub_dir))
-                                       break;
-                       }
-               }
-
-               if (strset_contains(dirs_removed, old_dir))
-                       increment_count(dir_rename_count, old_dir, new_dir);
-               else
-                       break;
-
-               /* If we hit toplevel directory ("") for old or new dir, quit */
-               if (!*old_dir || !*new_dir)
-                       break;
-
-               first_time_in_loop = 0;
-       }
-
-       /* Free resources we don't need anymore */
-       free(old_dir);
-       free(new_dir);
-}
-
-static void compute_rename_counts(struct diff_queue_struct *pairs,
-                                 struct strmap *dir_rename_count,
-                                 struct strset *dirs_removed)
-{
-       int i;
-
-       for (i = 0; i < pairs->nr; ++i) {
-               struct diff_filepair *pair = pairs->queue[i];
-
-               /* File not part of directory rename if it wasn't renamed */
-               if (pair->status != 'R')
-                       continue;
-
-               /*
-                * Make dir_rename_count contain a map of a map:
-                *   old_directory -> {new_directory -> count}
-                * In other words, for every pair look at the directories for
-                * the old filename and the new filename and count how many
-                * times that pairing occurs.
-                */
-               update_dir_rename_counts(dir_rename_count, dirs_removed,
-                                        pair->one->path,
-                                        pair->two->path);
-       }
-}
-
 static void get_provisional_directory_renames(struct merge_options *opt,
                                              unsigned side,
                                              int *clean)
@@ -1435,9 +2120,6 @@ static void get_provisional_directory_renames(struct merge_options *opt,
        struct strmap_entry *entry;
        struct rename_info *renames = &opt->priv->renames;
 
-       compute_rename_counts(&renames->pairs[side],
-                             &renames->dir_rename_count[side],
-                             &renames->dirs_removed[side]);
        /*
         * Collapse
         *    dir_rename_count: old_directory -> {new_directory -> count}
@@ -1466,6 +2148,9 @@ static void get_provisional_directory_renames(struct merge_options *opt,
                        }
                }
 
+               if (max == 0)
+                       continue;
+
                if (bad_max == max) {
                        path_msg(opt, source_dir, 0,
                               _("CONFLICT (directory rename split): "
@@ -1474,18 +2159,7 @@ static void get_provisional_directory_renames(struct merge_options *opt,
                                 "no destination getting a majority of the "
                                 "files."),
                               source_dir);
-                       /*
-                        * We should mark this as unclean IF something attempts
-                        * to use this rename.  We do not yet have the logic
-                        * in place to detect if this directory rename is being
-                        * used, and optimizations that reduce the number of
-                        * renames cause this to falsely trigger.  For now,
-                        * just disable it, causing t6423 testcase 2a to break.
-                        * We'll later fix the detection, and when we do we
-                        * will re-enable setting *clean to 0 (and thereby fix
-                        * t6423 testcase 2a).
-                        */
-                       /*   *clean = 0;   */
+                       *clean = 0;
                } else {
                        strmap_put(&renames->dir_renames[side],
                                   source_dir, (void*)best);
@@ -1577,7 +2251,7 @@ static void compute_collisions(struct strmap *collisions,
                        free(new_path);
                } else {
                        CALLOC_ARRAY(collision_info, 1);
-                       string_list_init(&collision_info->source_files, 0);
+                       string_list_init_nodup(&collision_info->source_files);
                        strmap_put(collisions, new_path, collision_info);
                }
                string_list_insert(&collision_info->source_files,
@@ -1688,12 +2362,17 @@ static void apply_directory_rename_modifications(struct merge_options *opt,
        VERIFY_CI(ci);
 
        /* Find parent directories missing from opt->priv->paths */
-       cur_path = new_path;
+       cur_path = mem_pool_strdup(&opt->priv->pool, new_path);
+       free((char*)new_path);
+       new_path = (char *)cur_path;
+
        while (1) {
                /* Find the parent directory of cur_path */
                char *last_slash = strrchr(cur_path, '/');
                if (last_slash) {
-                       parent_name = xstrndup(cur_path, last_slash - cur_path);
+                       parent_name = mem_pool_strndup(&opt->priv->pool,
+                                                      cur_path,
+                                                      last_slash - cur_path);
                } else {
                        parent_name = opt->priv->toplevel_dir;
                        break;
@@ -1702,7 +2381,6 @@ static void apply_directory_rename_modifications(struct merge_options *opt,
                /* Look it up in opt->priv->paths */
                entry = strmap_get_entry(&opt->priv->paths, parent_name);
                if (entry) {
-                       free((char*)parent_name);
                        parent_name = entry->key; /* reuse known pointer */
                        break;
                }
@@ -1729,13 +2407,6 @@ static void apply_directory_rename_modifications(struct merge_options *opt,
                parent_name = cur_dir;
        }
 
-       /*
-        * We are removing old_path from opt->priv->paths.  old_path also will
-        * eventually need to be freed, but it may still be used by e.g.
-        * ci->pathnames.  So, store it in another string-list for now.
-        */
-       string_list_append(&opt->priv->paths_to_free, old_path);
-
        assert(ci->filemask == 2 || ci->filemask == 4);
        assert(ci->dirmask == 0);
        strmap_remove(&opt->priv->paths, old_path, 0);
@@ -1769,7 +2440,6 @@ static void apply_directory_rename_modifications(struct merge_options *opt,
                new_ci->stages[index].mode = ci->stages[index].mode;
                oidcpy(&new_ci->stages[index].oid, &ci->stages[index].oid);
 
-               free(ci);
                ci = new_ci;
        }
 
@@ -1796,7 +2466,7 @@ static void apply_directory_rename_modifications(struct merge_options *opt,
                 */
                ci->path_conflict = 1;
                if (pair->status == 'A')
-                       path_msg(opt, new_path, 0,
+                       path_msg(opt, new_path, 1,
                                 _("CONFLICT (file location): %s added in %s "
                                   "inside a directory that was renamed in %s, "
                                   "suggesting it should perhaps be moved to "
@@ -1804,7 +2474,7 @@ static void apply_directory_rename_modifications(struct merge_options *opt,
                                 old_path, branch_with_new_path,
                                 branch_with_dir_rename, new_path);
                else
-                       path_msg(opt, new_path, 0,
+                       path_msg(opt, new_path, 1,
                                 _("CONFLICT (file location): %s renamed to %s "
                                   "in %s, inside a directory that was renamed "
                                   "in %s, suggesting it should perhaps be "
@@ -1885,6 +2555,9 @@ static int process_renames(struct merge_options *opt,
                        VERIFY_CI(side2);
 
                        if (!strcmp(pathnames[1], pathnames[2])) {
+                               struct rename_info *ri = &opt->priv->renames;
+                               int j;
+
                                /* Both sides renamed the same way */
                                assert(side1 == side2);
                                memcpy(&side1->stages[0], &base->stages[0],
@@ -1894,6 +2567,16 @@ static int process_renames(struct merge_options *opt,
                                base->merged.is_null = 1;
                                base->merged.clean = 1;
 
+                               /*
+                                * Disable remembering renames optimization;
+                                * rename/rename(1to1) is incredibly rare, and
+                                * just disabling the optimization is easier
+                                * than purging cached_pairs,
+                                * cached_target_names, and dir_rename_counts.
+                                */
+                               for (j = 0; j < 3; j++)
+                                       ri->merge_trees[j] = NULL;
+
                                /* We handled both renames, i.e. i+1 handled */
                                i++;
                                /* Move to next rename */
@@ -2078,7 +2761,7 @@ static int process_renames(struct merge_options *opt,
                        if (type_changed) {
                                /* rename vs. typechange */
                                /* Mark the original as resolved by removal */
-                               memcpy(&oldinfo->stages[0].oid, &null_oid,
+                               memcpy(&oldinfo->stages[0].oid, null_oid(),
                                       sizeof(oldinfo->stages[0].oid));
                                oldinfo->stages[0].mode = 0;
                                oldinfo->filemask &= 0x06;
@@ -2111,6 +2794,21 @@ static int process_renames(struct merge_options *opt,
        return clean_merge;
 }
 
+static inline int possible_side_renames(struct rename_info *renames,
+                                       unsigned side_index)
+{
+       return renames->pairs[side_index].nr > 0 &&
+              !strintmap_empty(&renames->relevant_sources[side_index]);
+}
+
+static inline int possible_renames(struct rename_info *renames)
+{
+       return possible_side_renames(renames, 1) ||
+              possible_side_renames(renames, 2) ||
+              !strmap_empty(&renames->cached_pairs[1]) ||
+              !strmap_empty(&renames->cached_pairs[2]);
+}
+
 static void resolve_diffpair_statuses(struct diff_queue_struct *q)
 {
        /*
@@ -2132,6 +2830,125 @@ static void resolve_diffpair_statuses(struct diff_queue_struct *q)
        }
 }
 
+static void prune_cached_from_relevant(struct rename_info *renames,
+                                      unsigned side)
+{
+       /* Reason for this function described in add_pair() */
+       struct hashmap_iter iter;
+       struct strmap_entry *entry;
+
+       /* Remove from relevant_sources all entries in cached_pairs[side] */
+       strmap_for_each_entry(&renames->cached_pairs[side], &iter, entry) {
+               strintmap_remove(&renames->relevant_sources[side],
+                                entry->key);
+       }
+       /* Remove from relevant_sources all entries in cached_irrelevant[side] */
+       strset_for_each_entry(&renames->cached_irrelevant[side], &iter, entry) {
+               strintmap_remove(&renames->relevant_sources[side],
+                                entry->key);
+       }
+}
+
+static void use_cached_pairs(struct merge_options *opt,
+                            struct strmap *cached_pairs,
+                            struct diff_queue_struct *pairs)
+{
+       struct hashmap_iter iter;
+       struct strmap_entry *entry;
+
+       /*
+        * Add to side_pairs all entries from renames->cached_pairs[side_index].
+        * (Info in cached_irrelevant[side_index] is not relevant here.)
+        */
+       strmap_for_each_entry(cached_pairs, &iter, entry) {
+               struct diff_filespec *one, *two;
+               const char *old_name = entry->key;
+               const char *new_name = entry->value;
+               if (!new_name)
+                       new_name = old_name;
+
+               /*
+                * cached_pairs has *copies* of old_name and new_name,
+                * because it has to persist across merges.  Since
+                * pool_alloc_filespec() will just re-use the existing
+                * filenames, which will also get re-used by
+                * opt->priv->paths if they become renames, and then
+                * get freed at the end of the merge, that would leave
+                * the copy in cached_pairs dangling.  Avoid this by
+                * making a copy here.
+                */
+               old_name = mem_pool_strdup(&opt->priv->pool, old_name);
+               new_name = mem_pool_strdup(&opt->priv->pool, new_name);
+
+               /* We don't care about oid/mode, only filenames and status */
+               one = pool_alloc_filespec(&opt->priv->pool, old_name);
+               two = pool_alloc_filespec(&opt->priv->pool, new_name);
+               pool_diff_queue(&opt->priv->pool, pairs, one, two);
+               pairs->queue[pairs->nr-1]->status = entry->value ? 'R' : 'D';
+       }
+}
+
+static void cache_new_pair(struct rename_info *renames,
+                          int side,
+                          char *old_path,
+                          char *new_path,
+                          int free_old_value)
+{
+       char *old_value;
+       new_path = xstrdup(new_path);
+       old_value = strmap_put(&renames->cached_pairs[side],
+                              old_path, new_path);
+       strset_add(&renames->cached_target_names[side], new_path);
+       if (free_old_value)
+               free(old_value);
+       else
+               assert(!old_value);
+}
+
+static void possibly_cache_new_pair(struct rename_info *renames,
+                                   struct diff_filepair *p,
+                                   unsigned side,
+                                   char *new_path)
+{
+       int dir_renamed_side = 0;
+
+       if (new_path) {
+               /*
+                * Directory renames happen on the other side of history from
+                * the side that adds new files to the old directory.
+                */
+               dir_renamed_side = 3 - side;
+       } else {
+               int val = strintmap_get(&renames->relevant_sources[side],
+                                       p->one->path);
+               if (val == RELEVANT_NO_MORE) {
+                       assert(p->status == 'D');
+                       strset_add(&renames->cached_irrelevant[side],
+                                  p->one->path);
+               }
+               if (val <= 0)
+                       return;
+       }
+
+       if (p->status == 'D') {
+               /*
+                * If we already had this delete, we'll just set it's value
+                * to NULL again, so no harm.
+                */
+               strmap_put(&renames->cached_pairs[side], p->one->path, NULL);
+       } else if (p->status == 'R') {
+               if (!new_path)
+                       new_path = p->two->path;
+               else
+                       cache_new_pair(renames, dir_renamed_side,
+                                      p->two->path, new_path, 0);
+               cache_new_pair(renames, side, p->one->path, new_path, 1);
+       } else if (p->status == 'A' && new_path) {
+               cache_new_pair(renames, dir_renamed_side,
+                              p->two->path, new_path, 0);
+       }
+}
+
 static int compare_pairs(const void *a_, const void *b_)
 {
        const struct diff_filepair *a = *((const struct diff_filepair **)a_);
@@ -2140,20 +2957,32 @@ static int compare_pairs(const void *a_, const void *b_)
        return strcmp(a->one->path, b->one->path);
 }
 
-/* Call diffcore_rename() to compute which files have changed on given side */
-static void detect_regular_renames(struct merge_options *opt,
-                                  unsigned side_index)
+/* Call diffcore_rename() to update deleted/added pairs into rename pairs */
+static int detect_regular_renames(struct merge_options *opt,
+                                 unsigned side_index)
 {
        struct diff_options diff_opts;
        struct rename_info *renames = &opt->priv->renames;
 
+       prune_cached_from_relevant(renames, side_index);
+       if (!possible_side_renames(renames, side_index)) {
+               /*
+                * No rename detection needed for this side, but we still need
+                * to make sure 'adds' are marked correctly in case the other
+                * side had directory renames.
+                */
+               resolve_diffpair_statuses(&renames->pairs[side_index]);
+               return 0;
+       }
+
+       partial_clear_dir_rename_count(&renames->dir_rename_count[side_index]);
        repo_diff_setup(opt->repo, &diff_opts);
        diff_opts.flags.recursive = 1;
        diff_opts.flags.rename_empty = 0;
        diff_opts.detect_rename = DIFF_DETECT_RENAME;
        diff_opts.rename_limit = opt->rename_limit;
        if (opt->rename_limit <= 0)
-               diff_opts.rename_limit = 1000;
+               diff_opts.rename_limit = 7000;
        diff_opts.rename_score = opt->rename_score;
        diff_opts.show_rename_progress = opt->show_rename_progress;
        diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
@@ -2161,10 +2990,17 @@ static void detect_regular_renames(struct merge_options *opt,
 
        diff_queued_diff = renames->pairs[side_index];
        trace2_region_enter("diff", "diffcore_rename", opt->repo);
-       diffcore_rename(&diff_opts);
+       diffcore_rename_extended(&diff_opts,
+                                &opt->priv->pool,
+                                &renames->relevant_sources[side_index],
+                                &renames->dirs_removed[side_index],
+                                &renames->dir_rename_count[side_index],
+                                &renames->cached_pairs[side_index]);
        trace2_region_leave("diff", "diffcore_rename", opt->repo);
        resolve_diffpair_statuses(&diff_queued_diff);
 
+       if (diff_opts.needed_rename_limit > 0)
+               renames->redo_after_renames = 0;
        if (diff_opts.needed_rename_limit > renames->needed_limit)
                renames->needed_limit = diff_opts.needed_rename_limit;
 
@@ -2174,11 +3010,15 @@ static void detect_regular_renames(struct merge_options *opt,
        diff_queued_diff.nr = 0;
        diff_queued_diff.queue = NULL;
        diff_flush(&diff_opts);
+
+       return 1;
 }
 
 /*
- * Get information of all renames which occurred in 'side_pairs', discarding
- * non-renames.
+ * Get information of all renames which occurred in 'side_pairs', making use
+ * of any implicit directory renames in side_dir_renames (also making use of
+ * implicit directory renames rename_exclusions as needed by
+ * check_for_directory_rename()).  Add all (updated) renames into result.
  */
 static int collect_renames(struct merge_options *opt,
                           struct diff_queue_struct *result,
@@ -2201,7 +3041,8 @@ static int collect_renames(struct merge_options *opt,
                char *new_path; /* non-NULL only with directory renames */
 
                if (p->status != 'A' && p->status != 'R') {
-                       diff_free_filepair(p);
+                       possibly_cache_new_pair(renames, p, side_index, NULL);
+                       pool_diff_free_filepair(&opt->priv->pool, p);
                        continue;
                }
 
@@ -2212,8 +3053,9 @@ static int collect_renames(struct merge_options *opt,
                                                      &collisions,
                                                      &clean);
 
+               possibly_cache_new_pair(renames, p, side_index, new_path);
                if (p->status != 'R' && !new_path) {
-                       diff_free_filepair(p);
+                       pool_diff_free_filepair(&opt->priv->pool, p);
                        continue;
                }
 
@@ -2256,15 +3098,40 @@ static int detect_and_process_renames(struct merge_options *opt,
                                      struct tree *side1,
                                      struct tree *side2)
 {
-       struct diff_queue_struct combined;
+       struct diff_queue_struct combined = { 0 };
        struct rename_info *renames = &opt->priv->renames;
-       int need_dir_renames, s, clean = 1;
+       int need_dir_renames, s, i, clean = 1;
+       unsigned detection_run = 0;
 
-       memset(&combined, 0, sizeof(combined));
+       if (!possible_renames(renames))
+               goto cleanup;
 
        trace2_region_enter("merge", "regular renames", opt->repo);
-       detect_regular_renames(opt, MERGE_SIDE1);
-       detect_regular_renames(opt, MERGE_SIDE2);
+       detection_run |= detect_regular_renames(opt, MERGE_SIDE1);
+       detection_run |= detect_regular_renames(opt, MERGE_SIDE2);
+       if (renames->needed_limit) {
+               renames->cached_pairs_valid_side = 0;
+               renames->redo_after_renames = 0;
+       }
+       if (renames->redo_after_renames && detection_run) {
+               int i, side;
+               struct diff_filepair *p;
+
+               /* Cache the renames, we found */
+               for (side = MERGE_SIDE1; side <= MERGE_SIDE2; side++) {
+                       for (i = 0; i < renames->pairs[side].nr; ++i) {
+                               p = renames->pairs[side].queue[i];
+                               possibly_cache_new_pair(renames, p, side, NULL);
+                       }
+               }
+
+               /* Restart the merge with the cached renames */
+               renames->redo_after_renames = 2;
+               trace2_region_leave("merge", "regular renames", opt->repo);
+               goto cleanup;
+       }
+       use_cached_pairs(opt, &renames->cached_pairs[1], &renames->pairs[1]);
+       use_cached_pairs(opt, &renames->cached_pairs[2], &renames->pairs[2]);
        trace2_region_leave("merge", "regular renames", opt->repo);
 
        trace2_region_enter("merge", "directory renames", opt->repo);
@@ -2288,55 +3155,153 @@ static int detect_and_process_renames(struct merge_options *opt,
        clean &= collect_renames(opt, &combined, MERGE_SIDE2,
                                 &renames->dir_renames[1],
                                 &renames->dir_renames[2]);
-       QSORT(combined.queue, combined.nr, compare_pairs);
+       STABLE_QSORT(combined.queue, combined.nr, compare_pairs);
        trace2_region_leave("merge", "directory renames", opt->repo);
 
        trace2_region_enter("merge", "process renames", opt->repo);
        clean &= process_renames(opt, &combined);
        trace2_region_leave("merge", "process renames", opt->repo);
 
+       goto simple_cleanup; /* collect_renames() handles some of cleanup */
+
+cleanup:
+       /*
+        * Free now unneeded filepairs, which would have been handled
+        * in collect_renames() normally but we skipped that code.
+        */
+       for (s = MERGE_SIDE1; s <= MERGE_SIDE2; s++) {
+               struct diff_queue_struct *side_pairs;
+               int i;
+
+               side_pairs = &renames->pairs[s];
+               for (i = 0; i < side_pairs->nr; ++i) {
+                       struct diff_filepair *p = side_pairs->queue[i];
+                       pool_diff_free_filepair(&opt->priv->pool, p);
+               }
+       }
+
+simple_cleanup:
        /* Free memory for renames->pairs[] and combined */
        for (s = MERGE_SIDE1; s <= MERGE_SIDE2; s++) {
                free(renames->pairs[s].queue);
                DIFF_QUEUE_CLEAR(&renames->pairs[s]);
        }
-       if (combined.nr) {
-               int i;
-               for (i = 0; i < combined.nr; i++)
-                       diff_free_filepair(combined.queue[i]);
-               free(combined.queue);
-       }
+       for (i = 0; i < combined.nr; i++)
+               pool_diff_free_filepair(&opt->priv->pool, combined.queue[i]);
+       free(combined.queue);
 
        return clean;
 }
 
 /*** Function Grouping: functions related to process_entries() ***/
 
-static int string_list_df_name_compare(const char *one, const char *two)
+static int sort_dirs_next_to_their_children(const char *one, const char *two)
 {
-       int onelen = strlen(one);
-       int twolen = strlen(two);
+       unsigned char c1, c2;
+
        /*
-        * Here we only care that entries for D/F conflicts are
-        * adjacent, in particular with the file of the D/F conflict
-        * appearing before files below the corresponding directory.
-        * The order of the rest of the list is irrelevant for us.
+        * Here we only care that entries for directories appear adjacent
+        * to and before files underneath the directory.  We can achieve
+        * that by pretending to add a trailing slash to every file and
+        * then sorting.  In other words, we do not want the natural
+        * sorting of
+        *     foo
+        *     foo.txt
+        *     foo/bar
+        * Instead, we want "foo" to sort as though it were "foo/", so that
+        * we instead get
+        *     foo.txt
+        *     foo
+        *     foo/bar
+        * To achieve this, we basically implement our own strcmp, except that
+        * if we get to the end of either string instead of comparing NUL to
+        * another character, we compare '/' to it.
+        *
+        * If this unusual "sort as though '/' were appended" perplexes
+        * you, perhaps it will help to note that this is not the final
+        * sort.  write_tree() will sort again without the trailing slash
+        * magic, but just on paths immediately under a given tree.
         *
-        * To achieve this, we sort with df_name_compare and provide
-        * the mode S_IFDIR so that D/F conflicts will sort correctly.
-        * We use the mode S_IFDIR for everything else for simplicity,
-        * since in other cases any changes in their order due to
-        * sorting cause no problems for us.
+        * The reason to not use df_name_compare directly was that it was
+        * just too expensive (we don't have the string lengths handy), so
+        * it was reimplemented.
+        */
+
+       /*
+        * NOTE: This function will never be called with two equal strings,
+        * because it is used to sort the keys of a strmap, and strmaps have
+        * unique keys by construction.  That simplifies our c1==c2 handling
+        * below.
         */
-       int cmp = df_name_compare(one, onelen, S_IFDIR,
-                                 two, twolen, S_IFDIR);
+
+       while (*one && (*one == *two)) {
+               one++;
+               two++;
+       }
+
+       c1 = *one ? *one : '/';
+       c2 = *two ? *two : '/';
+
+       if (c1 == c2) {
+               /* Getting here means one is a leading directory of the other */
+               return (*one) ? 1 : -1;
+       } else
+               return c1 - c2;
+}
+
+static int read_oid_strbuf(struct merge_options *opt,
+                          const struct object_id *oid,
+                          struct strbuf *dst)
+{
+       void *buf;
+       enum object_type type;
+       unsigned long size;
+       buf = read_object_file(oid, &type, &size);
+       if (!buf)
+               return err(opt, _("cannot read object %s"), oid_to_hex(oid));
+       if (type != OBJ_BLOB) {
+               free(buf);
+               return err(opt, _("object %s is not a blob"), oid_to_hex(oid));
+       }
+       strbuf_attach(dst, buf, size, size + 1);
+       return 0;
+}
+
+static int blob_unchanged(struct merge_options *opt,
+                         const struct version_info *base,
+                         const struct version_info *side,
+                         const char *path)
+{
+       struct strbuf basebuf = STRBUF_INIT;
+       struct strbuf sidebuf = STRBUF_INIT;
+       int ret = 0; /* assume changed for safety */
+       struct index_state *idx = &opt->priv->attr_index;
+
+       if (!idx->initialized)
+               initialize_attr_index(opt);
+
+       if (base->mode != side->mode)
+               return 0;
+       if (oideq(&base->oid, &side->oid))
+               return 1;
+
+       if (read_oid_strbuf(opt, &base->oid, &basebuf) ||
+           read_oid_strbuf(opt, &side->oid, &sidebuf))
+               goto error_return;
        /*
-        * Now that 'foo' and 'foo/bar' compare equal, we have to make sure
-        * that 'foo' comes before 'foo/bar'.
+        * Note: binary | is used so that both renormalizations are
+        * performed.  Comparison can be skipped if both files are
+        * unchanged since their sha1s have already been compared.
         */
-       if (cmp)
-               return cmp;
-       return onelen - twolen;
+       if (renormalize_buffer(idx, path, basebuf.buf, basebuf.len, &basebuf) |
+           renormalize_buffer(idx, path, sidebuf.buf, sidebuf.len, &sidebuf))
+               ret = (basebuf.len == sidebuf.len &&
+                      !memcmp(basebuf.buf, sidebuf.buf, basebuf.len));
+
+error_return:
+       strbuf_release(&basebuf);
+       strbuf_release(&sidebuf);
+       return ret;
 }
 
 struct directory_versions {
@@ -2399,22 +3364,15 @@ static void write_tree(struct object_id *result_oid,
                       size_t hash_size)
 {
        size_t maxlen = 0, extra;
-       unsigned int nr = versions->nr - offset;
+       unsigned int nr;
        struct strbuf buf = STRBUF_INIT;
-       struct string_list relevant_entries = STRING_LIST_INIT_NODUP;
        int i;
 
-       /*
-        * We want to sort the last (versions->nr-offset) entries in versions.
-        * Do so by abusing the string_list API a bit: make another string_list
-        * that contains just those entries and then sort them.
-        *
-        * We won't use relevant_entries again and will let it just pop off the
-        * stack, so there won't be allocation worries or anything.
-        */
-       relevant_entries.items = versions->items + offset;
-       relevant_entries.nr = versions->nr - offset;
-       QSORT(relevant_entries.items, relevant_entries.nr, tree_entry_order);
+       assert(offset <= versions->nr);
+       nr = versions->nr - offset;
+       if (versions->nr)
+               /* No need for STABLE_QSORT -- filenames must be unique */
+               QSORT(versions->items + offset, nr, tree_entry_order);
 
        /* Pre-allocate some space in buf */
        extra = hash_size + 8; /* 8: 6 for mode, 1 for space, 1 for NUL char */
@@ -2434,7 +3392,7 @@ static void write_tree(struct object_id *result_oid,
        }
 
        /* Write this object file out, and record in result_oid */
-       write_object_file(buf.buf, buf.len, tree_type, result_oid);
+       write_object_file(buf.buf, buf.len, OBJ_TREE, result_oid);
        strbuf_release(&buf);
 }
 
@@ -2670,7 +3628,7 @@ static void process_entry(struct merge_options *opt,
                        if (ci->filemask & (1 << i))
                                continue;
                        ci->stages[i].mode = 0;
-                       oidcpy(&ci->stages[i].oid, &null_oid);
+                       oidcpy(&ci->stages[i].oid, null_oid());
                }
        } else if (ci->df_conflict && ci->merged.result.mode != 0) {
                /*
@@ -2702,7 +3660,8 @@ static void process_entry(struct merge_options *opt,
                 * the directory to remain here, so we need to move this
                 * path to some new location.
                 */
-               CALLOC_ARRAY(new_ci, 1);
+               new_ci = mem_pool_calloc(&opt->priv->pool, 1, sizeof(*new_ci));
+
                /* We don't really want new_ci->merged.result copied, but it'll
                 * be overwritten below so it doesn't matter.  We also don't
                 * want any directory mode/oid values copied, but we'll zero
@@ -2716,7 +3675,7 @@ static void process_entry(struct merge_options *opt,
                                continue;
                        /* zero out any entries related to directories */
                        new_ci->stages[i].mode = 0;
-                       oidcpy(&new_ci->stages[i].oid, &null_oid);
+                       oidcpy(&new_ci->stages[i].oid, null_oid());
                }
 
                /*
@@ -2727,7 +3686,7 @@ static void process_entry(struct merge_options *opt,
                 */
                df_file_index = (ci->dirmask & (1 << 1)) ? 2 : 1;
                branch = (df_file_index == 1) ? opt->branch1 : opt->branch2;
-               path = unique_path(&opt->priv->paths, path, branch);
+               path = unique_path(opt, path, branch);
                strmap_put(&opt->priv->paths, path, new_ci);
 
                path_msg(opt, path, 0,
@@ -2755,7 +3714,7 @@ static void process_entry(struct merge_options *opt,
         *       above.
         */
        if (ci->match_mask) {
-               ci->merged.clean = 1;
+               ci->merged.clean = !ci->df_conflict && !ci->path_conflict;
                if (ci->match_mask == 6) {
                        /* stages[1] == stages[2] */
                        ci->merged.result.mode = ci->stages[1].mode;
@@ -2767,6 +3726,8 @@ static void process_entry(struct merge_options *opt,
 
                        ci->merged.result.mode = ci->stages[side].mode;
                        ci->merged.is_null = !ci->merged.result.mode;
+                       if (ci->merged.is_null)
+                               ci->merged.clean = 1;
                        oidcpy(&ci->merged.result.oid, &ci->stages[side].oid);
 
                        assert(othermask == 2 || othermask == 4);
@@ -2792,7 +3753,8 @@ static void process_entry(struct merge_options *opt,
                        const char *a_path = NULL, *b_path = NULL;
                        int rename_a = 0, rename_b = 0;
 
-                       new_ci = xmalloc(sizeof(*new_ci));
+                       new_ci = mem_pool_alloc(&opt->priv->pool,
+                                               sizeof(*new_ci));
 
                        if (S_ISREG(a_mode))
                                rename_a = 1;
@@ -2803,12 +3765,21 @@ static void process_entry(struct merge_options *opt,
                                rename_b = 1;
                        }
 
-                       path_msg(opt, path, 0,
-                                _("CONFLICT (distinct types): %s had different "
-                                  "types on each side; renamed %s of them so "
-                                  "each can be recorded somewhere."),
-                                path,
-                                (rename_a && rename_b) ? _("both") : _("one"));
+                       if (rename_a && rename_b) {
+                               path_msg(opt, path, 0,
+                                        _("CONFLICT (distinct types): %s had "
+                                          "different types on each side; "
+                                          "renamed both of them so each can "
+                                          "be recorded somewhere."),
+                                        path);
+                       } else {
+                               path_msg(opt, path, 0,
+                                        _("CONFLICT (distinct types): %s had "
+                                          "different types on each side; "
+                                          "renamed one of them so each can be "
+                                          "recorded somewhere."),
+                                        path);
+                       }
 
                        ci->merged.clean = 0;
                        memcpy(new_ci, ci, sizeof(*new_ci));
@@ -2817,11 +3788,11 @@ static void process_entry(struct merge_options *opt,
                        new_ci->merged.result.mode = ci->stages[2].mode;
                        oidcpy(&new_ci->merged.result.oid, &ci->stages[2].oid);
                        new_ci->stages[1].mode = 0;
-                       oidcpy(&new_ci->stages[1].oid, &null_oid);
+                       oidcpy(&new_ci->stages[1].oid, null_oid());
                        new_ci->filemask = 5;
                        if ((S_IFMT & b_mode) != (S_IFMT & o_mode)) {
                                new_ci->stages[0].mode = 0;
-                               oidcpy(&new_ci->stages[0].oid, &null_oid);
+                               oidcpy(&new_ci->stages[0].oid, null_oid());
                                new_ci->filemask = 4;
                        }
 
@@ -2829,40 +3800,29 @@ static void process_entry(struct merge_options *opt,
                        ci->merged.result.mode = ci->stages[1].mode;
                        oidcpy(&ci->merged.result.oid, &ci->stages[1].oid);
                        ci->stages[2].mode = 0;
-                       oidcpy(&ci->stages[2].oid, &null_oid);
+                       oidcpy(&ci->stages[2].oid, null_oid());
                        ci->filemask = 3;
                        if ((S_IFMT & a_mode) != (S_IFMT & o_mode)) {
                                ci->stages[0].mode = 0;
-                               oidcpy(&ci->stages[0].oid, &null_oid);
+                               oidcpy(&ci->stages[0].oid, null_oid());
                                ci->filemask = 2;
                        }
 
                        /* Insert entries into opt->priv_paths */
                        assert(rename_a || rename_b);
                        if (rename_a) {
-                               a_path = unique_path(&opt->priv->paths,
-                                                    path, opt->branch1);
+                               a_path = unique_path(opt, path, opt->branch1);
                                strmap_put(&opt->priv->paths, a_path, ci);
                        }
 
                        if (rename_b)
-                               b_path = unique_path(&opt->priv->paths,
-                                                    path, opt->branch2);
+                               b_path = unique_path(opt, path, opt->branch2);
                        else
                                b_path = path;
                        strmap_put(&opt->priv->paths, b_path, new_ci);
 
-                       if (rename_a && rename_b) {
+                       if (rename_a && rename_b)
                                strmap_remove(&opt->priv->paths, path, 0);
-                               /*
-                                * We removed path from opt->priv->paths.  path
-                                * will also eventually need to be freed, but
-                                * it may still be used by e.g.  ci->pathnames.
-                                * So, store it in another string-list for now.
-                                */
-                               string_list_append(&opt->priv->paths_to_free,
-                                                  path);
-                       }
 
                        /*
                         * Do special handling for b_path since process_entry()
@@ -2925,8 +3885,27 @@ static void process_entry(struct merge_options *opt,
                modify_branch = (side == 1) ? opt->branch1 : opt->branch2;
                delete_branch = (side == 1) ? opt->branch2 : opt->branch1;
 
-               if (ci->path_conflict &&
-                   oideq(&ci->stages[0].oid, &ci->stages[side].oid)) {
+               if (opt->renormalize &&
+                   blob_unchanged(opt, &ci->stages[0], &ci->stages[side],
+                                  path)) {
+                       if (!ci->path_conflict) {
+                               /*
+                                * Blob unchanged after renormalization, so
+                                * there's no modify/delete conflict after all;
+                                * we can just remove the file.
+                                */
+                               ci->merged.is_null = 1;
+                               ci->merged.clean = 1;
+                                /*
+                                 * file goes away => even if there was a
+                                 * directory/file conflict there isn't one now.
+                                 */
+                               ci->df_conflict = 0;
+                       } else {
+                               /* rename/delete, so conflict remains */
+                       }
+               } else if (ci->path_conflict &&
+                          oideq(&ci->stages[0].oid, &ci->stages[side].oid)) {
                        /*
                         * This came from a rename/delete; no action to take,
                         * but avoid printing "modify/delete" conflict notice
@@ -2950,7 +3929,8 @@ static void process_entry(struct merge_options *opt,
                /* Deleted on both sides */
                ci->merged.is_null = 1;
                ci->merged.result.mode = 0;
-               oidcpy(&ci->merged.result.oid, &null_oid);
+               oidcpy(&ci->merged.result.oid, null_oid());
+               assert(!ci->df_conflict);
                ci->merged.clean = !ci->path_conflict;
        }
 
@@ -2961,9 +3941,59 @@ static void process_entry(struct merge_options *opt,
         */
        if (!ci->merged.clean)
                strmap_put(&opt->priv->conflicted, path, ci);
+
+       /* Record metadata for ci->merged in dir_metadata */
        record_entry_for_tree(dir_metadata, path, &ci->merged);
 }
 
+static void prefetch_for_content_merges(struct merge_options *opt,
+                                       struct string_list *plist)
+{
+       struct string_list_item *e;
+       struct oid_array to_fetch = OID_ARRAY_INIT;
+
+       if (opt->repo != the_repository || !has_promisor_remote())
+               return;
+
+       for (e = &plist->items[plist->nr-1]; e >= plist->items; --e) {
+               /* char *path = e->string; */
+               struct conflict_info *ci = e->util;
+               int i;
+
+               /* Ignore clean entries */
+               if (ci->merged.clean)
+                       continue;
+
+               /* Ignore entries that don't need a content merge */
+               if (ci->match_mask || ci->filemask < 6 ||
+                   !S_ISREG(ci->stages[1].mode) ||
+                   !S_ISREG(ci->stages[2].mode) ||
+                   oideq(&ci->stages[1].oid, &ci->stages[2].oid))
+                       continue;
+
+               /* Also don't need content merge if base matches either side */
+               if (ci->filemask == 7 &&
+                   S_ISREG(ci->stages[0].mode) &&
+                   (oideq(&ci->stages[0].oid, &ci->stages[1].oid) ||
+                    oideq(&ci->stages[0].oid, &ci->stages[2].oid)))
+                       continue;
+
+               for (i = 0; i < 3; i++) {
+                       unsigned side_mask = (1 << i);
+                       struct version_info *vi = &ci->stages[i];
+
+                       if ((ci->filemask & side_mask) &&
+                           S_ISREG(vi->mode) &&
+                           oid_object_info_extended(opt->repo, &vi->oid, NULL,
+                                                    OBJECT_INFO_FOR_PREFETCH))
+                               oid_array_append(&to_fetch, &vi->oid);
+               }
+       }
+
+       promisor_remote_get_direct(opt->repo, to_fetch.oid, to_fetch.nr);
+       oid_array_clear(&to_fetch);
+}
+
 static void process_entries(struct merge_options *opt,
                            struct object_id *result_oid)
 {
@@ -2994,7 +4024,7 @@ static void process_entries(struct merge_options *opt,
        trace2_region_leave("merge", "plist copy", opt->repo);
 
        trace2_region_enter("merge", "plist special sort", opt->repo);
-       plist.cmp = string_list_df_name_compare;
+       plist.cmp = sort_dirs_next_to_their_children;
        string_list_sort(&plist);
        trace2_region_leave("merge", "plist special sort", opt->repo);
 
@@ -3010,6 +4040,7 @@ static void process_entries(struct merge_options *opt,
         * the way when it is time to process the file at the same path).
         */
        trace2_region_enter("merge", "processing", opt->repo);
+       prefetch_for_content_merges(opt, &plist);
        for (entry = &plist.items[plist.nr-1]; entry >= plist.items; --entry) {
                char *path = entry->string;
                /*
@@ -3033,8 +4064,8 @@ static void process_entries(struct merge_options *opt,
        trace2_region_enter("merge", "process_entries cleanup", opt->repo);
        if (dir_metadata.offsets.nr != 1 ||
            (uintptr_t)dir_metadata.offsets.items[0].util != 0) {
-               printf("dir_metadata.offsets.nr = %d (should be 1)\n",
-                      dir_metadata.offsets.nr);
+               printf("dir_metadata.offsets.nr = %"PRIuMAX" (should be 1)\n",
+                      (uintmax_t)dir_metadata.offsets.nr);
                printf("dir_metadata.offsets.items[0].util = %u (should be 0)\n",
                       (unsigned)(uintptr_t)dir_metadata.offsets.items[0].util);
                fflush(stdout);
@@ -3081,11 +4112,7 @@ static int checkout(struct merge_options *opt,
        unpack_opts.quiet = 0; /* FIXME: sequencer might want quiet? */
        unpack_opts.verbose_update = (opt->verbosity > 2);
        unpack_opts.fn = twoway_merge;
-       if (1/* FIXME: opts->overwrite_ignore*/) {
-               CALLOC_ARRAY(unpack_opts.dir, 1);
-               unpack_opts.dir->flags |= DIR_SHOW_IGNORED;
-               setup_standard_excludes(unpack_opts.dir);
-       }
+       unpack_opts.preserve_ignored = 0; /* FIXME: !opts->overwrite_ignore */
        parse_tree(prev);
        init_tree_desc(&trees[0], prev->buffer, prev->size);
        parse_tree(next);
@@ -3093,28 +4120,45 @@ static int checkout(struct merge_options *opt,
 
        ret = unpack_trees(2, trees, &unpack_opts);
        clear_unpack_trees_porcelain(&unpack_opts);
-       dir_clear(unpack_opts.dir);
-       FREE_AND_NULL(unpack_opts.dir);
        return ret;
 }
 
-static int record_conflicted_index_entries(struct merge_options *opt,
-                                          struct index_state *index,
-                                          struct strmap *paths,
-                                          struct strmap *conflicted)
+static int record_conflicted_index_entries(struct merge_options *opt)
 {
        struct hashmap_iter iter;
        struct strmap_entry *e;
+       struct index_state *index = opt->repo->index;
+       struct checkout state = CHECKOUT_INIT;
        int errs = 0;
        int original_cache_nr;
 
-       if (strmap_empty(conflicted))
+       if (strmap_empty(&opt->priv->conflicted))
                return 0;
 
+       /*
+        * We are in a conflicted state. These conflicts might be inside
+        * sparse-directory entries, so check if any entries are outside
+        * of the sparse-checkout cone preemptively.
+        *
+        * We set original_cache_nr below, but that might change if
+        * index_name_pos() calls ask for paths within sparse directories.
+        */
+       strmap_for_each_entry(&opt->priv->conflicted, &iter, e) {
+               if (!path_in_sparse_checkout(e->key, index)) {
+                       ensure_full_index(index);
+                       break;
+               }
+       }
+
+       /* If any entries have skip_worktree set, we'll have to check 'em out */
+       state.force = 1;
+       state.quiet = 1;
+       state.refresh_cache = 1;
+       state.istate = index;
        original_cache_nr = index->cache_nr;
 
-       /* Put every entry from paths into plist, then sort */
-       strmap_for_each_entry(conflicted, &iter, e) {
+       /* Append every entry from conflicted into index, then sort */
+       strmap_for_each_entry(&opt->priv->conflicted, &iter, e) {
                const char *path = e->key;
                struct conflict_info *ci = e->value;
                int pos;
@@ -3155,9 +4199,22 @@ static int record_conflicted_index_entries(struct merge_options *opt,
                         * the higher order stages.  Thus, we need override
                         * the CE_SKIP_WORKTREE bit and manually write those
                         * files to the working disk here.
-                        *
-                        * TODO: Implement this CE_SKIP_WORKTREE fixup.
                         */
+                       if (ce_skip_worktree(ce)) {
+                               struct stat st;
+
+                               if (!lstat(path, &st)) {
+                                       char *new_name = unique_path(opt,
+                                                                    path,
+                                                                    "cruft");
+
+                                       path_msg(opt, path, 1,
+                                                _("Note: %s not up to date and in way of checking out conflicted version; old copy renamed to %s"),
+                                                path, new_name);
+                                       errs |= rename(path, new_name);
+                               }
+                               errs |= checkout_entry(ce, &state, NULL, NULL);
+                       }
 
                        /*
                         * Mark this cache entry for removal and instead add
@@ -3189,6 +4246,11 @@ static int record_conflicted_index_entries(struct merge_options *opt,
         * entries we added to the end into their right locations.
         */
        remove_marked_cache_entries(index, 1);
+       /*
+        * No need for STABLE_QSORT -- cmp_cache_name_compare sorts primarily
+        * on filename and secondarily on stage, and (name, stage #) are a
+        * unique tuple.
+        */
        QSORT(index->cache, index->cache_nr, cmp_cache_name_compare);
 
        return errs;
@@ -3202,7 +4264,8 @@ void merge_switch_to_result(struct merge_options *opt,
 {
        assert(opt->priv == NULL);
        if (result->clean >= 0 && update_worktree_and_index) {
-               struct merge_options_internal *opti = result->priv;
+               const char *filename;
+               FILE *fp;
 
                trace2_region_enter("merge", "checkout", opt->repo);
                if (checkout(opt, head, result->tree)) {
@@ -3213,14 +4276,22 @@ void merge_switch_to_result(struct merge_options *opt,
                trace2_region_leave("merge", "checkout", opt->repo);
 
                trace2_region_enter("merge", "record_conflicted", opt->repo);
-               if (record_conflicted_index_entries(opt, opt->repo->index,
-                                                   &opti->paths,
-                                                   &opti->conflicted)) {
+               opt->priv = result->priv;
+               if (record_conflicted_index_entries(opt)) {
                        /* failure to function */
+                       opt->priv = NULL;
                        result->clean = -1;
                        return;
                }
+               opt->priv = NULL;
                trace2_region_leave("merge", "record_conflicted", opt->repo);
+
+               trace2_region_enter("merge", "write_auto_merge", opt->repo);
+               filename = git_path_auto_merge(opt->repo);
+               fp = xfopen(filename, "w");
+               fprintf(fp, "%s\n", oid_to_hex(&result->tree->object.oid));
+               fclose(fp);
+               trace2_region_leave("merge", "write_auto_merge", opt->repo);
        }
 
        if (display_update_msgs) {
@@ -3230,6 +4301,9 @@ void merge_switch_to_result(struct merge_options *opt,
                struct string_list olist = STRING_LIST_INIT_NODUP;
                int i;
 
+               if (opt->record_conflict_msgs_as_headers)
+                       BUG("Either display conflict messages or record them as headers, not both");
+
                trace2_region_enter("merge", "display messages", opt->repo);
 
                /* Hack to pre-allocate olist to the desired size */
@@ -3265,6 +4339,8 @@ void merge_finalize(struct merge_options *opt,
 {
        struct merge_options_internal *opti = result->priv;
 
+       if (opt->renormalize)
+               git_attr_set_direction(GIT_ATTR_CHECKIN);
        assert(opt->priv == NULL);
 
        clear_or_reinit_internal_opts(opti, 0);
@@ -3273,6 +4349,23 @@ void merge_finalize(struct merge_options *opt,
 
 /*** Function Grouping: helper functions for merge_incore_*() ***/
 
+static struct tree *shift_tree_object(struct repository *repo,
+                                     struct tree *one, struct tree *two,
+                                     const char *subtree_shift)
+{
+       struct object_id shifted;
+
+       if (!*subtree_shift) {
+               shift_tree(repo, &one->object.oid, &two->object.oid, &shifted, 0);
+       } else {
+               shift_tree_by(repo, &one->object.oid, &two->object.oid, &shifted,
+                             subtree_shift);
+       }
+       if (oideq(&two->object.oid, &shifted))
+               return two;
+       return lookup_tree(repo, &shifted);
+}
+
 static inline void set_commit_tree(struct commit *c, struct tree *t)
 {
        c->maybe_tree = t;
@@ -3294,6 +4387,7 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
 {
        struct rename_info *renames;
        int i;
+       struct mem_pool *pool = NULL;
 
        /* Sanity checks on opt */
        trace2_region_enter("merge", "sanity checks", opt->repo);
@@ -3311,6 +4405,9 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
        assert(opt->recursive_variant >= MERGE_VARIANT_NORMAL &&
               opt->recursive_variant <= MERGE_VARIANT_THEIRS);
 
+       if (opt->msg_header_prefix)
+               assert(opt->record_conflict_msgs_as_headers);
+
        /*
         * detect_renames, verbosity, buffer_output, and obuf are ignored
         * fields that were used by "recursive" rather than "ort" -- but
@@ -3323,6 +4420,10 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
        assert(opt->obuf.len == 0);
 
        assert(opt->priv == NULL);
+       if (result->_properly_initialized != 0 &&
+           result->_properly_initialized != RESULT_INITIALIZED)
+               BUG("struct merge_result passed to merge_incore_*recursive() must be zeroed or filled with values from a previous run");
+       assert(!!result->priv == !!result->_properly_initialized);
        if (result->priv) {
                opt->priv = result->priv;
                result->priv = NULL;
@@ -3340,6 +4441,10 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
        /* Default to histogram diff.  Actually, just hardcode it...for now. */
        opt->xdl_opts = DIFF_WITH_ALG(opt, HISTOGRAM_DIFF);
 
+       /* Handle attr direction stuff for renormalization */
+       if (opt->renormalize)
+               git_attr_set_direction(GIT_ATTR_CHECKOUT);
+
        /* Initialization of opt->priv, our internal merge data */
        trace2_region_enter("merge", "allocate/init", opt->repo);
        if (opt->priv) {
@@ -3351,27 +4456,51 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
 
        /* Initialization of various renames fields */
        renames = &opt->priv->renames;
+       mem_pool_init(&opt->priv->pool, 0);
+       pool = &opt->priv->pool;
        for (i = MERGE_SIDE1; i <= MERGE_SIDE2; i++) {
-               strset_init_with_options(&renames->dirs_removed[i],
-                                        NULL, 0);
+               strintmap_init_with_options(&renames->dirs_removed[i],
+                                           NOT_RELEVANT, pool, 0);
                strmap_init_with_options(&renames->dir_rename_count[i],
                                         NULL, 1);
                strmap_init_with_options(&renames->dir_renames[i],
                                         NULL, 0);
+               /*
+                * relevant_sources uses -1 for the default, because we need
+                * to be able to distinguish not-in-strintmap from valid
+                * relevant_source values from enum file_rename_relevance.
+                * In particular, possibly_cache_new_pair() expects a negative
+                * value for not-found entries.
+                */
+               strintmap_init_with_options(&renames->relevant_sources[i],
+                                           -1 /* explicitly invalid */,
+                                           pool, 0);
+               strmap_init_with_options(&renames->cached_pairs[i],
+                                        NULL, 1);
+               strset_init_with_options(&renames->cached_irrelevant[i],
+                                        NULL, 1);
+               strset_init_with_options(&renames->cached_target_names[i],
+                                        NULL, 0);
+       }
+       for (i = MERGE_SIDE1; i <= MERGE_SIDE2; i++) {
+               strintmap_init_with_options(&renames->deferred[i].possible_trivial_merges,
+                                           0, pool, 0);
+               strset_init_with_options(&renames->deferred[i].target_dirs,
+                                        pool, 1);
+               renames->deferred[i].trivial_merges_okay = 1; /* 1 == maybe */
        }
 
        /*
         * Although we initialize opt->priv->paths with strdup_strings=0,
         * that's just to avoid making yet another copy of an allocated
         * string.  Putting the entry into paths means we are taking
-        * ownership, so we will later free it.  paths_to_free is similar.
+        * ownership, so we will later free it.
         *
         * In contrast, conflicted just has a subset of keys from paths, so
         * we don't want to free those (it'd be a duplicate free).
         */
-       strmap_init_with_options(&opt->priv->paths, NULL, 0);
-       strmap_init_with_options(&opt->priv->conflicted, NULL, 0);
-       string_list_init(&opt->priv->paths_to_free, 0);
+       strmap_init_with_options(&opt->priv->paths, pool, 0);
+       strmap_init_with_options(&opt->priv->conflicted, pool, 0);
 
        /*
         * keys & strbufs in output will sometimes need to outlive "paths",
@@ -3383,6 +4512,50 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
        trace2_region_leave("merge", "allocate/init", opt->repo);
 }
 
+static void merge_check_renames_reusable(struct merge_options *opt,
+                                        struct merge_result *result,
+                                        struct tree *merge_base,
+                                        struct tree *side1,
+                                        struct tree *side2)
+{
+       struct rename_info *renames;
+       struct tree **merge_trees;
+       struct merge_options_internal *opti = result->priv;
+
+       if (!opti)
+               return;
+
+       renames = &opti->renames;
+       merge_trees = renames->merge_trees;
+
+       /*
+        * Handle case where previous merge operation did not want cache to
+        * take effect, e.g. because rename/rename(1to1) makes it invalid.
+        */
+       if (!merge_trees[0]) {
+               assert(!merge_trees[0] && !merge_trees[1] && !merge_trees[2]);
+               renames->cached_pairs_valid_side = 0; /* neither side valid */
+               return;
+       }
+
+       /*
+        * Handle other cases; note that merge_trees[0..2] will only
+        * be NULL if opti is, or if all three were manually set to
+        * NULL by e.g. rename/rename(1to1) handling.
+        */
+       assert(merge_trees[0] && merge_trees[1] && merge_trees[2]);
+
+       /* Check if we meet a condition for re-using cached_pairs */
+       if (oideq(&merge_base->object.oid, &merge_trees[2]->object.oid) &&
+           oideq(&side1->object.oid, &result->tree->object.oid))
+               renames->cached_pairs_valid_side = MERGE_SIDE1;
+       else if (oideq(&merge_base->object.oid, &merge_trees[1]->object.oid) &&
+                oideq(&side2->object.oid, &result->tree->object.oid))
+               renames->cached_pairs_valid_side = MERGE_SIDE2;
+       else
+               renames->cached_pairs_valid_side = 0; /* neither side valid */
+}
+
 /*** Function Grouping: merge_incore_*() and their internal variants ***/
 
 /*
@@ -3396,6 +4569,14 @@ static void merge_ort_nonrecursive_internal(struct merge_options *opt,
 {
        struct object_id working_tree_oid;
 
+       if (opt->subtree_shift) {
+               side2 = shift_tree_object(opt->repo, side1, side2,
+                                         opt->subtree_shift);
+               merge_base = shift_tree_object(opt->repo, side1, merge_base,
+                                              opt->subtree_shift);
+       }
+
+redo:
        trace2_region_enter("merge", "collect_merge_info", opt->repo);
        if (collect_merge_info(opt, merge_base, side1, side2) != 0) {
                /*
@@ -3415,17 +4596,25 @@ static void merge_ort_nonrecursive_internal(struct merge_options *opt,
        result->clean = detect_and_process_renames(opt, merge_base,
                                                   side1, side2);
        trace2_region_leave("merge", "renames", opt->repo);
+       if (opt->priv->renames.redo_after_renames == 2) {
+               trace2_region_enter("merge", "reset_maps", opt->repo);
+               clear_or_reinit_internal_opts(opt->priv, 1);
+               trace2_region_leave("merge", "reset_maps", opt->repo);
+               goto redo;
+       }
 
        trace2_region_enter("merge", "process_entries", opt->repo);
        process_entries(opt, &working_tree_oid);
        trace2_region_leave("merge", "process_entries", opt->repo);
 
        /* Set return values */
+       result->path_messages = &opt->priv->output;
        result->tree = parse_tree_indirect(&working_tree_oid);
        /* existence of conflicted entries implies unclean */
        result->clean &= strmap_empty(&opt->priv->conflicted);
        if (!opt->priv->call_depth) {
                result->priv = opt->priv;
+               result->_properly_initialized = RESULT_INITIALIZED;
                opt->priv = NULL;
        }
 }
@@ -3439,7 +4628,7 @@ static void merge_ort_internal(struct merge_options *opt,
                               struct commit *h2,
                               struct merge_result *result)
 {
-       struct commit_list *iter;
+       struct commit *next;
        struct commit *merged_merge_bases;
        const char *ancestor_name;
        struct strbuf merge_base_abbrev = STRBUF_INIT;
@@ -3451,7 +4640,7 @@ static void merge_ort_internal(struct merge_options *opt,
        }
 
        merged_merge_bases = pop_commit(&merge_bases);
-       if (merged_merge_bases == NULL) {
+       if (!merged_merge_bases) {
                /* if there is no common ancestor, use an empty tree */
                struct tree *tree;
 
@@ -3468,7 +4657,8 @@ static void merge_ort_internal(struct merge_options *opt,
                ancestor_name = merge_base_abbrev.buf;
        }
 
-       for (iter = merge_bases; iter; iter = iter->next) {
+       for (next = pop_commit(&merge_bases); next;
+            next = pop_commit(&merge_bases)) {
                const char *saved_b1, *saved_b2;
                struct commit *prev = merged_merge_bases;
 
@@ -3485,7 +4675,7 @@ static void merge_ort_internal(struct merge_options *opt,
                saved_b2 = opt->branch2;
                opt->branch1 = "Temporary merge branch 1";
                opt->branch2 = "Temporary merge branch 2";
-               merge_ort_internal(opt, NULL, prev, iter->item, result);
+               merge_ort_internal(opt, NULL, prev, next, result);
                if (result->clean < 0)
                        return;
                opt->branch1 = saved_b1;
@@ -3496,8 +4686,7 @@ static void merge_ort_internal(struct merge_options *opt,
                                                         result->tree,
                                                         "merged tree");
                commit_list_insert(prev, &merged_merge_bases->parents);
-               commit_list_insert(iter->item,
-                                  &merged_merge_bases->parents->next);
+               commit_list_insert(next, &merged_merge_bases->parents->next);
 
                clear_or_reinit_internal_opts(opt->priv, 1);
        }
@@ -3523,7 +4712,16 @@ void merge_incore_nonrecursive(struct merge_options *opt,
 
        trace2_region_enter("merge", "merge_start", opt->repo);
        assert(opt->ancestor != NULL);
+       merge_check_renames_reusable(opt, result, merge_base, side1, side2);
        merge_start(opt, result);
+       /*
+        * Record the trees used in this merge, so if there's a next merge in
+        * a cherry-pick or rebase sequence it might be able to take advantage
+        * of the cached_pairs in that next merge.
+        */
+       opt->priv->renames.merge_trees[0] = merge_base;
+       opt->priv->renames.merge_trees[1] = side1;
+       opt->priv->renames.merge_trees[2] = side2;
        trace2_region_leave("merge", "merge_start", opt->repo);
 
        merge_ort_nonrecursive_internal(opt, merge_base, side1, side2, result);