]> git.ipfire.org Git - thirdparty/git.git/blobdiff - merge-ort.c
Merge branch 'jk/apply-binary-hunk-parsing-fix'
[thirdparty/git.git] / merge-ort.c
index ec0c590421141a83f907039fbb97160598e36fe3..515dc39b7f69a95acf4b8c7a051b4382f538ee5d 100644 (file)
@@ -62,6 +62,53 @@ struct traversal_callback_data {
        struct name_entry names[3];
 };
 
+struct deferred_traversal_data {
+       /*
+        * possible_trivial_merges: directories to be explored only when needed
+        *
+        * possible_trivial_merges is a map of directory names to
+        * dir_rename_mask.  When we detect that a directory is unchanged on
+        * one side, we can sometimes resolve the directory without recursing
+        * into it.  Renames are the only things that can prevent such an
+        * optimization.  However, for rename sources:
+        *   - If no parent directory needed directory rename detection, then
+        *     no path under such a directory can be a relevant_source.
+        * and for rename destinations:
+        *   - If no cached rename has a target path under the directory AND
+        *   - If there are no unpaired relevant_sources elsewhere in the
+        *     repository
+        * then we don't need any path under this directory for a rename
+        * destination.  The only way to know the last item above is to defer
+        * handling such directories until the end of collect_merge_info(),
+        * in handle_deferred_entries().
+        *
+        * For each we store dir_rename_mask, since that's the only bit of
+        * information we need, other than the path, to resume the recursive
+        * traversal.
+        */
+       struct strintmap possible_trivial_merges;
+
+       /*
+        * trivial_merges_okay: if trivial directory merges are okay
+        *
+        * See possible_trivial_merges above.  The "no unpaired
+        * relevant_sources elsewhere in the repository" is a single boolean
+        * per merge side, which we store here.  Note that while 0 means no,
+        * 1 only means "maybe" rather than "yes"; we optimistically set it
+        * to 1 initially and only clear when we determine it is unsafe to
+        * do trivial directory merges.
+        */
+       unsigned trivial_merges_okay;
+
+       /*
+        * target_dirs: ancestor directories of rename targets
+        *
+        * target_dirs contains all directory names that are an ancestor of
+        * any rename destination.
+        */
+       struct strset target_dirs;
+};
+
 struct rename_info {
        /*
         * All variables that are arrays of size 3 correspond to data tracked
@@ -119,6 +166,8 @@ struct rename_info {
         */
        struct strintmap relevant_sources[3];
 
+       struct deferred_traversal_data deferred[3];
+
        /*
         * dir_rename_mask:
         *   0: optimization removing unmodified potential rename source okay
@@ -164,6 +213,7 @@ struct rename_info {
         *   MERGE_SIDE2: cached data from side2 can be reused
         *   MERGE_SIDE1: cached data from side1 can be reused
         *   0:           no cached data can be reused
+        *   -1:          See redo_after_renames; both sides can be reused.
         */
        int cached_pairs_valid_side;
 
@@ -209,6 +259,28 @@ struct rename_info {
         */
        struct strset cached_irrelevant[3];
 
+       /*
+        * redo_after_renames: optimization flag for "restarting" the merge
+        *
+        * Sometimes it pays to detect renames, cache them, and then
+        * restart the merge operation from the beginning.  The reason for
+        * this is that when we know where all the renames are, we know
+        * whether a certain directory has any paths under it affected --
+        * and if a directory is not affected then it permits us to do
+        * trivial tree merging in more cases.  Doing trivial tree merging
+        * prevents the need to run process_entry() on every path
+        * underneath trees that can be trivially merged, and
+        * process_entry() is more expensive than collect_merge_info() --
+        * plus, the second collect_merge_info() will be much faster since
+        * it doesn't have to recurse into the relevant trees.
+        *
+        * Values for this flag:
+        *   0 = don't bother, not worth it (or conditions not yet checked)
+        *   1 = conditions for optimization met, optimization worthwhile
+        *   2 = we already did it (don't restart merge yet again)
+        */
+       unsigned redo_after_renames;
+
        /*
         * needed_limit: value needed for inexact rename detection to run
         *
@@ -231,8 +303,6 @@ struct merge_options_internal {
         *   * these keys serve to intern all the path strings, which allows
         *     us to do pointer comparison on directory names instead of
         *     strcmp; we just have to be careful to use the interned strings.
-        *     (Technically paths_to_free may track some strings that were
-        *      removed from froms paths.)
         *
         * The values of paths:
         *   * either a pointer to a merged_info, or a conflict_info struct
@@ -268,14 +338,14 @@ struct merge_options_internal {
        struct strmap conflicted;
 
        /*
-        * paths_to_free: additional list of strings to free
+        * pool: memory pool for fast allocation/deallocation
         *
-        * If keys are removed from "paths", they are added to paths_to_free
-        * to ensure they are later freed.  We avoid free'ing immediately since
-        * other places (e.g. conflict_info.pathnames[]) may still be
-        * referencing these paths.
+        * We allocate room for lots of filenames and auxiliary data
+        * structures in merge_options_internal, and it tends to all be
+        * freed together too.  Using a memory pool for these provides a
+        * nice speedup.
         */
-       struct string_list paths_to_free;
+       struct mem_pool pool;
 
        /*
         * output: special messages and conflict notices for various paths
@@ -447,60 +517,47 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
 {
        struct rename_info *renames = &opti->renames;
        int i;
-       void (*strmap_func)(struct strmap *, int) =
+       void (*strmap_clear_func)(struct strmap *, int) =
                reinitialize ? strmap_partial_clear : strmap_clear;
-       void (*strintmap_func)(struct strintmap *) =
+       void (*strintmap_clear_func)(struct strintmap *) =
                reinitialize ? strintmap_partial_clear : strintmap_clear;
-       void (*strset_func)(struct strset *) =
+       void (*strset_clear_func)(struct strset *) =
                reinitialize ? strset_partial_clear : strset_clear;
 
-       /*
-        * We marked opti->paths with strdup_strings = 0, so that we
-        * wouldn't have to make another copy of the fullpath created by
-        * make_traverse_path from setup_path_info().  But, now that we've
-        * used it and have no other references to these strings, it is time
-        * to deallocate them.
-        */
-       free_strmap_strings(&opti->paths);
-       strmap_func(&opti->paths, 1);
+       strmap_clear_func(&opti->paths, 0);
 
        /*
         * All keys and values in opti->conflicted are a subset of those in
         * opti->paths.  We don't want to deallocate anything twice, so we
         * don't free the keys and we pass 0 for free_values.
         */
-       strmap_func(&opti->conflicted, 0);
-
-       /*
-        * opti->paths_to_free is similar to opti->paths; we created it with
-        * strdup_strings = 0 to avoid making _another_ copy of the fullpath
-        * but now that we've used it and have no other references to these
-        * strings, it is time to deallocate them.  We do so by temporarily
-        * setting strdup_strings to 1.
-        */
-       opti->paths_to_free.strdup_strings = 1;
-       string_list_clear(&opti->paths_to_free, 0);
-       opti->paths_to_free.strdup_strings = 0;
+       strmap_clear_func(&opti->conflicted, 0);
 
        if (opti->attr_index.cache_nr) /* true iff opt->renormalize */
                discard_index(&opti->attr_index);
 
        /* Free memory used by various renames maps */
        for (i = MERGE_SIDE1; i <= MERGE_SIDE2; ++i) {
-               strintmap_func(&renames->dirs_removed[i]);
-               strmap_func(&renames->dir_renames[i], 0);
-               strintmap_func(&renames->relevant_sources[i]);
+               strintmap_clear_func(&renames->dirs_removed[i]);
+               strmap_clear_func(&renames->dir_renames[i], 0);
+               strintmap_clear_func(&renames->relevant_sources[i]);
                if (!reinitialize)
                        assert(renames->cached_pairs_valid_side == 0);
-               if (i != renames->cached_pairs_valid_side) {
-                       strset_func(&renames->cached_target_names[i]);
-                       strmap_func(&renames->cached_pairs[i], 1);
-                       strset_func(&renames->cached_irrelevant[i]);
+               if (i != renames->cached_pairs_valid_side &&
+                   -1 != renames->cached_pairs_valid_side) {
+                       strset_clear_func(&renames->cached_target_names[i]);
+                       strmap_clear_func(&renames->cached_pairs[i], 1);
+                       strset_clear_func(&renames->cached_irrelevant[i]);
                        partial_clear_dir_rename_count(&renames->dir_rename_count[i]);
                        if (!reinitialize)
                                strmap_clear(&renames->dir_rename_count[i], 1);
                }
        }
+       for (i = MERGE_SIDE1; i <= MERGE_SIDE2; ++i) {
+               strintmap_clear_func(&renames->deferred[i].possible_trivial_merges);
+               strset_clear_func(&renames->deferred[i].target_dirs);
+               renames->deferred[i].trivial_merges_okay = 1; /* 1 == maybe */
+       }
        renames->cached_pairs_valid_side = 0;
        renames->dir_rename_mask = 0;
 
@@ -525,6 +582,8 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
                strmap_clear(&opti->output, 0);
        }
 
+       mem_pool_discard(&opti->pool, 0);
+
        /* Clean out callback_data as well. */
        FREE_AND_NULL(renames->callback_data);
        renames->callback_data_nr = renames->callback_data_alloc = 0;
@@ -587,6 +646,36 @@ static void path_msg(struct merge_options *opt,
        strbuf_addch(sb, '\n');
 }
 
+static struct diff_filespec *pool_alloc_filespec(struct mem_pool *pool,
+                                                const char *path)
+{
+       /* Similar to alloc_filespec(), but allocate from pool and reuse path */
+       struct diff_filespec *spec;
+
+       spec = mem_pool_calloc(pool, 1, sizeof(*spec));
+       spec->path = (char*)path; /* spec won't modify it */
+
+       spec->count = 1;
+       spec->is_binary = -1;
+       return spec;
+}
+
+static struct diff_filepair *pool_diff_queue(struct mem_pool *pool,
+                                            struct diff_queue_struct *queue,
+                                            struct diff_filespec *one,
+                                            struct diff_filespec *two)
+{
+       /* Same code as diff_queue(), except allocate from pool */
+       struct diff_filepair *dp;
+
+       dp = mem_pool_calloc(pool, 1, sizeof(*dp));
+       dp->one = one;
+       dp->two = two;
+       if (queue)
+               diff_q(queue, dp);
+       return dp;
+}
+
 /* add a string to a strbuf, but converting "/" to "_" */
 static void add_flattened_path(struct strbuf *out, const char *s)
 {
@@ -715,8 +804,9 @@ static void setup_path_info(struct merge_options *opt,
        assert(!df_conflict || !resolved); /* df_conflict implies !resolved */
        assert(resolved == (merged_version != NULL));
 
-       mi = xcalloc(1, resolved ? sizeof(struct merged_info) :
-                                  sizeof(struct conflict_info));
+       mi = mem_pool_calloc(&opt->priv->pool, 1,
+                            resolved ? sizeof(struct merged_info) :
+                                       sizeof(struct conflict_info));
        mi->directory_name = current_dir_name;
        mi->basename_offset = current_dir_name_len;
        mi->clean = !!resolved;
@@ -813,11 +903,11 @@ static void add_pair(struct merge_options *opt,
                        return;
        }
 
-       one = alloc_filespec(pathname);
-       two = alloc_filespec(pathname);
+       one = pool_alloc_filespec(&opt->priv->pool, pathname);
+       two = pool_alloc_filespec(&opt->priv->pool, pathname);
        fill_filespec(is_add ? two : one,
                      &names[names_idx].oid, 1, names[names_idx].mode);
-       diff_queue(&renames->pairs[side], one, two);
+       pool_diff_queue(&opt->priv->pool, &renames->pairs[side], one, two);
 }
 
 static void collect_rename_info(struct merge_options *opt,
@@ -1008,7 +1098,7 @@ static int collect_merge_info_callback(int n,
        len = traverse_path_len(info, p->pathlen);
 
        /* +1 in both of the following lines to include the NUL byte */
-       fullpath = xmalloc(len + 1);
+       fullpath = mem_pool_alloc(&opt->priv->pool, len + 1);
        make_traverse_path(fullpath, len + 1, info, p->path, p->pathlen);
 
        /*
@@ -1019,20 +1109,67 @@ static int collect_merge_info_callback(int n,
        if (side1_matches_mbase && side2_matches_mbase) {
                /* mbase, side1, & side2 all match; use mbase as resolution */
                setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
-                               names, names+0, mbase_null, 0,
+                               names, names+0, mbase_null, 0 /* df_conflict */,
+                               filemask, dirmask, 1 /* resolved */);
+               return mask;
+       }
+
+       /*
+        * If the sides match, and all three paths are present and are
+        * files, then we can take either as the resolution.  We can't do
+        * this with trees, because there may be rename sources from the
+        * merge_base.
+        */
+       if (sides_match && filemask == 0x07) {
+               /* use side1 (== side2) version as resolution */
+               setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
+                               names, names+1, side1_null, 0,
                                filemask, dirmask, 1);
                return mask;
        }
 
        /*
-        * Gather additional information used in rename detection.
+        * If side1 matches mbase and all three paths are present and are
+        * files, then we can use side2 as the resolution.  We cannot
+        * necessarily do so this for trees, because there may be rename
+        * destinations within side2.
+        */
+       if (side1_matches_mbase && filemask == 0x07) {
+               /* use side2 version as resolution */
+               setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
+                               names, names+2, side2_null, 0,
+                               filemask, dirmask, 1);
+               return mask;
+       }
+
+       /* Similar to above but swapping sides 1 and 2 */
+       if (side2_matches_mbase && filemask == 0x07) {
+               /* use side1 version as resolution */
+               setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
+                               names, names+1, side1_null, 0,
+                               filemask, dirmask, 1);
+               return mask;
+       }
+
+       /*
+        * Sometimes we can tell that a source path need not be included in
+        * rename detection -- namely, whenever either
+        *    side1_matches_mbase && side2_null
+        * or
+        *    side2_matches_mbase && side1_null
+        * However, we call collect_rename_info() even in those cases,
+        * because exact renames are cheap and would let us remove both a
+        * source and destination path.  We'll cull the unneeded sources
+        * later.
         */
        collect_rename_info(opt, names, dirname, fullpath,
                            filemask, dirmask, match_mask);
 
        /*
-        * Record information about the path so we can resolve later in
-        * process_entries.
+        * None of the special cases above matched, so we have a
+        * provisional conflict.  (Rename detection might allow us to
+        * unconflict some more cases, but that comes later so all we can
+        * do now is record the different non-null file hashes.)
         */
        setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
                        names, NULL, 0, df_conflict, filemask, dirmask, 0);
@@ -1047,8 +1184,36 @@ static int collect_merge_info_callback(int n,
                struct tree_desc t[3];
                void *buf[3] = {NULL, NULL, NULL};
                const char *original_dir_name;
-               int i, ret;
+               int i, ret, side;
+
+               /*
+                * Check for whether we can avoid recursing due to one side
+                * matching the merge base.  The side that does NOT match is
+                * the one that might have a rename destination we need.
+                */
+               assert(!side1_matches_mbase || !side2_matches_mbase);
+               side = side1_matches_mbase ? MERGE_SIDE2 :
+                       side2_matches_mbase ? MERGE_SIDE1 : MERGE_BASE;
+               if (filemask == 0 && (dirmask == 2 || dirmask == 4)) {
+                       /*
+                        * Also defer recursing into new directories; set up a
+                        * few variables to let us do so.
+                        */
+                       ci->match_mask = (7 - dirmask);
+                       side = dirmask / 2;
+               }
+               if (renames->dir_rename_mask != 0x07 &&
+                   side != MERGE_BASE &&
+                   renames->deferred[side].trivial_merges_okay &&
+                   !strset_contains(&renames->deferred[side].target_dirs,
+                                    pi.string)) {
+                       strintmap_set(&renames->deferred[side].possible_trivial_merges,
+                                     pi.string, renames->dir_rename_mask);
+                       renames->dir_rename_mask = prev_dir_rename_mask;
+                       return mask;
+               }
 
+               /* We need to recurse */
                ci->match_mask &= filemask;
                newinfo = *info;
                newinfo.prev = info;
@@ -1102,6 +1267,192 @@ static int collect_merge_info_callback(int n,
        return mask;
 }
 
+static void resolve_trivial_directory_merge(struct conflict_info *ci, int side)
+{
+       VERIFY_CI(ci);
+       assert((side == 1 && ci->match_mask == 5) ||
+              (side == 2 && ci->match_mask == 3));
+       oidcpy(&ci->merged.result.oid, &ci->stages[side].oid);
+       ci->merged.result.mode = ci->stages[side].mode;
+       ci->merged.is_null = is_null_oid(&ci->stages[side].oid);
+       ci->match_mask = 0;
+       ci->merged.clean = 1; /* (ci->filemask == 0); */
+}
+
+static int handle_deferred_entries(struct merge_options *opt,
+                                  struct traverse_info *info)
+{
+       struct rename_info *renames = &opt->priv->renames;
+       struct hashmap_iter iter;
+       struct strmap_entry *entry;
+       int side, ret = 0;
+       int path_count_before, path_count_after = 0;
+
+       path_count_before = strmap_get_size(&opt->priv->paths);
+       for (side = MERGE_SIDE1; side <= MERGE_SIDE2; side++) {
+               unsigned optimization_okay = 1;
+               struct strintmap copy;
+
+               /* Loop over the set of paths we need to know rename info for */
+               strset_for_each_entry(&renames->relevant_sources[side],
+                                     &iter, entry) {
+                       char *rename_target, *dir, *dir_marker;
+                       struct strmap_entry *e;
+
+                       /*
+                        * If we don't know delete/rename info for this path,
+                        * then we need to recurse into all trees to get all
+                        * adds to make sure we have it.
+                        */
+                       if (strset_contains(&renames->cached_irrelevant[side],
+                                           entry->key))
+                               continue;
+                       e = strmap_get_entry(&renames->cached_pairs[side],
+                                            entry->key);
+                       if (!e) {
+                               optimization_okay = 0;
+                               break;
+                       }
+
+                       /* If this is a delete, we have enough info already */
+                       rename_target = e->value;
+                       if (!rename_target)
+                               continue;
+
+                       /* If we already walked the rename target, we're good */
+                       if (strmap_contains(&opt->priv->paths, rename_target))
+                               continue;
+
+                       /*
+                        * Otherwise, we need to get a list of directories that
+                        * will need to be recursed into to get this
+                        * rename_target.
+                        */
+                       dir = xstrdup(rename_target);
+                       while ((dir_marker = strrchr(dir, '/'))) {
+                               *dir_marker = '\0';
+                               if (strset_contains(&renames->deferred[side].target_dirs,
+                                                   dir))
+                                       break;
+                               strset_add(&renames->deferred[side].target_dirs,
+                                          dir);
+                       }
+                       free(dir);
+               }
+               renames->deferred[side].trivial_merges_okay = optimization_okay;
+               /*
+                * We need to recurse into any directories in
+                * possible_trivial_merges[side] found in target_dirs[side].
+                * But when we recurse, we may need to queue up some of the
+                * subdirectories for possible_trivial_merges[side].  Since
+                * we can't safely iterate through a hashmap while also adding
+                * entries, move the entries into 'copy', iterate over 'copy',
+                * and then we'll also iterate anything added into
+                * possible_trivial_merges[side] once this loop is done.
+                */
+               copy = renames->deferred[side].possible_trivial_merges;
+               strintmap_init_with_options(&renames->deferred[side].possible_trivial_merges,
+                                           0,
+                                           &opt->priv->pool,
+                                           0);
+               strintmap_for_each_entry(&copy, &iter, entry) {
+                       const char *path = entry->key;
+                       unsigned dir_rename_mask = (intptr_t)entry->value;
+                       struct conflict_info *ci;
+                       unsigned dirmask;
+                       struct tree_desc t[3];
+                       void *buf[3] = {NULL,};
+                       int i;
+
+                       ci = strmap_get(&opt->priv->paths, path);
+                       VERIFY_CI(ci);
+                       dirmask = ci->dirmask;
+
+                       if (optimization_okay &&
+                           !strset_contains(&renames->deferred[side].target_dirs,
+                                            path)) {
+                               resolve_trivial_directory_merge(ci, side);
+                               continue;
+                       }
+
+                       info->name = path;
+                       info->namelen = strlen(path);
+                       info->pathlen = info->namelen + 1;
+
+                       for (i = 0; i < 3; i++, dirmask >>= 1) {
+                               if (i == 1 && ci->match_mask == 3)
+                                       t[1] = t[0];
+                               else if (i == 2 && ci->match_mask == 5)
+                                       t[2] = t[0];
+                               else if (i == 2 && ci->match_mask == 6)
+                                       t[2] = t[1];
+                               else {
+                                       const struct object_id *oid = NULL;
+                                       if (dirmask & 1)
+                                               oid = &ci->stages[i].oid;
+                                       buf[i] = fill_tree_descriptor(opt->repo,
+                                                                     t+i, oid);
+                               }
+                       }
+
+                       ci->match_mask &= ci->filemask;
+                       opt->priv->current_dir_name = path;
+                       renames->dir_rename_mask = dir_rename_mask;
+                       if (renames->dir_rename_mask == 0 ||
+                           renames->dir_rename_mask == 0x07)
+                               ret = traverse_trees(NULL, 3, t, info);
+                       else
+                               ret = traverse_trees_wrapper(NULL, 3, t, info);
+
+                       for (i = MERGE_BASE; i <= MERGE_SIDE2; i++)
+                               free(buf[i]);
+
+                       if (ret < 0)
+                               return ret;
+               }
+               strintmap_clear(&copy);
+               strintmap_for_each_entry(&renames->deferred[side].possible_trivial_merges,
+                                        &iter, entry) {
+                       const char *path = entry->key;
+                       struct conflict_info *ci;
+
+                       ci = strmap_get(&opt->priv->paths, path);
+                       VERIFY_CI(ci);
+
+                       assert(renames->deferred[side].trivial_merges_okay &&
+                              !strset_contains(&renames->deferred[side].target_dirs,
+                                               path));
+                       resolve_trivial_directory_merge(ci, side);
+               }
+               if (!optimization_okay || path_count_after)
+                       path_count_after = strmap_get_size(&opt->priv->paths);
+       }
+       if (path_count_after) {
+               /*
+                * The choice of wanted_factor here does not affect
+                * correctness, only performance.  When the
+                *    path_count_after / path_count_before
+                * ratio is high, redoing after renames is a big
+                * performance boost.  I suspect that redoing is a wash
+                * somewhere near a value of 2, and below that redoing will
+                * slow things down.  I applied a fudge factor and picked
+                * 3; see the commit message when this was introduced for
+                * back of the envelope calculations for this ratio.
+                */
+               const int wanted_factor = 3;
+
+               /* We should only redo collect_merge_info one time */
+               assert(renames->redo_after_renames == 0);
+
+               if (path_count_after / path_count_before >= wanted_factor) {
+                       renames->redo_after_renames = 1;
+                       renames->cached_pairs_valid_side = -1;
+               }
+       } else if (renames->redo_after_renames == 2)
+               renames->redo_after_renames = 0;
+       return ret;
+}
+
 static int collect_merge_info(struct merge_options *opt,
                              struct tree *merge_base,
                              struct tree *side1,
@@ -1127,6 +1478,8 @@ static int collect_merge_info(struct merge_options *opt,
 
        trace2_region_enter("merge", "traverse_trees", opt->repo);
        ret = traverse_trees(NULL, 3, t, &info);
+       if (ret == 0)
+               ret = handle_deferred_entries(opt, &info);
        trace2_region_leave("merge", "traverse_trees", opt->repo);
 
        return ret;
@@ -1952,12 +2305,17 @@ static void apply_directory_rename_modifications(struct merge_options *opt,
        VERIFY_CI(ci);
 
        /* Find parent directories missing from opt->priv->paths */
-       cur_path = new_path;
+       cur_path = mem_pool_strdup(&opt->priv->pool, new_path);
+       free((char*)new_path);
+       new_path = (char *)cur_path;
+
        while (1) {
                /* Find the parent directory of cur_path */
                char *last_slash = strrchr(cur_path, '/');
                if (last_slash) {
-                       parent_name = xstrndup(cur_path, last_slash - cur_path);
+                       parent_name = mem_pool_strndup(&opt->priv->pool,
+                                                      cur_path,
+                                                      last_slash - cur_path);
                } else {
                        parent_name = opt->priv->toplevel_dir;
                        break;
@@ -1966,7 +2324,6 @@ static void apply_directory_rename_modifications(struct merge_options *opt,
                /* Look it up in opt->priv->paths */
                entry = strmap_get_entry(&opt->priv->paths, parent_name);
                if (entry) {
-                       free((char*)parent_name);
                        parent_name = entry->key; /* reuse known pointer */
                        break;
                }
@@ -1993,13 +2350,6 @@ static void apply_directory_rename_modifications(struct merge_options *opt,
                parent_name = cur_dir;
        }
 
-       /*
-        * We are removing old_path from opt->priv->paths.  old_path also will
-        * eventually need to be freed, but it may still be used by e.g.
-        * ci->pathnames.  So, store it in another string-list for now.
-        */
-       string_list_append(&opt->priv->paths_to_free, old_path);
-
        assert(ci->filemask == 2 || ci->filemask == 4);
        assert(ci->dirmask == 0);
        strmap_remove(&opt->priv->paths, old_path, 0);
@@ -2033,7 +2383,6 @@ static void apply_directory_rename_modifications(struct merge_options *opt,
                new_ci->stages[index].mode = ci->stages[index].mode;
                oidcpy(&new_ci->stages[index].oid, &ci->stages[index].oid);
 
-               free(ci);
                ci = new_ci;
        }
 
@@ -2461,10 +2810,23 @@ static void use_cached_pairs(struct merge_options *opt,
                if (!new_name)
                        new_name = old_name;
 
+               /*
+                * cached_pairs has *copies* of old_name and new_name,
+                * because it has to persist across merges.  Since
+                * pool_alloc_filespec() will just re-use the existing
+                * filenames, which will also get re-used by
+                * opt->priv->paths if they become renames, and then
+                * get freed at the end of the merge, that would leave
+                * the copy in cached_pairs dangling.  Avoid this by
+                * making a copy here.
+                */
+               old_name = mem_pool_strdup(&opt->priv->pool, old_name);
+               new_name = mem_pool_strdup(&opt->priv->pool, new_name);
+
                /* We don't care about oid/mode, only filenames and status */
-               one = alloc_filespec(old_name);
-               two = alloc_filespec(new_name);
-               diff_queue(pairs, one, two);
+               one = pool_alloc_filespec(&opt->priv->pool, old_name);
+               two = pool_alloc_filespec(&opt->priv->pool, new_name);
+               pool_diff_queue(&opt->priv->pool, pairs, one, two);
                pairs->queue[pairs->nr-1]->status = entry->value ? 'R' : 'D';
        }
 }
@@ -2539,8 +2901,8 @@ static int compare_pairs(const void *a_, const void *b_)
 }
 
 /* Call diffcore_rename() to update deleted/added pairs into rename pairs */
-static void detect_regular_renames(struct merge_options *opt,
-                                  unsigned side_index)
+static int detect_regular_renames(struct merge_options *opt,
+                                 unsigned side_index)
 {
        struct diff_options diff_opts;
        struct rename_info *renames = &opt->priv->renames;
@@ -2553,7 +2915,7 @@ static void detect_regular_renames(struct merge_options *opt,
                 * side had directory renames.
                 */
                resolve_diffpair_statuses(&renames->pairs[side_index]);
-               return;
+               return 0;
        }
 
        partial_clear_dir_rename_count(&renames->dir_rename_count[side_index]);
@@ -2572,6 +2934,7 @@ static void detect_regular_renames(struct merge_options *opt,
        diff_queued_diff = renames->pairs[side_index];
        trace2_region_enter("diff", "diffcore_rename", opt->repo);
        diffcore_rename_extended(&diff_opts,
+                                &opt->priv->pool,
                                 &renames->relevant_sources[side_index],
                                 &renames->dirs_removed[side_index],
                                 &renames->dir_rename_count[side_index],
@@ -2579,6 +2942,8 @@ static void detect_regular_renames(struct merge_options *opt,
        trace2_region_leave("diff", "diffcore_rename", opt->repo);
        resolve_diffpair_statuses(&diff_queued_diff);
 
+       if (diff_opts.needed_rename_limit > 0)
+               renames->redo_after_renames = 0;
        if (diff_opts.needed_rename_limit > renames->needed_limit)
                renames->needed_limit = diff_opts.needed_rename_limit;
 
@@ -2588,6 +2953,8 @@ static void detect_regular_renames(struct merge_options *opt,
        diff_queued_diff.nr = 0;
        diff_queued_diff.queue = NULL;
        diff_flush(&diff_opts);
+
+       return 1;
 }
 
 /*
@@ -2618,7 +2985,7 @@ static int collect_renames(struct merge_options *opt,
 
                if (p->status != 'A' && p->status != 'R') {
                        possibly_cache_new_pair(renames, p, side_index, NULL);
-                       diff_free_filepair(p);
+                       pool_diff_free_filepair(&opt->priv->pool, p);
                        continue;
                }
 
@@ -2631,7 +2998,7 @@ static int collect_renames(struct merge_options *opt,
 
                possibly_cache_new_pair(renames, p, side_index, new_path);
                if (p->status != 'R' && !new_path) {
-                       diff_free_filepair(p);
+                       pool_diff_free_filepair(&opt->priv->pool, p);
                        continue;
                }
 
@@ -2677,14 +3044,32 @@ static int detect_and_process_renames(struct merge_options *opt,
        struct diff_queue_struct combined;
        struct rename_info *renames = &opt->priv->renames;
        int need_dir_renames, s, clean = 1;
+       unsigned detection_run = 0;
 
        memset(&combined, 0, sizeof(combined));
        if (!possible_renames(renames))
                goto cleanup;
 
        trace2_region_enter("merge", "regular renames", opt->repo);
-       detect_regular_renames(opt, MERGE_SIDE1);
-       detect_regular_renames(opt, MERGE_SIDE2);
+       detection_run |= detect_regular_renames(opt, MERGE_SIDE1);
+       detection_run |= detect_regular_renames(opt, MERGE_SIDE2);
+       if (renames->redo_after_renames && detection_run) {
+               int i, side;
+               struct diff_filepair *p;
+
+               /* Cache the renames, we found */
+               for (side = MERGE_SIDE1; side <= MERGE_SIDE2; side++) {
+                       for (i = 0; i < renames->pairs[side].nr; ++i) {
+                               p = renames->pairs[side].queue[i];
+                               possibly_cache_new_pair(renames, p, side, NULL);
+                       }
+               }
+
+               /* Restart the merge with the cached renames */
+               renames->redo_after_renames = 2;
+               trace2_region_leave("merge", "regular renames", opt->repo);
+               goto cleanup;
+       }
        use_cached_pairs(opt, &renames->cached_pairs[1], &renames->pairs[1]);
        use_cached_pairs(opt, &renames->cached_pairs[2], &renames->pairs[2]);
        trace2_region_leave("merge", "regular renames", opt->repo);
@@ -2731,7 +3116,7 @@ cleanup:
                side_pairs = &renames->pairs[s];
                for (i = 0; i < side_pairs->nr; ++i) {
                        struct diff_filepair *p = side_pairs->queue[i];
-                       diff_free_filepair(p);
+                       pool_diff_free_filepair(&opt->priv->pool, p);
                }
        }
 
@@ -2744,7 +3129,8 @@ simple_cleanup:
        if (combined.nr) {
                int i;
                for (i = 0; i < combined.nr; i++)
-                       diff_free_filepair(combined.queue[i]);
+                       pool_diff_free_filepair(&opt->priv->pool,
+                                               combined.queue[i]);
                free(combined.queue);
        }
 
@@ -3218,7 +3604,8 @@ static void process_entry(struct merge_options *opt,
                 * the directory to remain here, so we need to move this
                 * path to some new location.
                 */
-               CALLOC_ARRAY(new_ci, 1);
+               new_ci = mem_pool_calloc(&opt->priv->pool, 1, sizeof(*new_ci));
+
                /* We don't really want new_ci->merged.result copied, but it'll
                 * be overwritten below so it doesn't matter.  We also don't
                 * want any directory mode/oid values copied, but we'll zero
@@ -3310,7 +3697,8 @@ static void process_entry(struct merge_options *opt,
                        const char *a_path = NULL, *b_path = NULL;
                        int rename_a = 0, rename_b = 0;
 
-                       new_ci = xmalloc(sizeof(*new_ci));
+                       new_ci = mem_pool_alloc(&opt->priv->pool,
+                                               sizeof(*new_ci));
 
                        if (S_ISREG(a_mode))
                                rename_a = 1;
@@ -3379,17 +3767,8 @@ static void process_entry(struct merge_options *opt,
                                b_path = path;
                        strmap_put(&opt->priv->paths, b_path, new_ci);
 
-                       if (rename_a && rename_b) {
+                       if (rename_a && rename_b)
                                strmap_remove(&opt->priv->paths, path, 0);
-                               /*
-                                * We removed path from opt->priv->paths.  path
-                                * will also eventually need to be freed, but
-                                * it may still be used by e.g.  ci->pathnames.
-                                * So, store it in another string-list for now.
-                                */
-                               string_list_append(&opt->priv->paths_to_free,
-                                                  path);
-                       }
 
                        /*
                         * Do special handling for b_path since process_entry()
@@ -3930,6 +4309,7 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
 {
        struct rename_info *renames;
        int i;
+       struct mem_pool *pool = NULL;
 
        /* Sanity checks on opt */
        trace2_region_enter("merge", "sanity checks", opt->repo);
@@ -3995,9 +4375,11 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
 
        /* Initialization of various renames fields */
        renames = &opt->priv->renames;
+       mem_pool_init(&opt->priv->pool, 0);
+       pool = &opt->priv->pool;
        for (i = MERGE_SIDE1; i <= MERGE_SIDE2; i++) {
                strintmap_init_with_options(&renames->dirs_removed[i],
-                                           NOT_RELEVANT, NULL, 0);
+                                           NOT_RELEVANT, pool, 0);
                strmap_init_with_options(&renames->dir_rename_count[i],
                                         NULL, 1);
                strmap_init_with_options(&renames->dir_renames[i],
@@ -4011,7 +4393,7 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
                 */
                strintmap_init_with_options(&renames->relevant_sources[i],
                                            -1 /* explicitly invalid */,
-                                           NULL, 0);
+                                           pool, 0);
                strmap_init_with_options(&renames->cached_pairs[i],
                                         NULL, 1);
                strset_init_with_options(&renames->cached_irrelevant[i],
@@ -4019,19 +4401,25 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
                strset_init_with_options(&renames->cached_target_names[i],
                                         NULL, 0);
        }
+       for (i = MERGE_SIDE1; i <= MERGE_SIDE2; i++) {
+               strintmap_init_with_options(&renames->deferred[i].possible_trivial_merges,
+                                           0, pool, 0);
+               strset_init_with_options(&renames->deferred[i].target_dirs,
+                                        pool, 1);
+               renames->deferred[i].trivial_merges_okay = 1; /* 1 == maybe */
+       }
 
        /*
         * Although we initialize opt->priv->paths with strdup_strings=0,
         * that's just to avoid making yet another copy of an allocated
         * string.  Putting the entry into paths means we are taking
-        * ownership, so we will later free it.  paths_to_free is similar.
+        * ownership, so we will later free it.
         *
         * In contrast, conflicted just has a subset of keys from paths, so
         * we don't want to free those (it'd be a duplicate free).
         */
-       strmap_init_with_options(&opt->priv->paths, NULL, 0);
-       strmap_init_with_options(&opt->priv->conflicted, NULL, 0);
-       string_list_init_nodup(&opt->priv->paths_to_free);
+       strmap_init_with_options(&opt->priv->paths, pool, 0);
+       strmap_init_with_options(&opt->priv->conflicted, pool, 0);
 
        /*
         * keys & strbufs in output will sometimes need to outlive "paths",
@@ -4107,6 +4495,7 @@ static void merge_ort_nonrecursive_internal(struct merge_options *opt,
                                               opt->subtree_shift);
        }
 
+redo:
        trace2_region_enter("merge", "collect_merge_info", opt->repo);
        if (collect_merge_info(opt, merge_base, side1, side2) != 0) {
                /*
@@ -4126,6 +4515,12 @@ static void merge_ort_nonrecursive_internal(struct merge_options *opt,
        result->clean = detect_and_process_renames(opt, merge_base,
                                                   side1, side2);
        trace2_region_leave("merge", "renames", opt->repo);
+       if (opt->priv->renames.redo_after_renames == 2) {
+               trace2_region_enter("merge", "reset_maps", opt->repo);
+               clear_or_reinit_internal_opts(opt->priv, 1);
+               trace2_region_leave("merge", "reset_maps", opt->repo);
+               goto redo;
+       }
 
        trace2_region_enter("merge", "process_entries", opt->repo);
        process_entries(opt, &working_tree_oid);