Merge branch 'jk/apply-binary-hunk-parsing-fix'

[thirdparty/git.git] / merge-ort.c
diff --git a/merge-ort.c b/merge-ort.c

index ec0c590421141a83f907039fbb97160598e36fe3..515dc39b7f69a95acf4b8c7a051b4382f538ee5d 100644 (file)
--- a/merge-ort.c
+++ b/merge-ort.c
@@ -62,6 +62,53 @@ struct traversal_callback_data {
         struct name_entry names[3];
  };
  
+struct deferred_traversal_data {
+       /*
+        * possible_trivial_merges: directories to be explored only when needed
+        *
+        * possible_trivial_merges is a map of directory names to
+        * dir_rename_mask.  When we detect that a directory is unchanged on
+        * one side, we can sometimes resolve the directory without recursing
+        * into it.  Renames are the only things that can prevent such an
+        * optimization.  However, for rename sources:
+        *   - If no parent directory needed directory rename detection, then
+        *     no path under such a directory can be a relevant_source.
+        * and for rename destinations:
+        *   - If no cached rename has a target path under the directory AND
+        *   - If there are no unpaired relevant_sources elsewhere in the
+        *     repository
+        * then we don't need any path under this directory for a rename
+        * destination.  The only way to know the last item above is to defer
+        * handling such directories until the end of collect_merge_info(),
+        * in handle_deferred_entries().
+        *
+        * For each we store dir_rename_mask, since that's the only bit of
+        * information we need, other than the path, to resume the recursive
+        * traversal.
+        */
+       struct strintmap possible_trivial_merges;
+
+       /*
+        * trivial_merges_okay: if trivial directory merges are okay
+        *
+        * See possible_trivial_merges above.  The "no unpaired
+        * relevant_sources elsewhere in the repository" is a single boolean
+        * per merge side, which we store here.  Note that while 0 means no,
+        * 1 only means "maybe" rather than "yes"; we optimistically set it
+        * to 1 initially and only clear when we determine it is unsafe to
+        * do trivial directory merges.
+        */
+       unsigned trivial_merges_okay;
+
+       /*
+        * target_dirs: ancestor directories of rename targets
+        *
+        * target_dirs contains all directory names that are an ancestor of
+        * any rename destination.
+        */
+       struct strset target_dirs;
+};
+
  struct rename_info {
         /*
          * All variables that are arrays of size 3 correspond to data tracked
@@ -119,6 +166,8 @@ struct rename_info {
          */
         struct strintmap relevant_sources[3];
  
+       struct deferred_traversal_data deferred[3];
+
         /*
          * dir_rename_mask:
          *   0: optimization removing unmodified potential rename source okay
@@ -164,6 +213,7 @@ struct rename_info {
          *   MERGE_SIDE2: cached data from side2 can be reused
          *   MERGE_SIDE1: cached data from side1 can be reused
          *   0:           no cached data can be reused
+        *   -1:          See redo_after_renames; both sides can be reused.
          */
         int cached_pairs_valid_side;
  
@@ -209,6 +259,28 @@ struct rename_info {
          */
         struct strset cached_irrelevant[3];
  
+       /*
+        * redo_after_renames: optimization flag for "restarting" the merge
+        *
+        * Sometimes it pays to detect renames, cache them, and then
+        * restart the merge operation from the beginning.  The reason for
+        * this is that when we know where all the renames are, we know
+        * whether a certain directory has any paths under it affected --
+        * and if a directory is not affected then it permits us to do
+        * trivial tree merging in more cases.  Doing trivial tree merging
+        * prevents the need to run process_entry() on every path
+        * underneath trees that can be trivially merged, and
+        * process_entry() is more expensive than collect_merge_info() --
+        * plus, the second collect_merge_info() will be much faster since
+        * it doesn't have to recurse into the relevant trees.
+        *
+        * Values for this flag:
+        *   0 = don't bother, not worth it (or conditions not yet checked)
+        *   1 = conditions for optimization met, optimization worthwhile
+        *   2 = we already did it (don't restart merge yet again)
+        */
+       unsigned redo_after_renames;
+
         /*
          * needed_limit: value needed for inexact rename detection to run
          *
@@ -231,8 +303,6 @@ struct merge_options_internal {
          *   * these keys serve to intern all the path strings, which allows
          *     us to do pointer comparison on directory names instead of
          *     strcmp; we just have to be careful to use the interned strings.
-        *     (Technically paths_to_free may track some strings that were
-        *      removed from froms paths.)
          *
          * The values of paths:
          *   * either a pointer to a merged_info, or a conflict_info struct
@@ -268,14 +338,14 @@ struct merge_options_internal {
         struct strmap conflicted;
  
         /*
-        * paths_to_free: additional list of strings to free
+        * pool: memory pool for fast allocation/deallocation
          *
-        * If keys are removed from "paths", they are added to paths_to_free
-        * to ensure they are later freed.  We avoid free'ing immediately since
-        * other places (e.g. conflict_info.pathnames[]) may still be
-        * referencing these paths.
+        * We allocate room for lots of filenames and auxiliary data
+        * structures in merge_options_internal, and it tends to all be
+        * freed together too.  Using a memory pool for these provides a
+        * nice speedup.
          */
-       struct string_list paths_to_free;
+       struct mem_pool pool;
  
         /*
          * output: special messages and conflict notices for various paths
@@ -447,60 +517,47 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
  {
         struct rename_info *renames = &opti->renames;
         int i;
-       void (*strmap_func)(struct strmap *, int) =
+       void (*strmap_clear_func)(struct strmap *, int) =
                 reinitialize ? strmap_partial_clear : strmap_clear;
-       void (*strintmap_func)(struct strintmap *) =
+       void (*strintmap_clear_func)(struct strintmap *) =
                 reinitialize ? strintmap_partial_clear : strintmap_clear;
-       void (*strset_func)(struct strset *) =
+       void (*strset_clear_func)(struct strset *) =
                 reinitialize ? strset_partial_clear : strset_clear;
  
-       /*
-        * We marked opti->paths with strdup_strings = 0, so that we
-        * wouldn't have to make another copy of the fullpath created by
-        * make_traverse_path from setup_path_info().  But, now that we've
-        * used it and have no other references to these strings, it is time
-        * to deallocate them.
-        */
-       free_strmap_strings(&opti->paths);
-       strmap_func(&opti->paths, 1);
+       strmap_clear_func(&opti->paths, 0);
  
         /*
          * All keys and values in opti->conflicted are a subset of those in
          * opti->paths.  We don't want to deallocate anything twice, so we
          * don't free the keys and we pass 0 for free_values.
          */
-       strmap_func(&opti->conflicted, 0);
-
-       /*
-        * opti->paths_to_free is similar to opti->paths; we created it with
-        * strdup_strings = 0 to avoid making _another_ copy of the fullpath
-        * but now that we've used it and have no other references to these
-        * strings, it is time to deallocate them.  We do so by temporarily
-        * setting strdup_strings to 1.
-        */
-       opti->paths_to_free.strdup_strings = 1;
-       string_list_clear(&opti->paths_to_free, 0);
-       opti->paths_to_free.strdup_strings = 0;
+       strmap_clear_func(&opti->conflicted, 0);
  
         if (opti->attr_index.cache_nr) /* true iff opt->renormalize */
                 discard_index(&opti->attr_index);
  
         /* Free memory used by various renames maps */
         for (i = MERGE_SIDE1; i <= MERGE_SIDE2; ++i) {
-               strintmap_func(&renames->dirs_removed[i]);
-               strmap_func(&renames->dir_renames[i], 0);
-               strintmap_func(&renames->relevant_sources[i]);
+               strintmap_clear_func(&renames->dirs_removed[i]);
+               strmap_clear_func(&renames->dir_renames[i], 0);
+               strintmap_clear_func(&renames->relevant_sources[i]);
                 if (!reinitialize)
                         assert(renames->cached_pairs_valid_side == 0);
-               if (i != renames->cached_pairs_valid_side) {
-                       strset_func(&renames->cached_target_names[i]);
-                       strmap_func(&renames->cached_pairs[i], 1);
-                       strset_func(&renames->cached_irrelevant[i]);
+               if (i != renames->cached_pairs_valid_side &&
+                   -1 != renames->cached_pairs_valid_side) {
+                       strset_clear_func(&renames->cached_target_names[i]);
+                       strmap_clear_func(&renames->cached_pairs[i], 1);
+                       strset_clear_func(&renames->cached_irrelevant[i]);
                         partial_clear_dir_rename_count(&renames->dir_rename_count[i]);
                         if (!reinitialize)
                                 strmap_clear(&renames->dir_rename_count[i], 1);
                 }
         }
+       for (i = MERGE_SIDE1; i <= MERGE_SIDE2; ++i) {
+               strintmap_clear_func(&renames->deferred[i].possible_trivial_merges);
+               strset_clear_func(&renames->deferred[i].target_dirs);
+               renames->deferred[i].trivial_merges_okay = 1; /* 1 == maybe */
+       }
         renames->cached_pairs_valid_side = 0;
         renames->dir_rename_mask = 0;
  
@@ -525,6 +582,8 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
                 strmap_clear(&opti->output, 0);
         }
  
+       mem_pool_discard(&opti->pool, 0);
+
         /* Clean out callback_data as well. */
         FREE_AND_NULL(renames->callback_data);
         renames->callback_data_nr = renames->callback_data_alloc = 0;
@@ -587,6 +646,36 @@ static void path_msg(struct merge_options *opt,
         strbuf_addch(sb, '\n');
  }
  
+static struct diff_filespec *pool_alloc_filespec(struct mem_pool *pool,
+                                                const char *path)
+{
+       /* Similar to alloc_filespec(), but allocate from pool and reuse path */
+       struct diff_filespec *spec;
+
+       spec = mem_pool_calloc(pool, 1, sizeof(*spec));
+       spec->path = (char*)path; /* spec won't modify it */
+
+       spec->count = 1;
+       spec->is_binary = -1;
+       return spec;
+}
+
+static struct diff_filepair *pool_diff_queue(struct mem_pool *pool,
+                                            struct diff_queue_struct *queue,
+                                            struct diff_filespec *one,
+                                            struct diff_filespec *two)
+{
+       /* Same code as diff_queue(), except allocate from pool */
+       struct diff_filepair *dp;
+
+       dp = mem_pool_calloc(pool, 1, sizeof(*dp));
+       dp->one = one;
+       dp->two = two;
+       if (queue)
+               diff_q(queue, dp);
+       return dp;
+}
+
  /* add a string to a strbuf, but converting "/" to "_" */
  static void add_flattened_path(struct strbuf *out, const char *s)
  {
@@ -715,8 +804,9 @@ static void setup_path_info(struct merge_options *opt,
         assert(!df_conflict || !resolved); /* df_conflict implies !resolved */
         assert(resolved == (merged_version != NULL));
  
-       mi = xcalloc(1, resolved ? sizeof(struct merged_info) :
-                                  sizeof(struct conflict_info));
+       mi = mem_pool_calloc(&opt->priv->pool, 1,
+                            resolved ? sizeof(struct merged_info) :
+                                       sizeof(struct conflict_info));
         mi->directory_name = current_dir_name;
         mi->basename_offset = current_dir_name_len;
         mi->clean = !!resolved;
@@ -813,11 +903,11 @@ static void add_pair(struct merge_options *opt,
                         return;
         }
  
-       one = alloc_filespec(pathname);
-       two = alloc_filespec(pathname);
+       one = pool_alloc_filespec(&opt->priv->pool, pathname);
+       two = pool_alloc_filespec(&opt->priv->pool, pathname);
         fill_filespec(is_add ? two : one,
                       &names[names_idx].oid, 1, names[names_idx].mode);
-       diff_queue(&renames->pairs[side], one, two);
+       pool_diff_queue(&opt->priv->pool, &renames->pairs[side], one, two);
  }
  
  static void collect_rename_info(struct merge_options *opt,
@@ -1008,7 +1098,7 @@ static int collect_merge_info_callback(int n,
         len = traverse_path_len(info, p->pathlen);
  
         /* +1 in both of the following lines to include the NUL byte */
-       fullpath = xmalloc(len + 1);
+       fullpath = mem_pool_alloc(&opt->priv->pool, len + 1);
         make_traverse_path(fullpath, len + 1, info, p->path, p->pathlen);
  
         /*
@@ -1019,20 +1109,67 @@ static int collect_merge_info_callback(int n,
         if (side1_matches_mbase && side2_matches_mbase) {
                 /* mbase, side1, & side2 all match; use mbase as resolution */
                 setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
-                               names, names+0, mbase_null, 0,
+                               names, names+0, mbase_null, 0 /* df_conflict */,
+                               filemask, dirmask, 1 /* resolved */);
+               return mask;
+       }
+
+       /*
+        * If the sides match, and all three paths are present and are
+        * files, then we can take either as the resolution.  We can't do
+        * this with trees, because there may be rename sources from the
+        * merge_base.
+        */
+       if (sides_match && filemask == 0x07) {
+               /* use side1 (== side2) version as resolution */
+               setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
+                               names, names+1, side1_null, 0,
                                 filemask, dirmask, 1);
                 return mask;
         }
  
         /*
-        * Gather additional information used in rename detection.
+        * If side1 matches mbase and all three paths are present and are
+        * files, then we can use side2 as the resolution.  We cannot
+        * necessarily do so this for trees, because there may be rename
+        * destinations within side2.
+        */
+       if (side1_matches_mbase && filemask == 0x07) {
+               /* use side2 version as resolution */
+               setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
+                               names, names+2, side2_null, 0,
+                               filemask, dirmask, 1);
+               return mask;
+       }
+
+       /* Similar to above but swapping sides 1 and 2 */
+       if (side2_matches_mbase && filemask == 0x07) {
+               /* use side1 version as resolution */
+               setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
+                               names, names+1, side1_null, 0,
+                               filemask, dirmask, 1);
+               return mask;
+       }
+
+       /*
+        * Sometimes we can tell that a source path need not be included in
+        * rename detection -- namely, whenever either
+        *    side1_matches_mbase && side2_null
+        * or
+        *    side2_matches_mbase && side1_null
+        * However, we call collect_rename_info() even in those cases,
+        * because exact renames are cheap and would let us remove both a
+        * source and destination path.  We'll cull the unneeded sources
+        * later.
          */
         collect_rename_info(opt, names, dirname, fullpath,
                             filemask, dirmask, match_mask);
  
         /*
-        * Record information about the path so we can resolve later in
-        * process_entries.
+        * None of the special cases above matched, so we have a
+        * provisional conflict.  (Rename detection might allow us to
+        * unconflict some more cases, but that comes later so all we can
+        * do now is record the different non-null file hashes.)
          */
         setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
                         names, NULL, 0, df_conflict, filemask, dirmask, 0);
@@ -1047,8 +1184,36 @@ static int collect_merge_info_callback(int n,
                 struct tree_desc t[3];
                 void *buf[3] = {NULL, NULL, NULL};
                 const char *original_dir_name;
-               int i, ret;
+               int i, ret, side;
+
+               /*
+                * Check for whether we can avoid recursing due to one side
+                * matching the merge base.  The side that does NOT match is
+                * the one that might have a rename destination we need.
+                */
+               assert(!side1_matches_mbase || !side2_matches_mbase);
+               side = side1_matches_mbase ? MERGE_SIDE2 :
+                       side2_matches_mbase ? MERGE_SIDE1 : MERGE_BASE;
+               if (filemask == 0 && (dirmask == 2 || dirmask == 4)) {
+                       /*
+                        * Also defer recursing into new directories; set up a
+                        * few variables to let us do so.
+                        */
+                       ci->match_mask = (7 - dirmask);
+                       side = dirmask / 2;
+               }
+               if (renames->dir_rename_mask != 0x07 &&
+                   side != MERGE_BASE &&
+                   renames->deferred[side].trivial_merges_okay &&
+                   !strset_contains(&renames->deferred[side].target_dirs,
+                                    pi.string)) {
+                       strintmap_set(&renames->deferred[side].possible_trivial_merges,
+                                     pi.string, renames->dir_rename_mask);
+                       renames->dir_rename_mask = prev_dir_rename_mask;
+                       return mask;
+               }
  
+               /* We need to recurse */
                 ci->match_mask &= filemask;
                 newinfo = *info;
                 newinfo.prev = info;
@@ -1102,6 +1267,192 @@ static int collect_merge_info_callback(int n,
         return mask;
  }
  
+static void resolve_trivial_directory_merge(struct conflict_info *ci, int side)
+{
+       VERIFY_CI(ci);
+       assert((side == 1 && ci->match_mask == 5) ||
+              (side == 2 && ci->match_mask == 3));
+       oidcpy(&ci->merged.result.oid, &ci->stages[side].oid);
+       ci->merged.result.mode = ci->stages[side].mode;
+       ci->merged.is_null = is_null_oid(&ci->stages[side].oid);
+       ci->match_mask = 0;
+       ci->merged.clean = 1; /* (ci->filemask == 0); */
+}
+
+static int handle_deferred_entries(struct merge_options *opt,
+                                  struct traverse_info *info)
+{
+       struct rename_info *renames = &opt->priv->renames;
+       struct hashmap_iter iter;
+       struct strmap_entry *entry;
+       int side, ret = 0;
+       int path_count_before, path_count_after = 0;
+
+       path_count_before = strmap_get_size(&opt->priv->paths);
+       for (side = MERGE_SIDE1; side <= MERGE_SIDE2; side++) {
+               unsigned optimization_okay = 1;
+               struct strintmap copy;
+
+               /* Loop over the set of paths we need to know rename info for */
+               strset_for_each_entry(&renames->relevant_sources[side],
+                                     &iter, entry) {
+                       char *rename_target, *dir, *dir_marker;
+                       struct strmap_entry *e;
+
+                       /*
+                        * If we don't know delete/rename info for this path,
+                        * then we need to recurse into all trees to get all
+                        * adds to make sure we have it.
+                        */
+                       if (strset_contains(&renames->cached_irrelevant[side],
+                                           entry->key))
+                               continue;
+                       e = strmap_get_entry(&renames->cached_pairs[side],
+                                            entry->key);
+                       if (!e) {
+                               optimization_okay = 0;
+                               break;
+                       }
+
+                       /* If this is a delete, we have enough info already */
+                       rename_target = e->value;
+                       if (!rename_target)
+                               continue;
+
+                       /* If we already walked the rename target, we're good */
+                       if (strmap_contains(&opt->priv->paths, rename_target))
+                               continue;
+
+                       /*
+                        * Otherwise, we need to get a list of directories that
+                        * will need to be recursed into to get this
+                        * rename_target.
+                        */
+                       dir = xstrdup(rename_target);
+                       while ((dir_marker = strrchr(dir, '/'))) {
+                               *dir_marker = '\0';
+                               if (strset_contains(&renames->deferred[side].target_dirs,
+                                                   dir))
+                                       break;
+                               strset_add(&renames->deferred[side].target_dirs,
+                                          dir);
+                       }
+                       free(dir);
+               }
+               renames->deferred[side].trivial_merges_okay = optimization_okay;
+               /*
+                * We need to recurse into any directories in
+                * possible_trivial_merges[side] found in target_dirs[side].
+                * But when we recurse, we may need to queue up some of the
+                * subdirectories for possible_trivial_merges[side].  Since
+                * we can't safely iterate through a hashmap while also adding
+                * entries, move the entries into 'copy', iterate over 'copy',
+                * and then we'll also iterate anything added into
+                * possible_trivial_merges[side] once this loop is done.
+                */
+               copy = renames->deferred[side].possible_trivial_merges;
+               strintmap_init_with_options(&renames->deferred[side].possible_trivial_merges,
+                                           0,
+                                           &opt->priv->pool,
+                                           0);
+               strintmap_for_each_entry(&copy, &iter, entry) {
+                       const char *path = entry->key;
+                       unsigned dir_rename_mask = (intptr_t)entry->value;
+                       struct conflict_info *ci;
+                       unsigned dirmask;
+                       struct tree_desc t[3];
+                       void *buf[3] = {NULL,};
+                       int i;
+
+                       ci = strmap_get(&opt->priv->paths, path);
+                       VERIFY_CI(ci);
+                       dirmask = ci->dirmask;
+
+                       if (optimization_okay &&
+                           !strset_contains(&renames->deferred[side].target_dirs,
+                                            path)) {
+                               resolve_trivial_directory_merge(ci, side);
+                               continue;
+                       }
+
+                       info->name = path;
+                       info->namelen = strlen(path);
+                       info->pathlen = info->namelen + 1;
+
+                       for (i = 0; i < 3; i++, dirmask >>= 1) {
+                               if (i == 1 && ci->match_mask == 3)
+                                       t[1] = t[0];
+                               else if (i == 2 && ci->match_mask == 5)
+                                       t[2] = t[0];
+                               else if (i == 2 && ci->match_mask == 6)
+                                       t[2] = t[1];
+                               else {
+                                       const struct object_id *oid = NULL;
+                                       if (dirmask & 1)
+                                               oid = &ci->stages[i].oid;
+                                       buf[i] = fill_tree_descriptor(opt->repo,
+                                                                     t+i, oid);
+                               }
+                       }
+
+                       ci->match_mask &= ci->filemask;
+                       opt->priv->current_dir_name = path;
+                       renames->dir_rename_mask = dir_rename_mask;
+                       if (renames->dir_rename_mask == 0 ||
+                           renames->dir_rename_mask == 0x07)
+                               ret = traverse_trees(NULL, 3, t, info);
+                       else
+                               ret = traverse_trees_wrapper(NULL, 3, t, info);
+
+                       for (i = MERGE_BASE; i <= MERGE_SIDE2; i++)
+                               free(buf[i]);
+
+                       if (ret < 0)
+                               return ret;
+               }
+               strintmap_clear(&copy);
+               strintmap_for_each_entry(&renames->deferred[side].possible_trivial_merges,
+                                        &iter, entry) {
+                       const char *path = entry->key;
+                       struct conflict_info *ci;
+
+                       ci = strmap_get(&opt->priv->paths, path);
+                       VERIFY_CI(ci);
+
+                       assert(renames->deferred[side].trivial_merges_okay &&
+                              !strset_contains(&renames->deferred[side].target_dirs,
+                                               path));
+                       resolve_trivial_directory_merge(ci, side);
+               }
+               if (!optimization_okay || path_count_after)
+                       path_count_after = strmap_get_size(&opt->priv->paths);
+       }
+       if (path_count_after) {
+               /*
+                * The choice of wanted_factor here does not affect
+                * correctness, only performance.  When the
+                *    path_count_after / path_count_before
+                * ratio is high, redoing after renames is a big
+                * performance boost.  I suspect that redoing is a wash
+                * somewhere near a value of 2, and below that redoing will
+                * slow things down.  I applied a fudge factor and picked
+                * 3; see the commit message when this was introduced for
+                * back of the envelope calculations for this ratio.
+                */
+               const int wanted_factor = 3;
+
+               /* We should only redo collect_merge_info one time */
+               assert(renames->redo_after_renames == 0);
+
+               if (path_count_after / path_count_before >= wanted_factor) {
+                       renames->redo_after_renames = 1;
+                       renames->cached_pairs_valid_side = -1;
+               }
+       } else if (renames->redo_after_renames == 2)
+               renames->redo_after_renames = 0;
+       return ret;
+}
+
  static int collect_merge_info(struct merge_options *opt,
                               struct tree *merge_base,
                               struct tree *side1,
@@ -1127,6 +1478,8 @@ static int collect_merge_info(struct merge_options *opt,
  
         trace2_region_enter("merge", "traverse_trees", opt->repo);
         ret = traverse_trees(NULL, 3, t, &info);
+       if (ret == 0)
+               ret = handle_deferred_entries(opt, &info);
         trace2_region_leave("merge", "traverse_trees", opt->repo);
  
         return ret;
@@ -1952,12 +2305,17 @@ static void apply_directory_rename_modifications(struct merge_options *opt,
         VERIFY_CI(ci);
  
         /* Find parent directories missing from opt->priv->paths */
-       cur_path = new_path;
+       cur_path = mem_pool_strdup(&opt->priv->pool, new_path);
+       free((char*)new_path);
+       new_path = (char *)cur_path;
+
         while (1) {
                 /* Find the parent directory of cur_path */
                 char *last_slash = strrchr(cur_path, '/');
                 if (last_slash) {
-                       parent_name = xstrndup(cur_path, last_slash - cur_path);
+                       parent_name = mem_pool_strndup(&opt->priv->pool,
+                                                      cur_path,
+                                                      last_slash - cur_path);
                 } else {
                         parent_name = opt->priv->toplevel_dir;
                         break;
@@ -1966,7 +2324,6 @@ static void apply_directory_rename_modifications(struct merge_options *opt,
                 /* Look it up in opt->priv->paths */
                 entry = strmap_get_entry(&opt->priv->paths, parent_name);
                 if (entry) {
-                       free((char*)parent_name);
                         parent_name = entry->key; /* reuse known pointer */
                         break;
                 }
@@ -1993,13 +2350,6 @@ static void apply_directory_rename_modifications(struct merge_options *opt,
                 parent_name = cur_dir;
         }
  
-       /*
-        * We are removing old_path from opt->priv->paths.  old_path also will
-        * eventually need to be freed, but it may still be used by e.g.
-        * ci->pathnames.  So, store it in another string-list for now.
-        */
-       string_list_append(&opt->priv->paths_to_free, old_path);
-
         assert(ci->filemask == 2 || ci->filemask == 4);
         assert(ci->dirmask == 0);
         strmap_remove(&opt->priv->paths, old_path, 0);
@@ -2033,7 +2383,6 @@ static void apply_directory_rename_modifications(struct merge_options *opt,
                 new_ci->stages[index].mode = ci->stages[index].mode;
                 oidcpy(&new_ci->stages[index].oid, &ci->stages[index].oid);
  
-               free(ci);
                 ci = new_ci;
         }
  
@@ -2461,10 +2810,23 @@ static void use_cached_pairs(struct merge_options *opt,
                 if (!new_name)
                         new_name = old_name;
  
+               /*
+                * cached_pairs has *copies* of old_name and new_name,
+                * because it has to persist across merges.  Since
+                * pool_alloc_filespec() will just re-use the existing
+                * filenames, which will also get re-used by
+                * opt->priv->paths if they become renames, and then
+                * get freed at the end of the merge, that would leave
+                * the copy in cached_pairs dangling.  Avoid this by
+                * making a copy here.
+                */
+               old_name = mem_pool_strdup(&opt->priv->pool, old_name);
+               new_name = mem_pool_strdup(&opt->priv->pool, new_name);
+
                 /* We don't care about oid/mode, only filenames and status */
-               one = alloc_filespec(old_name);
-               two = alloc_filespec(new_name);
-               diff_queue(pairs, one, two);
+               one = pool_alloc_filespec(&opt->priv->pool, old_name);
+               two = pool_alloc_filespec(&opt->priv->pool, new_name);
+               pool_diff_queue(&opt->priv->pool, pairs, one, two);
                 pairs->queue[pairs->nr-1]->status = entry->value ? 'R' : 'D';
         }
  }
@@ -2539,8 +2901,8 @@ static int compare_pairs(const void *a_, const void *b_)
  }
  
  /* Call diffcore_rename() to update deleted/added pairs into rename pairs */
-static void detect_regular_renames(struct merge_options *opt,
-                                  unsigned side_index)
+static int detect_regular_renames(struct merge_options *opt,
+                                 unsigned side_index)
  {
         struct diff_options diff_opts;
         struct rename_info *renames = &opt->priv->renames;
@@ -2553,7 +2915,7 @@ static void detect_regular_renames(struct merge_options *opt,
                  * side had directory renames.
                  */
                 resolve_diffpair_statuses(&renames->pairs[side_index]);
-               return;
+               return 0;
         }
  
         partial_clear_dir_rename_count(&renames->dir_rename_count[side_index]);
@@ -2572,6 +2934,7 @@ static void detect_regular_renames(struct merge_options *opt,
         diff_queued_diff = renames->pairs[side_index];
         trace2_region_enter("diff", "diffcore_rename", opt->repo);
         diffcore_rename_extended(&diff_opts,
+                                &opt->priv->pool,
                                  &renames->relevant_sources[side_index],
                                  &renames->dirs_removed[side_index],
                                  &renames->dir_rename_count[side_index],
@@ -2579,6 +2942,8 @@ static void detect_regular_renames(struct merge_options *opt,
         trace2_region_leave("diff", "diffcore_rename", opt->repo);
         resolve_diffpair_statuses(&diff_queued_diff);
  
+       if (diff_opts.needed_rename_limit > 0)
+               renames->redo_after_renames = 0;
         if (diff_opts.needed_rename_limit > renames->needed_limit)
                 renames->needed_limit = diff_opts.needed_rename_limit;
  
@@ -2588,6 +2953,8 @@ static void detect_regular_renames(struct merge_options *opt,
         diff_queued_diff.nr = 0;
         diff_queued_diff.queue = NULL;
         diff_flush(&diff_opts);
+
+       return 1;
  }
  
  /*
@@ -2618,7 +2985,7 @@ static int collect_renames(struct merge_options *opt,
  
                 if (p->status != 'A' && p->status != 'R') {
                         possibly_cache_new_pair(renames, p, side_index, NULL);
-                       diff_free_filepair(p);
+                       pool_diff_free_filepair(&opt->priv->pool, p);
                         continue;
                 }
  
@@ -2631,7 +2998,7 @@ static int collect_renames(struct merge_options *opt,
  
                 possibly_cache_new_pair(renames, p, side_index, new_path);
                 if (p->status != 'R' && !new_path) {
-                       diff_free_filepair(p);
+                       pool_diff_free_filepair(&opt->priv->pool, p);
                         continue;
                 }
  
@@ -2677,14 +3044,32 @@ static int detect_and_process_renames(struct merge_options *opt,
         struct diff_queue_struct combined;
         struct rename_info *renames = &opt->priv->renames;
         int need_dir_renames, s, clean = 1;
+       unsigned detection_run = 0;
  
         memset(&combined, 0, sizeof(combined));
         if (!possible_renames(renames))
                 goto cleanup;
  
         trace2_region_enter("merge", "regular renames", opt->repo);
-       detect_regular_renames(opt, MERGE_SIDE1);
-       detect_regular_renames(opt, MERGE_SIDE2);
+       detection_run |= detect_regular_renames(opt, MERGE_SIDE1);
+       detection_run |= detect_regular_renames(opt, MERGE_SIDE2);
+       if (renames->redo_after_renames && detection_run) {
+               int i, side;
+               struct diff_filepair *p;
+
+               /* Cache the renames, we found */
+               for (side = MERGE_SIDE1; side <= MERGE_SIDE2; side++) {
+                       for (i = 0; i < renames->pairs[side].nr; ++i) {
+                               p = renames->pairs[side].queue[i];
+                               possibly_cache_new_pair(renames, p, side, NULL);
+                       }
+               }
+
+               /* Restart the merge with the cached renames */
+               renames->redo_after_renames = 2;
+               trace2_region_leave("merge", "regular renames", opt->repo);
+               goto cleanup;
+       }
         use_cached_pairs(opt, &renames->cached_pairs[1], &renames->pairs[1]);
         use_cached_pairs(opt, &renames->cached_pairs[2], &renames->pairs[2]);
         trace2_region_leave("merge", "regular renames", opt->repo);
@@ -2731,7 +3116,7 @@ cleanup:
                 side_pairs = &renames->pairs[s];
                 for (i = 0; i < side_pairs->nr; ++i) {
                         struct diff_filepair *p = side_pairs->queue[i];
-                       diff_free_filepair(p);
+                       pool_diff_free_filepair(&opt->priv->pool, p);
                 }
         }
  
@@ -2744,7 +3129,8 @@ simple_cleanup:
         if (combined.nr) {
                 int i;
                 for (i = 0; i < combined.nr; i++)
-                       diff_free_filepair(combined.queue[i]);
+                       pool_diff_free_filepair(&opt->priv->pool,
+                                               combined.queue[i]);
                 free(combined.queue);
         }
  
@@ -3218,7 +3604,8 @@ static void process_entry(struct merge_options *opt,
                  * the directory to remain here, so we need to move this
                  * path to some new location.
                  */
-               CALLOC_ARRAY(new_ci, 1);
+               new_ci = mem_pool_calloc(&opt->priv->pool, 1, sizeof(*new_ci));
+
                 /* We don't really want new_ci->merged.result copied, but it'll
                  * be overwritten below so it doesn't matter.  We also don't
                  * want any directory mode/oid values copied, but we'll zero
@@ -3310,7 +3697,8 @@ static void process_entry(struct merge_options *opt,
                         const char *a_path = NULL, *b_path = NULL;
                         int rename_a = 0, rename_b = 0;
  
-                       new_ci = xmalloc(sizeof(*new_ci));
+                       new_ci = mem_pool_alloc(&opt->priv->pool,
+                                               sizeof(*new_ci));
  
                         if (S_ISREG(a_mode))
                                 rename_a = 1;
@@ -3379,17 +3767,8 @@ static void process_entry(struct merge_options *opt,
                                 b_path = path;
                         strmap_put(&opt->priv->paths, b_path, new_ci);
  
-                       if (rename_a && rename_b) {
+                       if (rename_a && rename_b)
                                 strmap_remove(&opt->priv->paths, path, 0);
-                               /*
-                                * We removed path from opt->priv->paths.  path
-                                * will also eventually need to be freed, but
-                                * it may still be used by e.g.  ci->pathnames.
-                                * So, store it in another string-list for now.
-                                */
-                               string_list_append(&opt->priv->paths_to_free,
-                                                  path);
-                       }
  
                         /*
                          * Do special handling for b_path since process_entry()
@@ -3930,6 +4309,7 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
  {
         struct rename_info *renames;
         int i;
+       struct mem_pool *pool = NULL;
  
         /* Sanity checks on opt */
         trace2_region_enter("merge", "sanity checks", opt->repo);
@@ -3995,9 +4375,11 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
  
         /* Initialization of various renames fields */
         renames = &opt->priv->renames;
+       mem_pool_init(&opt->priv->pool, 0);
+       pool = &opt->priv->pool;
         for (i = MERGE_SIDE1; i <= MERGE_SIDE2; i++) {
                 strintmap_init_with_options(&renames->dirs_removed[i],
-                                           NOT_RELEVANT, NULL, 0);
+                                           NOT_RELEVANT, pool, 0);
                 strmap_init_with_options(&renames->dir_rename_count[i],
                                          NULL, 1);
                 strmap_init_with_options(&renames->dir_renames[i],
@@ -4011,7 +4393,7 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
                  */
                 strintmap_init_with_options(&renames->relevant_sources[i],
                                             -1 /* explicitly invalid */,
-                                           NULL, 0);
+                                           pool, 0);
                 strmap_init_with_options(&renames->cached_pairs[i],
                                          NULL, 1);
                 strset_init_with_options(&renames->cached_irrelevant[i],
@@ -4019,19 +4401,25 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
                 strset_init_with_options(&renames->cached_target_names[i],
                                          NULL, 0);
         }
+       for (i = MERGE_SIDE1; i <= MERGE_SIDE2; i++) {
+               strintmap_init_with_options(&renames->deferred[i].possible_trivial_merges,
+                                           0, pool, 0);
+               strset_init_with_options(&renames->deferred[i].target_dirs,
+                                        pool, 1);
+               renames->deferred[i].trivial_merges_okay = 1; /* 1 == maybe */
+       }
  
         /*
          * Although we initialize opt->priv->paths with strdup_strings=0,
          * that's just to avoid making yet another copy of an allocated
          * string.  Putting the entry into paths means we are taking
-        * ownership, so we will later free it.  paths_to_free is similar.
+        * ownership, so we will later free it.
          *
          * In contrast, conflicted just has a subset of keys from paths, so
          * we don't want to free those (it'd be a duplicate free).
          */
-       strmap_init_with_options(&opt->priv->paths, NULL, 0);
-       strmap_init_with_options(&opt->priv->conflicted, NULL, 0);
-       string_list_init_nodup(&opt->priv->paths_to_free);
+       strmap_init_with_options(&opt->priv->paths, pool, 0);
+       strmap_init_with_options(&opt->priv->conflicted, pool, 0);
  
         /*
          * keys & strbufs in output will sometimes need to outlive "paths",
@@ -4107,6 +4495,7 @@ static void merge_ort_nonrecursive_internal(struct merge_options *opt,
                                                opt->subtree_shift);
         }
  
+redo:
         trace2_region_enter("merge", "collect_merge_info", opt->repo);
         if (collect_merge_info(opt, merge_base, side1, side2) != 0) {
                 /*
@@ -4126,6 +4515,12 @@ static void merge_ort_nonrecursive_internal(struct merge_options *opt,
         result->clean = detect_and_process_renames(opt, merge_base,
                                                    side1, side2);
         trace2_region_leave("merge", "renames", opt->repo);
+       if (opt->priv->renames.redo_after_renames == 2) {
+               trace2_region_enter("merge", "reset_maps", opt->repo);
+               clear_or_reinit_internal_opts(opt->priv, 1);
+               trace2_region_leave("merge", "reset_maps", opt->repo);
+               goto redo;
+       }
  
         trace2_region_enter("merge", "process_entries", opt->repo);
         process_entries(opt, &working_tree_oid);