merge-ort.c

   1 /*
   2  * "Ostensibly Recursive's Twin" merge strategy, or "ort" for short.  Meant
   3  * as a drop-in replacement for the "recursive" merge strategy, allowing one
   4  * to replace
   5  *
   6  *   git merge [-s recursive]
   7  *
   8  * with
   9  *
  10  *   git merge -s ort
  11  *
  12  * Note: git's parser allows the space between '-s' and its argument to be
  13  * missing.  (Should I have backronymed "ham", "alsa", "kip", "nap, "alvo",
  14  * "cale", "peedy", or "ins" instead of "ort"?)
  15  */
  16
  17 #include "cache.h"
  18 #include "merge-ort.h"
  19
  20 #include "alloc.h"
  21 #include "blob.h"
  22 #include "cache-tree.h"
  23 #include "commit.h"
  24 #include "commit-reach.h"
  25 #include "diff.h"
  26 #include "diffcore.h"
  27 #include "dir.h"
  28 #include "object-store.h"
  29 #include "strmap.h"
  30 #include "tree.h"
  31 #include "unpack-trees.h"
  32 #include "xdiff-interface.h"
  33
  34 /*
  35  * We have many arrays of size 3.  Whenever we have such an array, the
  36  * indices refer to one of the sides of the three-way merge.  This is so
  37  * pervasive that the constants 0, 1, and 2 are used in many places in the
  38  * code (especially in arithmetic operations to find the other side's index
  39  * or to compute a relevant mask), but sometimes these enum names are used
  40  * to aid code clarity.
  41  *
  42  * See also 'filemask' and 'dirmask' in struct conflict_info; the "ith side"
  43  * referred to there is one of these three sides.
  44  */
  45 enum merge_side {
  46         MERGE_BASE = 0,
  47         MERGE_SIDE1 = 1,
  48         MERGE_SIDE2 = 2
  49 };
  50
  51 struct rename_info {
  52         /*
  53          * All variables that are arrays of size 3 correspond to data tracked
  54          * for the sides in enum merge_side.  Index 0 is almost always unused
  55          * because we often only need to track information for MERGE_SIDE1 and
  56          * MERGE_SIDE2 (MERGE_BASE can't have rename information since renames
  57          * are determined relative to what changed since the MERGE_BASE).
  58          */
  59
  60         /*
  61          * pairs: pairing of filenames from diffcore_rename()
  62          */
  63         struct diff_queue_struct pairs[3];
  64
  65         /*
  66          * dirs_removed: directories removed on a given side of history.
  67          */
  68         struct strset dirs_removed[3];
  69
  70         /*
  71          * dir_rename_count: tracking where parts of a directory were renamed to
  72          *
  73          * When files in a directory are renamed, they may not all go to the
  74          * same location.  Each strmap here tracks:
  75          *      old_dir => {new_dir => int}
  76          * That is, dir_rename_count[side] is a strmap to a strintmap.
  77          */
  78         struct strmap dir_rename_count[3];
  79
  80         /*
  81          * dir_renames: computed directory renames
  82          *
  83          * This is a map of old_dir => new_dir and is derived in part from
  84          * dir_rename_count.
  85          */
  86         struct strmap dir_renames[3];
  87
  88         /*
  89          * needed_limit: value needed for inexact rename detection to run
  90          *
  91          * If the current rename limit wasn't high enough for inexact
  92          * rename detection to run, this records the limit needed.  Otherwise,
  93          * this value remains 0.
  94          */
  95         int needed_limit;
  96 };
  97
  98 struct merge_options_internal {
  99         /*
 100          * paths: primary data structure in all of merge ort.
 101          *
 102          * The keys of paths:
 103          *   * are full relative paths from the toplevel of the repository
 104          *     (e.g. "drivers/firmware/raspberrypi.c").
 105          *   * store all relevant paths in the repo, both directories and
 106          *     files (e.g. drivers, drivers/firmware would also be included)
 107          *   * these keys serve to intern all the path strings, which allows
 108          *     us to do pointer comparison on directory names instead of
 109          *     strcmp; we just have to be careful to use the interned strings.
 110          *     (Technically paths_to_free may track some strings that were
 111          *      removed from froms paths.)
 112          *
 113          * The values of paths:
 114          *   * either a pointer to a merged_info, or a conflict_info struct
 115          *   * merged_info contains all relevant information for a
 116          *     non-conflicted entry.
 117          *   * conflict_info contains a merged_info, plus any additional
 118          *     information about a conflict such as the higher orders stages
 119          *     involved and the names of the paths those came from (handy
 120          *     once renames get involved).
 121          *   * a path may start "conflicted" (i.e. point to a conflict_info)
 122          *     and then a later step (e.g. three-way content merge) determines
 123          *     it can be cleanly merged, at which point it'll be marked clean
 124          *     and the algorithm will ignore any data outside the contained
 125          *     merged_info for that entry
 126          *   * If an entry remains conflicted, the merged_info portion of a
 127          *     conflict_info will later be filled with whatever version of
 128          *     the file should be placed in the working directory (e.g. an
 129          *     as-merged-as-possible variation that contains conflict markers).
 130          */
 131         struct strmap paths;
 132
 133         /*
 134          * conflicted: a subset of keys->values from "paths"
 135          *
 136          * conflicted is basically an optimization between process_entries()
 137          * and record_conflicted_index_entries(); the latter could loop over
 138          * ALL the entries in paths AGAIN and look for the ones that are
 139          * still conflicted, but since process_entries() has to loop over
 140          * all of them, it saves the ones it couldn't resolve in this strmap
 141          * so that record_conflicted_index_entries() can iterate just the
 142          * relevant entries.
 143          */
 144         struct strmap conflicted;
 145
 146         /*
 147          * paths_to_free: additional list of strings to free
 148          *
 149          * If keys are removed from "paths", they are added to paths_to_free
 150          * to ensure they are later freed.  We avoid free'ing immediately since
 151          * other places (e.g. conflict_info.pathnames[]) may still be
 152          * referencing these paths.
 153          */
 154         struct string_list paths_to_free;
 155
 156         /*
 157          * output: special messages and conflict notices for various paths
 158          *
 159          * This is a map of pathnames (a subset of the keys in "paths" above)
 160          * to strbufs.  It gathers various warning/conflict/notice messages
 161          * for later processing.
 162          */
 163         struct strmap output;
 164
 165         /*
 166          * renames: various data relating to rename detection
 167          */
 168         struct rename_info renames;
 169
 170         /*
 171          * current_dir_name: temporary var used in collect_merge_info_callback()
 172          *
 173          * Used to set merged_info.directory_name; see documentation for that
 174          * variable and the requirements placed on that field.
 175          */
 176         const char *current_dir_name;
 177
 178         /* call_depth: recursion level counter for merging merge bases */
 179         int call_depth;
 180 };
 181
 182 struct version_info {
 183         struct object_id oid;
 184         unsigned short mode;
 185 };
 186
 187 struct merged_info {
 188         /* if is_null, ignore result.  otherwise result has oid & mode */
 189         struct version_info result;
 190         unsigned is_null:1;
 191
 192         /*
 193          * clean: whether the path in question is cleanly merged.
 194          *
 195          * see conflict_info.merged for more details.
 196          */
 197         unsigned clean:1;
 198
 199         /*
 200          * basename_offset: offset of basename of path.
 201          *
 202          * perf optimization to avoid recomputing offset of final '/'
 203          * character in pathname (0 if no '/' in pathname).
 204          */
 205         size_t basename_offset;
 206
 207          /*
 208           * directory_name: containing directory name.
 209           *
 210           * Note that we assume directory_name is constructed such that
 211           *    strcmp(dir1_name, dir2_name) == 0 iff dir1_name == dir2_name,
 212           * i.e. string equality is equivalent to pointer equality.  For this
 213           * to hold, we have to be careful setting directory_name.
 214           */
 215         const char *directory_name;
 216 };
 217
 218 struct conflict_info {
 219         /*
 220          * merged: the version of the path that will be written to working tree
 221          *
 222          * WARNING: It is critical to check merged.clean and ensure it is 0
 223          * before reading any conflict_info fields outside of merged.
 224          * Allocated merge_info structs will always have clean set to 1.
 225          * Allocated conflict_info structs will have merged.clean set to 0
 226          * initially.  The merged.clean field is how we know if it is safe
 227          * to access other parts of conflict_info besides merged; if a
 228          * conflict_info's merged.clean is changed to 1, the rest of the
 229          * algorithm is not allowed to look at anything outside of the
 230          * merged member anymore.
 231          */
 232         struct merged_info merged;
 233
 234         /* oids & modes from each of the three trees for this path */
 235         struct version_info stages[3];
 236
 237         /* pathnames for each stage; may differ due to rename detection */
 238         const char *pathnames[3];
 239
 240         /* Whether this path is/was involved in a directory/file conflict */
 241         unsigned df_conflict:1;
 242
 243         /*
 244          * Whether this path is/was involved in a non-content conflict other
 245          * than a directory/file conflict (e.g. rename/rename, rename/delete,
 246          * file location based on possible directory rename).
 247          */
 248         unsigned path_conflict:1;
 249
 250         /*
 251          * For filemask and dirmask, the ith bit corresponds to whether the
 252          * ith entry is a file (filemask) or a directory (dirmask).  Thus,
 253          * filemask & dirmask is always zero, and filemask | dirmask is at
 254          * most 7 but can be less when a path does not appear as either a
 255          * file or a directory on at least one side of history.
 256          *
 257          * Note that these masks are related to enum merge_side, as the ith
 258          * entry corresponds to side i.
 259          *
 260          * These values come from a traverse_trees() call; more info may be
 261          * found looking at tree-walk.h's struct traverse_info,
 262          * particularly the documentation above the "fn" member (note that
 263          * filemask = mask & ~dirmask from that documentation).
 264          */
 265         unsigned filemask:3;
 266         unsigned dirmask:3;
 267
 268         /*
 269          * Optimization to track which stages match, to avoid the need to
 270          * recompute it in multiple steps. Either 0 or at least 2 bits are
 271          * set; if at least 2 bits are set, their corresponding stages match.
 272          */
 273         unsigned match_mask:3;
 274 };
 275
 276 /*** Function Grouping: various utility functions ***/
 277
 278 /*
 279  * For the next three macros, see warning for conflict_info.merged.
 280  *
 281  * In each of the below, mi is a struct merged_info*, and ci was defined
 282  * as a struct conflict_info* (but we need to verify ci isn't actually
 283  * pointed at a struct merged_info*).
 284  *
 285  * INITIALIZE_CI: Assign ci to mi but only if it's safe; set to NULL otherwise.
 286  * VERIFY_CI: Ensure that something we assigned to a conflict_info* is one.
 287  * ASSIGN_AND_VERIFY_CI: Similar to VERIFY_CI but do assignment first.
 288  */
 289 #define INITIALIZE_CI(ci, mi) do {                                           \
 290         (ci) = (!(mi) || (mi)->clean) ? NULL : (struct conflict_info *)(mi); \
 291 } while (0)
 292 #define VERIFY_CI(ci) assert(ci && !ci->merged.clean);
 293 #define ASSIGN_AND_VERIFY_CI(ci, mi) do {    \
 294         (ci) = (struct conflict_info *)(mi);  \
 295         assert((ci) && !(mi)->clean);        \
 296 } while (0)
 297
 298 static void free_strmap_strings(struct strmap *map)
 299 {
 300         struct hashmap_iter iter;
 301         struct strmap_entry *entry;
 302
 303         strmap_for_each_entry(map, &iter, entry) {
 304                 free((char*)entry->key);
 305         }
 306 }
 307
 308 static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
 309                                           int reinitialize)
 310 {
 311         struct rename_info *renames = &opti->renames;
 312         int i;
 313         void (*strmap_func)(struct strmap *, int) =
 314                 reinitialize ? strmap_partial_clear : strmap_clear;
 315         void (*strset_func)(struct strset *) =
 316                 reinitialize ? strset_partial_clear : strset_clear;
 317
 318         /*
 319          * We marked opti->paths with strdup_strings = 0, so that we
 320          * wouldn't have to make another copy of the fullpath created by
 321          * make_traverse_path from setup_path_info().  But, now that we've
 322          * used it and have no other references to these strings, it is time
 323          * to deallocate them.
 324          */
 325         free_strmap_strings(&opti->paths);
 326         strmap_func(&opti->paths, 1);
 327
 328         /*
 329          * All keys and values in opti->conflicted are a subset of those in
 330          * opti->paths.  We don't want to deallocate anything twice, so we
 331          * don't free the keys and we pass 0 for free_values.
 332          */
 333         strmap_func(&opti->conflicted, 0);
 334
 335         /*
 336          * opti->paths_to_free is similar to opti->paths; we created it with
 337          * strdup_strings = 0 to avoid making _another_ copy of the fullpath
 338          * but now that we've used it and have no other references to these
 339          * strings, it is time to deallocate them.  We do so by temporarily
 340          * setting strdup_strings to 1.
 341          */
 342         opti->paths_to_free.strdup_strings = 1;
 343         string_list_clear(&opti->paths_to_free, 0);
 344         opti->paths_to_free.strdup_strings = 0;
 345
 346         /* Free memory used by various renames maps */
 347         for (i = MERGE_SIDE1; i <= MERGE_SIDE2; ++i) {
 348                 struct hashmap_iter iter;
 349                 struct strmap_entry *entry;
 350
 351                 strset_func(&renames->dirs_removed[i]);
 352
 353                 strmap_for_each_entry(&renames->dir_rename_count[i],
 354                                       &iter, entry) {
 355                         struct strintmap *counts = entry->value;
 356                         strintmap_clear(counts);
 357                 }
 358                 strmap_func(&renames->dir_rename_count[i], 1);
 359
 360                 strmap_func(&renames->dir_renames[i], 0);
 361         }
 362
 363         if (!reinitialize) {
 364                 struct hashmap_iter iter;
 365                 struct strmap_entry *e;
 366
 367                 /* Release and free each strbuf found in output */
 368                 strmap_for_each_entry(&opti->output, &iter, e) {
 369                         struct strbuf *sb = e->value;
 370                         strbuf_release(sb);
 371                         /*
 372                          * While strictly speaking we don't need to free(sb)
 373                          * here because we could pass free_values=1 when
 374                          * calling strmap_clear() on opti->output, that would
 375                          * require strmap_clear to do another
 376                          * strmap_for_each_entry() loop, so we just free it
 377                          * while we're iterating anyway.
 378                          */
 379                         free(sb);
 380                 }
 381                 strmap_clear(&opti->output, 0);
 382         }
 383 }
 384
 385 static int err(struct merge_options *opt, const char *err, ...)
 386 {
 387         va_list params;
 388         struct strbuf sb = STRBUF_INIT;
 389
 390         strbuf_addstr(&sb, "error: ");
 391         va_start(params, err);
 392         strbuf_vaddf(&sb, err, params);
 393         va_end(params);
 394
 395         error("%s", sb.buf);
 396         strbuf_release(&sb);
 397
 398         return -1;
 399 }
 400
 401 __attribute__((format (printf, 4, 5)))
 402 static void path_msg(struct merge_options *opt,
 403                      const char *path,
 404                      int omittable_hint, /* skippable under --remerge-diff */
 405                      const char *fmt, ...)
 406 {
 407         va_list ap;
 408         struct strbuf *sb = strmap_get(&opt->priv->output, path);
 409         if (!sb) {
 410                 sb = xmalloc(sizeof(*sb));
 411                 strbuf_init(sb, 0);
 412                 strmap_put(&opt->priv->output, path, sb);
 413         }
 414
 415         va_start(ap, fmt);
 416         strbuf_vaddf(sb, fmt, ap);
 417         va_end(ap);
 418
 419         strbuf_addch(sb, '\n');
 420 }
 421
 422 /*** Function Grouping: functions related to collect_merge_info() ***/
 423
 424 static void setup_path_info(struct merge_options *opt,
 425                             struct string_list_item *result,
 426                             const char *current_dir_name,
 427                             int current_dir_name_len,
 428                             char *fullpath, /* we'll take over ownership */
 429                             struct name_entry *names,
 430                             struct name_entry *merged_version,
 431                             unsigned is_null,     /* boolean */
 432                             unsigned df_conflict, /* boolean */
 433                             unsigned filemask,
 434                             unsigned dirmask,
 435                             int resolved          /* boolean */)
 436 {
 437         /* result->util is void*, so mi is a convenience typed variable */
 438         struct merged_info *mi;
 439
 440         assert(!is_null || resolved);
 441         assert(!df_conflict || !resolved); /* df_conflict implies !resolved */
 442         assert(resolved == (merged_version != NULL));
 443
 444         mi = xcalloc(1, resolved ? sizeof(struct merged_info) :
 445                                    sizeof(struct conflict_info));
 446         mi->directory_name = current_dir_name;
 447         mi->basename_offset = current_dir_name_len;
 448         mi->clean = !!resolved;
 449         if (resolved) {
 450                 mi->result.mode = merged_version->mode;
 451                 oidcpy(&mi->result.oid, &merged_version->oid);
 452                 mi->is_null = !!is_null;
 453         } else {
 454                 int i;
 455                 struct conflict_info *ci;
 456
 457                 ASSIGN_AND_VERIFY_CI(ci, mi);
 458                 for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) {
 459                         ci->pathnames[i] = fullpath;
 460                         ci->stages[i].mode = names[i].mode;
 461                         oidcpy(&ci->stages[i].oid, &names[i].oid);
 462                 }
 463                 ci->filemask = filemask;
 464                 ci->dirmask = dirmask;
 465                 ci->df_conflict = !!df_conflict;
 466                 if (dirmask)
 467                         /*
 468                          * Assume is_null for now, but if we have entries
 469                          * under the directory then when it is complete in
 470                          * write_completed_directory() it'll update this.
 471                          * Also, for D/F conflicts, we have to handle the
 472                          * directory first, then clear this bit and process
 473                          * the file to see how it is handled -- that occurs
 474                          * near the top of process_entry().
 475                          */
 476                         mi->is_null = 1;
 477         }
 478         strmap_put(&opt->priv->paths, fullpath, mi);
 479         result->string = fullpath;
 480         result->util = mi;
 481 }
 482
 483 static void collect_rename_info(struct merge_options *opt,
 484                                 struct name_entry *names,
 485                                 const char *dirname,
 486                                 const char *fullname,
 487                                 unsigned filemask,
 488                                 unsigned dirmask,
 489                                 unsigned match_mask)
 490 {
 491         struct rename_info *renames = &opt->priv->renames;
 492
 493         /* Update dirs_removed, as needed */
 494         if (dirmask == 1 || dirmask == 3 || dirmask == 5) {
 495                 /* absent_mask = 0x07 - dirmask; sides = absent_mask/2 */
 496                 unsigned sides = (0x07 - dirmask)/2;
 497                 if (sides & 1)
 498                         strset_add(&renames->dirs_removed[1], fullname);
 499                 if (sides & 2)
 500                         strset_add(&renames->dirs_removed[2], fullname);
 501         }
 502 }
 503
 504 static int collect_merge_info_callback(int n,
 505                                        unsigned long mask,
 506                                        unsigned long dirmask,
 507                                        struct name_entry *names,
 508                                        struct traverse_info *info)
 509 {
 510         /*
 511          * n is 3.  Always.
 512          * common ancestor (mbase) has mask 1, and stored in index 0 of names
 513          * head of side 1  (side1) has mask 2, and stored in index 1 of names
 514          * head of side 2  (side2) has mask 4, and stored in index 2 of names
 515          */
 516         struct merge_options *opt = info->data;
 517         struct merge_options_internal *opti = opt->priv;
 518         struct string_list_item pi;  /* Path Info */
 519         struct conflict_info *ci; /* typed alias to pi.util (which is void*) */
 520         struct name_entry *p;
 521         size_t len;
 522         char *fullpath;
 523         const char *dirname = opti->current_dir_name;
 524         unsigned filemask = mask & ~dirmask;
 525         unsigned match_mask = 0; /* will be updated below */
 526         unsigned mbase_null = !(mask & 1);
 527         unsigned side1_null = !(mask & 2);
 528         unsigned side2_null = !(mask & 4);
 529         unsigned side1_matches_mbase = (!side1_null && !mbase_null &&
 530                                         names[0].mode == names[1].mode &&
 531                                         oideq(&names[0].oid, &names[1].oid));
 532         unsigned side2_matches_mbase = (!side2_null && !mbase_null &&
 533                                         names[0].mode == names[2].mode &&
 534                                         oideq(&names[0].oid, &names[2].oid));
 535         unsigned sides_match = (!side1_null && !side2_null &&
 536                                 names[1].mode == names[2].mode &&
 537                                 oideq(&names[1].oid, &names[2].oid));
 538
 539         /*
 540          * Note: When a path is a file on one side of history and a directory
 541          * in another, we have a directory/file conflict.  In such cases, if
 542          * the conflict doesn't resolve from renames and deletions, then we
 543          * always leave directories where they are and move files out of the
 544          * way.  Thus, while struct conflict_info has a df_conflict field to
 545          * track such conflicts, we ignore that field for any directories at
 546          * a path and only pay attention to it for files at the given path.
 547          * The fact that we leave directories were they are also means that
 548          * we do not need to worry about getting additional df_conflict
 549          * information propagated from parent directories down to children
 550          * (unlike, say traverse_trees_recursive() in unpack-trees.c, which
 551          * sets a newinfo.df_conflicts field specifically to propagate it).
 552          */
 553         unsigned df_conflict = (filemask != 0) && (dirmask != 0);
 554
 555         /* n = 3 is a fundamental assumption. */
 556         if (n != 3)
 557                 BUG("Called collect_merge_info_callback wrong");
 558
 559         /*
 560          * A bunch of sanity checks verifying that traverse_trees() calls
 561          * us the way I expect.  Could just remove these at some point,
 562          * though maybe they are helpful to future code readers.
 563          */
 564         assert(mbase_null == is_null_oid(&names[0].oid));
 565         assert(side1_null == is_null_oid(&names[1].oid));
 566         assert(side2_null == is_null_oid(&names[2].oid));
 567         assert(!mbase_null || !side1_null || !side2_null);
 568         assert(mask > 0 && mask < 8);
 569
 570         /* Determine match_mask */
 571         if (side1_matches_mbase)
 572                 match_mask = (side2_matches_mbase ? 7 : 3);
 573         else if (side2_matches_mbase)
 574                 match_mask = 5;
 575         else if (sides_match)
 576                 match_mask = 6;
 577
 578         /*
 579          * Get the name of the relevant filepath, which we'll pass to
 580          * setup_path_info() for tracking.
 581          */
 582         p = names;
 583         while (!p->mode)
 584                 p++;
 585         len = traverse_path_len(info, p->pathlen);
 586
 587         /* +1 in both of the following lines to include the NUL byte */
 588         fullpath = xmalloc(len + 1);
 589         make_traverse_path(fullpath, len + 1, info, p->path, p->pathlen);
 590
 591         /*
 592          * If mbase, side1, and side2 all match, we can resolve early.  Even
 593          * if these are trees, there will be no renames or anything
 594          * underneath.
 595          */
 596         if (side1_matches_mbase && side2_matches_mbase) {
 597                 /* mbase, side1, & side2 all match; use mbase as resolution */
 598                 setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
 599                                 names, names+0, mbase_null, 0,
 600                                 filemask, dirmask, 1);
 601                 return mask;
 602         }
 603
 604         /*
 605          * Gather additional information used in rename detection.
 606          */
 607         collect_rename_info(opt, names, dirname, fullpath,
 608                             filemask, dirmask, match_mask);
 609
 610         /*
 611          * Record information about the path so we can resolve later in
 612          * process_entries.
 613          */
 614         setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
 615                         names, NULL, 0, df_conflict, filemask, dirmask, 0);
 616
 617         ci = pi.util;
 618         VERIFY_CI(ci);
 619         ci->match_mask = match_mask;
 620
 621         /* If dirmask, recurse into subdirectories */
 622         if (dirmask) {
 623                 struct traverse_info newinfo;
 624                 struct tree_desc t[3];
 625                 void *buf[3] = {NULL, NULL, NULL};
 626                 const char *original_dir_name;
 627                 int i, ret;
 628
 629                 ci->match_mask &= filemask;
 630                 newinfo = *info;
 631                 newinfo.prev = info;
 632                 newinfo.name = p->path;
 633                 newinfo.namelen = p->pathlen;
 634                 newinfo.pathlen = st_add3(newinfo.pathlen, p->pathlen, 1);
 635                 /*
 636                  * If this directory we are about to recurse into cared about
 637                  * its parent directory (the current directory) having a D/F
 638                  * conflict, then we'd propagate the masks in this way:
 639                  *    newinfo.df_conflicts |= (mask & ~dirmask);
 640                  * But we don't worry about propagating D/F conflicts.  (See
 641                  * comment near setting of local df_conflict variable near
 642                  * the beginning of this function).
 643                  */
 644
 645                 for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) {
 646                         if (i == 1 && side1_matches_mbase)
 647                                 t[1] = t[0];
 648                         else if (i == 2 && side2_matches_mbase)
 649                                 t[2] = t[0];
 650                         else if (i == 2 && sides_match)
 651                                 t[2] = t[1];
 652                         else {
 653                                 const struct object_id *oid = NULL;
 654                                 if (dirmask & 1)
 655                                         oid = &names[i].oid;
 656                                 buf[i] = fill_tree_descriptor(opt->repo,
 657                                                               t + i, oid);
 658                         }
 659                         dirmask >>= 1;
 660                 }
 661
 662                 original_dir_name = opti->current_dir_name;
 663                 opti->current_dir_name = pi.string;
 664                 ret = traverse_trees(NULL, 3, t, &newinfo);
 665                 opti->current_dir_name = original_dir_name;
 666
 667                 for (i = MERGE_BASE; i <= MERGE_SIDE2; i++)
 668                         free(buf[i]);
 669
 670                 if (ret < 0)
 671                         return -1;
 672         }
 673
 674         return mask;
 675 }
 676
 677 static int collect_merge_info(struct merge_options *opt,
 678                               struct tree *merge_base,
 679                               struct tree *side1,
 680                               struct tree *side2)
 681 {
 682         int ret;
 683         struct tree_desc t[3];
 684         struct traverse_info info;
 685         const char *toplevel_dir_placeholder = "";
 686
 687         opt->priv->current_dir_name = toplevel_dir_placeholder;
 688         setup_traverse_info(&info, toplevel_dir_placeholder);
 689         info.fn = collect_merge_info_callback;
 690         info.data = opt;
 691         info.show_all_errors = 1;
 692
 693         parse_tree(merge_base);
 694         parse_tree(side1);
 695         parse_tree(side2);
 696         init_tree_desc(t + 0, merge_base->buffer, merge_base->size);
 697         init_tree_desc(t + 1, side1->buffer, side1->size);
 698         init_tree_desc(t + 2, side2->buffer, side2->size);
 699
 700         ret = traverse_trees(NULL, 3, t, &info);
 701
 702         return ret;
 703 }
 704
 705 /*** Function Grouping: functions related to threeway content merges ***/
 706
 707 static int handle_content_merge(struct merge_options *opt,
 708                                 const char *path,
 709                                 const struct version_info *o,
 710                                 const struct version_info *a,
 711                                 const struct version_info *b,
 712                                 const char *pathnames[3],
 713                                 const int extra_marker_size,
 714                                 struct version_info *result)
 715 {
 716         die("Not yet implemented");
 717 }
 718
 719 /*** Function Grouping: functions related to detect_and_process_renames(), ***
 720  *** which are split into directory and regular rename detection sections. ***/
 721
 722 /*** Function Grouping: functions related to directory rename detection ***/
 723
 724 static void get_provisional_directory_renames(struct merge_options *opt,
 725                                               unsigned side,
 726                                               int *clean)
 727 {
 728         die("Not yet implemented!");
 729 }
 730
 731 static void handle_directory_level_conflicts(struct merge_options *opt)
 732 {
 733         die("Not yet implemented!");
 734 }
 735
 736 /*** Function Grouping: functions related to regular rename detection ***/
 737
 738 static int process_renames(struct merge_options *opt,
 739                            struct diff_queue_struct *renames)
 740 {
 741         int clean_merge = 1, i;
 742
 743         for (i = 0; i < renames->nr; ++i) {
 744                 const char *oldpath = NULL, *newpath;
 745                 struct diff_filepair *pair = renames->queue[i];
 746                 struct conflict_info *oldinfo = NULL, *newinfo = NULL;
 747                 struct strmap_entry *old_ent, *new_ent;
 748                 unsigned int old_sidemask;
 749                 int target_index, other_source_index;
 750                 int source_deleted, collision, type_changed;
 751                 const char *rename_branch = NULL, *delete_branch = NULL;
 752
 753                 old_ent = strmap_get_entry(&opt->priv->paths, pair->one->path);
 754                 oldpath = old_ent->key;
 755                 oldinfo = old_ent->value;
 756
 757                 new_ent = strmap_get_entry(&opt->priv->paths, pair->two->path);
 758                 newpath = new_ent->key;
 759                 newinfo = new_ent->value;
 760
 761                 /*
 762                  * diff_filepairs have copies of pathnames, thus we have to
 763                  * use standard 'strcmp()' (negated) instead of '=='.
 764                  */
 765                 if (i + 1 < renames->nr &&
 766                     !strcmp(oldpath, renames->queue[i+1]->one->path)) {
 767                         /* Handle rename/rename(1to2) or rename/rename(1to1) */
 768                         const char *pathnames[3];
 769                         struct version_info merged;
 770                         struct conflict_info *base, *side1, *side2;
 771                         unsigned was_binary_blob = 0;
 772
 773                         pathnames[0] = oldpath;
 774                         pathnames[1] = newpath;
 775                         pathnames[2] = renames->queue[i+1]->two->path;
 776
 777                         base = strmap_get(&opt->priv->paths, pathnames[0]);
 778                         side1 = strmap_get(&opt->priv->paths, pathnames[1]);
 779                         side2 = strmap_get(&opt->priv->paths, pathnames[2]);
 780
 781                         VERIFY_CI(base);
 782                         VERIFY_CI(side1);
 783                         VERIFY_CI(side2);
 784
 785                         if (!strcmp(pathnames[1], pathnames[2])) {
 786                                 /* Both sides renamed the same way */
 787                                 assert(side1 == side2);
 788                                 memcpy(&side1->stages[0], &base->stages[0],
 789                                        sizeof(merged));
 790                                 side1->filemask |= (1 << MERGE_BASE);
 791                                 /* Mark base as resolved by removal */
 792                                 base->merged.is_null = 1;
 793                                 base->merged.clean = 1;
 794
 795                                 /* We handled both renames, i.e. i+1 handled */
 796                                 i++;
 797                                 /* Move to next rename */
 798                                 continue;
 799                         }
 800
 801                         /* This is a rename/rename(1to2) */
 802                         clean_merge = handle_content_merge(opt,
 803                                                            pair->one->path,
 804                                                            &base->stages[0],
 805                                                            &side1->stages[1],
 806                                                            &side2->stages[2],
 807                                                            pathnames,
 808                                                            1 + 2 * opt->priv->call_depth,
 809                                                            &merged);
 810                         if (!clean_merge &&
 811                             merged.mode == side1->stages[1].mode &&
 812                             oideq(&merged.oid, &side1->stages[1].oid))
 813                                 was_binary_blob = 1;
 814                         memcpy(&side1->stages[1], &merged, sizeof(merged));
 815                         if (was_binary_blob) {
 816                                 /*
 817                                  * Getting here means we were attempting to
 818                                  * merge a binary blob.
 819                                  *
 820                                  * Since we can't merge binaries,
 821                                  * handle_content_merge() just takes one
 822                                  * side.  But we don't want to copy the
 823                                  * contents of one side to both paths.  We
 824                                  * used the contents of side1 above for
 825                                  * side1->stages, let's use the contents of
 826                                  * side2 for side2->stages below.
 827                                  */
 828                                 oidcpy(&merged.oid, &side2->stages[2].oid);
 829                                 merged.mode = side2->stages[2].mode;
 830                         }
 831                         memcpy(&side2->stages[2], &merged, sizeof(merged));
 832
 833                         side1->path_conflict = 1;
 834                         side2->path_conflict = 1;
 835                         /*
 836                          * TODO: For renames we normally remove the path at the
 837                          * old name.  It would thus seem consistent to do the
 838                          * same for rename/rename(1to2) cases, but we haven't
 839                          * done so traditionally and a number of the regression
 840                          * tests now encode an expectation that the file is
 841                          * left there at stage 1.  If we ever decide to change
 842                          * this, add the following two lines here:
 843                          *    base->merged.is_null = 1;
 844                          *    base->merged.clean = 1;
 845                          * and remove the setting of base->path_conflict to 1.
 846                          */
 847                         base->path_conflict = 1;
 848                         path_msg(opt, oldpath, 0,
 849                                  _("CONFLICT (rename/rename): %s renamed to "
 850                                    "%s in %s and to %s in %s."),
 851                                  pathnames[0],
 852                                  pathnames[1], opt->branch1,
 853                                  pathnames[2], opt->branch2);
 854
 855                         i++; /* We handled both renames, i.e. i+1 handled */
 856                         continue;
 857                 }
 858
 859                 VERIFY_CI(oldinfo);
 860                 VERIFY_CI(newinfo);
 861                 target_index = pair->score; /* from collect_renames() */
 862                 assert(target_index == 1 || target_index == 2);
 863                 other_source_index = 3 - target_index;
 864                 old_sidemask = (1 << other_source_index); /* 2 or 4 */
 865                 source_deleted = (oldinfo->filemask == 1);
 866                 collision = ((newinfo->filemask & old_sidemask) != 0);
 867                 type_changed = !source_deleted &&
 868                         (S_ISREG(oldinfo->stages[other_source_index].mode) !=
 869                          S_ISREG(newinfo->stages[target_index].mode));
 870                 if (type_changed && collision) {
 871                         /*
 872                          * special handling so later blocks can handle this...
 873                          *
 874                          * if type_changed && collision are both true, then this
 875                          * was really a double rename, but one side wasn't
 876                          * detected due to lack of break detection.  I.e.
 877                          * something like
 878                          *    orig: has normal file 'foo'
 879                          *    side1: renames 'foo' to 'bar', adds 'foo' symlink
 880                          *    side2: renames 'foo' to 'bar'
 881                          * In this case, the foo->bar rename on side1 won't be
 882                          * detected because the new symlink named 'foo' is
 883                          * there and we don't do break detection.  But we detect
 884                          * this here because we don't want to merge the content
 885                          * of the foo symlink with the foo->bar file, so we
 886                          * have some logic to handle this special case.  The
 887                          * easiest way to do that is make 'bar' on side1 not
 888                          * be considered a colliding file but the other part
 889                          * of a normal rename.  If the file is very different,
 890                          * well we're going to get content merge conflicts
 891                          * anyway so it doesn't hurt.  And if the colliding
 892                          * file also has a different type, that'll be handled
 893                          * by the content merge logic in process_entry() too.
 894                          *
 895                          * See also t6430, 'rename vs. rename/symlink'
 896                          */
 897                         collision = 0;
 898                 }
 899                 if (source_deleted) {
 900                         if (target_index == 1) {
 901                                 rename_branch = opt->branch1;
 902                                 delete_branch = opt->branch2;
 903                         } else {
 904                                 rename_branch = opt->branch2;
 905                                 delete_branch = opt->branch1;
 906                         }
 907                 }
 908
 909                 assert(source_deleted || oldinfo->filemask & old_sidemask);
 910
 911                 /* Need to check for special types of rename conflicts... */
 912                 if (collision && !source_deleted) {
 913                         /* collision: rename/add or rename/rename(2to1) */
 914                         const char *pathnames[3];
 915                         struct version_info merged;
 916
 917                         struct conflict_info *base, *side1, *side2;
 918                         unsigned clean;
 919
 920                         pathnames[0] = oldpath;
 921                         pathnames[other_source_index] = oldpath;
 922                         pathnames[target_index] = newpath;
 923
 924                         base = strmap_get(&opt->priv->paths, pathnames[0]);
 925                         side1 = strmap_get(&opt->priv->paths, pathnames[1]);
 926                         side2 = strmap_get(&opt->priv->paths, pathnames[2]);
 927
 928                         VERIFY_CI(base);
 929                         VERIFY_CI(side1);
 930                         VERIFY_CI(side2);
 931
 932                         clean = handle_content_merge(opt, pair->one->path,
 933                                                      &base->stages[0],
 934                                                      &side1->stages[1],
 935                                                      &side2->stages[2],
 936                                                      pathnames,
 937                                                      1 + 2 * opt->priv->call_depth,
 938                                                      &merged);
 939
 940                         memcpy(&newinfo->stages[target_index], &merged,
 941                                sizeof(merged));
 942                         if (!clean) {
 943                                 path_msg(opt, newpath, 0,
 944                                          _("CONFLICT (rename involved in "
 945                                            "collision): rename of %s -> %s has "
 946                                            "content conflicts AND collides "
 947                                            "with another path; this may result "
 948                                            "in nested conflict markers."),
 949                                          oldpath, newpath);
 950                         }
 951                 } else if (collision && source_deleted) {
 952                         /*
 953                          * rename/add/delete or rename/rename(2to1)/delete:
 954                          * since oldpath was deleted on the side that didn't
 955                          * do the rename, there's not much of a content merge
 956                          * we can do for the rename.  oldinfo->merged.is_null
 957                          * was already set, so we just leave things as-is so
 958                          * they look like an add/add conflict.
 959                          */
 960
 961                         newinfo->path_conflict = 1;
 962                         path_msg(opt, newpath, 0,
 963                                  _("CONFLICT (rename/delete): %s renamed "
 964                                    "to %s in %s, but deleted in %s."),
 965                                  oldpath, newpath, rename_branch, delete_branch);
 966                 } else {
 967                         /*
 968                          * a few different cases...start by copying the
 969                          * existing stage(s) from oldinfo over the newinfo
 970                          * and update the pathname(s).
 971                          */
 972                         memcpy(&newinfo->stages[0], &oldinfo->stages[0],
 973                                sizeof(newinfo->stages[0]));
 974                         newinfo->filemask |= (1 << MERGE_BASE);
 975                         newinfo->pathnames[0] = oldpath;
 976                         if (type_changed) {
 977                                 /* rename vs. typechange */
 978                                 /* Mark the original as resolved by removal */
 979                                 memcpy(&oldinfo->stages[0].oid, &null_oid,
 980                                        sizeof(oldinfo->stages[0].oid));
 981                                 oldinfo->stages[0].mode = 0;
 982                                 oldinfo->filemask &= 0x06;
 983                         } else if (source_deleted) {
 984                                 /* rename/delete */
 985                                 newinfo->path_conflict = 1;
 986                                 path_msg(opt, newpath, 0,
 987                                          _("CONFLICT (rename/delete): %s renamed"
 988                                            " to %s in %s, but deleted in %s."),
 989                                          oldpath, newpath,
 990                                          rename_branch, delete_branch);
 991                         } else {
 992                                 /* normal rename */
 993                                 memcpy(&newinfo->stages[other_source_index],
 994                                        &oldinfo->stages[other_source_index],
 995                                        sizeof(newinfo->stages[0]));
 996                                 newinfo->filemask |= (1 << other_source_index);
 997                                 newinfo->pathnames[other_source_index] = oldpath;
 998                         }
 999                 }
1000
1001                 if (!type_changed) {
1002                         /* Mark the original as resolved by removal */
1003                         oldinfo->merged.is_null = 1;
1004                         oldinfo->merged.clean = 1;
1005                 }
1006
1007         }
1008
1009         return clean_merge;
1010 }
1011
1012 static int compare_pairs(const void *a_, const void *b_)
1013 {
1014         const struct diff_filepair *a = *((const struct diff_filepair **)a_);
1015         const struct diff_filepair *b = *((const struct diff_filepair **)b_);
1016
1017         return strcmp(a->one->path, b->one->path);
1018 }
1019
1020 /* Call diffcore_rename() to compute which files have changed on given side */
1021 static void detect_regular_renames(struct merge_options *opt,
1022                                    struct tree *merge_base,
1023                                    struct tree *side,
1024                                    unsigned side_index)
1025 {
1026         struct diff_options diff_opts;
1027         struct rename_info *renames = &opt->priv->renames;
1028
1029         repo_diff_setup(opt->repo, &diff_opts);
1030         diff_opts.flags.recursive = 1;
1031         diff_opts.flags.rename_empty = 0;
1032         diff_opts.detect_rename = DIFF_DETECT_RENAME;
1033         diff_opts.rename_limit = opt->rename_limit;
1034         if (opt->rename_limit <= 0)
1035                 diff_opts.rename_limit = 1000;
1036         diff_opts.rename_score = opt->rename_score;
1037         diff_opts.show_rename_progress = opt->show_rename_progress;
1038         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
1039         diff_setup_done(&diff_opts);
1040         diff_tree_oid(&merge_base->object.oid, &side->object.oid, "",
1041                       &diff_opts);
1042         diffcore_std(&diff_opts);
1043
1044         if (diff_opts.needed_rename_limit > renames->needed_limit)
1045                 renames->needed_limit = diff_opts.needed_rename_limit;
1046
1047         renames->pairs[side_index] = diff_queued_diff;
1048
1049         diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
1050         diff_queued_diff.nr = 0;
1051         diff_queued_diff.queue = NULL;
1052         diff_flush(&diff_opts);
1053 }
1054
1055 /*
1056  * Get information of all renames which occurred in 'side_pairs', discarding
1057  * non-renames.
1058  */
1059 static int collect_renames(struct merge_options *opt,
1060                            struct diff_queue_struct *result,
1061                            unsigned side_index)
1062 {
1063         int i, clean = 1;
1064         struct diff_queue_struct *side_pairs;
1065         struct rename_info *renames = &opt->priv->renames;
1066
1067         side_pairs = &renames->pairs[side_index];
1068
1069         for (i = 0; i < side_pairs->nr; ++i) {
1070                 struct diff_filepair *p = side_pairs->queue[i];
1071
1072                 if (p->status != 'R') {
1073                         diff_free_filepair(p);
1074                         continue;
1075                 }
1076
1077                 /*
1078                  * p->score comes back from diffcore_rename_extended() with
1079                  * the similarity of the renamed file.  The similarity is
1080                  * was used to determine that the two files were related
1081                  * and are a rename, which we have already used, but beyond
1082                  * that we have no use for the similarity.  So p->score is
1083                  * now irrelevant.  However, process_renames() will need to
1084                  * know which side of the merge this rename was associated
1085                  * with, so overwrite p->score with that value.
1086                  */
1087                 p->score = side_index;
1088                 result->queue[result->nr++] = p;
1089         }
1090
1091         return clean;
1092 }
1093
1094 static int detect_and_process_renames(struct merge_options *opt,
1095                                       struct tree *merge_base,
1096                                       struct tree *side1,
1097                                       struct tree *side2)
1098 {
1099         struct diff_queue_struct combined;
1100         struct rename_info *renames = &opt->priv->renames;
1101         int need_dir_renames, s, clean = 1;
1102
1103         memset(&combined, 0, sizeof(combined));
1104
1105         detect_regular_renames(opt, merge_base, side1, MERGE_SIDE1);
1106         detect_regular_renames(opt, merge_base, side2, MERGE_SIDE2);
1107
1108         need_dir_renames =
1109           !opt->priv->call_depth &&
1110           (opt->detect_directory_renames == MERGE_DIRECTORY_RENAMES_TRUE ||
1111            opt->detect_directory_renames == MERGE_DIRECTORY_RENAMES_CONFLICT);
1112
1113         if (need_dir_renames) {
1114                 get_provisional_directory_renames(opt, MERGE_SIDE1, &clean);
1115                 get_provisional_directory_renames(opt, MERGE_SIDE2, &clean);
1116                 handle_directory_level_conflicts(opt);
1117         }
1118
1119         ALLOC_GROW(combined.queue,
1120                    renames->pairs[1].nr + renames->pairs[2].nr,
1121                    combined.alloc);
1122         clean &= collect_renames(opt, &combined, MERGE_SIDE1);
1123         clean &= collect_renames(opt, &combined, MERGE_SIDE2);
1124         QSORT(combined.queue, combined.nr, compare_pairs);
1125
1126         clean &= process_renames(opt, &combined);
1127
1128         /* Free memory for renames->pairs[] and combined */
1129         for (s = MERGE_SIDE1; s <= MERGE_SIDE2; s++) {
1130                 free(renames->pairs[s].queue);
1131                 DIFF_QUEUE_CLEAR(&renames->pairs[s]);
1132         }
1133         if (combined.nr) {
1134                 int i;
1135                 for (i = 0; i < combined.nr; i++)
1136                         diff_free_filepair(combined.queue[i]);
1137                 free(combined.queue);
1138         }
1139
1140         return clean;
1141 }
1142
1143 /*** Function Grouping: functions related to process_entries() ***/
1144
1145 static int string_list_df_name_compare(const char *one, const char *two)
1146 {
1147         int onelen = strlen(one);
1148         int twolen = strlen(two);
1149         /*
1150          * Here we only care that entries for D/F conflicts are
1151          * adjacent, in particular with the file of the D/F conflict
1152          * appearing before files below the corresponding directory.
1153          * The order of the rest of the list is irrelevant for us.
1154          *
1155          * To achieve this, we sort with df_name_compare and provide
1156          * the mode S_IFDIR so that D/F conflicts will sort correctly.
1157          * We use the mode S_IFDIR for everything else for simplicity,
1158          * since in other cases any changes in their order due to
1159          * sorting cause no problems for us.
1160          */
1161         int cmp = df_name_compare(one, onelen, S_IFDIR,
1162                                   two, twolen, S_IFDIR);
1163         /*
1164          * Now that 'foo' and 'foo/bar' compare equal, we have to make sure
1165          * that 'foo' comes before 'foo/bar'.
1166          */
1167         if (cmp)
1168                 return cmp;
1169         return onelen - twolen;
1170 }
1171
1172 struct directory_versions {
1173         /*
1174          * versions: list of (basename -> version_info)
1175          *
1176          * The basenames are in reverse lexicographic order of full pathnames,
1177          * as processed in process_entries().  This puts all entries within
1178          * a directory together, and covers the directory itself after
1179          * everything within it, allowing us to write subtrees before needing
1180          * to record information for the tree itself.
1181          */
1182         struct string_list versions;
1183
1184         /*
1185          * offsets: list of (full relative path directories -> integer offsets)
1186          *
1187          * Since versions contains basenames from files in multiple different
1188          * directories, we need to know which entries in versions correspond
1189          * to which directories.  Values of e.g.
1190          *     ""             0
1191          *     src            2
1192          *     src/moduleA    5
1193          * Would mean that entries 0-1 of versions are files in the toplevel
1194          * directory, entries 2-4 are files under src/, and the remaining
1195          * entries starting at index 5 are files under src/moduleA/.
1196          */
1197         struct string_list offsets;
1198
1199         /*
1200          * last_directory: directory that previously processed file found in
1201          *
1202          * last_directory starts NULL, but records the directory in which the
1203          * previous file was found within.  As soon as
1204          *    directory(current_file) != last_directory
1205          * then we need to start updating accounting in versions & offsets.
1206          * Note that last_directory is always the last path in "offsets" (or
1207          * NULL if "offsets" is empty) so this exists just for quick access.
1208          */
1209         const char *last_directory;
1210
1211         /* last_directory_len: cached computation of strlen(last_directory) */
1212         unsigned last_directory_len;
1213 };
1214
1215 static int tree_entry_order(const void *a_, const void *b_)
1216 {
1217         const struct string_list_item *a = a_;
1218         const struct string_list_item *b = b_;
1219
1220         const struct merged_info *ami = a->util;
1221         const struct merged_info *bmi = b->util;
1222         return base_name_compare(a->string, strlen(a->string), ami->result.mode,
1223                                  b->string, strlen(b->string), bmi->result.mode);
1224 }
1225
1226 static void write_tree(struct object_id *result_oid,
1227                        struct string_list *versions,
1228                        unsigned int offset,
1229                        size_t hash_size)
1230 {
1231         size_t maxlen = 0, extra;
1232         unsigned int nr = versions->nr - offset;
1233         struct strbuf buf = STRBUF_INIT;
1234         struct string_list relevant_entries = STRING_LIST_INIT_NODUP;
1235         int i;
1236
1237         /*
1238          * We want to sort the last (versions->nr-offset) entries in versions.
1239          * Do so by abusing the string_list API a bit: make another string_list
1240          * that contains just those entries and then sort them.
1241          *
1242          * We won't use relevant_entries again and will let it just pop off the
1243          * stack, so there won't be allocation worries or anything.
1244          */
1245         relevant_entries.items = versions->items + offset;
1246         relevant_entries.nr = versions->nr - offset;
1247         QSORT(relevant_entries.items, relevant_entries.nr, tree_entry_order);
1248
1249         /* Pre-allocate some space in buf */
1250         extra = hash_size + 8; /* 8: 6 for mode, 1 for space, 1 for NUL char */
1251         for (i = 0; i < nr; i++) {
1252                 maxlen += strlen(versions->items[offset+i].string) + extra;
1253         }
1254         strbuf_grow(&buf, maxlen);
1255
1256         /* Write each entry out to buf */
1257         for (i = 0; i < nr; i++) {
1258                 struct merged_info *mi = versions->items[offset+i].util;
1259                 struct version_info *ri = &mi->result;
1260                 strbuf_addf(&buf, "%o %s%c",
1261                             ri->mode,
1262                             versions->items[offset+i].string, '\0');
1263                 strbuf_add(&buf, ri->oid.hash, hash_size);
1264         }
1265
1266         /* Write this object file out, and record in result_oid */
1267         write_object_file(buf.buf, buf.len, tree_type, result_oid);
1268         strbuf_release(&buf);
1269 }
1270
1271 static void record_entry_for_tree(struct directory_versions *dir_metadata,
1272                                   const char *path,
1273                                   struct merged_info *mi)
1274 {
1275         const char *basename;
1276
1277         if (mi->is_null)
1278                 /* nothing to record */
1279                 return;
1280
1281         basename = path + mi->basename_offset;
1282         assert(strchr(basename, '/') == NULL);
1283         string_list_append(&dir_metadata->versions,
1284                            basename)->util = &mi->result;
1285 }
1286
1287 static void write_completed_directory(struct merge_options *opt,
1288                                       const char *new_directory_name,
1289                                       struct directory_versions *info)
1290 {
1291         const char *prev_dir;
1292         struct merged_info *dir_info = NULL;
1293         unsigned int offset;
1294
1295         /*
1296          * Some explanation of info->versions and info->offsets...
1297          *
1298          * process_entries() iterates over all relevant files AND
1299          * directories in reverse lexicographic order, and calls this
1300          * function.  Thus, an example of the paths that process_entries()
1301          * could operate on (along with the directories for those paths
1302          * being shown) is:
1303          *
1304          *     xtract.c             ""
1305          *     tokens.txt           ""
1306          *     src/moduleB/umm.c    src/moduleB
1307          *     src/moduleB/stuff.h  src/moduleB
1308          *     src/moduleB/baz.c    src/moduleB
1309          *     src/moduleB          src
1310          *     src/moduleA/foo.c    src/moduleA
1311          *     src/moduleA/bar.c    src/moduleA
1312          *     src/moduleA          src
1313          *     src                  ""
1314          *     Makefile             ""
1315          *
1316          * info->versions:
1317          *
1318          *     always contains the unprocessed entries and their
1319          *     version_info information.  For example, after the first five
1320          *     entries above, info->versions would be:
1321          *
1322          *         xtract.c     <xtract.c's version_info>
1323          *         token.txt    <token.txt's version_info>
1324          *         umm.c        <src/moduleB/umm.c's version_info>
1325          *         stuff.h      <src/moduleB/stuff.h's version_info>
1326          *         baz.c        <src/moduleB/baz.c's version_info>
1327          *
1328          *     Once a subdirectory is completed we remove the entries in
1329          *     that subdirectory from info->versions, writing it as a tree
1330          *     (write_tree()).  Thus, as soon as we get to src/moduleB,
1331          *     info->versions would be updated to
1332          *
1333          *         xtract.c     <xtract.c's version_info>
1334          *         token.txt    <token.txt's version_info>
1335          *         moduleB      <src/moduleB's version_info>
1336          *
1337          * info->offsets:
1338          *
1339          *     helps us track which entries in info->versions correspond to
1340          *     which directories.  When we are N directories deep (e.g. 4
1341          *     for src/modA/submod/subdir/), we have up to N+1 unprocessed
1342          *     directories (+1 because of toplevel dir).  Corresponding to
1343          *     the info->versions example above, after processing five entries
1344          *     info->offsets will be:
1345          *
1346          *         ""           0
1347          *         src/moduleB  2
1348          *
1349          *     which is used to know that xtract.c & token.txt are from the
1350          *     toplevel dirctory, while umm.c & stuff.h & baz.c are from the
1351          *     src/moduleB directory.  Again, following the example above,
1352          *     once we need to process src/moduleB, then info->offsets is
1353          *     updated to
1354          *
1355          *         ""           0
1356          *         src          2
1357          *
1358          *     which says that moduleB (and only moduleB so far) is in the
1359          *     src directory.
1360          *
1361          *     One unique thing to note about info->offsets here is that
1362          *     "src" was not added to info->offsets until there was a path
1363          *     (a file OR directory) immediately below src/ that got
1364          *     processed.
1365          *
1366          * Since process_entry() just appends new entries to info->versions,
1367          * write_completed_directory() only needs to do work if the next path
1368          * is in a directory that is different than the last directory found
1369          * in info->offsets.
1370          */
1371
1372         /*
1373          * If we are working with the same directory as the last entry, there
1374          * is no work to do.  (See comments above the directory_name member of
1375          * struct merged_info for why we can use pointer comparison instead of
1376          * strcmp here.)
1377          */
1378         if (new_directory_name == info->last_directory)
1379                 return;
1380
1381         /*
1382          * If we are just starting (last_directory is NULL), or last_directory
1383          * is a prefix of the current directory, then we can just update
1384          * info->offsets to record the offset where we started this directory
1385          * and update last_directory to have quick access to it.
1386          */
1387         if (info->last_directory == NULL ||
1388             !strncmp(new_directory_name, info->last_directory,
1389                      info->last_directory_len)) {
1390                 uintptr_t offset = info->versions.nr;
1391
1392                 info->last_directory = new_directory_name;
1393                 info->last_directory_len = strlen(info->last_directory);
1394                 /*
1395                  * Record the offset into info->versions where we will
1396                  * start recording basenames of paths found within
1397                  * new_directory_name.
1398                  */
1399                 string_list_append(&info->offsets,
1400                                    info->last_directory)->util = (void*)offset;
1401                 return;
1402         }
1403
1404         /*
1405          * The next entry that will be processed will be within
1406          * new_directory_name.  Since at this point we know that
1407          * new_directory_name is within a different directory than
1408          * info->last_directory, we have all entries for info->last_directory
1409          * in info->versions and we need to create a tree object for them.
1410          */
1411         dir_info = strmap_get(&opt->priv->paths, info->last_directory);
1412         assert(dir_info);
1413         offset = (uintptr_t)info->offsets.items[info->offsets.nr-1].util;
1414         if (offset == info->versions.nr) {
1415                 /*
1416                  * Actually, we don't need to create a tree object in this
1417                  * case.  Whenever all files within a directory disappear
1418                  * during the merge (e.g. unmodified on one side and
1419                  * deleted on the other, or files were renamed elsewhere),
1420                  * then we get here and the directory itself needs to be
1421                  * omitted from its parent tree as well.
1422                  */
1423                 dir_info->is_null = 1;
1424         } else {
1425                 /*
1426                  * Write out the tree to the git object directory, and also
1427                  * record the mode and oid in dir_info->result.
1428                  */
1429                 dir_info->is_null = 0;
1430                 dir_info->result.mode = S_IFDIR;
1431                 write_tree(&dir_info->result.oid, &info->versions, offset,
1432                            opt->repo->hash_algo->rawsz);
1433         }
1434
1435         /*
1436          * We've now used several entries from info->versions and one entry
1437          * from info->offsets, so we get rid of those values.
1438          */
1439         info->offsets.nr--;
1440         info->versions.nr = offset;
1441
1442         /*
1443          * Now we've taken care of the completed directory, but we need to
1444          * prepare things since future entries will be in
1445          * new_directory_name.  (In particular, process_entry() will be
1446          * appending new entries to info->versions.)  So, we need to make
1447          * sure new_directory_name is the last entry in info->offsets.
1448          */
1449         prev_dir = info->offsets.nr == 0 ? NULL :
1450                    info->offsets.items[info->offsets.nr-1].string;
1451         if (new_directory_name != prev_dir) {
1452                 uintptr_t c = info->versions.nr;
1453                 string_list_append(&info->offsets,
1454                                    new_directory_name)->util = (void*)c;
1455         }
1456
1457         /* And, of course, we need to update last_directory to match. */
1458         info->last_directory = new_directory_name;
1459         info->last_directory_len = strlen(info->last_directory);
1460 }
1461
1462 /* Per entry merge function */
1463 static void process_entry(struct merge_options *opt,
1464                           const char *path,
1465                           struct conflict_info *ci,
1466                           struct directory_versions *dir_metadata)
1467 {
1468         VERIFY_CI(ci);
1469         assert(ci->filemask >= 0 && ci->filemask <= 7);
1470         /* ci->match_mask == 7 was handled in collect_merge_info_callback() */
1471         assert(ci->match_mask == 0 || ci->match_mask == 3 ||
1472                ci->match_mask == 5 || ci->match_mask == 6);
1473
1474         if (ci->dirmask) {
1475                 record_entry_for_tree(dir_metadata, path, &ci->merged);
1476                 if (ci->filemask == 0)
1477                         /* nothing else to handle */
1478                         return;
1479                 assert(ci->df_conflict);
1480         }
1481
1482         if (ci->df_conflict) {
1483                 die("Not yet implemented.");
1484         }
1485
1486         /*
1487          * NOTE: Below there is a long switch-like if-elseif-elseif... block
1488          *       which the code goes through even for the df_conflict cases
1489          *       above.  Well, it will once we don't die-not-implemented above.
1490          */
1491         if (ci->match_mask) {
1492                 ci->merged.clean = 1;
1493                 if (ci->match_mask == 6) {
1494                         /* stages[1] == stages[2] */
1495                         ci->merged.result.mode = ci->stages[1].mode;
1496                         oidcpy(&ci->merged.result.oid, &ci->stages[1].oid);
1497                 } else {
1498                         /* determine the mask of the side that didn't match */
1499                         unsigned int othermask = 7 & ~ci->match_mask;
1500                         int side = (othermask == 4) ? 2 : 1;
1501
1502                         ci->merged.result.mode = ci->stages[side].mode;
1503                         ci->merged.is_null = !ci->merged.result.mode;
1504                         oidcpy(&ci->merged.result.oid, &ci->stages[side].oid);
1505
1506                         assert(othermask == 2 || othermask == 4);
1507                         assert(ci->merged.is_null ==
1508                                (ci->filemask == ci->match_mask));
1509                 }
1510         } else if (ci->filemask >= 6 &&
1511                    (S_IFMT & ci->stages[1].mode) !=
1512                    (S_IFMT & ci->stages[2].mode)) {
1513                 /*
1514                  * Two different items from (file/submodule/symlink)
1515                  */
1516                 die("Not yet implemented.");
1517         } else if (ci->filemask >= 6) {
1518                 /*
1519                  * TODO: Needs a two-way or three-way content merge, but we're
1520                  * just being lazy and copying the version from HEAD and
1521                  * leaving it as conflicted.
1522                  */
1523                 ci->merged.clean = 0;
1524                 ci->merged.result.mode = ci->stages[1].mode;
1525                 oidcpy(&ci->merged.result.oid, &ci->stages[1].oid);
1526                 /* When we fix above, we'll call handle_content_merge() */
1527                 (void)handle_content_merge;
1528         } else if (ci->filemask == 3 || ci->filemask == 5) {
1529                 /* Modify/delete */
1530                 const char *modify_branch, *delete_branch;
1531                 int side = (ci->filemask == 5) ? 2 : 1;
1532                 int index = opt->priv->call_depth ? 0 : side;
1533
1534                 ci->merged.result.mode = ci->stages[index].mode;
1535                 oidcpy(&ci->merged.result.oid, &ci->stages[index].oid);
1536                 ci->merged.clean = 0;
1537
1538                 modify_branch = (side == 1) ? opt->branch1 : opt->branch2;
1539                 delete_branch = (side == 1) ? opt->branch2 : opt->branch1;
1540
1541                 if (ci->path_conflict &&
1542                     oideq(&ci->stages[0].oid, &ci->stages[side].oid)) {
1543                         /*
1544                          * This came from a rename/delete; no action to take,
1545                          * but avoid printing "modify/delete" conflict notice
1546                          * since the contents were not modified.
1547                          */
1548                 } else {
1549                         path_msg(opt, path, 0,
1550                                  _("CONFLICT (modify/delete): %s deleted in %s "
1551                                    "and modified in %s.  Version %s of %s left "
1552                                    "in tree."),
1553                                  path, delete_branch, modify_branch,
1554                                  modify_branch, path);
1555                 }
1556         } else if (ci->filemask == 2 || ci->filemask == 4) {
1557                 /* Added on one side */
1558                 int side = (ci->filemask == 4) ? 2 : 1;
1559                 ci->merged.result.mode = ci->stages[side].mode;
1560                 oidcpy(&ci->merged.result.oid, &ci->stages[side].oid);
1561                 ci->merged.clean = !ci->df_conflict && !ci->path_conflict;
1562         } else if (ci->filemask == 1) {
1563                 /* Deleted on both sides */
1564                 ci->merged.is_null = 1;
1565                 ci->merged.result.mode = 0;
1566                 oidcpy(&ci->merged.result.oid, &null_oid);
1567                 ci->merged.clean = !ci->path_conflict;
1568         }
1569
1570         /*
1571          * If still conflicted, record it separately.  This allows us to later
1572          * iterate over just conflicted entries when updating the index instead
1573          * of iterating over all entries.
1574          */
1575         if (!ci->merged.clean)
1576                 strmap_put(&opt->priv->conflicted, path, ci);
1577         record_entry_for_tree(dir_metadata, path, &ci->merged);
1578 }
1579
1580 static void process_entries(struct merge_options *opt,
1581                             struct object_id *result_oid)
1582 {
1583         struct hashmap_iter iter;
1584         struct strmap_entry *e;
1585         struct string_list plist = STRING_LIST_INIT_NODUP;
1586         struct string_list_item *entry;
1587         struct directory_versions dir_metadata = { STRING_LIST_INIT_NODUP,
1588                                                    STRING_LIST_INIT_NODUP,
1589                                                    NULL, 0 };
1590
1591         if (strmap_empty(&opt->priv->paths)) {
1592                 oidcpy(result_oid, opt->repo->hash_algo->empty_tree);
1593                 return;
1594         }
1595
1596         /* Hack to pre-allocate plist to the desired size */
1597         ALLOC_GROW(plist.items, strmap_get_size(&opt->priv->paths), plist.alloc);
1598
1599         /* Put every entry from paths into plist, then sort */
1600         strmap_for_each_entry(&opt->priv->paths, &iter, e) {
1601                 string_list_append(&plist, e->key)->util = e->value;
1602         }
1603         plist.cmp = string_list_df_name_compare;
1604         string_list_sort(&plist);
1605
1606         /*
1607          * Iterate over the items in reverse order, so we can handle paths
1608          * below a directory before needing to handle the directory itself.
1609          *
1610          * This allows us to write subtrees before we need to write trees,
1611          * and it also enables sane handling of directory/file conflicts
1612          * (because it allows us to know whether the directory is still in
1613          * the way when it is time to process the file at the same path).
1614          */
1615         for (entry = &plist.items[plist.nr-1]; entry >= plist.items; --entry) {
1616                 char *path = entry->string;
1617                 /*
1618                  * NOTE: mi may actually be a pointer to a conflict_info, but
1619                  * we have to check mi->clean first to see if it's safe to
1620                  * reassign to such a pointer type.
1621                  */
1622                 struct merged_info *mi = entry->util;
1623
1624                 write_completed_directory(opt, mi->directory_name,
1625                                           &dir_metadata);
1626                 if (mi->clean)
1627                         record_entry_for_tree(&dir_metadata, path, mi);
1628                 else {
1629                         struct conflict_info *ci = (struct conflict_info *)mi;
1630                         process_entry(opt, path, ci, &dir_metadata);
1631                 }
1632         }
1633
1634         if (dir_metadata.offsets.nr != 1 ||
1635             (uintptr_t)dir_metadata.offsets.items[0].util != 0) {
1636                 printf("dir_metadata.offsets.nr = %d (should be 1)\n",
1637                        dir_metadata.offsets.nr);
1638                 printf("dir_metadata.offsets.items[0].util = %u (should be 0)\n",
1639                        (unsigned)(uintptr_t)dir_metadata.offsets.items[0].util);
1640                 fflush(stdout);
1641                 BUG("dir_metadata accounting completely off; shouldn't happen");
1642         }
1643         write_tree(result_oid, &dir_metadata.versions, 0,
1644                    opt->repo->hash_algo->rawsz);
1645         string_list_clear(&plist, 0);
1646         string_list_clear(&dir_metadata.versions, 0);
1647         string_list_clear(&dir_metadata.offsets, 0);
1648 }
1649
1650 /*** Function Grouping: functions related to merge_switch_to_result() ***/
1651
1652 static int checkout(struct merge_options *opt,
1653                     struct tree *prev,
1654                     struct tree *next)
1655 {
1656         /* Switch the index/working copy from old to new */
1657         int ret;
1658         struct tree_desc trees[2];
1659         struct unpack_trees_options unpack_opts;
1660
1661         memset(&unpack_opts, 0, sizeof(unpack_opts));
1662         unpack_opts.head_idx = -1;
1663         unpack_opts.src_index = opt->repo->index;
1664         unpack_opts.dst_index = opt->repo->index;
1665
1666         setup_unpack_trees_porcelain(&unpack_opts, "merge");
1667
1668         /*
1669          * NOTE: if this were just "git checkout" code, we would probably
1670          * read or refresh the cache and check for a conflicted index, but
1671          * builtin/merge.c or sequencer.c really needs to read the index
1672          * and check for conflicted entries before starting merging for a
1673          * good user experience (no sense waiting for merges/rebases before
1674          * erroring out), so there's no reason to duplicate that work here.
1675          */
1676
1677         /* 2-way merge to the new branch */
1678         unpack_opts.update = 1;
1679         unpack_opts.merge = 1;
1680         unpack_opts.quiet = 0; /* FIXME: sequencer might want quiet? */
1681         unpack_opts.verbose_update = (opt->verbosity > 2);
1682         unpack_opts.fn = twoway_merge;
1683         if (1/* FIXME: opts->overwrite_ignore*/) {
1684                 unpack_opts.dir = xcalloc(1, sizeof(*unpack_opts.dir));
1685                 unpack_opts.dir->flags |= DIR_SHOW_IGNORED;
1686                 setup_standard_excludes(unpack_opts.dir);
1687         }
1688         parse_tree(prev);
1689         init_tree_desc(&trees[0], prev->buffer, prev->size);
1690         parse_tree(next);
1691         init_tree_desc(&trees[1], next->buffer, next->size);
1692
1693         ret = unpack_trees(2, trees, &unpack_opts);
1694         clear_unpack_trees_porcelain(&unpack_opts);
1695         dir_clear(unpack_opts.dir);
1696         FREE_AND_NULL(unpack_opts.dir);
1697         return ret;
1698 }
1699
1700 static int record_conflicted_index_entries(struct merge_options *opt,
1701                                            struct index_state *index,
1702                                            struct strmap *paths,
1703                                            struct strmap *conflicted)
1704 {
1705         struct hashmap_iter iter;
1706         struct strmap_entry *e;
1707         int errs = 0;
1708         int original_cache_nr;
1709
1710         if (strmap_empty(conflicted))
1711                 return 0;
1712
1713         original_cache_nr = index->cache_nr;
1714
1715         /* Put every entry from paths into plist, then sort */
1716         strmap_for_each_entry(conflicted, &iter, e) {
1717                 const char *path = e->key;
1718                 struct conflict_info *ci = e->value;
1719                 int pos;
1720                 struct cache_entry *ce;
1721                 int i;
1722
1723                 VERIFY_CI(ci);
1724
1725                 /*
1726                  * The index will already have a stage=0 entry for this path,
1727                  * because we created an as-merged-as-possible version of the
1728                  * file and checkout() moved the working copy and index over
1729                  * to that version.
1730                  *
1731                  * However, previous iterations through this loop will have
1732                  * added unstaged entries to the end of the cache which
1733                  * ignore the standard alphabetical ordering of cache
1734                  * entries and break invariants needed for index_name_pos()
1735                  * to work.  However, we know the entry we want is before
1736                  * those appended cache entries, so do a temporary swap on
1737                  * cache_nr to only look through entries of interest.
1738                  */
1739                 SWAP(index->cache_nr, original_cache_nr);
1740                 pos = index_name_pos(index, path, strlen(path));
1741                 SWAP(index->cache_nr, original_cache_nr);
1742                 if (pos < 0) {
1743                         if (ci->filemask != 1)
1744                                 BUG("Conflicted %s but nothing in basic working tree or index; this shouldn't happen", path);
1745                         cache_tree_invalidate_path(index, path);
1746                 } else {
1747                         ce = index->cache[pos];
1748
1749                         /*
1750                          * Clean paths with CE_SKIP_WORKTREE set will not be
1751                          * written to the working tree by the unpack_trees()
1752                          * call in checkout().  Our conflicted entries would
1753                          * have appeared clean to that code since we ignored
1754                          * the higher order stages.  Thus, we need override
1755                          * the CE_SKIP_WORKTREE bit and manually write those
1756                          * files to the working disk here.
1757                          *
1758                          * TODO: Implement this CE_SKIP_WORKTREE fixup.
1759                          */
1760
1761                         /*
1762                          * Mark this cache entry for removal and instead add
1763                          * new stage>0 entries corresponding to the
1764                          * conflicts.  If there are many conflicted entries, we
1765                          * want to avoid memmove'ing O(NM) entries by
1766                          * inserting the new entries one at a time.  So,
1767                          * instead, we just add the new cache entries to the
1768                          * end (ignoring normal index requirements on sort
1769                          * order) and sort the index once we're all done.
1770                          */
1771                         ce->ce_flags |= CE_REMOVE;
1772                 }
1773
1774                 for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) {
1775                         struct version_info *vi;
1776                         if (!(ci->filemask & (1ul << i)))
1777                                 continue;
1778                         vi = &ci->stages[i];
1779                         ce = make_cache_entry(index, vi->mode, &vi->oid,
1780                                               path, i+1, 0);
1781                         add_index_entry(index, ce, ADD_CACHE_JUST_APPEND);
1782                 }
1783         }
1784
1785         /*
1786          * Remove the unused cache entries (and invalidate the relevant
1787          * cache-trees), then sort the index entries to get the conflicted
1788          * entries we added to the end into their right locations.
1789          */
1790         remove_marked_cache_entries(index, 1);
1791         QSORT(index->cache, index->cache_nr, cmp_cache_name_compare);
1792
1793         return errs;
1794 }
1795
1796 void merge_switch_to_result(struct merge_options *opt,
1797                             struct tree *head,
1798                             struct merge_result *result,
1799                             int update_worktree_and_index,
1800                             int display_update_msgs)
1801 {
1802         assert(opt->priv == NULL);
1803         if (result->clean >= 0 && update_worktree_and_index) {
1804                 struct merge_options_internal *opti = result->priv;
1805
1806                 if (checkout(opt, head, result->tree)) {
1807                         /* failure to function */
1808                         result->clean = -1;
1809                         return;
1810                 }
1811
1812                 if (record_conflicted_index_entries(opt, opt->repo->index,
1813                                                     &opti->paths,
1814                                                     &opti->conflicted)) {
1815                         /* failure to function */
1816                         result->clean = -1;
1817                         return;
1818                 }
1819         }
1820
1821         if (display_update_msgs) {
1822                 struct merge_options_internal *opti = result->priv;
1823                 struct hashmap_iter iter;
1824                 struct strmap_entry *e;
1825                 struct string_list olist = STRING_LIST_INIT_NODUP;
1826                 int i;
1827
1828                 /* Hack to pre-allocate olist to the desired size */
1829                 ALLOC_GROW(olist.items, strmap_get_size(&opti->output),
1830                            olist.alloc);
1831
1832                 /* Put every entry from output into olist, then sort */
1833                 strmap_for_each_entry(&opti->output, &iter, e) {
1834                         string_list_append(&olist, e->key)->util = e->value;
1835                 }
1836                 string_list_sort(&olist);
1837
1838                 /* Iterate over the items, printing them */
1839                 for (i = 0; i < olist.nr; ++i) {
1840                         struct strbuf *sb = olist.items[i].util;
1841
1842                         printf("%s", sb->buf);
1843                 }
1844                 string_list_clear(&olist, 0);
1845
1846                 /* Also include needed rename limit adjustment now */
1847                 diff_warn_rename_limit("merge.renamelimit",
1848                                        opti->renames.needed_limit, 0);
1849         }
1850
1851         merge_finalize(opt, result);
1852 }
1853
1854 void merge_finalize(struct merge_options *opt,
1855                     struct merge_result *result)
1856 {
1857         struct merge_options_internal *opti = result->priv;
1858
1859         assert(opt->priv == NULL);
1860
1861         clear_or_reinit_internal_opts(opti, 0);
1862         FREE_AND_NULL(opti);
1863 }
1864
1865 /*** Function Grouping: helper functions for merge_incore_*() ***/
1866
1867 static inline void set_commit_tree(struct commit *c, struct tree *t)
1868 {
1869         c->maybe_tree = t;
1870 }
1871
1872 static struct commit *make_virtual_commit(struct repository *repo,
1873                                           struct tree *tree,
1874                                           const char *comment)
1875 {
1876         struct commit *commit = alloc_commit_node(repo);
1877
1878         set_merge_remote_desc(commit, comment, (struct object *)commit);
1879         set_commit_tree(commit, tree);
1880         commit->object.parsed = 1;
1881         return commit;
1882 }
1883
1884 static void merge_start(struct merge_options *opt, struct merge_result *result)
1885 {
1886         struct rename_info *renames;
1887         int i;
1888
1889         /* Sanity checks on opt */
1890         assert(opt->repo);
1891
1892         assert(opt->branch1 && opt->branch2);
1893
1894         assert(opt->detect_directory_renames >= MERGE_DIRECTORY_RENAMES_NONE &&
1895                opt->detect_directory_renames <= MERGE_DIRECTORY_RENAMES_TRUE);
1896         assert(opt->rename_limit >= -1);
1897         assert(opt->rename_score >= 0 && opt->rename_score <= MAX_SCORE);
1898         assert(opt->show_rename_progress >= 0 && opt->show_rename_progress <= 1);
1899
1900         assert(opt->xdl_opts >= 0);
1901         assert(opt->recursive_variant >= MERGE_VARIANT_NORMAL &&
1902                opt->recursive_variant <= MERGE_VARIANT_THEIRS);
1903
1904         /*
1905          * detect_renames, verbosity, buffer_output, and obuf are ignored
1906          * fields that were used by "recursive" rather than "ort" -- but
1907          * sanity check them anyway.
1908          */
1909         assert(opt->detect_renames >= -1 &&
1910                opt->detect_renames <= DIFF_DETECT_COPY);
1911         assert(opt->verbosity >= 0 && opt->verbosity <= 5);
1912         assert(opt->buffer_output <= 2);
1913         assert(opt->obuf.len == 0);
1914
1915         assert(opt->priv == NULL);
1916
1917         /* Default to histogram diff.  Actually, just hardcode it...for now. */
1918         opt->xdl_opts = DIFF_WITH_ALG(opt, HISTOGRAM_DIFF);
1919
1920         /* Initialization of opt->priv, our internal merge data */
1921         opt->priv = xcalloc(1, sizeof(*opt->priv));
1922
1923         /* Initialization of various renames fields */
1924         renames = &opt->priv->renames;
1925         for (i = MERGE_SIDE1; i <= MERGE_SIDE2; i++) {
1926                 strset_init_with_options(&renames->dirs_removed[i],
1927                                          NULL, 0);
1928                 strmap_init_with_options(&renames->dir_rename_count[i],
1929                                          NULL, 1);
1930                 strmap_init_with_options(&renames->dir_renames[i],
1931                                          NULL, 0);
1932         }
1933
1934         /*
1935          * Although we initialize opt->priv->paths with strdup_strings=0,
1936          * that's just to avoid making yet another copy of an allocated
1937          * string.  Putting the entry into paths means we are taking
1938          * ownership, so we will later free it.  paths_to_free is similar.
1939          *
1940          * In contrast, conflicted just has a subset of keys from paths, so
1941          * we don't want to free those (it'd be a duplicate free).
1942          */
1943         strmap_init_with_options(&opt->priv->paths, NULL, 0);
1944         strmap_init_with_options(&opt->priv->conflicted, NULL, 0);
1945         string_list_init(&opt->priv->paths_to_free, 0);
1946
1947         /*
1948          * keys & strbufs in output will sometimes need to outlive "paths",
1949          * so it will have a copy of relevant keys.  It's probably a small
1950          * subset of the overall paths that have special output.
1951          */
1952         strmap_init(&opt->priv->output);
1953 }
1954
1955 /*** Function Grouping: merge_incore_*() and their internal variants ***/
1956
1957 /*
1958  * Originally from merge_trees_internal(); heavily adapted, though.
1959  */
1960 static void merge_ort_nonrecursive_internal(struct merge_options *opt,
1961                                             struct tree *merge_base,
1962                                             struct tree *side1,
1963                                             struct tree *side2,
1964                                             struct merge_result *result)
1965 {
1966         struct object_id working_tree_oid;
1967
1968         if (collect_merge_info(opt, merge_base, side1, side2) != 0) {
1969                 /*
1970                  * TRANSLATORS: The %s arguments are: 1) tree hash of a merge
1971                  * base, and 2-3) the trees for the two trees we're merging.
1972                  */
1973                 err(opt, _("collecting merge info failed for trees %s, %s, %s"),
1974                     oid_to_hex(&merge_base->object.oid),
1975                     oid_to_hex(&side1->object.oid),
1976                     oid_to_hex(&side2->object.oid));
1977                 result->clean = -1;
1978                 return;
1979         }
1980
1981         result->clean = detect_and_process_renames(opt, merge_base,
1982                                                    side1, side2);
1983         process_entries(opt, &working_tree_oid);
1984
1985         /* Set return values */
1986         result->tree = parse_tree_indirect(&working_tree_oid);
1987         /* existence of conflicted entries implies unclean */
1988         result->clean &= strmap_empty(&opt->priv->conflicted);
1989         if (!opt->priv->call_depth) {
1990                 result->priv = opt->priv;
1991                 opt->priv = NULL;
1992         }
1993 }
1994
1995 /*
1996  * Originally from merge_recursive_internal(); somewhat adapted, though.
1997  */
1998 static void merge_ort_internal(struct merge_options *opt,
1999                                struct commit_list *merge_bases,
2000                                struct commit *h1,
2001                                struct commit *h2,
2002                                struct merge_result *result)
2003 {
2004         struct commit_list *iter;
2005         struct commit *merged_merge_bases;
2006         const char *ancestor_name;
2007         struct strbuf merge_base_abbrev = STRBUF_INIT;
2008
2009         if (!merge_bases) {
2010                 merge_bases = get_merge_bases(h1, h2);
2011                 /* See merge-ort.h:merge_incore_recursive() declaration NOTE */
2012                 merge_bases = reverse_commit_list(merge_bases);
2013         }
2014
2015         merged_merge_bases = pop_commit(&merge_bases);
2016         if (merged_merge_bases == NULL) {
2017                 /* if there is no common ancestor, use an empty tree */
2018                 struct tree *tree;
2019
2020                 tree = lookup_tree(opt->repo, opt->repo->hash_algo->empty_tree);
2021                 merged_merge_bases = make_virtual_commit(opt->repo, tree,
2022                                                          "ancestor");
2023                 ancestor_name = "empty tree";
2024         } else if (merge_bases) {
2025                 ancestor_name = "merged common ancestors";
2026         } else {
2027                 strbuf_add_unique_abbrev(&merge_base_abbrev,
2028                                          &merged_merge_bases->object.oid,
2029                                          DEFAULT_ABBREV);
2030                 ancestor_name = merge_base_abbrev.buf;
2031         }
2032
2033         for (iter = merge_bases; iter; iter = iter->next) {
2034                 const char *saved_b1, *saved_b2;
2035                 struct commit *prev = merged_merge_bases;
2036
2037                 opt->priv->call_depth++;
2038                 /*
2039                  * When the merge fails, the result contains files
2040                  * with conflict markers. The cleanness flag is
2041                  * ignored (unless indicating an error), it was never
2042                  * actually used, as result of merge_trees has always
2043                  * overwritten it: the committed "conflicts" were
2044                  * already resolved.
2045                  */
2046                 saved_b1 = opt->branch1;
2047                 saved_b2 = opt->branch2;
2048                 opt->branch1 = "Temporary merge branch 1";
2049                 opt->branch2 = "Temporary merge branch 2";
2050                 merge_ort_internal(opt, NULL, prev, iter->item, result);
2051                 if (result->clean < 0)
2052                         return;
2053                 opt->branch1 = saved_b1;
2054                 opt->branch2 = saved_b2;
2055                 opt->priv->call_depth--;
2056
2057                 merged_merge_bases = make_virtual_commit(opt->repo,
2058                                                          result->tree,
2059                                                          "merged tree");
2060                 commit_list_insert(prev, &merged_merge_bases->parents);
2061                 commit_list_insert(iter->item,
2062                                    &merged_merge_bases->parents->next);
2063
2064                 clear_or_reinit_internal_opts(opt->priv, 1);
2065         }
2066
2067         opt->ancestor = ancestor_name;
2068         merge_ort_nonrecursive_internal(opt,
2069                                         repo_get_commit_tree(opt->repo,
2070                                                              merged_merge_bases),
2071                                         repo_get_commit_tree(opt->repo, h1),
2072                                         repo_get_commit_tree(opt->repo, h2),
2073                                         result);
2074         strbuf_release(&merge_base_abbrev);
2075         opt->ancestor = NULL;  /* avoid accidental re-use of opt->ancestor */
2076 }
2077
2078 void merge_incore_nonrecursive(struct merge_options *opt,
2079                                struct tree *merge_base,
2080                                struct tree *side1,
2081                                struct tree *side2,
2082                                struct merge_result *result)
2083 {
2084         assert(opt->ancestor != NULL);
2085         merge_start(opt, result);
2086         merge_ort_nonrecursive_internal(opt, merge_base, side1, side2, result);
2087 }
2088
2089 void merge_incore_recursive(struct merge_options *opt,
2090                             struct commit_list *merge_bases,
2091                             struct commit *side1,
2092                             struct commit *side2,
2093                             struct merge_result *result)
2094 {
2095         /* We set the ancestor label based on the merge_bases */
2096         assert(opt->ancestor == NULL);
2097
2098         merge_start(opt, result);
2099         merge_ort_internal(opt, merge_bases, side1, side2, result);
2100 }