]> git.ipfire.org Git - thirdparty/git.git/blame - merge-ort.c
merge-ort: preserve cached renames for the appropriate side
[thirdparty/git.git] / merge-ort.c
CommitLineData
17e5574b
EN
1/*
2 * "Ostensibly Recursive's Twin" merge strategy, or "ort" for short. Meant
3 * as a drop-in replacement for the "recursive" merge strategy, allowing one
4 * to replace
5 *
6 * git merge [-s recursive]
7 *
8 * with
9 *
10 * git merge -s ort
11 *
12 * Note: git's parser allows the space between '-s' and its argument to be
13 * missing. (Should I have backronymed "ham", "alsa", "kip", "nap, "alvo",
14 * "cale", "peedy", or "ins" instead of "ort"?)
15 */
16
17#include "cache.h"
18#include "merge-ort.h"
19
4296d8f1 20#include "alloc.h"
ea305a68 21#include "attr.h"
67845745 22#include "blob.h"
ef2b3693 23#include "cache-tree.h"
4296d8f1 24#include "commit.h"
67845745 25#include "commit-reach.h"
e4171b1b
EN
26#include "diff.h"
27#include "diffcore.h"
6681ce5c 28#include "dir.h"
f591c472 29#include "ll-merge.h"
ee4012dc 30#include "object-store.h"
4204cd59 31#include "revision.h"
5b59c3db 32#include "strmap.h"
c73cda76 33#include "submodule.h"
231e2dd4 34#include "tree.h"
6681ce5c 35#include "unpack-trees.h"
c8017176 36#include "xdiff-interface.h"
5b59c3db 37
d2bc1994
EN
38/*
39 * We have many arrays of size 3. Whenever we have such an array, the
40 * indices refer to one of the sides of the three-way merge. This is so
41 * pervasive that the constants 0, 1, and 2 are used in many places in the
42 * code (especially in arithmetic operations to find the other side's index
43 * or to compute a relevant mask), but sometimes these enum names are used
44 * to aid code clarity.
45 *
46 * See also 'filemask' and 'dirmask' in struct conflict_info; the "ith side"
47 * referred to there is one of these three sides.
48 */
49enum merge_side {
50 MERGE_BASE = 0,
51 MERGE_SIDE1 = 1,
52 MERGE_SIDE2 = 2
53};
54
19ceb486
EN
55static unsigned RESULT_INITIALIZED = 0x1abe11ed; /* unlikely accidental value */
56
beb06145
EN
57struct traversal_callback_data {
58 unsigned long mask;
59 unsigned long dirmask;
60 struct name_entry names[3];
61};
62
864075ec 63struct rename_info {
c09376d5
EN
64 /*
65 * All variables that are arrays of size 3 correspond to data tracked
66 * for the sides in enum merge_side. Index 0 is almost always unused
67 * because we often only need to track information for MERGE_SIDE1 and
68 * MERGE_SIDE2 (MERGE_BASE can't have rename information since renames
69 * are determined relative to what changed since the MERGE_BASE).
70 */
71
864075ec
EN
72 /*
73 * pairs: pairing of filenames from diffcore_rename()
864075ec
EN
74 */
75 struct diff_queue_struct pairs[3];
76
c09376d5
EN
77 /*
78 * dirs_removed: directories removed on a given side of history.
fb52938e
EN
79 *
80 * The keys of dirs_removed[side] are the directories that were removed
81 * on the given side of history. The value of the strintmap for each
82 * directory is a value from enum dir_rename_relevance.
c09376d5 83 */
a49b55d5 84 struct strintmap dirs_removed[3];
c09376d5
EN
85
86 /*
87 * dir_rename_count: tracking where parts of a directory were renamed to
88 *
89 * When files in a directory are renamed, they may not all go to the
90 * same location. Each strmap here tracks:
91 * old_dir => {new_dir => int}
92 * That is, dir_rename_count[side] is a strmap to a strintmap.
93 */
94 struct strmap dir_rename_count[3];
95
96 /*
97 * dir_renames: computed directory renames
98 *
99 * This is a map of old_dir => new_dir and is derived in part from
100 * dir_rename_count.
101 */
102 struct strmap dir_renames[3];
103
32a56dfb 104 /*
ec59da60 105 * relevant_sources: deleted paths wanted in rename detection, and why
32a56dfb
EN
106 *
107 * relevant_sources is a set of deleted paths on each side of
108 * history for which we need rename detection. If a path is deleted
109 * on one side of history, we need to detect if it is part of a
110 * rename if either
32a56dfb 111 * * the file is modified/deleted on the other side of history
ec59da60 112 * * we need to detect renames for an ancestor directory
32a56dfb 113 * If neither of those are true, we can skip rename detection for
ec59da60
EN
114 * that path. The reason is stored as a value from enum
115 * file_rename_relevance, as the reason can inform the algorithm in
116 * diffcore_rename_extended().
32a56dfb 117 */
a49b55d5 118 struct strintmap relevant_sources[3];
32a56dfb 119
2fd9eda4
EN
120 /*
121 * dir_rename_mask:
122 * 0: optimization removing unmodified potential rename source okay
123 * 2 or 4: optimization okay, but must check for files added to dir
124 * 7: optimization forbidden; need rename source in case of dir rename
125 */
126 unsigned dir_rename_mask:3;
127
beb06145
EN
128 /*
129 * callback_data_*: supporting data structures for alternate traversal
130 *
131 * We sometimes need to be able to traverse through all the files
132 * in a given tree before all immediate subdirectories within that
133 * tree. Since traverse_trees() doesn't do that naturally, we have
134 * a traverse_trees_wrapper() that stores any immediate
135 * subdirectories while traversing files, then traverses the
136 * immediate subdirectories later. These callback_data* variables
137 * store the information for the subdirectories so that we can do
138 * that traversal order.
139 */
140 struct traversal_callback_data *callback_data;
141 int callback_data_nr, callback_data_alloc;
142 char *callback_data_traverse_path;
143
64aceb6d
EN
144 /*
145 * merge_trees: trees passed to the merge algorithm for the merge
146 *
147 * merge_trees records the trees passed to the merge algorithm. But,
148 * this data also is stored in merge_result->priv. If a sequence of
149 * merges are being done (such as when cherry-picking or rebasing),
150 * the next merge can look at this and re-use information from
151 * previous merges under certain circumstances.
152 *
153 * See also all the cached_* variables.
154 */
155 struct tree *merge_trees[3];
156
157 /*
158 * cached_pairs_valid_side: which side's cached info can be reused
159 *
160 * See the description for merge_trees. For repeated merges, at most
161 * only one side's cached information can be used. Valid values:
162 * MERGE_SIDE2: cached data from side2 can be reused
163 * MERGE_SIDE1: cached data from side1 can be reused
164 * 0: no cached data can be reused
165 */
166 int cached_pairs_valid_side;
167
d29bd6d7
EN
168 /*
169 * cached_pairs: Caching of renames and deletions.
170 *
171 * These are mappings recording renames and deletions of individual
172 * files (not directories). They are thus a map from an old
173 * filename to either NULL (for deletions) or a new filename (for
174 * renames).
175 */
176 struct strmap cached_pairs[3];
177
178 /*
179 * cached_target_names: just the destinations from cached_pairs
180 *
181 * We sometimes want a fast lookup to determine if a given filename
182 * is one of the destinations in cached_pairs. cached_target_names
183 * is thus duplicative information, but it provides a fast lookup.
184 */
185 struct strset cached_target_names[3];
186
187 /*
188 * cached_irrelevant: Caching of rename_sources that aren't relevant.
189 *
190 * If we try to detect a rename for a source path and succeed, it's
191 * part of a rename. If we try to detect a rename for a source path
192 * and fail, then it's a delete. If we do not try to detect a rename
193 * for a path, then we don't know if it's a rename or a delete. If
194 * merge-ort doesn't think the path is relevant, then we just won't
195 * cache anything for that path. But there's a slight problem in
196 * that merge-ort can think a path is RELEVANT_LOCATION, but due to
197 * commit 9bd342137e ("diffcore-rename: determine which
198 * relevant_sources are no longer relevant", 2021-03-13),
199 * diffcore-rename can downgrade the path to RELEVANT_NO_MORE. To
200 * avoid excessive calls to diffcore_rename_extended() we still need
201 * to cache such paths, though we cannot record them as either
202 * renames or deletes. So we cache them here as a "turned out to be
203 * irrelevant *for this commit*" as they are often also irrelevant
204 * for subsequent commits, though we will have to do some extra
205 * checking to see whether such paths become relevant for rename
206 * detection when cherry-picking/rebasing subsequent commits.
207 */
208 struct strset cached_irrelevant[3];
209
864075ec
EN
210 /*
211 * needed_limit: value needed for inexact rename detection to run
212 *
213 * If the current rename limit wasn't high enough for inexact
214 * rename detection to run, this records the limit needed. Otherwise,
215 * this value remains 0.
216 */
217 int needed_limit;
218};
219
5b59c3db
EN
220struct merge_options_internal {
221 /*
222 * paths: primary data structure in all of merge ort.
223 *
224 * The keys of paths:
225 * * are full relative paths from the toplevel of the repository
226 * (e.g. "drivers/firmware/raspberrypi.c").
227 * * store all relevant paths in the repo, both directories and
228 * files (e.g. drivers, drivers/firmware would also be included)
229 * * these keys serve to intern all the path strings, which allows
230 * us to do pointer comparison on directory names instead of
231 * strcmp; we just have to be careful to use the interned strings.
43c1dccb
EN
232 * (Technically paths_to_free may track some strings that were
233 * removed from froms paths.)
5b59c3db
EN
234 *
235 * The values of paths:
236 * * either a pointer to a merged_info, or a conflict_info struct
237 * * merged_info contains all relevant information for a
238 * non-conflicted entry.
239 * * conflict_info contains a merged_info, plus any additional
240 * information about a conflict such as the higher orders stages
241 * involved and the names of the paths those came from (handy
242 * once renames get involved).
243 * * a path may start "conflicted" (i.e. point to a conflict_info)
244 * and then a later step (e.g. three-way content merge) determines
245 * it can be cleanly merged, at which point it'll be marked clean
246 * and the algorithm will ignore any data outside the contained
247 * merged_info for that entry
248 * * If an entry remains conflicted, the merged_info portion of a
249 * conflict_info will later be filled with whatever version of
250 * the file should be placed in the working directory (e.g. an
251 * as-merged-as-possible variation that contains conflict markers).
252 */
253 struct strmap paths;
254
255 /*
256 * conflicted: a subset of keys->values from "paths"
257 *
258 * conflicted is basically an optimization between process_entries()
259 * and record_conflicted_index_entries(); the latter could loop over
260 * ALL the entries in paths AGAIN and look for the ones that are
261 * still conflicted, but since process_entries() has to loop over
262 * all of them, it saves the ones it couldn't resolve in this strmap
263 * so that record_conflicted_index_entries() can iterate just the
264 * relevant entries.
265 */
266 struct strmap conflicted;
267
43c1dccb
EN
268 /*
269 * paths_to_free: additional list of strings to free
270 *
271 * If keys are removed from "paths", they are added to paths_to_free
272 * to ensure they are later freed. We avoid free'ing immediately since
273 * other places (e.g. conflict_info.pathnames[]) may still be
274 * referencing these paths.
275 */
276 struct string_list paths_to_free;
277
c5a6f655
EN
278 /*
279 * output: special messages and conflict notices for various paths
280 *
281 * This is a map of pathnames (a subset of the keys in "paths" above)
282 * to strbufs. It gathers various warning/conflict/notice messages
283 * for later processing.
284 */
285 struct strmap output;
286
5b59c3db 287 /*
864075ec
EN
288 * renames: various data relating to rename detection
289 */
290 struct rename_info renames;
291
ea305a68
EN
292 /*
293 * attr_index: hacky minimal index used for renormalization
294 *
295 * renormalization code _requires_ an index, though it only needs to
296 * find a .gitattributes file within the index. So, when
297 * renormalization is important, we create a special index with just
298 * that one file.
299 */
300 struct index_state attr_index;
301
5b59c3db 302 /*
05b85c6e 303 * current_dir_name, toplevel_dir: temporary vars
5b59c3db 304 *
05b85c6e
EN
305 * These are used in collect_merge_info_callback(), and will set the
306 * various merged_info.directory_name for the various paths we get;
307 * see documentation for that variable and the requirements placed on
308 * that field.
5b59c3db
EN
309 */
310 const char *current_dir_name;
05b85c6e 311 const char *toplevel_dir;
5b59c3db
EN
312
313 /* call_depth: recursion level counter for merging merge bases */
314 int call_depth;
315};
316
317struct version_info {
318 struct object_id oid;
319 unsigned short mode;
320};
321
322struct merged_info {
323 /* if is_null, ignore result. otherwise result has oid & mode */
324 struct version_info result;
325 unsigned is_null:1;
326
327 /*
328 * clean: whether the path in question is cleanly merged.
329 *
330 * see conflict_info.merged for more details.
331 */
332 unsigned clean:1;
333
334 /*
335 * basename_offset: offset of basename of path.
336 *
337 * perf optimization to avoid recomputing offset of final '/'
338 * character in pathname (0 if no '/' in pathname).
339 */
340 size_t basename_offset;
341
342 /*
343 * directory_name: containing directory name.
344 *
345 * Note that we assume directory_name is constructed such that
346 * strcmp(dir1_name, dir2_name) == 0 iff dir1_name == dir2_name,
347 * i.e. string equality is equivalent to pointer equality. For this
348 * to hold, we have to be careful setting directory_name.
349 */
350 const char *directory_name;
351};
352
353struct conflict_info {
354 /*
355 * merged: the version of the path that will be written to working tree
356 *
357 * WARNING: It is critical to check merged.clean and ensure it is 0
358 * before reading any conflict_info fields outside of merged.
359 * Allocated merge_info structs will always have clean set to 1.
360 * Allocated conflict_info structs will have merged.clean set to 0
361 * initially. The merged.clean field is how we know if it is safe
362 * to access other parts of conflict_info besides merged; if a
363 * conflict_info's merged.clean is changed to 1, the rest of the
364 * algorithm is not allowed to look at anything outside of the
365 * merged member anymore.
366 */
367 struct merged_info merged;
368
369 /* oids & modes from each of the three trees for this path */
370 struct version_info stages[3];
371
372 /* pathnames for each stage; may differ due to rename detection */
373 const char *pathnames[3];
374
375 /* Whether this path is/was involved in a directory/file conflict */
376 unsigned df_conflict:1;
377
1c7873cd
EN
378 /*
379 * Whether this path is/was involved in a non-content conflict other
380 * than a directory/file conflict (e.g. rename/rename, rename/delete,
381 * file location based on possible directory rename).
382 */
383 unsigned path_conflict:1;
384
5b59c3db
EN
385 /*
386 * For filemask and dirmask, the ith bit corresponds to whether the
387 * ith entry is a file (filemask) or a directory (dirmask). Thus,
388 * filemask & dirmask is always zero, and filemask | dirmask is at
389 * most 7 but can be less when a path does not appear as either a
390 * file or a directory on at least one side of history.
391 *
392 * Note that these masks are related to enum merge_side, as the ith
393 * entry corresponds to side i.
394 *
395 * These values come from a traverse_trees() call; more info may be
396 * found looking at tree-walk.h's struct traverse_info,
397 * particularly the documentation above the "fn" member (note that
398 * filemask = mask & ~dirmask from that documentation).
399 */
400 unsigned filemask:3;
401 unsigned dirmask:3;
402
403 /*
404 * Optimization to track which stages match, to avoid the need to
405 * recompute it in multiple steps. Either 0 or at least 2 bits are
406 * set; if at least 2 bits are set, their corresponding stages match.
407 */
408 unsigned match_mask:3;
409};
410
04af1879
EN
411/*** Function Grouping: various utility functions ***/
412
98bf9841
EN
413/*
414 * For the next three macros, see warning for conflict_info.merged.
415 *
416 * In each of the below, mi is a struct merged_info*, and ci was defined
417 * as a struct conflict_info* (but we need to verify ci isn't actually
418 * pointed at a struct merged_info*).
419 *
420 * INITIALIZE_CI: Assign ci to mi but only if it's safe; set to NULL otherwise.
421 * VERIFY_CI: Ensure that something we assigned to a conflict_info* is one.
422 * ASSIGN_AND_VERIFY_CI: Similar to VERIFY_CI but do assignment first.
423 */
424#define INITIALIZE_CI(ci, mi) do { \
425 (ci) = (!(mi) || (mi)->clean) ? NULL : (struct conflict_info *)(mi); \
426} while (0)
427#define VERIFY_CI(ci) assert(ci && !ci->merged.clean);
428#define ASSIGN_AND_VERIFY_CI(ci, mi) do { \
429 (ci) = (struct conflict_info *)(mi); \
430 assert((ci) && !(mi)->clean); \
431} while (0)
432
89422d29
EN
433static void free_strmap_strings(struct strmap *map)
434{
435 struct hashmap_iter iter;
436 struct strmap_entry *entry;
437
438 strmap_for_each_entry(map, &iter, entry) {
439 free((char*)entry->key);
440 }
441}
442
43e9c4ee
EN
443static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
444 int reinitialize)
101bc5bc 445{
f5d9fbc2
EN
446 struct rename_info *renames = &opti->renames;
447 int i;
43e9c4ee
EN
448 void (*strmap_func)(struct strmap *, int) =
449 reinitialize ? strmap_partial_clear : strmap_clear;
a49b55d5
EN
450 void (*strintmap_func)(struct strintmap *) =
451 reinitialize ? strintmap_partial_clear : strintmap_clear;
d29bd6d7
EN
452 void (*strset_func)(struct strset *) =
453 reinitialize ? strset_partial_clear : strset_clear;
101bc5bc
EN
454
455 /*
456 * We marked opti->paths with strdup_strings = 0, so that we
457 * wouldn't have to make another copy of the fullpath created by
458 * make_traverse_path from setup_path_info(). But, now that we've
459 * used it and have no other references to these strings, it is time
460 * to deallocate them.
461 */
462 free_strmap_strings(&opti->paths);
43e9c4ee 463 strmap_func(&opti->paths, 1);
101bc5bc
EN
464
465 /*
466 * All keys and values in opti->conflicted are a subset of those in
467 * opti->paths. We don't want to deallocate anything twice, so we
468 * don't free the keys and we pass 0 for free_values.
469 */
43e9c4ee 470 strmap_func(&opti->conflicted, 0);
43c1dccb
EN
471
472 /*
473 * opti->paths_to_free is similar to opti->paths; we created it with
474 * strdup_strings = 0 to avoid making _another_ copy of the fullpath
475 * but now that we've used it and have no other references to these
476 * strings, it is time to deallocate them. We do so by temporarily
477 * setting strdup_strings to 1.
478 */
479 opti->paths_to_free.strdup_strings = 1;
480 string_list_clear(&opti->paths_to_free, 0);
481 opti->paths_to_free.strdup_strings = 0;
c5a6f655 482
1218b3ab 483 if (opti->attr_index.cache_nr) /* true iff opt->renormalize */
ea305a68
EN
484 discard_index(&opti->attr_index);
485
f5d9fbc2
EN
486 /* Free memory used by various renames maps */
487 for (i = MERGE_SIDE1; i <= MERGE_SIDE2; ++i) {
a49b55d5 488 strintmap_func(&renames->dirs_removed[i]);
f5d9fbc2 489 strmap_func(&renames->dir_renames[i], 0);
a49b55d5 490 strintmap_func(&renames->relevant_sources[i]);
d5098029
EN
491 if (!reinitialize)
492 assert(renames->cached_pairs_valid_side == 0);
493 if (i != renames->cached_pairs_valid_side) {
494 strset_func(&renames->cached_target_names[i]);
495 strmap_func(&renames->cached_pairs[i], 1);
496 strset_func(&renames->cached_irrelevant[i]);
497 partial_clear_dir_rename_count(&renames->dir_rename_count[i]);
498 if (!reinitialize)
499 strmap_clear(&renames->dir_rename_count[i], 1);
500 }
f5d9fbc2 501 }
64aceb6d
EN
502 renames->cached_pairs_valid_side = 0;
503 renames->dir_rename_mask = 0;
f5d9fbc2 504
c5a6f655
EN
505 if (!reinitialize) {
506 struct hashmap_iter iter;
507 struct strmap_entry *e;
508
509 /* Release and free each strbuf found in output */
510 strmap_for_each_entry(&opti->output, &iter, e) {
511 struct strbuf *sb = e->value;
512 strbuf_release(sb);
513 /*
514 * While strictly speaking we don't need to free(sb)
515 * here because we could pass free_values=1 when
516 * calling strmap_clear() on opti->output, that would
517 * require strmap_clear to do another
518 * strmap_for_each_entry() loop, so we just free it
519 * while we're iterating anyway.
520 */
521 free(sb);
522 }
523 strmap_clear(&opti->output, 0);
524 }
beb06145
EN
525
526 /* Clean out callback_data as well. */
527 FREE_AND_NULL(renames->callback_data);
528 renames->callback_data_nr = renames->callback_data_alloc = 0;
101bc5bc
EN
529}
530
0c0d705b
EN
531static int err(struct merge_options *opt, const char *err, ...)
532{
533 va_list params;
534 struct strbuf sb = STRBUF_INIT;
535
536 strbuf_addstr(&sb, "error: ");
537 va_start(params, err);
538 strbuf_vaddf(&sb, err, params);
539 va_end(params);
540
541 error("%s", sb.buf);
542 strbuf_release(&sb);
543
544 return -1;
545}
546
c73cda76
EN
547static void format_commit(struct strbuf *sb,
548 int indent,
549 struct commit *commit)
550{
70f19c7f
EN
551 struct merge_remote_desc *desc;
552 struct pretty_print_context ctx = {0};
553 ctx.abbrev = DEFAULT_ABBREV;
554
555 strbuf_addchars(sb, ' ', indent);
556 desc = merge_remote_util(commit);
557 if (desc) {
558 strbuf_addf(sb, "virtual %s\n", desc->name);
559 return;
560 }
561
562 format_commit_message(commit, "%h %s", sb, &ctx);
563 strbuf_addch(sb, '\n');
c73cda76
EN
564}
565
c5a6f655
EN
566__attribute__((format (printf, 4, 5)))
567static void path_msg(struct merge_options *opt,
568 const char *path,
569 int omittable_hint, /* skippable under --remerge-diff */
570 const char *fmt, ...)
571{
572 va_list ap;
573 struct strbuf *sb = strmap_get(&opt->priv->output, path);
574 if (!sb) {
575 sb = xmalloc(sizeof(*sb));
576 strbuf_init(sb, 0);
577 strmap_put(&opt->priv->output, path, sb);
578 }
579
580 va_start(ap, fmt);
581 strbuf_vaddf(sb, fmt, ap);
582 va_end(ap);
583
584 strbuf_addch(sb, '\n');
585}
586
5a1a1e8e
EN
587/* add a string to a strbuf, but converting "/" to "_" */
588static void add_flattened_path(struct strbuf *out, const char *s)
589{
590 size_t i = out->len;
591 strbuf_addstr(out, s);
592 for (; i < out->len; i++)
593 if (out->buf[i] == '/')
594 out->buf[i] = '_';
595}
596
23366d2a
EN
597static char *unique_path(struct strmap *existing_paths,
598 const char *path,
599 const char *branch)
600{
5a1a1e8e
EN
601 struct strbuf newpath = STRBUF_INIT;
602 int suffix = 0;
603 size_t base_len;
604
605 strbuf_addf(&newpath, "%s~", path);
606 add_flattened_path(&newpath, branch);
607
608 base_len = newpath.len;
609 while (strmap_contains(existing_paths, newpath.buf)) {
610 strbuf_setlen(&newpath, base_len);
611 strbuf_addf(&newpath, "_%d", suffix++);
612 }
613
614 return strbuf_detach(&newpath, NULL);
23366d2a
EN
615}
616
04af1879
EN
617/*** Function Grouping: functions related to collect_merge_info() ***/
618
a68e6cea
EN
619static int traverse_trees_wrapper_callback(int n,
620 unsigned long mask,
621 unsigned long dirmask,
622 struct name_entry *names,
623 struct traverse_info *info)
624{
625 struct merge_options *opt = info->data;
626 struct rename_info *renames = &opt->priv->renames;
2fd9eda4 627 unsigned filemask = mask & ~dirmask;
a68e6cea
EN
628
629 assert(n==3);
630
631 if (!renames->callback_data_traverse_path)
632 renames->callback_data_traverse_path = xstrdup(info->traverse_path);
633
2fd9eda4
EN
634 if (filemask && filemask == renames->dir_rename_mask)
635 renames->dir_rename_mask = 0x07;
636
a68e6cea
EN
637 ALLOC_GROW(renames->callback_data, renames->callback_data_nr + 1,
638 renames->callback_data_alloc);
639 renames->callback_data[renames->callback_data_nr].mask = mask;
640 renames->callback_data[renames->callback_data_nr].dirmask = dirmask;
641 COPY_ARRAY(renames->callback_data[renames->callback_data_nr].names,
642 names, 3);
643 renames->callback_data_nr++;
644
645 return mask;
646}
647
648/*
649 * Much like traverse_trees(), BUT:
650 * - read all the tree entries FIRST, saving them
651 * - note that the above step provides an opportunity to compute necessary
652 * additional details before the "real" traversal
653 * - loop through the saved entries and call the original callback on them
654 */
a68e6cea
EN
655static int traverse_trees_wrapper(struct index_state *istate,
656 int n,
657 struct tree_desc *t,
658 struct traverse_info *info)
659{
660 int ret, i, old_offset;
661 traverse_callback_t old_fn;
662 char *old_callback_data_traverse_path;
663 struct merge_options *opt = info->data;
664 struct rename_info *renames = &opt->priv->renames;
665
2fd9eda4
EN
666 assert(renames->dir_rename_mask == 2 || renames->dir_rename_mask == 4);
667
a68e6cea
EN
668 old_callback_data_traverse_path = renames->callback_data_traverse_path;
669 old_fn = info->fn;
670 old_offset = renames->callback_data_nr;
671
672 renames->callback_data_traverse_path = NULL;
673 info->fn = traverse_trees_wrapper_callback;
674 ret = traverse_trees(istate, n, t, info);
675 if (ret < 0)
676 return ret;
677
678 info->traverse_path = renames->callback_data_traverse_path;
679 info->fn = old_fn;
680 for (i = old_offset; i < renames->callback_data_nr; ++i) {
681 info->fn(n,
682 renames->callback_data[i].mask,
683 renames->callback_data[i].dirmask,
684 renames->callback_data[i].names,
685 info);
686 }
687
688 renames->callback_data_nr = old_offset;
689 free(renames->callback_data_traverse_path);
690 renames->callback_data_traverse_path = old_callback_data_traverse_path;
691 info->traverse_path = NULL;
692 return 0;
693}
694
98bf9841
EN
695static void setup_path_info(struct merge_options *opt,
696 struct string_list_item *result,
697 const char *current_dir_name,
698 int current_dir_name_len,
699 char *fullpath, /* we'll take over ownership */
700 struct name_entry *names,
701 struct name_entry *merged_version,
702 unsigned is_null, /* boolean */
703 unsigned df_conflict, /* boolean */
704 unsigned filemask,
705 unsigned dirmask,
706 int resolved /* boolean */)
707{
708 /* result->util is void*, so mi is a convenience typed variable */
709 struct merged_info *mi;
710
711 assert(!is_null || resolved);
712 assert(!df_conflict || !resolved); /* df_conflict implies !resolved */
713 assert(resolved == (merged_version != NULL));
714
715 mi = xcalloc(1, resolved ? sizeof(struct merged_info) :
716 sizeof(struct conflict_info));
717 mi->directory_name = current_dir_name;
718 mi->basename_offset = current_dir_name_len;
719 mi->clean = !!resolved;
720 if (resolved) {
721 mi->result.mode = merged_version->mode;
722 oidcpy(&mi->result.oid, &merged_version->oid);
723 mi->is_null = !!is_null;
724 } else {
725 int i;
726 struct conflict_info *ci;
727
728 ASSIGN_AND_VERIFY_CI(ci, mi);
729 for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) {
730 ci->pathnames[i] = fullpath;
731 ci->stages[i].mode = names[i].mode;
732 oidcpy(&ci->stages[i].oid, &names[i].oid);
733 }
734 ci->filemask = filemask;
735 ci->dirmask = dirmask;
736 ci->df_conflict = !!df_conflict;
737 if (dirmask)
738 /*
739 * Assume is_null for now, but if we have entries
740 * under the directory then when it is complete in
741 * write_completed_directory() it'll update this.
742 * Also, for D/F conflicts, we have to handle the
743 * directory first, then clear this bit and process
744 * the file to see how it is handled -- that occurs
745 * near the top of process_entry().
746 */
747 mi->is_null = 1;
748 }
749 strmap_put(&opt->priv->paths, fullpath, mi);
750 result->string = fullpath;
751 result->util = mi;
752}
753
f78cf976
EN
754static void add_pair(struct merge_options *opt,
755 struct name_entry *names,
756 const char *pathname,
757 unsigned side,
32a56dfb 758 unsigned is_add /* if false, is_delete */,
2fd9eda4
EN
759 unsigned match_mask,
760 unsigned dir_rename_mask)
f78cf976
EN
761{
762 struct diff_filespec *one, *two;
763 struct rename_info *renames = &opt->priv->renames;
764 int names_idx = is_add ? side : 0;
765
32a56dfb
EN
766 if (!is_add) {
767 unsigned content_relevant = (match_mask == 0);
2fd9eda4 768 unsigned location_relevant = (dir_rename_mask == 0x07);
32a56dfb 769
ec59da60
EN
770 if (content_relevant || location_relevant) {
771 /* content_relevant trumps location_relevant */
772 strintmap_set(&renames->relevant_sources[side], pathname,
773 content_relevant ? RELEVANT_CONTENT : RELEVANT_LOCATION);
774 }
32a56dfb
EN
775 }
776
f78cf976
EN
777 one = alloc_filespec(pathname);
778 two = alloc_filespec(pathname);
779 fill_filespec(is_add ? two : one,
780 &names[names_idx].oid, 1, names[names_idx].mode);
781 diff_queue(&renames->pairs[side], one, two);
782}
783
eb3e3e1d
EN
784static void collect_rename_info(struct merge_options *opt,
785 struct name_entry *names,
786 const char *dirname,
787 const char *fullname,
788 unsigned filemask,
789 unsigned dirmask,
790 unsigned match_mask)
791{
792 struct rename_info *renames = &opt->priv->renames;
f78cf976 793 unsigned side;
eb3e3e1d 794
2fd9eda4
EN
795 /*
796 * Update dir_rename_mask (determines ignore-rename-source validity)
797 *
798 * dir_rename_mask helps us keep track of when directory rename
799 * detection may be relevant. Basically, whenver a directory is
800 * removed on one side of history, and a file is added to that
801 * directory on the other side of history, directory rename
802 * detection is relevant (meaning we have to detect renames for all
803 * files within that directory to deduce where the directory
804 * moved). Also, whenever a directory needs directory rename
805 * detection, due to the "majority rules" choice for where to move
806 * it (see t6423 testcase 1f), we also need to detect renames for
807 * all files within subdirectories of that directory as well.
808 *
809 * Here we haven't looked at files within the directory yet, we are
810 * just looking at the directory itself. So, if we aren't yet in
811 * a case where a parent directory needed directory rename detection
812 * (i.e. dir_rename_mask != 0x07), and if the directory was removed
813 * on one side of history, record the mask of the other side of
814 * history in dir_rename_mask.
815 */
816 if (renames->dir_rename_mask != 0x07 &&
817 (dirmask == 3 || dirmask == 5)) {
818 /* simple sanity check */
819 assert(renames->dir_rename_mask == 0 ||
820 renames->dir_rename_mask == (dirmask & ~1));
821 /* update dir_rename_mask; have it record mask of new side */
822 renames->dir_rename_mask = (dirmask & ~1);
823 }
824
eb3e3e1d
EN
825 /* Update dirs_removed, as needed */
826 if (dirmask == 1 || dirmask == 3 || dirmask == 5) {
827 /* absent_mask = 0x07 - dirmask; sides = absent_mask/2 */
828 unsigned sides = (0x07 - dirmask)/2;
fb52938e
EN
829 unsigned relevance = (renames->dir_rename_mask == 0x07) ?
830 RELEVANT_FOR_ANCESTOR : NOT_RELEVANT;
831 /*
832 * Record relevance of this directory. However, note that
833 * when collect_merge_info_callback() recurses into this
834 * directory and calls collect_rename_info() on paths
835 * within that directory, if we find a path that was added
836 * to this directory on the other side of history, we will
837 * upgrade this value to RELEVANT_FOR_SELF; see below.
838 */
eb3e3e1d 839 if (sides & 1)
fb52938e
EN
840 strintmap_set(&renames->dirs_removed[1], fullname,
841 relevance);
eb3e3e1d 842 if (sides & 2)
fb52938e
EN
843 strintmap_set(&renames->dirs_removed[2], fullname,
844 relevance);
845 }
846
847 /*
848 * Here's the block that potentially upgrades to RELEVANT_FOR_SELF.
849 * When we run across a file added to a directory. In such a case,
850 * find the directory of the file and upgrade its relevance.
851 */
852 if (renames->dir_rename_mask == 0x07 &&
853 (filemask == 2 || filemask == 4)) {
854 /*
855 * Need directory rename for parent directory on other side
856 * of history from added file. Thus
857 * side = (~filemask & 0x06) >> 1
858 * or
859 * side = 3 - (filemask/2).
860 */
861 unsigned side = 3 - (filemask >> 1);
862 strintmap_set(&renames->dirs_removed[side], dirname,
863 RELEVANT_FOR_SELF);
eb3e3e1d 864 }
f78cf976
EN
865
866 if (filemask == 0 || filemask == 7)
867 return;
868
869 for (side = MERGE_SIDE1; side <= MERGE_SIDE2; ++side) {
870 unsigned side_mask = (1 << side);
871
872 /* Check for deletion on side */
873 if ((filemask & 1) && !(filemask & side_mask))
32a56dfb 874 add_pair(opt, names, fullname, side, 0 /* delete */,
2fd9eda4
EN
875 match_mask & filemask,
876 renames->dir_rename_mask);
f78cf976
EN
877
878 /* Check for addition on side */
879 if (!(filemask & 1) && (filemask & side_mask))
32a56dfb 880 add_pair(opt, names, fullname, side, 1 /* add */,
2fd9eda4
EN
881 match_mask & filemask,
882 renames->dir_rename_mask);
f78cf976 883 }
eb3e3e1d
EN
884}
885
d2bc1994
EN
886static int collect_merge_info_callback(int n,
887 unsigned long mask,
888 unsigned long dirmask,
889 struct name_entry *names,
890 struct traverse_info *info)
891{
892 /*
893 * n is 3. Always.
894 * common ancestor (mbase) has mask 1, and stored in index 0 of names
895 * head of side 1 (side1) has mask 2, and stored in index 1 of names
896 * head of side 2 (side2) has mask 4, and stored in index 2 of names
897 */
898 struct merge_options *opt = info->data;
899 struct merge_options_internal *opti = opt->priv;
2fd9eda4 900 struct rename_info *renames = &opt->priv->renames;
98bf9841
EN
901 struct string_list_item pi; /* Path Info */
902 struct conflict_info *ci; /* typed alias to pi.util (which is void*) */
d2bc1994
EN
903 struct name_entry *p;
904 size_t len;
905 char *fullpath;
98bf9841 906 const char *dirname = opti->current_dir_name;
2fd9eda4 907 unsigned prev_dir_rename_mask = renames->dir_rename_mask;
d2bc1994 908 unsigned filemask = mask & ~dirmask;
34e557af 909 unsigned match_mask = 0; /* will be updated below */
d2bc1994
EN
910 unsigned mbase_null = !(mask & 1);
911 unsigned side1_null = !(mask & 2);
912 unsigned side2_null = !(mask & 4);
885f0063
EN
913 unsigned side1_matches_mbase = (!side1_null && !mbase_null &&
914 names[0].mode == names[1].mode &&
915 oideq(&names[0].oid, &names[1].oid));
916 unsigned side2_matches_mbase = (!side2_null && !mbase_null &&
917 names[0].mode == names[2].mode &&
918 oideq(&names[0].oid, &names[2].oid));
919 unsigned sides_match = (!side1_null && !side2_null &&
920 names[1].mode == names[2].mode &&
921 oideq(&names[1].oid, &names[2].oid));
d2bc1994 922
34e557af
EN
923 /*
924 * Note: When a path is a file on one side of history and a directory
925 * in another, we have a directory/file conflict. In such cases, if
926 * the conflict doesn't resolve from renames and deletions, then we
927 * always leave directories where they are and move files out of the
928 * way. Thus, while struct conflict_info has a df_conflict field to
929 * track such conflicts, we ignore that field for any directories at
930 * a path and only pay attention to it for files at the given path.
931 * The fact that we leave directories were they are also means that
932 * we do not need to worry about getting additional df_conflict
933 * information propagated from parent directories down to children
934 * (unlike, say traverse_trees_recursive() in unpack-trees.c, which
935 * sets a newinfo.df_conflicts field specifically to propagate it).
936 */
937 unsigned df_conflict = (filemask != 0) && (dirmask != 0);
938
d2bc1994
EN
939 /* n = 3 is a fundamental assumption. */
940 if (n != 3)
941 BUG("Called collect_merge_info_callback wrong");
942
943 /*
944 * A bunch of sanity checks verifying that traverse_trees() calls
945 * us the way I expect. Could just remove these at some point,
946 * though maybe they are helpful to future code readers.
947 */
948 assert(mbase_null == is_null_oid(&names[0].oid));
949 assert(side1_null == is_null_oid(&names[1].oid));
950 assert(side2_null == is_null_oid(&names[2].oid));
951 assert(!mbase_null || !side1_null || !side2_null);
952 assert(mask > 0 && mask < 8);
953
34e557af
EN
954 /* Determine match_mask */
955 if (side1_matches_mbase)
956 match_mask = (side2_matches_mbase ? 7 : 3);
957 else if (side2_matches_mbase)
958 match_mask = 5;
959 else if (sides_match)
960 match_mask = 6;
961
d2bc1994
EN
962 /*
963 * Get the name of the relevant filepath, which we'll pass to
964 * setup_path_info() for tracking.
965 */
966 p = names;
967 while (!p->mode)
968 p++;
969 len = traverse_path_len(info, p->pathlen);
970
971 /* +1 in both of the following lines to include the NUL byte */
972 fullpath = xmalloc(len + 1);
973 make_traverse_path(fullpath, len + 1, info, p->path, p->pathlen);
974
291f29ca
EN
975 /*
976 * If mbase, side1, and side2 all match, we can resolve early. Even
977 * if these are trees, there will be no renames or anything
978 * underneath.
979 */
980 if (side1_matches_mbase && side2_matches_mbase) {
981 /* mbase, side1, & side2 all match; use mbase as resolution */
982 setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
983 names, names+0, mbase_null, 0,
984 filemask, dirmask, 1);
985 return mask;
986 }
987
eb3e3e1d
EN
988 /*
989 * Gather additional information used in rename detection.
990 */
991 collect_rename_info(opt, names, dirname, fullpath,
992 filemask, dirmask, match_mask);
993
d2bc1994 994 /*
98bf9841
EN
995 * Record information about the path so we can resolve later in
996 * process_entries.
d2bc1994 997 */
98bf9841
EN
998 setup_path_info(opt, &pi, dirname, info->pathlen, fullpath,
999 names, NULL, 0, df_conflict, filemask, dirmask, 0);
1000
1001 ci = pi.util;
1002 VERIFY_CI(ci);
34e557af 1003 ci->match_mask = match_mask;
d2bc1994
EN
1004
1005 /* If dirmask, recurse into subdirectories */
1006 if (dirmask) {
1007 struct traverse_info newinfo;
1008 struct tree_desc t[3];
1009 void *buf[3] = {NULL, NULL, NULL};
1010 const char *original_dir_name;
1011 int i, ret;
1012
1013 ci->match_mask &= filemask;
1014 newinfo = *info;
1015 newinfo.prev = info;
1016 newinfo.name = p->path;
1017 newinfo.namelen = p->pathlen;
1018 newinfo.pathlen = st_add3(newinfo.pathlen, p->pathlen, 1);
34e557af
EN
1019 /*
1020 * If this directory we are about to recurse into cared about
1021 * its parent directory (the current directory) having a D/F
1022 * conflict, then we'd propagate the masks in this way:
1023 * newinfo.df_conflicts |= (mask & ~dirmask);
1024 * But we don't worry about propagating D/F conflicts. (See
1025 * comment near setting of local df_conflict variable near
1026 * the beginning of this function).
1027 */
d2bc1994
EN
1028
1029 for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) {
885f0063
EN
1030 if (i == 1 && side1_matches_mbase)
1031 t[1] = t[0];
1032 else if (i == 2 && side2_matches_mbase)
1033 t[2] = t[0];
1034 else if (i == 2 && sides_match)
1035 t[2] = t[1];
1036 else {
1037 const struct object_id *oid = NULL;
1038 if (dirmask & 1)
1039 oid = &names[i].oid;
1040 buf[i] = fill_tree_descriptor(opt->repo,
1041 t + i, oid);
1042 }
d2bc1994
EN
1043 dirmask >>= 1;
1044 }
1045
1046 original_dir_name = opti->current_dir_name;
98bf9841 1047 opti->current_dir_name = pi.string;
2fd9eda4
EN
1048 if (renames->dir_rename_mask == 0 ||
1049 renames->dir_rename_mask == 0x07)
1050 ret = traverse_trees(NULL, 3, t, &newinfo);
1051 else
1052 ret = traverse_trees_wrapper(NULL, 3, t, &newinfo);
d2bc1994 1053 opti->current_dir_name = original_dir_name;
2fd9eda4 1054 renames->dir_rename_mask = prev_dir_rename_mask;
d2bc1994
EN
1055
1056 for (i = MERGE_BASE; i <= MERGE_SIDE2; i++)
1057 free(buf[i]);
1058
1059 if (ret < 0)
1060 return -1;
1061 }
1062
1063 return mask;
1064}
1065
231e2dd4
EN
1066static int collect_merge_info(struct merge_options *opt,
1067 struct tree *merge_base,
1068 struct tree *side1,
1069 struct tree *side2)
1070{
d2bc1994
EN
1071 int ret;
1072 struct tree_desc t[3];
1073 struct traverse_info info;
d2bc1994 1074
05b85c6e
EN
1075 opt->priv->toplevel_dir = "";
1076 opt->priv->current_dir_name = opt->priv->toplevel_dir;
1077 setup_traverse_info(&info, opt->priv->toplevel_dir);
d2bc1994
EN
1078 info.fn = collect_merge_info_callback;
1079 info.data = opt;
1080 info.show_all_errors = 1;
1081
1082 parse_tree(merge_base);
1083 parse_tree(side1);
1084 parse_tree(side2);
1085 init_tree_desc(t + 0, merge_base->buffer, merge_base->size);
1086 init_tree_desc(t + 1, side1->buffer, side1->size);
1087 init_tree_desc(t + 2, side2->buffer, side2->size);
1088
557ac035 1089 trace2_region_enter("merge", "traverse_trees", opt->repo);
d2bc1994 1090 ret = traverse_trees(NULL, 3, t, &info);
557ac035 1091 trace2_region_leave("merge", "traverse_trees", opt->repo);
d2bc1994
EN
1092
1093 return ret;
231e2dd4
EN
1094}
1095
04af1879
EN
1096/*** Function Grouping: functions related to threeway content merges ***/
1097
c73cda76
EN
1098static int find_first_merges(struct repository *repo,
1099 const char *path,
1100 struct commit *a,
1101 struct commit *b,
1102 struct object_array *result)
1103{
4204cd59
EN
1104 int i, j;
1105 struct object_array merges = OBJECT_ARRAY_INIT;
1106 struct commit *commit;
1107 int contains_another;
1108
1109 char merged_revision[GIT_MAX_HEXSZ + 2];
1110 const char *rev_args[] = { "rev-list", "--merges", "--ancestry-path",
1111 "--all", merged_revision, NULL };
1112 struct rev_info revs;
1113 struct setup_revision_opt rev_opts;
1114
1115 memset(result, 0, sizeof(struct object_array));
1116 memset(&rev_opts, 0, sizeof(rev_opts));
1117
1118 /* get all revisions that merge commit a */
1119 xsnprintf(merged_revision, sizeof(merged_revision), "^%s",
1120 oid_to_hex(&a->object.oid));
1121 repo_init_revisions(repo, &revs, NULL);
1122 rev_opts.submodule = path;
1123 /* FIXME: can't handle linked worktrees in submodules yet */
1124 revs.single_worktree = path != NULL;
1125 setup_revisions(ARRAY_SIZE(rev_args)-1, rev_args, &revs, &rev_opts);
1126
1127 /* save all revisions from the above list that contain b */
1128 if (prepare_revision_walk(&revs))
1129 die("revision walk setup failed");
1130 while ((commit = get_revision(&revs)) != NULL) {
1131 struct object *o = &(commit->object);
1132 if (in_merge_bases(b, commit))
1133 add_object_array(o, NULL, &merges);
1134 }
1135 reset_revision_walk();
1136
1137 /* Now we've got all merges that contain a and b. Prune all
1138 * merges that contain another found merge and save them in
1139 * result.
1140 */
1141 for (i = 0; i < merges.nr; i++) {
1142 struct commit *m1 = (struct commit *) merges.objects[i].item;
1143
1144 contains_another = 0;
1145 for (j = 0; j < merges.nr; j++) {
1146 struct commit *m2 = (struct commit *) merges.objects[j].item;
1147 if (i != j && in_merge_bases(m2, m1)) {
1148 contains_another = 1;
1149 break;
1150 }
1151 }
1152
1153 if (!contains_another)
1154 add_object_array(merges.objects[i].item, NULL, result);
1155 }
1156
1157 object_array_clear(&merges);
1158 return result->nr;
c73cda76
EN
1159}
1160
62fdec17
EN
1161static int merge_submodule(struct merge_options *opt,
1162 const char *path,
1163 const struct object_id *o,
1164 const struct object_id *a,
1165 const struct object_id *b,
1166 struct object_id *result)
1167{
c73cda76
EN
1168 struct commit *commit_o, *commit_a, *commit_b;
1169 int parent_count;
1170 struct object_array merges;
1171 struct strbuf sb = STRBUF_INIT;
1172
1173 int i;
1174 int search = !opt->priv->call_depth;
1175
1176 /* store fallback answer in result in case we fail */
1177 oidcpy(result, opt->priv->call_depth ? o : a);
1178
1179 /* we can not handle deletion conflicts */
1180 if (is_null_oid(o))
1181 return 0;
1182 if (is_null_oid(a))
1183 return 0;
1184 if (is_null_oid(b))
1185 return 0;
1186
1187 if (add_submodule_odb(path)) {
1188 path_msg(opt, path, 0,
1189 _("Failed to merge submodule %s (not checked out)"),
1190 path);
1191 return 0;
1192 }
1193
1194 if (!(commit_o = lookup_commit_reference(opt->repo, o)) ||
1195 !(commit_a = lookup_commit_reference(opt->repo, a)) ||
1196 !(commit_b = lookup_commit_reference(opt->repo, b))) {
1197 path_msg(opt, path, 0,
1198 _("Failed to merge submodule %s (commits not present)"),
1199 path);
1200 return 0;
1201 }
1202
1203 /* check whether both changes are forward */
1204 if (!in_merge_bases(commit_o, commit_a) ||
1205 !in_merge_bases(commit_o, commit_b)) {
1206 path_msg(opt, path, 0,
1207 _("Failed to merge submodule %s "
1208 "(commits don't follow merge-base)"),
1209 path);
1210 return 0;
1211 }
1212
1213 /* Case #1: a is contained in b or vice versa */
1214 if (in_merge_bases(commit_a, commit_b)) {
1215 oidcpy(result, b);
1216 path_msg(opt, path, 1,
1217 _("Note: Fast-forwarding submodule %s to %s"),
1218 path, oid_to_hex(b));
1219 return 1;
1220 }
1221 if (in_merge_bases(commit_b, commit_a)) {
1222 oidcpy(result, a);
1223 path_msg(opt, path, 1,
1224 _("Note: Fast-forwarding submodule %s to %s"),
1225 path, oid_to_hex(a));
1226 return 1;
1227 }
1228
1229 /*
1230 * Case #2: There are one or more merges that contain a and b in
1231 * the submodule. If there is only one, then present it as a
1232 * suggestion to the user, but leave it marked unmerged so the
1233 * user needs to confirm the resolution.
1234 */
1235
1236 /* Skip the search if makes no sense to the calling context. */
1237 if (!search)
1238 return 0;
1239
1240 /* find commit which merges them */
1241 parent_count = find_first_merges(opt->repo, path, commit_a, commit_b,
1242 &merges);
1243 switch (parent_count) {
1244 case 0:
1245 path_msg(opt, path, 0, _("Failed to merge submodule %s"), path);
1246 break;
1247
1248 case 1:
1249 format_commit(&sb, 4,
1250 (struct commit *)merges.objects[0].item);
1251 path_msg(opt, path, 0,
1252 _("Failed to merge submodule %s, but a possible merge "
1253 "resolution exists:\n%s\n"),
1254 path, sb.buf);
1255 path_msg(opt, path, 1,
1256 _("If this is correct simply add it to the index "
1257 "for example\n"
1258 "by using:\n\n"
1259 " git update-index --cacheinfo 160000 %s \"%s\"\n\n"
1260 "which will accept this suggestion.\n"),
1261 oid_to_hex(&merges.objects[0].item->oid), path);
1262 strbuf_release(&sb);
1263 break;
1264 default:
1265 for (i = 0; i < merges.nr; i++)
1266 format_commit(&sb, 4,
1267 (struct commit *)merges.objects[i].item);
1268 path_msg(opt, path, 0,
1269 _("Failed to merge submodule %s, but multiple "
1270 "possible merges exist:\n%s"), path, sb.buf);
1271 strbuf_release(&sb);
1272 }
1273
1274 object_array_clear(&merges);
1275 return 0;
62fdec17
EN
1276}
1277
1218b3ab
EN
1278static void initialize_attr_index(struct merge_options *opt)
1279{
1280 /*
1281 * The renormalize_buffer() functions require attributes, and
1282 * annoyingly those can only be read from the working tree or from
1283 * an index_state. merge-ort doesn't have an index_state, so we
1284 * generate a fake one containing only attribute information.
1285 */
1286 struct merged_info *mi;
1287 struct index_state *attr_index = &opt->priv->attr_index;
1288 struct cache_entry *ce;
1289
1290 attr_index->initialized = 1;
1291
1292 if (!opt->renormalize)
1293 return;
1294
1295 mi = strmap_get(&opt->priv->paths, GITATTRIBUTES_FILE);
1296 if (!mi)
1297 return;
1298
1299 if (mi->clean) {
1300 int len = strlen(GITATTRIBUTES_FILE);
1301 ce = make_empty_cache_entry(attr_index, len);
1302 ce->ce_mode = create_ce_mode(mi->result.mode);
1303 ce->ce_flags = create_ce_flags(0);
1304 ce->ce_namelen = len;
1305 oidcpy(&ce->oid, &mi->result.oid);
1306 memcpy(ce->name, GITATTRIBUTES_FILE, len);
1307 add_index_entry(attr_index, ce,
1308 ADD_CACHE_OK_TO_ADD | ADD_CACHE_OK_TO_REPLACE);
1309 get_stream_filter(attr_index, GITATTRIBUTES_FILE, &ce->oid);
1310 } else {
1311 int stage, len;
1312 struct conflict_info *ci;
1313
1314 ASSIGN_AND_VERIFY_CI(ci, mi);
1315 for (stage = 0; stage < 3; stage++) {
1316 unsigned stage_mask = (1 << stage);
1317
1318 if (!(ci->filemask & stage_mask))
1319 continue;
1320 len = strlen(GITATTRIBUTES_FILE);
1321 ce = make_empty_cache_entry(attr_index, len);
1322 ce->ce_mode = create_ce_mode(ci->stages[stage].mode);
1323 ce->ce_flags = create_ce_flags(stage);
1324 ce->ce_namelen = len;
1325 oidcpy(&ce->oid, &ci->stages[stage].oid);
1326 memcpy(ce->name, GITATTRIBUTES_FILE, len);
1327 add_index_entry(attr_index, ce,
1328 ADD_CACHE_OK_TO_ADD | ADD_CACHE_OK_TO_REPLACE);
1329 get_stream_filter(attr_index, GITATTRIBUTES_FILE,
1330 &ce->oid);
1331 }
1332 }
1333}
1334
62fdec17
EN
1335static int merge_3way(struct merge_options *opt,
1336 const char *path,
1337 const struct object_id *o,
1338 const struct object_id *a,
1339 const struct object_id *b,
1340 const char *pathnames[3],
1341 const int extra_marker_size,
1342 mmbuffer_t *result_buf)
1343{
f591c472
EN
1344 mmfile_t orig, src1, src2;
1345 struct ll_merge_options ll_opts = {0};
1346 char *base, *name1, *name2;
1347 int merge_status;
1348
1218b3ab
EN
1349 if (!opt->priv->attr_index.initialized)
1350 initialize_attr_index(opt);
1351
f591c472
EN
1352 ll_opts.renormalize = opt->renormalize;
1353 ll_opts.extra_marker_size = extra_marker_size;
1354 ll_opts.xdl_opts = opt->xdl_opts;
1355
1356 if (opt->priv->call_depth) {
1357 ll_opts.virtual_ancestor = 1;
1358 ll_opts.variant = 0;
1359 } else {
1360 switch (opt->recursive_variant) {
1361 case MERGE_VARIANT_OURS:
1362 ll_opts.variant = XDL_MERGE_FAVOR_OURS;
1363 break;
1364 case MERGE_VARIANT_THEIRS:
1365 ll_opts.variant = XDL_MERGE_FAVOR_THEIRS;
1366 break;
1367 default:
1368 ll_opts.variant = 0;
1369 break;
1370 }
1371 }
1372
1373 assert(pathnames[0] && pathnames[1] && pathnames[2] && opt->ancestor);
1374 if (pathnames[0] == pathnames[1] && pathnames[1] == pathnames[2]) {
1375 base = mkpathdup("%s", opt->ancestor);
1376 name1 = mkpathdup("%s", opt->branch1);
1377 name2 = mkpathdup("%s", opt->branch2);
1378 } else {
1379 base = mkpathdup("%s:%s", opt->ancestor, pathnames[0]);
1380 name1 = mkpathdup("%s:%s", opt->branch1, pathnames[1]);
1381 name2 = mkpathdup("%s:%s", opt->branch2, pathnames[2]);
1382 }
1383
1384 read_mmblob(&orig, o);
1385 read_mmblob(&src1, a);
1386 read_mmblob(&src2, b);
1387
1388 merge_status = ll_merge(result_buf, path, &orig, base,
1389 &src1, name1, &src2, name2,
1218b3ab 1390 &opt->priv->attr_index, &ll_opts);
f591c472
EN
1391
1392 free(base);
1393 free(name1);
1394 free(name2);
1395 free(orig.ptr);
1396 free(src1.ptr);
1397 free(src2.ptr);
1398 return merge_status;
62fdec17
EN
1399}
1400
e2e9dc03
EN
1401static int handle_content_merge(struct merge_options *opt,
1402 const char *path,
1403 const struct version_info *o,
1404 const struct version_info *a,
1405 const struct version_info *b,
1406 const char *pathnames[3],
1407 const int extra_marker_size,
1408 struct version_info *result)
1409{
991bbdca 1410 /*
62fdec17
EN
1411 * path is the target location where we want to put the file, and
1412 * is used to determine any normalization rules in ll_merge.
1413 *
1414 * The normal case is that path and all entries in pathnames are
1415 * identical, though renames can affect which path we got one of
1416 * the three blobs to merge on various sides of history.
1417 *
1418 * extra_marker_size is the amount to extend conflict markers in
1419 * ll_merge; this is neeed if we have content merges of content
1420 * merges, which happens for example with rename/rename(2to1) and
1421 * rename/add conflicts.
1422 */
1423 unsigned clean = 1;
1424
1425 /*
1426 * handle_content_merge() needs both files to be of the same type, i.e.
1427 * both files OR both submodules OR both symlinks. Conflicting types
1428 * needs to be handled elsewhere.
991bbdca 1429 */
62fdec17
EN
1430 assert((S_IFMT & a->mode) == (S_IFMT & b->mode));
1431
1432 /* Merge modes */
1433 if (a->mode == b->mode || a->mode == o->mode)
1434 result->mode = b->mode;
1435 else {
1436 /* must be the 100644/100755 case */
1437 assert(S_ISREG(a->mode));
1438 result->mode = a->mode;
1439 clean = (b->mode == o->mode);
1440 /*
1441 * FIXME: If opt->priv->call_depth && !clean, then we really
1442 * should not make result->mode match either a->mode or
1443 * b->mode; that causes t6036 "check conflicting mode for
1444 * regular file" to fail. It would be best to use some other
1445 * mode, but we'll confuse all kinds of stuff if we use one
1446 * where S_ISREG(result->mode) isn't true, and if we use
1447 * something like 0100666, then tree-walk.c's calls to
1448 * canon_mode() will just normalize that to 100644 for us and
1449 * thus not solve anything.
1450 *
1451 * Figure out if there's some kind of way we can work around
1452 * this...
1453 */
1454 }
1455
1456 /*
1457 * Trivial oid merge.
1458 *
1459 * Note: While one might assume that the next four lines would
1460 * be unnecessary due to the fact that match_mask is often
1461 * setup and already handled, renames don't always take care
1462 * of that.
1463 */
1464 if (oideq(&a->oid, &b->oid) || oideq(&a->oid, &o->oid))
1465 oidcpy(&result->oid, &b->oid);
1466 else if (oideq(&b->oid, &o->oid))
1467 oidcpy(&result->oid, &a->oid);
1468
1469 /* Remaining rules depend on file vs. submodule vs. symlink. */
1470 else if (S_ISREG(a->mode)) {
1471 mmbuffer_t result_buf;
1472 int ret = 0, merge_status;
1473 int two_way;
1474
1475 /*
1476 * If 'o' is different type, treat it as null so we do a
1477 * two-way merge.
1478 */
1479 two_way = ((S_IFMT & o->mode) != (S_IFMT & a->mode));
1480
1481 merge_status = merge_3way(opt, path,
1482 two_way ? &null_oid : &o->oid,
1483 &a->oid, &b->oid,
1484 pathnames, extra_marker_size,
1485 &result_buf);
1486
1487 if ((merge_status < 0) || !result_buf.ptr)
1488 ret = err(opt, _("Failed to execute internal merge"));
1489
1490 if (!ret &&
1491 write_object_file(result_buf.ptr, result_buf.size,
1492 blob_type, &result->oid))
1493 ret = err(opt, _("Unable to add %s to database"),
1494 path);
1495
1496 free(result_buf.ptr);
1497 if (ret)
1498 return -1;
1499 clean &= (merge_status == 0);
1500 path_msg(opt, path, 1, _("Auto-merging %s"), path);
1501 } else if (S_ISGITLINK(a->mode)) {
1502 int two_way = ((S_IFMT & o->mode) != (S_IFMT & a->mode));
1503 clean = merge_submodule(opt, pathnames[0],
1504 two_way ? &null_oid : &o->oid,
1505 &a->oid, &b->oid, &result->oid);
1506 if (opt->priv->call_depth && two_way && !clean) {
1507 result->mode = o->mode;
1508 oidcpy(&result->oid, &o->oid);
1509 }
1510 } else if (S_ISLNK(a->mode)) {
1511 if (opt->priv->call_depth) {
1512 clean = 0;
1513 result->mode = o->mode;
1514 oidcpy(&result->oid, &o->oid);
1515 } else {
1516 switch (opt->recursive_variant) {
1517 case MERGE_VARIANT_NORMAL:
1518 clean = 0;
1519 oidcpy(&result->oid, &a->oid);
1520 break;
1521 case MERGE_VARIANT_OURS:
1522 oidcpy(&result->oid, &a->oid);
1523 break;
1524 case MERGE_VARIANT_THEIRS:
1525 oidcpy(&result->oid, &b->oid);
1526 break;
1527 }
1528 }
1529 } else
1530 BUG("unsupported object type in the tree: %06o for %s",
1531 a->mode, path);
1532
991bbdca 1533 return clean;
e2e9dc03
EN
1534}
1535
04af1879
EN
1536/*** Function Grouping: functions related to detect_and_process_renames(), ***
1537 *** which are split into directory and regular rename detection sections. ***/
1538
1539/*** Function Grouping: functions related to directory rename detection ***/
1540
fa5e06d6
EN
1541struct collision_info {
1542 struct string_list source_files;
1543 unsigned reported_already:1;
1544};
1545
d9d015df
EN
1546/*
1547 * Return a new string that replaces the beginning portion (which matches
1548 * rename_info->key), with rename_info->util.new_dir. In perl-speak:
1549 * new_path_name = (old_path =~ s/rename_info->key/rename_info->value/);
1550 * NOTE:
1551 * Caller must ensure that old_path starts with rename_info->key + '/'.
1552 */
1553static char *apply_dir_rename(struct strmap_entry *rename_info,
1554 const char *old_path)
1555{
fbcfc0cc
EN
1556 struct strbuf new_path = STRBUF_INIT;
1557 const char *old_dir = rename_info->key;
1558 const char *new_dir = rename_info->value;
1559 int oldlen, newlen, new_dir_len;
1560
1561 oldlen = strlen(old_dir);
1562 if (*new_dir == '\0')
1563 /*
1564 * If someone renamed/merged a subdirectory into the root
1565 * directory (e.g. 'some/subdir' -> ''), then we want to
1566 * avoid returning
1567 * '' + '/filename'
1568 * as the rename; we need to make old_path + oldlen advance
1569 * past the '/' character.
1570 */
1571 oldlen++;
1572 new_dir_len = strlen(new_dir);
1573 newlen = new_dir_len + (strlen(old_path) - oldlen) + 1;
1574 strbuf_grow(&new_path, newlen);
1575 strbuf_add(&new_path, new_dir, new_dir_len);
1576 strbuf_addstr(&new_path, &old_path[oldlen]);
1577
1578 return strbuf_detach(&new_path, NULL);
d9d015df
EN
1579}
1580
bea43365
EN
1581static int path_in_way(struct strmap *paths, const char *path, unsigned side_mask)
1582{
1583 struct merged_info *mi = strmap_get(paths, path);
1584 struct conflict_info *ci;
1585 if (!mi)
1586 return 0;
1587 INITIALIZE_CI(ci, mi);
1588 return mi->clean || (side_mask & (ci->filemask | ci->dirmask));
1589}
1590
47325e85
EN
1591/*
1592 * See if there is a directory rename for path, and if there are any file
1593 * level conflicts on the given side for the renamed location. If there is
1594 * a rename and there are no conflicts, return the new name. Otherwise,
1595 * return NULL.
1596 */
1597static char *handle_path_level_conflicts(struct merge_options *opt,
1598 const char *path,
1599 unsigned side_index,
1600 struct strmap_entry *rename_info,
1601 struct strmap *collisions)
1602{
bea43365
EN
1603 char *new_path = NULL;
1604 struct collision_info *c_info;
1605 int clean = 1;
1606 struct strbuf collision_paths = STRBUF_INIT;
1607
1608 /*
1609 * entry has the mapping of old directory name to new directory name
1610 * that we want to apply to path.
1611 */
1612 new_path = apply_dir_rename(rename_info, path);
1613 if (!new_path)
1614 BUG("Failed to apply directory rename!");
1615
1616 /*
1617 * The caller needs to have ensured that it has pre-populated
1618 * collisions with all paths that map to new_path. Do a quick check
1619 * to ensure that's the case.
1620 */
1621 c_info = strmap_get(collisions, new_path);
1622 if (c_info == NULL)
1623 BUG("c_info is NULL");
1624
1625 /*
1626 * Check for one-sided add/add/.../add conflicts, i.e.
1627 * where implicit renames from the other side doing
1628 * directory rename(s) can affect this side of history
1629 * to put multiple paths into the same location. Warn
1630 * and bail on directory renames for such paths.
1631 */
1632 if (c_info->reported_already) {
1633 clean = 0;
1634 } else if (path_in_way(&opt->priv->paths, new_path, 1 << side_index)) {
1635 c_info->reported_already = 1;
1636 strbuf_add_separated_string_list(&collision_paths, ", ",
1637 &c_info->source_files);
1638 path_msg(opt, new_path, 0,
1639 _("CONFLICT (implicit dir rename): Existing file/dir "
1640 "at %s in the way of implicit directory rename(s) "
1641 "putting the following path(s) there: %s."),
1642 new_path, collision_paths.buf);
1643 clean = 0;
1644 } else if (c_info->source_files.nr > 1) {
1645 c_info->reported_already = 1;
1646 strbuf_add_separated_string_list(&collision_paths, ", ",
1647 &c_info->source_files);
1648 path_msg(opt, new_path, 0,
1649 _("CONFLICT (implicit dir rename): Cannot map more "
1650 "than one path to %s; implicit directory renames "
1651 "tried to put these paths there: %s"),
1652 new_path, collision_paths.buf);
1653 clean = 0;
1654 }
1655
1656 /* Free memory we no longer need */
1657 strbuf_release(&collision_paths);
1658 if (!clean && new_path) {
1659 free(new_path);
1660 return NULL;
1661 }
1662
1663 return new_path;
47325e85
EN
1664}
1665
112e1112
EN
1666static void get_provisional_directory_renames(struct merge_options *opt,
1667 unsigned side,
1668 int *clean)
1669{
04264d40
EN
1670 struct hashmap_iter iter;
1671 struct strmap_entry *entry;
1672 struct rename_info *renames = &opt->priv->renames;
1673
04264d40
EN
1674 /*
1675 * Collapse
1676 * dir_rename_count: old_directory -> {new_directory -> count}
1677 * down to
1678 * dir_renames: old_directory -> best_new_directory
1679 * where best_new_directory is the one with the unique highest count.
1680 */
1681 strmap_for_each_entry(&renames->dir_rename_count[side], &iter, entry) {
1682 const char *source_dir = entry->key;
1683 struct strintmap *counts = entry->value;
1684 struct hashmap_iter count_iter;
1685 struct strmap_entry *count_entry;
1686 int max = 0;
1687 int bad_max = 0;
1688 const char *best = NULL;
1689
1690 strintmap_for_each_entry(counts, &count_iter, count_entry) {
1691 const char *target_dir = count_entry->key;
1692 intptr_t count = (intptr_t)count_entry->value;
1693
1694 if (count == max)
1695 bad_max = max;
1696 else if (count > max) {
1697 max = count;
1698 best = target_dir;
1699 }
1700 }
1701
bf238b71
EN
1702 if (max == 0)
1703 continue;
1704
04264d40
EN
1705 if (bad_max == max) {
1706 path_msg(opt, source_dir, 0,
1707 _("CONFLICT (directory rename split): "
1708 "Unclear where to rename %s to; it was "
1709 "renamed to multiple other directories, with "
1710 "no destination getting a majority of the "
1711 "files."),
1712 source_dir);
41376b58 1713 *clean = 0;
04264d40
EN
1714 } else {
1715 strmap_put(&renames->dir_renames[side],
1716 source_dir, (void*)best);
1717 }
1718 }
112e1112
EN
1719}
1720
1721static void handle_directory_level_conflicts(struct merge_options *opt)
1722{
98d0d081
EN
1723 struct hashmap_iter iter;
1724 struct strmap_entry *entry;
1725 struct string_list duplicated = STRING_LIST_INIT_NODUP;
1726 struct rename_info *renames = &opt->priv->renames;
1727 struct strmap *side1_dir_renames = &renames->dir_renames[MERGE_SIDE1];
1728 struct strmap *side2_dir_renames = &renames->dir_renames[MERGE_SIDE2];
1729 int i;
1730
1731 strmap_for_each_entry(side1_dir_renames, &iter, entry) {
1732 if (strmap_contains(side2_dir_renames, entry->key))
1733 string_list_append(&duplicated, entry->key);
1734 }
1735
1736 for (i = 0; i < duplicated.nr; i++) {
1737 strmap_remove(side1_dir_renames, duplicated.items[i].string, 0);
1738 strmap_remove(side2_dir_renames, duplicated.items[i].string, 0);
1739 }
1740 string_list_clear(&duplicated, 0);
112e1112
EN
1741}
1742
d9d015df
EN
1743static struct strmap_entry *check_dir_renamed(const char *path,
1744 struct strmap *dir_renames)
1745{
fbcfc0cc
EN
1746 char *temp = xstrdup(path);
1747 char *end;
1748 struct strmap_entry *e = NULL;
1749
1750 while ((end = strrchr(temp, '/'))) {
1751 *end = '\0';
1752 e = strmap_get_entry(dir_renames, temp);
1753 if (e)
1754 break;
1755 }
1756 free(temp);
1757 return e;
d9d015df
EN
1758}
1759
fa5e06d6
EN
1760static void compute_collisions(struct strmap *collisions,
1761 struct strmap *dir_renames,
1762 struct diff_queue_struct *pairs)
1763{
d9d015df
EN
1764 int i;
1765
1766 strmap_init_with_options(collisions, NULL, 0);
1767 if (strmap_empty(dir_renames))
1768 return;
1769
1770 /*
1771 * Multiple files can be mapped to the same path due to directory
1772 * renames done by the other side of history. Since that other
1773 * side of history could have merged multiple directories into one,
1774 * if our side of history added the same file basename to each of
1775 * those directories, then all N of them would get implicitly
1776 * renamed by the directory rename detection into the same path,
1777 * and we'd get an add/add/.../add conflict, and all those adds
1778 * from *this* side of history. This is not representable in the
1779 * index, and users aren't going to easily be able to make sense of
1780 * it. So we need to provide a good warning about what's
1781 * happening, and fall back to no-directory-rename detection
1782 * behavior for those paths.
1783 *
1784 * See testcases 9e and all of section 5 from t6043 for examples.
1785 */
1786 for (i = 0; i < pairs->nr; ++i) {
1787 struct strmap_entry *rename_info;
1788 struct collision_info *collision_info;
1789 char *new_path;
1790 struct diff_filepair *pair = pairs->queue[i];
1791
1792 if (pair->status != 'A' && pair->status != 'R')
1793 continue;
1794 rename_info = check_dir_renamed(pair->two->path, dir_renames);
1795 if (!rename_info)
1796 continue;
1797
1798 new_path = apply_dir_rename(rename_info, pair->two->path);
1799 assert(new_path);
1800 collision_info = strmap_get(collisions, new_path);
1801 if (collision_info) {
1802 free(new_path);
1803 } else {
1804 collision_info = xcalloc(1,
1805 sizeof(struct collision_info));
1806 string_list_init(&collision_info->source_files, 0);
1807 strmap_put(collisions, new_path, collision_info);
1808 }
1809 string_list_insert(&collision_info->source_files,
1810 pair->two->path);
1811 }
fa5e06d6
EN
1812}
1813
1814static char *check_for_directory_rename(struct merge_options *opt,
1815 const char *path,
1816 unsigned side_index,
1817 struct strmap *dir_renames,
1818 struct strmap *dir_rename_exclusions,
1819 struct strmap *collisions,
1820 int *clean_merge)
1821{
47325e85
EN
1822 char *new_path = NULL;
1823 struct strmap_entry *rename_info;
1824 struct strmap_entry *otherinfo = NULL;
1825 const char *new_dir;
1826
1827 if (strmap_empty(dir_renames))
1828 return new_path;
1829 rename_info = check_dir_renamed(path, dir_renames);
1830 if (!rename_info)
1831 return new_path;
1832 /* old_dir = rename_info->key; */
1833 new_dir = rename_info->value;
1834
1835 /*
1836 * This next part is a little weird. We do not want to do an
1837 * implicit rename into a directory we renamed on our side, because
1838 * that will result in a spurious rename/rename(1to2) conflict. An
1839 * example:
1840 * Base commit: dumbdir/afile, otherdir/bfile
1841 * Side 1: smrtdir/afile, otherdir/bfile
1842 * Side 2: dumbdir/afile, dumbdir/bfile
1843 * Here, while working on Side 1, we could notice that otherdir was
1844 * renamed/merged to dumbdir, and change the diff_filepair for
1845 * otherdir/bfile into a rename into dumbdir/bfile. However, Side
1846 * 2 will notice the rename from dumbdir to smrtdir, and do the
1847 * transitive rename to move it from dumbdir/bfile to
1848 * smrtdir/bfile. That gives us bfile in dumbdir vs being in
1849 * smrtdir, a rename/rename(1to2) conflict. We really just want
1850 * the file to end up in smrtdir. And the way to achieve that is
1851 * to not let Side1 do the rename to dumbdir, since we know that is
1852 * the source of one of our directory renames.
1853 *
1854 * That's why otherinfo and dir_rename_exclusions is here.
1855 *
1856 * As it turns out, this also prevents N-way transient rename
1857 * confusion; See testcases 9c and 9d of t6043.
1858 */
1859 otherinfo = strmap_get_entry(dir_rename_exclusions, new_dir);
1860 if (otherinfo) {
1861 path_msg(opt, rename_info->key, 1,
1862 _("WARNING: Avoiding applying %s -> %s rename "
1863 "to %s, because %s itself was renamed."),
1864 rename_info->key, new_dir, path, new_dir);
1865 return NULL;
1866 }
1867
1868 new_path = handle_path_level_conflicts(opt, path, side_index,
1869 rename_info, collisions);
1870 *clean_merge &= (new_path != NULL);
1871
1872 return new_path;
fa5e06d6
EN
1873}
1874
1875static void apply_directory_rename_modifications(struct merge_options *opt,
1876 struct diff_filepair *pair,
1877 char *new_path)
1878{
089d82bc
EN
1879 /*
1880 * The basic idea is to get the conflict_info from opt->priv->paths
1881 * at old path, and insert it into new_path; basically just this:
1882 * ci = strmap_get(&opt->priv->paths, old_path);
1883 * strmap_remove(&opt->priv->paths, old_path, 0);
1884 * strmap_put(&opt->priv->paths, new_path, ci);
1885 * However, there are some factors complicating this:
1886 * - opt->priv->paths may already have an entry at new_path
1887 * - Each ci tracks its containing directory, so we need to
1888 * update that
1889 * - If another ci has the same containing directory, then
1890 * the two char*'s MUST point to the same location. See the
1891 * comment in struct merged_info. strcmp equality is not
1892 * enough; we need pointer equality.
1893 * - opt->priv->paths must hold the parent directories of any
1894 * entries that are added. So, if this directory rename
1895 * causes entirely new directories, we must recursively add
1896 * parent directories.
1897 * - For each parent directory added to opt->priv->paths, we
1898 * also need to get its parent directory stored in its
1899 * conflict_info->merged.directory_name with all the same
1900 * requirements about pointer equality.
1901 */
1902 struct string_list dirs_to_insert = STRING_LIST_INIT_NODUP;
1903 struct conflict_info *ci, *new_ci;
1904 struct strmap_entry *entry;
1905 const char *branch_with_new_path, *branch_with_dir_rename;
1906 const char *old_path = pair->two->path;
1907 const char *parent_name;
1908 const char *cur_path;
1909 int i, len;
1910
1911 entry = strmap_get_entry(&opt->priv->paths, old_path);
1912 old_path = entry->key;
1913 ci = entry->value;
1914 VERIFY_CI(ci);
1915
1916 /* Find parent directories missing from opt->priv->paths */
1917 cur_path = new_path;
1918 while (1) {
1919 /* Find the parent directory of cur_path */
1920 char *last_slash = strrchr(cur_path, '/');
1921 if (last_slash) {
1922 parent_name = xstrndup(cur_path, last_slash - cur_path);
1923 } else {
1924 parent_name = opt->priv->toplevel_dir;
1925 break;
1926 }
1927
1928 /* Look it up in opt->priv->paths */
1929 entry = strmap_get_entry(&opt->priv->paths, parent_name);
1930 if (entry) {
1931 free((char*)parent_name);
1932 parent_name = entry->key; /* reuse known pointer */
1933 break;
1934 }
1935
1936 /* Record this is one of the directories we need to insert */
1937 string_list_append(&dirs_to_insert, parent_name);
1938 cur_path = parent_name;
1939 }
1940
1941 /* Traverse dirs_to_insert and insert them into opt->priv->paths */
1942 for (i = dirs_to_insert.nr-1; i >= 0; --i) {
1943 struct conflict_info *dir_ci;
1944 char *cur_dir = dirs_to_insert.items[i].string;
1945
1946 dir_ci = xcalloc(1, sizeof(*dir_ci));
1947
1948 dir_ci->merged.directory_name = parent_name;
1949 len = strlen(parent_name);
1950 /* len+1 because of trailing '/' character */
1951 dir_ci->merged.basename_offset = (len > 0 ? len+1 : len);
1952 dir_ci->dirmask = ci->filemask;
1953 strmap_put(&opt->priv->paths, cur_dir, dir_ci);
1954
1955 parent_name = cur_dir;
1956 }
1957
1958 /*
1959 * We are removing old_path from opt->priv->paths. old_path also will
1960 * eventually need to be freed, but it may still be used by e.g.
1961 * ci->pathnames. So, store it in another string-list for now.
1962 */
1963 string_list_append(&opt->priv->paths_to_free, old_path);
1964
1965 assert(ci->filemask == 2 || ci->filemask == 4);
1966 assert(ci->dirmask == 0);
1967 strmap_remove(&opt->priv->paths, old_path, 0);
1968
1969 branch_with_new_path = (ci->filemask == 2) ? opt->branch1 : opt->branch2;
1970 branch_with_dir_rename = (ci->filemask == 2) ? opt->branch2 : opt->branch1;
1971
1972 /* Now, finally update ci and stick it into opt->priv->paths */
1973 ci->merged.directory_name = parent_name;
1974 len = strlen(parent_name);
1975 ci->merged.basename_offset = (len > 0 ? len+1 : len);
1976 new_ci = strmap_get(&opt->priv->paths, new_path);
1977 if (!new_ci) {
1978 /* Place ci back into opt->priv->paths, but at new_path */
1979 strmap_put(&opt->priv->paths, new_path, ci);
1980 } else {
1981 int index;
1982
1983 /* A few sanity checks */
1984 VERIFY_CI(new_ci);
1985 assert(ci->filemask == 2 || ci->filemask == 4);
1986 assert((new_ci->filemask & ci->filemask) == 0);
1987 assert(!new_ci->merged.clean);
1988
1989 /* Copy stuff from ci into new_ci */
1990 new_ci->filemask |= ci->filemask;
1991 if (new_ci->dirmask)
1992 new_ci->df_conflict = 1;
1993 index = (ci->filemask >> 1);
1994 new_ci->pathnames[index] = ci->pathnames[index];
1995 new_ci->stages[index].mode = ci->stages[index].mode;
1996 oidcpy(&new_ci->stages[index].oid, &ci->stages[index].oid);
1997
1998 free(ci);
1999 ci = new_ci;
2000 }
2001
2002 if (opt->detect_directory_renames == MERGE_DIRECTORY_RENAMES_TRUE) {
2003 /* Notify user of updated path */
2004 if (pair->status == 'A')
2005 path_msg(opt, new_path, 1,
2006 _("Path updated: %s added in %s inside a "
2007 "directory that was renamed in %s; moving "
2008 "it to %s."),
2009 old_path, branch_with_new_path,
2010 branch_with_dir_rename, new_path);
2011 else
2012 path_msg(opt, new_path, 1,
2013 _("Path updated: %s renamed to %s in %s, "
2014 "inside a directory that was renamed in %s; "
2015 "moving it to %s."),
2016 pair->one->path, old_path, branch_with_new_path,
2017 branch_with_dir_rename, new_path);
2018 } else {
2019 /*
2020 * opt->detect_directory_renames has the value
2021 * MERGE_DIRECTORY_RENAMES_CONFLICT, so mark these as conflicts.
2022 */
2023 ci->path_conflict = 1;
2024 if (pair->status == 'A')
2025 path_msg(opt, new_path, 0,
2026 _("CONFLICT (file location): %s added in %s "
2027 "inside a directory that was renamed in %s, "
2028 "suggesting it should perhaps be moved to "
2029 "%s."),
2030 old_path, branch_with_new_path,
2031 branch_with_dir_rename, new_path);
2032 else
2033 path_msg(opt, new_path, 0,
2034 _("CONFLICT (file location): %s renamed to %s "
2035 "in %s, inside a directory that was renamed "
2036 "in %s, suggesting it should perhaps be "
2037 "moved to %s."),
2038 pair->one->path, old_path, branch_with_new_path,
2039 branch_with_dir_rename, new_path);
2040 }
2041
2042 /*
2043 * Finally, record the new location.
2044 */
2045 pair->two->path = new_path;
fa5e06d6
EN
2046}
2047
04af1879
EN
2048/*** Function Grouping: functions related to regular rename detection ***/
2049
e1a124e8
EN
2050static int process_renames(struct merge_options *opt,
2051 struct diff_queue_struct *renames)
2052{
c2d267df
EN
2053 int clean_merge = 1, i;
2054
2055 for (i = 0; i < renames->nr; ++i) {
2056 const char *oldpath = NULL, *newpath;
2057 struct diff_filepair *pair = renames->queue[i];
2058 struct conflict_info *oldinfo = NULL, *newinfo = NULL;
2059 struct strmap_entry *old_ent, *new_ent;
2060 unsigned int old_sidemask;
2061 int target_index, other_source_index;
2062 int source_deleted, collision, type_changed;
2e91ddd2 2063 const char *rename_branch = NULL, *delete_branch = NULL;
c2d267df
EN
2064
2065 old_ent = strmap_get_entry(&opt->priv->paths, pair->one->path);
c2d267df 2066 new_ent = strmap_get_entry(&opt->priv->paths, pair->two->path);
1b6b902d
EN
2067 if (old_ent) {
2068 oldpath = old_ent->key;
2069 oldinfo = old_ent->value;
2070 }
2071 newpath = pair->two->path;
2072 if (new_ent) {
2073 newpath = new_ent->key;
2074 newinfo = new_ent->value;
2075 }
2076
2077 /*
2078 * If pair->one->path isn't in opt->priv->paths, that means
2079 * that either directory rename detection removed that
2080 * path, or a parent directory of oldpath was resolved and
2081 * we don't even need the rename; in either case, we can
2082 * skip it. If oldinfo->merged.clean, then the other side
2083 * of history had no changes to oldpath and we don't need
2084 * the rename and can skip it.
2085 */
2086 if (!oldinfo || oldinfo->merged.clean)
2087 continue;
c2d267df
EN
2088
2089 /*
2090 * diff_filepairs have copies of pathnames, thus we have to
2091 * use standard 'strcmp()' (negated) instead of '=='.
2092 */
2093 if (i + 1 < renames->nr &&
2094 !strcmp(oldpath, renames->queue[i+1]->one->path)) {
2095 /* Handle rename/rename(1to2) or rename/rename(1to1) */
2096 const char *pathnames[3];
af1e56c4
EN
2097 struct version_info merged;
2098 struct conflict_info *base, *side1, *side2;
53e88a03 2099 unsigned was_binary_blob = 0;
c2d267df
EN
2100
2101 pathnames[0] = oldpath;
2102 pathnames[1] = newpath;
2103 pathnames[2] = renames->queue[i+1]->two->path;
2104
af1e56c4
EN
2105 base = strmap_get(&opt->priv->paths, pathnames[0]);
2106 side1 = strmap_get(&opt->priv->paths, pathnames[1]);
2107 side2 = strmap_get(&opt->priv->paths, pathnames[2]);
2108
2109 VERIFY_CI(base);
2110 VERIFY_CI(side1);
2111 VERIFY_CI(side2);
2112
c2d267df 2113 if (!strcmp(pathnames[1], pathnames[2])) {
af1e56c4
EN
2114 /* Both sides renamed the same way */
2115 assert(side1 == side2);
2116 memcpy(&side1->stages[0], &base->stages[0],
2117 sizeof(merged));
2118 side1->filemask |= (1 << MERGE_BASE);
2119 /* Mark base as resolved by removal */
2120 base->merged.is_null = 1;
2121 base->merged.clean = 1;
c2d267df
EN
2122
2123 /* We handled both renames, i.e. i+1 handled */
2124 i++;
2125 /* Move to next rename */
2126 continue;
2127 }
2128
2129 /* This is a rename/rename(1to2) */
53e88a03
EN
2130 clean_merge = handle_content_merge(opt,
2131 pair->one->path,
2132 &base->stages[0],
2133 &side1->stages[1],
2134 &side2->stages[2],
2135 pathnames,
2136 1 + 2 * opt->priv->call_depth,
2137 &merged);
2138 if (!clean_merge &&
2139 merged.mode == side1->stages[1].mode &&
2140 oideq(&merged.oid, &side1->stages[1].oid))
2141 was_binary_blob = 1;
2142 memcpy(&side1->stages[1], &merged, sizeof(merged));
2143 if (was_binary_blob) {
2144 /*
2145 * Getting here means we were attempting to
2146 * merge a binary blob.
2147 *
2148 * Since we can't merge binaries,
2149 * handle_content_merge() just takes one
2150 * side. But we don't want to copy the
2151 * contents of one side to both paths. We
2152 * used the contents of side1 above for
2153 * side1->stages, let's use the contents of
2154 * side2 for side2->stages below.
2155 */
2156 oidcpy(&merged.oid, &side2->stages[2].oid);
2157 merged.mode = side2->stages[2].mode;
2158 }
2159 memcpy(&side2->stages[2], &merged, sizeof(merged));
2160
2161 side1->path_conflict = 1;
2162 side2->path_conflict = 1;
2163 /*
2164 * TODO: For renames we normally remove the path at the
2165 * old name. It would thus seem consistent to do the
2166 * same for rename/rename(1to2) cases, but we haven't
2167 * done so traditionally and a number of the regression
2168 * tests now encode an expectation that the file is
2169 * left there at stage 1. If we ever decide to change
2170 * this, add the following two lines here:
2171 * base->merged.is_null = 1;
2172 * base->merged.clean = 1;
2173 * and remove the setting of base->path_conflict to 1.
2174 */
2175 base->path_conflict = 1;
2176 path_msg(opt, oldpath, 0,
2177 _("CONFLICT (rename/rename): %s renamed to "
2178 "%s in %s and to %s in %s."),
2179 pathnames[0],
2180 pathnames[1], opt->branch1,
2181 pathnames[2], opt->branch2);
c2d267df
EN
2182
2183 i++; /* We handled both renames, i.e. i+1 handled */
2184 continue;
2185 }
2186
2187 VERIFY_CI(oldinfo);
2188 VERIFY_CI(newinfo);
2189 target_index = pair->score; /* from collect_renames() */
2190 assert(target_index == 1 || target_index == 2);
2191 other_source_index = 3 - target_index;
2192 old_sidemask = (1 << other_source_index); /* 2 or 4 */
2193 source_deleted = (oldinfo->filemask == 1);
2194 collision = ((newinfo->filemask & old_sidemask) != 0);
2195 type_changed = !source_deleted &&
2196 (S_ISREG(oldinfo->stages[other_source_index].mode) !=
2197 S_ISREG(newinfo->stages[target_index].mode));
2198 if (type_changed && collision) {
6fcccbd7
EN
2199 /*
2200 * special handling so later blocks can handle this...
2201 *
2202 * if type_changed && collision are both true, then this
2203 * was really a double rename, but one side wasn't
2204 * detected due to lack of break detection. I.e.
2205 * something like
2206 * orig: has normal file 'foo'
2207 * side1: renames 'foo' to 'bar', adds 'foo' symlink
2208 * side2: renames 'foo' to 'bar'
2209 * In this case, the foo->bar rename on side1 won't be
2210 * detected because the new symlink named 'foo' is
2211 * there and we don't do break detection. But we detect
2212 * this here because we don't want to merge the content
2213 * of the foo symlink with the foo->bar file, so we
2214 * have some logic to handle this special case. The
2215 * easiest way to do that is make 'bar' on side1 not
2216 * be considered a colliding file but the other part
2217 * of a normal rename. If the file is very different,
2218 * well we're going to get content merge conflicts
2219 * anyway so it doesn't hurt. And if the colliding
2220 * file also has a different type, that'll be handled
2221 * by the content merge logic in process_entry() too.
2222 *
2223 * See also t6430, 'rename vs. rename/symlink'
2224 */
2225 collision = 0;
c2d267df 2226 }
2e91ddd2
EN
2227 if (source_deleted) {
2228 if (target_index == 1) {
2229 rename_branch = opt->branch1;
2230 delete_branch = opt->branch2;
2231 } else {
2232 rename_branch = opt->branch2;
2233 delete_branch = opt->branch1;
2234 }
2235 }
c2d267df
EN
2236
2237 assert(source_deleted || oldinfo->filemask & old_sidemask);
2238
2239 /* Need to check for special types of rename conflicts... */
2240 if (collision && !source_deleted) {
2241 /* collision: rename/add or rename/rename(2to1) */
35e47e35
EN
2242 const char *pathnames[3];
2243 struct version_info merged;
2244
2245 struct conflict_info *base, *side1, *side2;
2246 unsigned clean;
2247
2248 pathnames[0] = oldpath;
2249 pathnames[other_source_index] = oldpath;
2250 pathnames[target_index] = newpath;
2251
2252 base = strmap_get(&opt->priv->paths, pathnames[0]);
2253 side1 = strmap_get(&opt->priv->paths, pathnames[1]);
2254 side2 = strmap_get(&opt->priv->paths, pathnames[2]);
2255
2256 VERIFY_CI(base);
2257 VERIFY_CI(side1);
2258 VERIFY_CI(side2);
2259
2260 clean = handle_content_merge(opt, pair->one->path,
2261 &base->stages[0],
2262 &side1->stages[1],
2263 &side2->stages[2],
2264 pathnames,
2265 1 + 2 * opt->priv->call_depth,
2266 &merged);
2267
2268 memcpy(&newinfo->stages[target_index], &merged,
2269 sizeof(merged));
2270 if (!clean) {
2271 path_msg(opt, newpath, 0,
2272 _("CONFLICT (rename involved in "
2273 "collision): rename of %s -> %s has "
2274 "content conflicts AND collides "
2275 "with another path; this may result "
2276 "in nested conflict markers."),
2277 oldpath, newpath);
2278 }
c2d267df 2279 } else if (collision && source_deleted) {
35e47e35
EN
2280 /*
2281 * rename/add/delete or rename/rename(2to1)/delete:
2282 * since oldpath was deleted on the side that didn't
2283 * do the rename, there's not much of a content merge
2284 * we can do for the rename. oldinfo->merged.is_null
2285 * was already set, so we just leave things as-is so
2286 * they look like an add/add conflict.
2287 */
2288
2289 newinfo->path_conflict = 1;
2290 path_msg(opt, newpath, 0,
2291 _("CONFLICT (rename/delete): %s renamed "
2292 "to %s in %s, but deleted in %s."),
2293 oldpath, newpath, rename_branch, delete_branch);
c2d267df 2294 } else {
2e91ddd2
EN
2295 /*
2296 * a few different cases...start by copying the
2297 * existing stage(s) from oldinfo over the newinfo
2298 * and update the pathname(s).
2299 */
2300 memcpy(&newinfo->stages[0], &oldinfo->stages[0],
2301 sizeof(newinfo->stages[0]));
2302 newinfo->filemask |= (1 << MERGE_BASE);
2303 newinfo->pathnames[0] = oldpath;
c2d267df
EN
2304 if (type_changed) {
2305 /* rename vs. typechange */
6fcccbd7
EN
2306 /* Mark the original as resolved by removal */
2307 memcpy(&oldinfo->stages[0].oid, &null_oid,
2308 sizeof(oldinfo->stages[0].oid));
2309 oldinfo->stages[0].mode = 0;
2310 oldinfo->filemask &= 0x06;
c2d267df
EN
2311 } else if (source_deleted) {
2312 /* rename/delete */
2e91ddd2
EN
2313 newinfo->path_conflict = 1;
2314 path_msg(opt, newpath, 0,
2315 _("CONFLICT (rename/delete): %s renamed"
2316 " to %s in %s, but deleted in %s."),
2317 oldpath, newpath,
2318 rename_branch, delete_branch);
c2d267df
EN
2319 } else {
2320 /* normal rename */
f1665e69
EN
2321 memcpy(&newinfo->stages[other_source_index],
2322 &oldinfo->stages[other_source_index],
2323 sizeof(newinfo->stages[0]));
2324 newinfo->filemask |= (1 << other_source_index);
2325 newinfo->pathnames[other_source_index] = oldpath;
c2d267df
EN
2326 }
2327 }
2328
2329 if (!type_changed) {
2330 /* Mark the original as resolved by removal */
2331 oldinfo->merged.is_null = 1;
2332 oldinfo->merged.clean = 1;
2333 }
2334
2335 }
2336
2337 return clean_merge;
e1a124e8
EN
2338}
2339
f89b4f2b
EN
2340static inline int possible_side_renames(struct rename_info *renames,
2341 unsigned side_index)
2342{
2343 return renames->pairs[side_index].nr > 0 &&
a49b55d5 2344 !strintmap_empty(&renames->relevant_sources[side_index]);
f89b4f2b
EN
2345}
2346
2347static inline int possible_renames(struct rename_info *renames)
2348{
2349 return possible_side_renames(renames, 1) ||
2350 possible_side_renames(renames, 2);
2351}
2352
f78cf976
EN
2353static void resolve_diffpair_statuses(struct diff_queue_struct *q)
2354{
2355 /*
2356 * A simplified version of diff_resolve_rename_copy(); would probably
2357 * just use that function but it's static...
2358 */
2359 int i;
2360 struct diff_filepair *p;
2361
2362 for (i = 0; i < q->nr; ++i) {
2363 p = q->queue[i];
2364 p->status = 0; /* undecided */
2365 if (!DIFF_FILE_VALID(p->one))
2366 p->status = DIFF_STATUS_ADDED;
2367 else if (!DIFF_FILE_VALID(p->two))
2368 p->status = DIFF_STATUS_DELETED;
2369 else if (DIFF_PAIR_RENAME(p))
2370 p->status = DIFF_STATUS_RENAMED;
2371 }
2372}
2373
2734f2e3
EN
2374static void cache_new_pair(struct rename_info *renames,
2375 int side,
2376 char *old_path,
2377 char *new_path,
2378 int free_old_value)
2379{
2380 char *old_value;
2381 new_path = xstrdup(new_path);
2382 old_value = strmap_put(&renames->cached_pairs[side],
2383 old_path, new_path);
2384 strset_add(&renames->cached_target_names[side], new_path);
2385 if (free_old_value)
2386 free(old_value);
2387 else
2388 assert(!old_value);
2389}
2390
2391static void possibly_cache_new_pair(struct rename_info *renames,
2392 struct diff_filepair *p,
2393 unsigned side,
2394 char *new_path)
2395{
2396 int dir_renamed_side = 0;
2397
2398 if (new_path) {
2399 /*
2400 * Directory renames happen on the other side of history from
2401 * the side that adds new files to the old directory.
2402 */
2403 dir_renamed_side = 3 - side;
2404 } else {
2405 int val = strintmap_get(&renames->relevant_sources[side],
2406 p->one->path);
2407 if (val == RELEVANT_NO_MORE) {
2408 assert(p->status == 'D');
2409 strset_add(&renames->cached_irrelevant[side],
2410 p->one->path);
2411 }
2412 if (val <= 0)
2413 return;
2414 }
2415
2416 if (p->status == 'D') {
2417 /*
2418 * If we already had this delete, we'll just set it's value
2419 * to NULL again, so no harm.
2420 */
2421 strmap_put(&renames->cached_pairs[side], p->one->path, NULL);
2422 } else if (p->status == 'R') {
2423 if (!new_path)
2424 new_path = p->two->path;
2425 else
2426 cache_new_pair(renames, dir_renamed_side,
2427 p->two->path, new_path, 0);
2428 cache_new_pair(renames, side, p->one->path, new_path, 1);
2429 } else if (p->status == 'A' && new_path) {
2430 cache_new_pair(renames, dir_renamed_side,
2431 p->two->path, new_path, 0);
2432 }
2433}
2434
e1a124e8
EN
2435static int compare_pairs(const void *a_, const void *b_)
2436{
965a7bc2
EN
2437 const struct diff_filepair *a = *((const struct diff_filepair **)a_);
2438 const struct diff_filepair *b = *((const struct diff_filepair **)b_);
2439
2440 return strcmp(a->one->path, b->one->path);
e1a124e8
EN
2441}
2442
2443/* Call diffcore_rename() to compute which files have changed on given side */
2444static void detect_regular_renames(struct merge_options *opt,
e1a124e8
EN
2445 unsigned side_index)
2446{
f39d05ca
EN
2447 struct diff_options diff_opts;
2448 struct rename_info *renames = &opt->priv->renames;
2449
f89b4f2b
EN
2450 if (!possible_side_renames(renames, side_index)) {
2451 /*
2452 * No rename detection needed for this side, but we still need
2453 * to make sure 'adds' are marked correctly in case the other
2454 * side had directory renames.
2455 */
2456 resolve_diffpair_statuses(&renames->pairs[side_index]);
2457 return;
2458 }
2459
d5098029 2460 partial_clear_dir_rename_count(&renames->dir_rename_count[side_index]);
f39d05ca
EN
2461 repo_diff_setup(opt->repo, &diff_opts);
2462 diff_opts.flags.recursive = 1;
2463 diff_opts.flags.rename_empty = 0;
2464 diff_opts.detect_rename = DIFF_DETECT_RENAME;
2465 diff_opts.rename_limit = opt->rename_limit;
2466 if (opt->rename_limit <= 0)
2467 diff_opts.rename_limit = 1000;
2468 diff_opts.rename_score = opt->rename_score;
2469 diff_opts.show_rename_progress = opt->show_rename_progress;
2470 diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
2471 diff_setup_done(&diff_opts);
557ac035 2472
f78cf976 2473 diff_queued_diff = renames->pairs[side_index];
557ac035 2474 trace2_region_enter("diff", "diffcore_rename", opt->repo);
0c4fd732 2475 diffcore_rename_extended(&diff_opts,
174791f0 2476 &renames->relevant_sources[side_index],
0c4fd732
EN
2477 &renames->dirs_removed[side_index],
2478 &renames->dir_rename_count[side_index]);
557ac035 2479 trace2_region_leave("diff", "diffcore_rename", opt->repo);
f78cf976 2480 resolve_diffpair_statuses(&diff_queued_diff);
f39d05ca
EN
2481
2482 if (diff_opts.needed_rename_limit > renames->needed_limit)
2483 renames->needed_limit = diff_opts.needed_rename_limit;
2484
2485 renames->pairs[side_index] = diff_queued_diff;
2486
2487 diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
2488 diff_queued_diff.nr = 0;
2489 diff_queued_diff.queue = NULL;
2490 diff_flush(&diff_opts);
e1a124e8
EN
2491}
2492
2493/*
2494 * Get information of all renames which occurred in 'side_pairs', discarding
2495 * non-renames.
2496 */
2497static int collect_renames(struct merge_options *opt,
2498 struct diff_queue_struct *result,
fa5e06d6
EN
2499 unsigned side_index,
2500 struct strmap *dir_renames_for_side,
2501 struct strmap *rename_exclusions)
e1a124e8 2502{
965a7bc2 2503 int i, clean = 1;
fa5e06d6 2504 struct strmap collisions;
965a7bc2 2505 struct diff_queue_struct *side_pairs;
fa5e06d6
EN
2506 struct hashmap_iter iter;
2507 struct strmap_entry *entry;
965a7bc2
EN
2508 struct rename_info *renames = &opt->priv->renames;
2509
2510 side_pairs = &renames->pairs[side_index];
fa5e06d6 2511 compute_collisions(&collisions, dir_renames_for_side, side_pairs);
965a7bc2
EN
2512
2513 for (i = 0; i < side_pairs->nr; ++i) {
2514 struct diff_filepair *p = side_pairs->queue[i];
fa5e06d6 2515 char *new_path; /* non-NULL only with directory renames */
965a7bc2 2516
fa5e06d6 2517 if (p->status != 'A' && p->status != 'R') {
2734f2e3 2518 possibly_cache_new_pair(renames, p, side_index, NULL);
965a7bc2
EN
2519 diff_free_filepair(p);
2520 continue;
2521 }
2522
fa5e06d6
EN
2523 new_path = check_for_directory_rename(opt, p->two->path,
2524 side_index,
2525 dir_renames_for_side,
2526 rename_exclusions,
2527 &collisions,
2528 &clean);
2529
2734f2e3 2530 possibly_cache_new_pair(renames, p, side_index, new_path);
fa5e06d6
EN
2531 if (p->status != 'R' && !new_path) {
2532 diff_free_filepair(p);
2533 continue;
2534 }
2535
2536 if (new_path)
2537 apply_directory_rename_modifications(opt, p, new_path);
2538
965a7bc2
EN
2539 /*
2540 * p->score comes back from diffcore_rename_extended() with
2541 * the similarity of the renamed file. The similarity is
2542 * was used to determine that the two files were related
2543 * and are a rename, which we have already used, but beyond
2544 * that we have no use for the similarity. So p->score is
2545 * now irrelevant. However, process_renames() will need to
2546 * know which side of the merge this rename was associated
2547 * with, so overwrite p->score with that value.
2548 */
2549 p->score = side_index;
2550 result->queue[result->nr++] = p;
2551 }
2552
fa5e06d6
EN
2553 /* Free each value in the collisions map */
2554 strmap_for_each_entry(&collisions, &iter, entry) {
2555 struct collision_info *info = entry->value;
2556 string_list_clear(&info->source_files, 0);
2557 }
2558 /*
2559 * In compute_collisions(), we set collisions.strdup_strings to 0
2560 * so that we wouldn't have to make another copy of the new_path
2561 * allocated by apply_dir_rename(). But now that we've used them
2562 * and have no other references to these strings, it is time to
2563 * deallocate them.
2564 */
2565 free_strmap_strings(&collisions);
2566 strmap_clear(&collisions, 1);
965a7bc2 2567 return clean;
e1a124e8
EN
2568}
2569
231e2dd4
EN
2570static int detect_and_process_renames(struct merge_options *opt,
2571 struct tree *merge_base,
2572 struct tree *side1,
2573 struct tree *side2)
2574{
e1a124e8
EN
2575 struct diff_queue_struct combined;
2576 struct rename_info *renames = &opt->priv->renames;
112e1112 2577 int need_dir_renames, s, clean = 1;
e1a124e8
EN
2578
2579 memset(&combined, 0, sizeof(combined));
f89b4f2b
EN
2580 if (!possible_renames(renames))
2581 goto cleanup;
e1a124e8 2582
557ac035 2583 trace2_region_enter("merge", "regular renames", opt->repo);
f78cf976
EN
2584 detect_regular_renames(opt, MERGE_SIDE1);
2585 detect_regular_renames(opt, MERGE_SIDE2);
557ac035 2586 trace2_region_leave("merge", "regular renames", opt->repo);
e1a124e8 2587
557ac035 2588 trace2_region_enter("merge", "directory renames", opt->repo);
112e1112
EN
2589 need_dir_renames =
2590 !opt->priv->call_depth &&
2591 (opt->detect_directory_renames == MERGE_DIRECTORY_RENAMES_TRUE ||
2592 opt->detect_directory_renames == MERGE_DIRECTORY_RENAMES_CONFLICT);
2593
2594 if (need_dir_renames) {
2595 get_provisional_directory_renames(opt, MERGE_SIDE1, &clean);
2596 get_provisional_directory_renames(opt, MERGE_SIDE2, &clean);
2597 handle_directory_level_conflicts(opt);
2598 }
2599
e1a124e8
EN
2600 ALLOC_GROW(combined.queue,
2601 renames->pairs[1].nr + renames->pairs[2].nr,
2602 combined.alloc);
fa5e06d6
EN
2603 clean &= collect_renames(opt, &combined, MERGE_SIDE1,
2604 &renames->dir_renames[2],
2605 &renames->dir_renames[1]);
2606 clean &= collect_renames(opt, &combined, MERGE_SIDE2,
2607 &renames->dir_renames[1],
2608 &renames->dir_renames[2]);
72b30910 2609 STABLE_QSORT(combined.queue, combined.nr, compare_pairs);
557ac035 2610 trace2_region_leave("merge", "directory renames", opt->repo);
e1a124e8 2611
557ac035 2612 trace2_region_enter("merge", "process renames", opt->repo);
e1a124e8 2613 clean &= process_renames(opt, &combined);
557ac035 2614 trace2_region_leave("merge", "process renames", opt->repo);
e1a124e8 2615
f89b4f2b
EN
2616 goto simple_cleanup; /* collect_renames() handles some of cleanup */
2617
2618cleanup:
2619 /*
2620 * Free now unneeded filepairs, which would have been handled
2621 * in collect_renames() normally but we skipped that code.
2622 */
2623 for (s = MERGE_SIDE1; s <= MERGE_SIDE2; s++) {
2624 struct diff_queue_struct *side_pairs;
2625 int i;
2626
2627 side_pairs = &renames->pairs[s];
2628 for (i = 0; i < side_pairs->nr; ++i) {
2629 struct diff_filepair *p = side_pairs->queue[i];
2630 diff_free_filepair(p);
2631 }
2632 }
2633
2634simple_cleanup:
e1a124e8
EN
2635 /* Free memory for renames->pairs[] and combined */
2636 for (s = MERGE_SIDE1; s <= MERGE_SIDE2; s++) {
2637 free(renames->pairs[s].queue);
2638 DIFF_QUEUE_CLEAR(&renames->pairs[s]);
2639 }
2640 if (combined.nr) {
2641 int i;
2642 for (i = 0; i < combined.nr; i++)
2643 diff_free_filepair(combined.queue[i]);
2644 free(combined.queue);
2645 }
231e2dd4 2646
231e2dd4
EN
2647 return clean;
2648}
2649
04af1879
EN
2650/*** Function Grouping: functions related to process_entries() ***/
2651
8adffaa8
EN
2652static int string_list_df_name_compare(const char *one, const char *two)
2653{
2654 int onelen = strlen(one);
2655 int twolen = strlen(two);
2656 /*
2657 * Here we only care that entries for D/F conflicts are
2658 * adjacent, in particular with the file of the D/F conflict
2659 * appearing before files below the corresponding directory.
2660 * The order of the rest of the list is irrelevant for us.
2661 *
2662 * To achieve this, we sort with df_name_compare and provide
2663 * the mode S_IFDIR so that D/F conflicts will sort correctly.
2664 * We use the mode S_IFDIR for everything else for simplicity,
2665 * since in other cases any changes in their order due to
2666 * sorting cause no problems for us.
2667 */
2668 int cmp = df_name_compare(one, onelen, S_IFDIR,
2669 two, twolen, S_IFDIR);
2670 /*
2671 * Now that 'foo' and 'foo/bar' compare equal, we have to make sure
2672 * that 'foo' comes before 'foo/bar'.
2673 */
2674 if (cmp)
2675 return cmp;
2676 return onelen - twolen;
2677}
2678
3860220b
EN
2679static int read_oid_strbuf(struct merge_options *opt,
2680 const struct object_id *oid,
2681 struct strbuf *dst)
2682{
2683 void *buf;
2684 enum object_type type;
2685 unsigned long size;
2686 buf = read_object_file(oid, &type, &size);
2687 if (!buf)
2688 return err(opt, _("cannot read object %s"), oid_to_hex(oid));
2689 if (type != OBJ_BLOB) {
2690 free(buf);
2691 return err(opt, _("object %s is not a blob"), oid_to_hex(oid));
2692 }
2693 strbuf_attach(dst, buf, size, size + 1);
2694 return 0;
2695}
2696
2697static int blob_unchanged(struct merge_options *opt,
2698 const struct version_info *base,
2699 const struct version_info *side,
2700 const char *path)
2701{
2702 struct strbuf basebuf = STRBUF_INIT;
2703 struct strbuf sidebuf = STRBUF_INIT;
2704 int ret = 0; /* assume changed for safety */
2705 const struct index_state *idx = &opt->priv->attr_index;
2706
2707 if (!idx->initialized)
2708 initialize_attr_index(opt);
2709
2710 if (base->mode != side->mode)
2711 return 0;
2712 if (oideq(&base->oid, &side->oid))
2713 return 1;
2714
2715 if (read_oid_strbuf(opt, &base->oid, &basebuf) ||
2716 read_oid_strbuf(opt, &side->oid, &sidebuf))
2717 goto error_return;
2718 /*
2719 * Note: binary | is used so that both renormalizations are
2720 * performed. Comparison can be skipped if both files are
2721 * unchanged since their sha1s have already been compared.
2722 */
2723 if (renormalize_buffer(idx, path, basebuf.buf, basebuf.len, &basebuf) |
2724 renormalize_buffer(idx, path, sidebuf.buf, sidebuf.len, &sidebuf))
2725 ret = (basebuf.len == sidebuf.len &&
2726 !memcmp(basebuf.buf, sidebuf.buf, basebuf.len));
2727
2728error_return:
2729 strbuf_release(&basebuf);
2730 strbuf_release(&sidebuf);
2731 return ret;
2732}
2733
a9945bba 2734struct directory_versions {
bb470f4e
EN
2735 /*
2736 * versions: list of (basename -> version_info)
2737 *
2738 * The basenames are in reverse lexicographic order of full pathnames,
2739 * as processed in process_entries(). This puts all entries within
2740 * a directory together, and covers the directory itself after
2741 * everything within it, allowing us to write subtrees before needing
2742 * to record information for the tree itself.
2743 */
a9945bba 2744 struct string_list versions;
bb470f4e
EN
2745
2746 /*
2747 * offsets: list of (full relative path directories -> integer offsets)
2748 *
2749 * Since versions contains basenames from files in multiple different
2750 * directories, we need to know which entries in versions correspond
2751 * to which directories. Values of e.g.
2752 * "" 0
2753 * src 2
2754 * src/moduleA 5
2755 * Would mean that entries 0-1 of versions are files in the toplevel
2756 * directory, entries 2-4 are files under src/, and the remaining
2757 * entries starting at index 5 are files under src/moduleA/.
2758 */
2759 struct string_list offsets;
2760
2761 /*
2762 * last_directory: directory that previously processed file found in
2763 *
2764 * last_directory starts NULL, but records the directory in which the
2765 * previous file was found within. As soon as
2766 * directory(current_file) != last_directory
2767 * then we need to start updating accounting in versions & offsets.
2768 * Note that last_directory is always the last path in "offsets" (or
2769 * NULL if "offsets" is empty) so this exists just for quick access.
2770 */
2771 const char *last_directory;
2772
2773 /* last_directory_len: cached computation of strlen(last_directory) */
2774 unsigned last_directory_len;
a9945bba
EN
2775};
2776
ee4012dc
EN
2777static int tree_entry_order(const void *a_, const void *b_)
2778{
2779 const struct string_list_item *a = a_;
2780 const struct string_list_item *b = b_;
2781
2782 const struct merged_info *ami = a->util;
2783 const struct merged_info *bmi = b->util;
2784 return base_name_compare(a->string, strlen(a->string), ami->result.mode,
2785 b->string, strlen(b->string), bmi->result.mode);
2786}
2787
2788static void write_tree(struct object_id *result_oid,
2789 struct string_list *versions,
2790 unsigned int offset,
2791 size_t hash_size)
2792{
2793 size_t maxlen = 0, extra;
2794 unsigned int nr = versions->nr - offset;
2795 struct strbuf buf = STRBUF_INIT;
2796 struct string_list relevant_entries = STRING_LIST_INIT_NODUP;
2797 int i;
2798
2799 /*
2800 * We want to sort the last (versions->nr-offset) entries in versions.
2801 * Do so by abusing the string_list API a bit: make another string_list
2802 * that contains just those entries and then sort them.
2803 *
2804 * We won't use relevant_entries again and will let it just pop off the
2805 * stack, so there won't be allocation worries or anything.
2806 */
2807 relevant_entries.items = versions->items + offset;
2808 relevant_entries.nr = versions->nr - offset;
72b30910 2809 /* No need for STABLE_QSORT -- filenames must be unique */
ee4012dc
EN
2810 QSORT(relevant_entries.items, relevant_entries.nr, tree_entry_order);
2811
2812 /* Pre-allocate some space in buf */
2813 extra = hash_size + 8; /* 8: 6 for mode, 1 for space, 1 for NUL char */
2814 for (i = 0; i < nr; i++) {
2815 maxlen += strlen(versions->items[offset+i].string) + extra;
2816 }
2817 strbuf_grow(&buf, maxlen);
2818
2819 /* Write each entry out to buf */
2820 for (i = 0; i < nr; i++) {
2821 struct merged_info *mi = versions->items[offset+i].util;
2822 struct version_info *ri = &mi->result;
2823 strbuf_addf(&buf, "%o %s%c",
2824 ri->mode,
2825 versions->items[offset+i].string, '\0');
2826 strbuf_add(&buf, ri->oid.hash, hash_size);
2827 }
2828
2829 /* Write this object file out, and record in result_oid */
2830 write_object_file(buf.buf, buf.len, tree_type, result_oid);
2831 strbuf_release(&buf);
2832}
2833
a9945bba
EN
2834static void record_entry_for_tree(struct directory_versions *dir_metadata,
2835 const char *path,
2836 struct merged_info *mi)
2837{
2838 const char *basename;
2839
2840 if (mi->is_null)
2841 /* nothing to record */
2842 return;
2843
2844 basename = path + mi->basename_offset;
2845 assert(strchr(basename, '/') == NULL);
2846 string_list_append(&dir_metadata->versions,
2847 basename)->util = &mi->result;
2848}
2849
bb470f4e
EN
2850static void write_completed_directory(struct merge_options *opt,
2851 const char *new_directory_name,
2852 struct directory_versions *info)
2853{
2854 const char *prev_dir;
2855 struct merged_info *dir_info = NULL;
2856 unsigned int offset;
2857
2858 /*
2859 * Some explanation of info->versions and info->offsets...
2860 *
2861 * process_entries() iterates over all relevant files AND
2862 * directories in reverse lexicographic order, and calls this
2863 * function. Thus, an example of the paths that process_entries()
2864 * could operate on (along with the directories for those paths
2865 * being shown) is:
2866 *
2867 * xtract.c ""
2868 * tokens.txt ""
2869 * src/moduleB/umm.c src/moduleB
2870 * src/moduleB/stuff.h src/moduleB
2871 * src/moduleB/baz.c src/moduleB
2872 * src/moduleB src
2873 * src/moduleA/foo.c src/moduleA
2874 * src/moduleA/bar.c src/moduleA
2875 * src/moduleA src
2876 * src ""
2877 * Makefile ""
2878 *
2879 * info->versions:
2880 *
2881 * always contains the unprocessed entries and their
2882 * version_info information. For example, after the first five
2883 * entries above, info->versions would be:
2884 *
2885 * xtract.c <xtract.c's version_info>
2886 * token.txt <token.txt's version_info>
2887 * umm.c <src/moduleB/umm.c's version_info>
2888 * stuff.h <src/moduleB/stuff.h's version_info>
2889 * baz.c <src/moduleB/baz.c's version_info>
2890 *
2891 * Once a subdirectory is completed we remove the entries in
2892 * that subdirectory from info->versions, writing it as a tree
2893 * (write_tree()). Thus, as soon as we get to src/moduleB,
2894 * info->versions would be updated to
2895 *
2896 * xtract.c <xtract.c's version_info>
2897 * token.txt <token.txt's version_info>
2898 * moduleB <src/moduleB's version_info>
2899 *
2900 * info->offsets:
2901 *
2902 * helps us track which entries in info->versions correspond to
2903 * which directories. When we are N directories deep (e.g. 4
2904 * for src/modA/submod/subdir/), we have up to N+1 unprocessed
2905 * directories (+1 because of toplevel dir). Corresponding to
2906 * the info->versions example above, after processing five entries
2907 * info->offsets will be:
2908 *
2909 * "" 0
2910 * src/moduleB 2
2911 *
2912 * which is used to know that xtract.c & token.txt are from the
2913 * toplevel dirctory, while umm.c & stuff.h & baz.c are from the
2914 * src/moduleB directory. Again, following the example above,
2915 * once we need to process src/moduleB, then info->offsets is
2916 * updated to
2917 *
2918 * "" 0
2919 * src 2
2920 *
2921 * which says that moduleB (and only moduleB so far) is in the
2922 * src directory.
2923 *
2924 * One unique thing to note about info->offsets here is that
2925 * "src" was not added to info->offsets until there was a path
2926 * (a file OR directory) immediately below src/ that got
2927 * processed.
2928 *
2929 * Since process_entry() just appends new entries to info->versions,
2930 * write_completed_directory() only needs to do work if the next path
2931 * is in a directory that is different than the last directory found
2932 * in info->offsets.
2933 */
2934
2935 /*
2936 * If we are working with the same directory as the last entry, there
2937 * is no work to do. (See comments above the directory_name member of
2938 * struct merged_info for why we can use pointer comparison instead of
2939 * strcmp here.)
2940 */
2941 if (new_directory_name == info->last_directory)
2942 return;
2943
2944 /*
2945 * If we are just starting (last_directory is NULL), or last_directory
2946 * is a prefix of the current directory, then we can just update
2947 * info->offsets to record the offset where we started this directory
2948 * and update last_directory to have quick access to it.
2949 */
2950 if (info->last_directory == NULL ||
2951 !strncmp(new_directory_name, info->last_directory,
2952 info->last_directory_len)) {
2953 uintptr_t offset = info->versions.nr;
2954
2955 info->last_directory = new_directory_name;
2956 info->last_directory_len = strlen(info->last_directory);
2957 /*
2958 * Record the offset into info->versions where we will
2959 * start recording basenames of paths found within
2960 * new_directory_name.
2961 */
2962 string_list_append(&info->offsets,
2963 info->last_directory)->util = (void*)offset;
2964 return;
2965 }
2966
2967 /*
2968 * The next entry that will be processed will be within
2969 * new_directory_name. Since at this point we know that
2970 * new_directory_name is within a different directory than
2971 * info->last_directory, we have all entries for info->last_directory
2972 * in info->versions and we need to create a tree object for them.
2973 */
2974 dir_info = strmap_get(&opt->priv->paths, info->last_directory);
2975 assert(dir_info);
2976 offset = (uintptr_t)info->offsets.items[info->offsets.nr-1].util;
2977 if (offset == info->versions.nr) {
2978 /*
2979 * Actually, we don't need to create a tree object in this
2980 * case. Whenever all files within a directory disappear
2981 * during the merge (e.g. unmodified on one side and
2982 * deleted on the other, or files were renamed elsewhere),
2983 * then we get here and the directory itself needs to be
2984 * omitted from its parent tree as well.
2985 */
2986 dir_info->is_null = 1;
2987 } else {
2988 /*
2989 * Write out the tree to the git object directory, and also
2990 * record the mode and oid in dir_info->result.
2991 */
2992 dir_info->is_null = 0;
2993 dir_info->result.mode = S_IFDIR;
2994 write_tree(&dir_info->result.oid, &info->versions, offset,
2995 opt->repo->hash_algo->rawsz);
2996 }
2997
2998 /*
2999 * We've now used several entries from info->versions and one entry
3000 * from info->offsets, so we get rid of those values.
3001 */
3002 info->offsets.nr--;
3003 info->versions.nr = offset;
3004
3005 /*
3006 * Now we've taken care of the completed directory, but we need to
3007 * prepare things since future entries will be in
3008 * new_directory_name. (In particular, process_entry() will be
3009 * appending new entries to info->versions.) So, we need to make
3010 * sure new_directory_name is the last entry in info->offsets.
3011 */
3012 prev_dir = info->offsets.nr == 0 ? NULL :
3013 info->offsets.items[info->offsets.nr-1].string;
3014 if (new_directory_name != prev_dir) {
3015 uintptr_t c = info->versions.nr;
3016 string_list_append(&info->offsets,
3017 new_directory_name)->util = (void*)c;
3018 }
3019
3020 /* And, of course, we need to update last_directory to match. */
3021 info->last_directory = new_directory_name;
3022 info->last_directory_len = strlen(info->last_directory);
3023}
3024
6a02dd90
EN
3025/* Per entry merge function */
3026static void process_entry(struct merge_options *opt,
3027 const char *path,
a9945bba
EN
3028 struct conflict_info *ci,
3029 struct directory_versions *dir_metadata)
6a02dd90 3030{
23366d2a
EN
3031 int df_file_index = 0;
3032
6a02dd90
EN
3033 VERIFY_CI(ci);
3034 assert(ci->filemask >= 0 && ci->filemask <= 7);
3035 /* ci->match_mask == 7 was handled in collect_merge_info_callback() */
3036 assert(ci->match_mask == 0 || ci->match_mask == 3 ||
3037 ci->match_mask == 5 || ci->match_mask == 6);
3038
a9945bba
EN
3039 if (ci->dirmask) {
3040 record_entry_for_tree(dir_metadata, path, &ci->merged);
3041 if (ci->filemask == 0)
3042 /* nothing else to handle */
3043 return;
3044 assert(ci->df_conflict);
3045 }
3046
0ccfa4e5
EN
3047 if (ci->df_conflict && ci->merged.result.mode == 0) {
3048 int i;
3049
3050 /*
3051 * directory no longer in the way, but we do have a file we
3052 * need to place here so we need to clean away the "directory
3053 * merges to nothing" result.
3054 */
3055 ci->df_conflict = 0;
3056 assert(ci->filemask != 0);
3057 ci->merged.clean = 0;
3058 ci->merged.is_null = 0;
3059 /* and we want to zero out any directory-related entries */
3060 ci->match_mask = (ci->match_mask & ~ci->dirmask);
3061 ci->dirmask = 0;
3062 for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) {
3063 if (ci->filemask & (1 << i))
3064 continue;
3065 ci->stages[i].mode = 0;
3066 oidcpy(&ci->stages[i].oid, &null_oid);
3067 }
3068 } else if (ci->df_conflict && ci->merged.result.mode != 0) {
23366d2a
EN
3069 /*
3070 * This started out as a D/F conflict, and the entries in
3071 * the competing directory were not removed by the merge as
3072 * evidenced by write_completed_directory() writing a value
3073 * to ci->merged.result.mode.
3074 */
3075 struct conflict_info *new_ci;
3076 const char *branch;
3077 const char *old_path = path;
3078 int i;
3079
3080 assert(ci->merged.result.mode == S_IFDIR);
3081
3082 /*
3083 * If filemask is 1, we can just ignore the file as having
3084 * been deleted on both sides. We do not want to overwrite
3085 * ci->merged.result, since it stores the tree for all the
3086 * files under it.
3087 */
3088 if (ci->filemask == 1) {
3089 ci->filemask = 0;
3090 return;
3091 }
3092
3093 /*
3094 * This file still exists on at least one side, and we want
3095 * the directory to remain here, so we need to move this
3096 * path to some new location.
3097 */
3098 new_ci = xcalloc(1, sizeof(*new_ci));
3099 /* We don't really want new_ci->merged.result copied, but it'll
3100 * be overwritten below so it doesn't matter. We also don't
3101 * want any directory mode/oid values copied, but we'll zero
3102 * those out immediately. We do want the rest of ci copied.
3103 */
3104 memcpy(new_ci, ci, sizeof(*ci));
3105 new_ci->match_mask = (new_ci->match_mask & ~new_ci->dirmask);
3106 new_ci->dirmask = 0;
3107 for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) {
3108 if (new_ci->filemask & (1 << i))
3109 continue;
3110 /* zero out any entries related to directories */
3111 new_ci->stages[i].mode = 0;
3112 oidcpy(&new_ci->stages[i].oid, &null_oid);
3113 }
3114
3115 /*
3116 * Find out which side this file came from; note that we
3117 * cannot just use ci->filemask, because renames could cause
3118 * the filemask to go back to 7. So we use dirmask, then
3119 * pick the opposite side's index.
3120 */
3121 df_file_index = (ci->dirmask & (1 << 1)) ? 2 : 1;
3122 branch = (df_file_index == 1) ? opt->branch1 : opt->branch2;
3123 path = unique_path(&opt->priv->paths, path, branch);
3124 strmap_put(&opt->priv->paths, path, new_ci);
3125
3126 path_msg(opt, path, 0,
3127 _("CONFLICT (file/directory): directory in the way "
3128 "of %s from %s; moving it to %s instead."),
3129 old_path, branch, path);
3130
3131 /*
3132 * Zero out the filemask for the old ci. At this point, ci
3133 * was just an entry for a directory, so we don't need to
3134 * do anything more with it.
3135 */
3136 ci->filemask = 0;
3137
3138 /*
3139 * Now note that we're working on the new entry (path was
3140 * updated above.
3141 */
3142 ci = new_ci;
6a02dd90
EN
3143 }
3144
3145 /*
3146 * NOTE: Below there is a long switch-like if-elseif-elseif... block
3147 * which the code goes through even for the df_conflict cases
23366d2a 3148 * above.
6a02dd90
EN
3149 */
3150 if (ci->match_mask) {
3151 ci->merged.clean = 1;
3152 if (ci->match_mask == 6) {
3153 /* stages[1] == stages[2] */
3154 ci->merged.result.mode = ci->stages[1].mode;
3155 oidcpy(&ci->merged.result.oid, &ci->stages[1].oid);
3156 } else {
3157 /* determine the mask of the side that didn't match */
3158 unsigned int othermask = 7 & ~ci->match_mask;
3159 int side = (othermask == 4) ? 2 : 1;
3160
3161 ci->merged.result.mode = ci->stages[side].mode;
3162 ci->merged.is_null = !ci->merged.result.mode;
3163 oidcpy(&ci->merged.result.oid, &ci->stages[side].oid);
3164
3165 assert(othermask == 2 || othermask == 4);
3166 assert(ci->merged.is_null ==
3167 (ci->filemask == ci->match_mask));
3168 }
3169 } else if (ci->filemask >= 6 &&
3170 (S_IFMT & ci->stages[1].mode) !=
3171 (S_IFMT & ci->stages[2].mode)) {
4ef88fc3
EN
3172 /* Two different items from (file/submodule/symlink) */
3173 if (opt->priv->call_depth) {
3174 /* Just use the version from the merge base */
3175 ci->merged.clean = 0;
3176 oidcpy(&ci->merged.result.oid, &ci->stages[0].oid);
3177 ci->merged.result.mode = ci->stages[0].mode;
3178 ci->merged.is_null = (ci->merged.result.mode == 0);
3179 } else {
3180 /* Handle by renaming one or both to separate paths. */
3181 unsigned o_mode = ci->stages[0].mode;
3182 unsigned a_mode = ci->stages[1].mode;
3183 unsigned b_mode = ci->stages[2].mode;
3184 struct conflict_info *new_ci;
3185 const char *a_path = NULL, *b_path = NULL;
3186 int rename_a = 0, rename_b = 0;
3187
3188 new_ci = xmalloc(sizeof(*new_ci));
3189
3190 if (S_ISREG(a_mode))
3191 rename_a = 1;
3192 else if (S_ISREG(b_mode))
3193 rename_b = 1;
3194 else {
3195 rename_a = 1;
3196 rename_b = 1;
3197 }
3198
3199 path_msg(opt, path, 0,
3200 _("CONFLICT (distinct types): %s had different "
3201 "types on each side; renamed %s of them so "
3202 "each can be recorded somewhere."),
3203 path,
3204 (rename_a && rename_b) ? _("both") : _("one"));
3205
3206 ci->merged.clean = 0;
3207 memcpy(new_ci, ci, sizeof(*new_ci));
3208
3209 /* Put b into new_ci, removing a from stages */
3210 new_ci->merged.result.mode = ci->stages[2].mode;
3211 oidcpy(&new_ci->merged.result.oid, &ci->stages[2].oid);
3212 new_ci->stages[1].mode = 0;
3213 oidcpy(&new_ci->stages[1].oid, &null_oid);
3214 new_ci->filemask = 5;
3215 if ((S_IFMT & b_mode) != (S_IFMT & o_mode)) {
3216 new_ci->stages[0].mode = 0;
3217 oidcpy(&new_ci->stages[0].oid, &null_oid);
3218 new_ci->filemask = 4;
3219 }
3220
3221 /* Leave only a in ci, fixing stages. */
3222 ci->merged.result.mode = ci->stages[1].mode;
3223 oidcpy(&ci->merged.result.oid, &ci->stages[1].oid);
3224 ci->stages[2].mode = 0;
3225 oidcpy(&ci->stages[2].oid, &null_oid);
3226 ci->filemask = 3;
3227 if ((S_IFMT & a_mode) != (S_IFMT & o_mode)) {
3228 ci->stages[0].mode = 0;
3229 oidcpy(&ci->stages[0].oid, &null_oid);
3230 ci->filemask = 2;
3231 }
3232
3233 /* Insert entries into opt->priv_paths */
3234 assert(rename_a || rename_b);
3235 if (rename_a) {
3236 a_path = unique_path(&opt->priv->paths,
3237 path, opt->branch1);
3238 strmap_put(&opt->priv->paths, a_path, ci);
3239 }
3240
3241 if (rename_b)
3242 b_path = unique_path(&opt->priv->paths,
3243 path, opt->branch2);
3244 else
3245 b_path = path;
3246 strmap_put(&opt->priv->paths, b_path, new_ci);
3247
3248 if (rename_a && rename_b) {
3249 strmap_remove(&opt->priv->paths, path, 0);
3250 /*
3251 * We removed path from opt->priv->paths. path
3252 * will also eventually need to be freed, but
3253 * it may still be used by e.g. ci->pathnames.
3254 * So, store it in another string-list for now.
3255 */
3256 string_list_append(&opt->priv->paths_to_free,
3257 path);
3258 }
3259
3260 /*
3261 * Do special handling for b_path since process_entry()
3262 * won't be called on it specially.
3263 */
3264 strmap_put(&opt->priv->conflicted, b_path, new_ci);
3265 record_entry_for_tree(dir_metadata, b_path,
3266 &new_ci->merged);
3267
3268 /*
3269 * Remaining code for processing this entry should
3270 * think in terms of processing a_path.
3271 */
3272 if (a_path)
3273 path = a_path;
3274 }
6a02dd90 3275 } else if (ci->filemask >= 6) {
991bbdca
EN
3276 /* Need a two-way or three-way content merge */
3277 struct version_info merged_file;
3278 unsigned clean_merge;
3279 struct version_info *o = &ci->stages[0];
3280 struct version_info *a = &ci->stages[1];
3281 struct version_info *b = &ci->stages[2];
3282
3283 clean_merge = handle_content_merge(opt, path, o, a, b,
3284 ci->pathnames,
3285 opt->priv->call_depth * 2,
3286 &merged_file);
3287 ci->merged.clean = clean_merge &&
3288 !ci->df_conflict && !ci->path_conflict;
3289 ci->merged.result.mode = merged_file.mode;
3290 ci->merged.is_null = (merged_file.mode == 0);
3291 oidcpy(&ci->merged.result.oid, &merged_file.oid);
3292 if (clean_merge && ci->df_conflict) {
3293 assert(df_file_index == 1 || df_file_index == 2);
3294 ci->filemask = 1 << df_file_index;
3295 ci->stages[df_file_index].mode = merged_file.mode;
3296 oidcpy(&ci->stages[df_file_index].oid, &merged_file.oid);
3297 }
3298 if (!clean_merge) {
3299 const char *reason = _("content");
3300 if (ci->filemask == 6)
3301 reason = _("add/add");
3302 if (S_ISGITLINK(merged_file.mode))
3303 reason = _("submodule");
3304 path_msg(opt, path, 0,
3305 _("CONFLICT (%s): Merge conflict in %s"),
3306 reason, path);
3307 }
6a02dd90
EN
3308 } else if (ci->filemask == 3 || ci->filemask == 5) {
3309 /* Modify/delete */
c5a6f655
EN
3310 const char *modify_branch, *delete_branch;
3311 int side = (ci->filemask == 5) ? 2 : 1;
3312 int index = opt->priv->call_depth ? 0 : side;
3313
3314 ci->merged.result.mode = ci->stages[index].mode;
3315 oidcpy(&ci->merged.result.oid, &ci->stages[index].oid);
3316 ci->merged.clean = 0;
3317
3318 modify_branch = (side == 1) ? opt->branch1 : opt->branch2;
3319 delete_branch = (side == 1) ? opt->branch2 : opt->branch1;
3320
3860220b
EN
3321 if (opt->renormalize &&
3322 blob_unchanged(opt, &ci->stages[0], &ci->stages[side],
3323 path)) {
3324 ci->merged.is_null = 1;
3325 ci->merged.clean = 1;
3326 } else if (ci->path_conflict &&
3327 oideq(&ci->stages[0].oid, &ci->stages[side].oid)) {
2e91ddd2
EN
3328 /*
3329 * This came from a rename/delete; no action to take,
3330 * but avoid printing "modify/delete" conflict notice
3331 * since the contents were not modified.
3332 */
3333 } else {
3334 path_msg(opt, path, 0,
3335 _("CONFLICT (modify/delete): %s deleted in %s "
3336 "and modified in %s. Version %s of %s left "
3337 "in tree."),
3338 path, delete_branch, modify_branch,
3339 modify_branch, path);
3340 }
6a02dd90
EN
3341 } else if (ci->filemask == 2 || ci->filemask == 4) {
3342 /* Added on one side */
3343 int side = (ci->filemask == 4) ? 2 : 1;
3344 ci->merged.result.mode = ci->stages[side].mode;
3345 oidcpy(&ci->merged.result.oid, &ci->stages[side].oid);
53e88a03 3346 ci->merged.clean = !ci->df_conflict && !ci->path_conflict;
6a02dd90
EN
3347 } else if (ci->filemask == 1) {
3348 /* Deleted on both sides */
3349 ci->merged.is_null = 1;
3350 ci->merged.result.mode = 0;
3351 oidcpy(&ci->merged.result.oid, &null_oid);
53e88a03 3352 ci->merged.clean = !ci->path_conflict;
6a02dd90
EN
3353 }
3354
3355 /*
3356 * If still conflicted, record it separately. This allows us to later
3357 * iterate over just conflicted entries when updating the index instead
3358 * of iterating over all entries.
3359 */
3360 if (!ci->merged.clean)
3361 strmap_put(&opt->priv->conflicted, path, ci);
a9945bba 3362 record_entry_for_tree(dir_metadata, path, &ci->merged);
6a02dd90
EN
3363}
3364
231e2dd4
EN
3365static void process_entries(struct merge_options *opt,
3366 struct object_id *result_oid)
3367{
6a02dd90
EN
3368 struct hashmap_iter iter;
3369 struct strmap_entry *e;
8adffaa8
EN
3370 struct string_list plist = STRING_LIST_INIT_NODUP;
3371 struct string_list_item *entry;
bb470f4e
EN
3372 struct directory_versions dir_metadata = { STRING_LIST_INIT_NODUP,
3373 STRING_LIST_INIT_NODUP,
3374 NULL, 0 };
6a02dd90 3375
557ac035 3376 trace2_region_enter("merge", "process_entries setup", opt->repo);
6a02dd90
EN
3377 if (strmap_empty(&opt->priv->paths)) {
3378 oidcpy(result_oid, opt->repo->hash_algo->empty_tree);
3379 return;
3380 }
3381
8adffaa8 3382 /* Hack to pre-allocate plist to the desired size */
557ac035 3383 trace2_region_enter("merge", "plist grow", opt->repo);
8adffaa8 3384 ALLOC_GROW(plist.items, strmap_get_size(&opt->priv->paths), plist.alloc);
557ac035 3385 trace2_region_leave("merge", "plist grow", opt->repo);
8adffaa8
EN
3386
3387 /* Put every entry from paths into plist, then sort */
557ac035 3388 trace2_region_enter("merge", "plist copy", opt->repo);
6a02dd90 3389 strmap_for_each_entry(&opt->priv->paths, &iter, e) {
8adffaa8
EN
3390 string_list_append(&plist, e->key)->util = e->value;
3391 }
557ac035
EN
3392 trace2_region_leave("merge", "plist copy", opt->repo);
3393
3394 trace2_region_enter("merge", "plist special sort", opt->repo);
8adffaa8
EN
3395 plist.cmp = string_list_df_name_compare;
3396 string_list_sort(&plist);
557ac035
EN
3397 trace2_region_leave("merge", "plist special sort", opt->repo);
3398
3399 trace2_region_leave("merge", "process_entries setup", opt->repo);
8adffaa8
EN
3400
3401 /*
3402 * Iterate over the items in reverse order, so we can handle paths
3403 * below a directory before needing to handle the directory itself.
bb470f4e
EN
3404 *
3405 * This allows us to write subtrees before we need to write trees,
3406 * and it also enables sane handling of directory/file conflicts
3407 * (because it allows us to know whether the directory is still in
3408 * the way when it is time to process the file at the same path).
8adffaa8 3409 */
557ac035 3410 trace2_region_enter("merge", "processing", opt->repo);
8adffaa8
EN
3411 for (entry = &plist.items[plist.nr-1]; entry >= plist.items; --entry) {
3412 char *path = entry->string;
6a02dd90
EN
3413 /*
3414 * NOTE: mi may actually be a pointer to a conflict_info, but
3415 * we have to check mi->clean first to see if it's safe to
3416 * reassign to such a pointer type.
3417 */
8adffaa8 3418 struct merged_info *mi = entry->util;
6a02dd90 3419
bb470f4e
EN
3420 write_completed_directory(opt, mi->directory_name,
3421 &dir_metadata);
a9945bba
EN
3422 if (mi->clean)
3423 record_entry_for_tree(&dir_metadata, path, mi);
3424 else {
8adffaa8 3425 struct conflict_info *ci = (struct conflict_info *)mi;
a9945bba 3426 process_entry(opt, path, ci, &dir_metadata);
8adffaa8 3427 }
6a02dd90 3428 }
557ac035 3429 trace2_region_leave("merge", "processing", opt->repo);
6a02dd90 3430
557ac035 3431 trace2_region_enter("merge", "process_entries cleanup", opt->repo);
bb470f4e
EN
3432 if (dir_metadata.offsets.nr != 1 ||
3433 (uintptr_t)dir_metadata.offsets.items[0].util != 0) {
3434 printf("dir_metadata.offsets.nr = %d (should be 1)\n",
3435 dir_metadata.offsets.nr);
3436 printf("dir_metadata.offsets.items[0].util = %u (should be 0)\n",
3437 (unsigned)(uintptr_t)dir_metadata.offsets.items[0].util);
3438 fflush(stdout);
3439 BUG("dir_metadata accounting completely off; shouldn't happen");
3440 }
ee4012dc
EN
3441 write_tree(result_oid, &dir_metadata.versions, 0,
3442 opt->repo->hash_algo->rawsz);
8adffaa8 3443 string_list_clear(&plist, 0);
a9945bba 3444 string_list_clear(&dir_metadata.versions, 0);
bb470f4e 3445 string_list_clear(&dir_metadata.offsets, 0);
557ac035 3446 trace2_region_leave("merge", "process_entries cleanup", opt->repo);
231e2dd4
EN
3447}
3448
04af1879
EN
3449/*** Function Grouping: functions related to merge_switch_to_result() ***/
3450
9fefce68
EN
3451static int checkout(struct merge_options *opt,
3452 struct tree *prev,
3453 struct tree *next)
3454{
6681ce5c
EN
3455 /* Switch the index/working copy from old to new */
3456 int ret;
3457 struct tree_desc trees[2];
3458 struct unpack_trees_options unpack_opts;
3459
3460 memset(&unpack_opts, 0, sizeof(unpack_opts));
3461 unpack_opts.head_idx = -1;
3462 unpack_opts.src_index = opt->repo->index;
3463 unpack_opts.dst_index = opt->repo->index;
3464
3465 setup_unpack_trees_porcelain(&unpack_opts, "merge");
3466
3467 /*
3468 * NOTE: if this were just "git checkout" code, we would probably
3469 * read or refresh the cache and check for a conflicted index, but
3470 * builtin/merge.c or sequencer.c really needs to read the index
3471 * and check for conflicted entries before starting merging for a
3472 * good user experience (no sense waiting for merges/rebases before
3473 * erroring out), so there's no reason to duplicate that work here.
3474 */
3475
3476 /* 2-way merge to the new branch */
3477 unpack_opts.update = 1;
3478 unpack_opts.merge = 1;
3479 unpack_opts.quiet = 0; /* FIXME: sequencer might want quiet? */
3480 unpack_opts.verbose_update = (opt->verbosity > 2);
3481 unpack_opts.fn = twoway_merge;
3482 if (1/* FIXME: opts->overwrite_ignore*/) {
3483 unpack_opts.dir = xcalloc(1, sizeof(*unpack_opts.dir));
3484 unpack_opts.dir->flags |= DIR_SHOW_IGNORED;
3485 setup_standard_excludes(unpack_opts.dir);
3486 }
3487 parse_tree(prev);
3488 init_tree_desc(&trees[0], prev->buffer, prev->size);
3489 parse_tree(next);
3490 init_tree_desc(&trees[1], next->buffer, next->size);
3491
3492 ret = unpack_trees(2, trees, &unpack_opts);
3493 clear_unpack_trees_porcelain(&unpack_opts);
3494 dir_clear(unpack_opts.dir);
3495 FREE_AND_NULL(unpack_opts.dir);
3496 return ret;
9fefce68
EN
3497}
3498
66b209b8 3499static int record_conflicted_index_entries(struct merge_options *opt)
9fefce68 3500{
ef2b3693
EN
3501 struct hashmap_iter iter;
3502 struct strmap_entry *e;
66b209b8
EN
3503 struct index_state *index = opt->repo->index;
3504 struct checkout state = CHECKOUT_INIT;
ef2b3693
EN
3505 int errs = 0;
3506 int original_cache_nr;
3507
66b209b8 3508 if (strmap_empty(&opt->priv->conflicted))
9fefce68
EN
3509 return 0;
3510
66b209b8
EN
3511 /* If any entries have skip_worktree set, we'll have to check 'em out */
3512 state.force = 1;
3513 state.quiet = 1;
3514 state.refresh_cache = 1;
3515 state.istate = index;
ef2b3693
EN
3516 original_cache_nr = index->cache_nr;
3517
3518 /* Put every entry from paths into plist, then sort */
66b209b8 3519 strmap_for_each_entry(&opt->priv->conflicted, &iter, e) {
ef2b3693
EN
3520 const char *path = e->key;
3521 struct conflict_info *ci = e->value;
3522 int pos;
3523 struct cache_entry *ce;
3524 int i;
3525
3526 VERIFY_CI(ci);
3527
3528 /*
3529 * The index will already have a stage=0 entry for this path,
3530 * because we created an as-merged-as-possible version of the
3531 * file and checkout() moved the working copy and index over
3532 * to that version.
3533 *
3534 * However, previous iterations through this loop will have
3535 * added unstaged entries to the end of the cache which
3536 * ignore the standard alphabetical ordering of cache
3537 * entries and break invariants needed for index_name_pos()
3538 * to work. However, we know the entry we want is before
3539 * those appended cache entries, so do a temporary swap on
3540 * cache_nr to only look through entries of interest.
3541 */
3542 SWAP(index->cache_nr, original_cache_nr);
3543 pos = index_name_pos(index, path, strlen(path));
3544 SWAP(index->cache_nr, original_cache_nr);
3545 if (pos < 0) {
3546 if (ci->filemask != 1)
3547 BUG("Conflicted %s but nothing in basic working tree or index; this shouldn't happen", path);
3548 cache_tree_invalidate_path(index, path);
3549 } else {
3550 ce = index->cache[pos];
3551
3552 /*
3553 * Clean paths with CE_SKIP_WORKTREE set will not be
3554 * written to the working tree by the unpack_trees()
3555 * call in checkout(). Our conflicted entries would
3556 * have appeared clean to that code since we ignored
3557 * the higher order stages. Thus, we need override
3558 * the CE_SKIP_WORKTREE bit and manually write those
3559 * files to the working disk here.
ef2b3693 3560 */
66b209b8
EN
3561 if (ce_skip_worktree(ce)) {
3562 struct stat st;
3563
3564 if (!lstat(path, &st)) {
3565 char *new_name = unique_path(&opt->priv->paths,
3566 path,
3567 "cruft");
3568
3569 path_msg(opt, path, 1,
3570 _("Note: %s not up to date and in way of checking out conflicted version; old copy renamed to %s"),
3571 path, new_name);
3572 errs |= rename(path, new_name);
3573 free(new_name);
3574 }
3575 errs |= checkout_entry(ce, &state, NULL, NULL);
3576 }
ef2b3693
EN
3577
3578 /*
3579 * Mark this cache entry for removal and instead add
3580 * new stage>0 entries corresponding to the
3581 * conflicts. If there are many conflicted entries, we
3582 * want to avoid memmove'ing O(NM) entries by
3583 * inserting the new entries one at a time. So,
3584 * instead, we just add the new cache entries to the
3585 * end (ignoring normal index requirements on sort
3586 * order) and sort the index once we're all done.
3587 */
3588 ce->ce_flags |= CE_REMOVE;
3589 }
3590
3591 for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) {
3592 struct version_info *vi;
3593 if (!(ci->filemask & (1ul << i)))
3594 continue;
3595 vi = &ci->stages[i];
3596 ce = make_cache_entry(index, vi->mode, &vi->oid,
3597 path, i+1, 0);
3598 add_index_entry(index, ce, ADD_CACHE_JUST_APPEND);
3599 }
3600 }
3601
3602 /*
3603 * Remove the unused cache entries (and invalidate the relevant
3604 * cache-trees), then sort the index entries to get the conflicted
3605 * entries we added to the end into their right locations.
3606 */
3607 remove_marked_cache_entries(index, 1);
72b30910
EN
3608 /*
3609 * No need for STABLE_QSORT -- cmp_cache_name_compare sorts primarily
3610 * on filename and secondarily on stage, and (name, stage #) are a
3611 * unique tuple.
3612 */
ef2b3693
EN
3613 QSORT(index->cache, index->cache_nr, cmp_cache_name_compare);
3614
3615 return errs;
9fefce68
EN
3616}
3617
17e5574b
EN
3618void merge_switch_to_result(struct merge_options *opt,
3619 struct tree *head,
3620 struct merge_result *result,
3621 int update_worktree_and_index,
3622 int display_update_msgs)
3623{
9fefce68
EN
3624 assert(opt->priv == NULL);
3625 if (result->clean >= 0 && update_worktree_and_index) {
5291828d
EN
3626 const char *filename;
3627 FILE *fp;
3628
557ac035 3629 trace2_region_enter("merge", "checkout", opt->repo);
9fefce68
EN
3630 if (checkout(opt, head, result->tree)) {
3631 /* failure to function */
3632 result->clean = -1;
3633 return;
3634 }
557ac035 3635 trace2_region_leave("merge", "checkout", opt->repo);
9fefce68 3636
557ac035 3637 trace2_region_enter("merge", "record_conflicted", opt->repo);
66b209b8
EN
3638 opt->priv = result->priv;
3639 if (record_conflicted_index_entries(opt)) {
9fefce68 3640 /* failure to function */
66b209b8 3641 opt->priv = NULL;
9fefce68
EN
3642 result->clean = -1;
3643 return;
3644 }
66b209b8 3645 opt->priv = NULL;
557ac035 3646 trace2_region_leave("merge", "record_conflicted", opt->repo);
5291828d
EN
3647
3648 trace2_region_enter("merge", "write_auto_merge", opt->repo);
3649 filename = git_path_auto_merge(opt->repo);
3650 fp = xfopen(filename, "w");
3651 fprintf(fp, "%s\n", oid_to_hex(&result->tree->object.oid));
3652 fclose(fp);
3653 trace2_region_leave("merge", "write_auto_merge", opt->repo);
9fefce68
EN
3654 }
3655
3656 if (display_update_msgs) {
c5a6f655
EN
3657 struct merge_options_internal *opti = result->priv;
3658 struct hashmap_iter iter;
3659 struct strmap_entry *e;
3660 struct string_list olist = STRING_LIST_INIT_NODUP;
3661 int i;
3662
557ac035
EN
3663 trace2_region_enter("merge", "display messages", opt->repo);
3664
c5a6f655
EN
3665 /* Hack to pre-allocate olist to the desired size */
3666 ALLOC_GROW(olist.items, strmap_get_size(&opti->output),
3667 olist.alloc);
3668
3669 /* Put every entry from output into olist, then sort */
3670 strmap_for_each_entry(&opti->output, &iter, e) {
3671 string_list_append(&olist, e->key)->util = e->value;
3672 }
3673 string_list_sort(&olist);
3674
3675 /* Iterate over the items, printing them */
3676 for (i = 0; i < olist.nr; ++i) {
3677 struct strbuf *sb = olist.items[i].util;
3678
3679 printf("%s", sb->buf);
3680 }
3681 string_list_clear(&olist, 0);
f39d05ca
EN
3682
3683 /* Also include needed rename limit adjustment now */
3684 diff_warn_rename_limit("merge.renamelimit",
3685 opti->renames.needed_limit, 0);
557ac035
EN
3686
3687 trace2_region_leave("merge", "display messages", opt->repo);
9fefce68
EN
3688 }
3689
17e5574b
EN
3690 merge_finalize(opt, result);
3691}
3692
3693void merge_finalize(struct merge_options *opt,
3694 struct merge_result *result)
3695{
89422d29
EN
3696 struct merge_options_internal *opti = result->priv;
3697
ea305a68
EN
3698 if (opt->renormalize)
3699 git_attr_set_direction(GIT_ATTR_CHECKIN);
89422d29
EN
3700 assert(opt->priv == NULL);
3701
43e9c4ee 3702 clear_or_reinit_internal_opts(opti, 0);
89422d29 3703 FREE_AND_NULL(opti);
17e5574b
EN
3704}
3705
04af1879
EN
3706/*** Function Grouping: helper functions for merge_incore_*() ***/
3707
3639dfb3
EN
3708static struct tree *shift_tree_object(struct repository *repo,
3709 struct tree *one, struct tree *two,
3710 const char *subtree_shift)
3711{
3712 struct object_id shifted;
3713
3714 if (!*subtree_shift) {
3715 shift_tree(repo, &one->object.oid, &two->object.oid, &shifted, 0);
3716 } else {
3717 shift_tree_by(repo, &one->object.oid, &two->object.oid, &shifted,
3718 subtree_shift);
3719 }
3720 if (oideq(&two->object.oid, &shifted))
3721 return two;
3722 return lookup_tree(repo, &shifted);
3723}
3724
4296d8f1
EN
3725static inline void set_commit_tree(struct commit *c, struct tree *t)
3726{
3727 c->maybe_tree = t;
3728}
3729
4296d8f1
EN
3730static struct commit *make_virtual_commit(struct repository *repo,
3731 struct tree *tree,
3732 const char *comment)
3733{
3734 struct commit *commit = alloc_commit_node(repo);
3735
3736 set_merge_remote_desc(commit, comment, (struct object *)commit);
3737 set_commit_tree(commit, tree);
3738 commit->object.parsed = 1;
3739 return commit;
3740}
3741
231e2dd4
EN
3742static void merge_start(struct merge_options *opt, struct merge_result *result)
3743{
f5d9fbc2
EN
3744 struct rename_info *renames;
3745 int i;
3746
e4171b1b 3747 /* Sanity checks on opt */
557ac035 3748 trace2_region_enter("merge", "sanity checks", opt->repo);
e4171b1b
EN
3749 assert(opt->repo);
3750
3751 assert(opt->branch1 && opt->branch2);
3752
3753 assert(opt->detect_directory_renames >= MERGE_DIRECTORY_RENAMES_NONE &&
3754 opt->detect_directory_renames <= MERGE_DIRECTORY_RENAMES_TRUE);
3755 assert(opt->rename_limit >= -1);
3756 assert(opt->rename_score >= 0 && opt->rename_score <= MAX_SCORE);
3757 assert(opt->show_rename_progress >= 0 && opt->show_rename_progress <= 1);
3758
3759 assert(opt->xdl_opts >= 0);
3760 assert(opt->recursive_variant >= MERGE_VARIANT_NORMAL &&
3761 opt->recursive_variant <= MERGE_VARIANT_THEIRS);
3762
3763 /*
3764 * detect_renames, verbosity, buffer_output, and obuf are ignored
3765 * fields that were used by "recursive" rather than "ort" -- but
3766 * sanity check them anyway.
3767 */
3768 assert(opt->detect_renames >= -1 &&
3769 opt->detect_renames <= DIFF_DETECT_COPY);
3770 assert(opt->verbosity >= 0 && opt->verbosity <= 5);
3771 assert(opt->buffer_output <= 2);
3772 assert(opt->obuf.len == 0);
3773
3774 assert(opt->priv == NULL);
19ceb486
EN
3775 if (result->_properly_initialized != 0 &&
3776 result->_properly_initialized != RESULT_INITIALIZED)
3777 BUG("struct merge_result passed to merge_incore_*recursive() must be zeroed or filled with values from a previous run");
3778 assert(!!result->priv == !!result->_properly_initialized);
cf8937ac
EN
3779 if (result->priv) {
3780 opt->priv = result->priv;
3781 result->priv = NULL;
3782 /*
3783 * opt->priv non-NULL means we had results from a previous
3784 * run; do a few sanity checks that user didn't mess with
3785 * it in an obvious fashion.
3786 */
3787 assert(opt->priv->call_depth == 0);
3788 assert(!opt->priv->toplevel_dir ||
3789 0 == strlen(opt->priv->toplevel_dir));
3790 }
557ac035 3791 trace2_region_leave("merge", "sanity checks", opt->repo);
e4171b1b 3792
c8017176
EN
3793 /* Default to histogram diff. Actually, just hardcode it...for now. */
3794 opt->xdl_opts = DIFF_WITH_ALG(opt, HISTOGRAM_DIFF);
3795
ea305a68
EN
3796 /* Handle attr direction stuff for renormalization */
3797 if (opt->renormalize)
3798 git_attr_set_direction(GIT_ATTR_CHECKOUT);
3799
e4171b1b 3800 /* Initialization of opt->priv, our internal merge data */
557ac035 3801 trace2_region_enter("merge", "allocate/init", opt->repo);
cf8937ac
EN
3802 if (opt->priv) {
3803 clear_or_reinit_internal_opts(opt->priv, 1);
3804 trace2_region_leave("merge", "allocate/init", opt->repo);
3805 return;
3806 }
e4171b1b
EN
3807 opt->priv = xcalloc(1, sizeof(*opt->priv));
3808
f5d9fbc2
EN
3809 /* Initialization of various renames fields */
3810 renames = &opt->priv->renames;
3811 for (i = MERGE_SIDE1; i <= MERGE_SIDE2; i++) {
a49b55d5 3812 strintmap_init_with_options(&renames->dirs_removed[i],
fb52938e 3813 NOT_RELEVANT, NULL, 0);
f5d9fbc2
EN
3814 strmap_init_with_options(&renames->dir_rename_count[i],
3815 NULL, 1);
3816 strmap_init_with_options(&renames->dir_renames[i],
3817 NULL, 0);
2734f2e3
EN
3818 /*
3819 * relevant_sources uses -1 for the default, because we need
3820 * to be able to distinguish not-in-strintmap from valid
3821 * relevant_source values from enum file_rename_relevance.
3822 * In particular, possibly_cache_new_pair() expects a negative
3823 * value for not-found entries.
3824 */
a49b55d5 3825 strintmap_init_with_options(&renames->relevant_sources[i],
2734f2e3
EN
3826 -1 /* explicitly invalid */,
3827 NULL, 0);
d29bd6d7
EN
3828 strmap_init_with_options(&renames->cached_pairs[i],
3829 NULL, 1);
3830 strset_init_with_options(&renames->cached_irrelevant[i],
3831 NULL, 1);
3832 strset_init_with_options(&renames->cached_target_names[i],
3833 NULL, 0);
f5d9fbc2
EN
3834 }
3835
e4171b1b
EN
3836 /*
3837 * Although we initialize opt->priv->paths with strdup_strings=0,
3838 * that's just to avoid making yet another copy of an allocated
3839 * string. Putting the entry into paths means we are taking
43c1dccb 3840 * ownership, so we will later free it. paths_to_free is similar.
e4171b1b
EN
3841 *
3842 * In contrast, conflicted just has a subset of keys from paths, so
3843 * we don't want to free those (it'd be a duplicate free).
3844 */
3845 strmap_init_with_options(&opt->priv->paths, NULL, 0);
3846 strmap_init_with_options(&opt->priv->conflicted, NULL, 0);
43c1dccb 3847 string_list_init(&opt->priv->paths_to_free, 0);
c5a6f655
EN
3848
3849 /*
3850 * keys & strbufs in output will sometimes need to outlive "paths",
3851 * so it will have a copy of relevant keys. It's probably a small
3852 * subset of the overall paths that have special output.
3853 */
3854 strmap_init(&opt->priv->output);
557ac035
EN
3855
3856 trace2_region_leave("merge", "allocate/init", opt->repo);
231e2dd4
EN
3857}
3858
64aceb6d
EN
3859static void merge_check_renames_reusable(struct merge_options *opt,
3860 struct merge_result *result,
3861 struct tree *merge_base,
3862 struct tree *side1,
3863 struct tree *side2)
3864{
3865 struct rename_info *renames;
3866 struct tree **merge_trees;
3867 struct merge_options_internal *opti = result->priv;
3868
3869 if (!opti)
3870 return;
3871
3872 renames = &opti->renames;
3873 merge_trees = renames->merge_trees;
3874 /* merge_trees[0..2] will only be NULL if opti is */
3875 assert(merge_trees[0] && merge_trees[1] && merge_trees[2]);
3876
3877 /* Check if we meet a condition for re-using cached_pairs */
3878 if (oideq(&merge_base->object.oid, &merge_trees[2]->object.oid) &&
3879 oideq(&side1->object.oid, &result->tree->object.oid))
3880 renames->cached_pairs_valid_side = MERGE_SIDE1;
3881 else if (oideq(&merge_base->object.oid, &merge_trees[1]->object.oid) &&
3882 oideq(&side2->object.oid, &result->tree->object.oid))
3883 renames->cached_pairs_valid_side = MERGE_SIDE2;
3884 else
3885 renames->cached_pairs_valid_side = 0; /* neither side valid */
3886}
3887
04af1879
EN
3888/*** Function Grouping: merge_incore_*() and their internal variants ***/
3889
231e2dd4
EN
3890/*
3891 * Originally from merge_trees_internal(); heavily adapted, though.
3892 */
3893static void merge_ort_nonrecursive_internal(struct merge_options *opt,
3894 struct tree *merge_base,
3895 struct tree *side1,
3896 struct tree *side2,
3897 struct merge_result *result)
3898{
3899 struct object_id working_tree_oid;
3900
3639dfb3
EN
3901 if (opt->subtree_shift) {
3902 side2 = shift_tree_object(opt->repo, side1, side2,
3903 opt->subtree_shift);
3904 merge_base = shift_tree_object(opt->repo, side1, merge_base,
3905 opt->subtree_shift);
3906 }
3907
557ac035 3908 trace2_region_enter("merge", "collect_merge_info", opt->repo);
0c0d705b
EN
3909 if (collect_merge_info(opt, merge_base, side1, side2) != 0) {
3910 /*
3911 * TRANSLATORS: The %s arguments are: 1) tree hash of a merge
3912 * base, and 2-3) the trees for the two trees we're merging.
3913 */
3914 err(opt, _("collecting merge info failed for trees %s, %s, %s"),
3915 oid_to_hex(&merge_base->object.oid),
3916 oid_to_hex(&side1->object.oid),
3917 oid_to_hex(&side2->object.oid));
3918 result->clean = -1;
3919 return;
3920 }
557ac035 3921 trace2_region_leave("merge", "collect_merge_info", opt->repo);
0c0d705b 3922
557ac035 3923 trace2_region_enter("merge", "renames", opt->repo);
231e2dd4
EN
3924 result->clean = detect_and_process_renames(opt, merge_base,
3925 side1, side2);
557ac035
EN
3926 trace2_region_leave("merge", "renames", opt->repo);
3927
3928 trace2_region_enter("merge", "process_entries", opt->repo);
231e2dd4 3929 process_entries(opt, &working_tree_oid);
557ac035 3930 trace2_region_leave("merge", "process_entries", opt->repo);
231e2dd4
EN
3931
3932 /* Set return values */
3933 result->tree = parse_tree_indirect(&working_tree_oid);
3934 /* existence of conflicted entries implies unclean */
3935 result->clean &= strmap_empty(&opt->priv->conflicted);
3936 if (!opt->priv->call_depth) {
3937 result->priv = opt->priv;
19ceb486 3938 result->_properly_initialized = RESULT_INITIALIZED;
231e2dd4
EN
3939 opt->priv = NULL;
3940 }
3941}
3942
8119214f
EN
3943/*
3944 * Originally from merge_recursive_internal(); somewhat adapted, though.
3945 */
3946static void merge_ort_internal(struct merge_options *opt,
3947 struct commit_list *merge_bases,
3948 struct commit *h1,
3949 struct commit *h2,
3950 struct merge_result *result)
3951{
3952 struct commit_list *iter;
3953 struct commit *merged_merge_bases;
3954 const char *ancestor_name;
3955 struct strbuf merge_base_abbrev = STRBUF_INIT;
3956
3957 if (!merge_bases) {
3958 merge_bases = get_merge_bases(h1, h2);
3959 /* See merge-ort.h:merge_incore_recursive() declaration NOTE */
3960 merge_bases = reverse_commit_list(merge_bases);
3961 }
3962
3963 merged_merge_bases = pop_commit(&merge_bases);
3964 if (merged_merge_bases == NULL) {
3965 /* if there is no common ancestor, use an empty tree */
3966 struct tree *tree;
3967
3968 tree = lookup_tree(opt->repo, opt->repo->hash_algo->empty_tree);
3969 merged_merge_bases = make_virtual_commit(opt->repo, tree,
3970 "ancestor");
3971 ancestor_name = "empty tree";
3972 } else if (merge_bases) {
3973 ancestor_name = "merged common ancestors";
3974 } else {
3975 strbuf_add_unique_abbrev(&merge_base_abbrev,
3976 &merged_merge_bases->object.oid,
3977 DEFAULT_ABBREV);
3978 ancestor_name = merge_base_abbrev.buf;
3979 }
3980
3981 for (iter = merge_bases; iter; iter = iter->next) {
3982 const char *saved_b1, *saved_b2;
3983 struct commit *prev = merged_merge_bases;
3984
3985 opt->priv->call_depth++;
3986 /*
3987 * When the merge fails, the result contains files
3988 * with conflict markers. The cleanness flag is
3989 * ignored (unless indicating an error), it was never
3990 * actually used, as result of merge_trees has always
3991 * overwritten it: the committed "conflicts" were
3992 * already resolved.
3993 */
3994 saved_b1 = opt->branch1;
3995 saved_b2 = opt->branch2;
3996 opt->branch1 = "Temporary merge branch 1";
3997 opt->branch2 = "Temporary merge branch 2";
3998 merge_ort_internal(opt, NULL, prev, iter->item, result);
3999 if (result->clean < 0)
4000 return;
4001 opt->branch1 = saved_b1;
4002 opt->branch2 = saved_b2;
4003 opt->priv->call_depth--;
4004
4005 merged_merge_bases = make_virtual_commit(opt->repo,
4006 result->tree,
4007 "merged tree");
4008 commit_list_insert(prev, &merged_merge_bases->parents);
4009 commit_list_insert(iter->item,
4010 &merged_merge_bases->parents->next);
4011
4012 clear_or_reinit_internal_opts(opt->priv, 1);
4013 }
4014
4015 opt->ancestor = ancestor_name;
4016 merge_ort_nonrecursive_internal(opt,
4017 repo_get_commit_tree(opt->repo,
4018 merged_merge_bases),
4019 repo_get_commit_tree(opt->repo, h1),
4020 repo_get_commit_tree(opt->repo, h2),
4021 result);
4022 strbuf_release(&merge_base_abbrev);
4023 opt->ancestor = NULL; /* avoid accidental re-use of opt->ancestor */
4024}
4025
17e5574b
EN
4026void merge_incore_nonrecursive(struct merge_options *opt,
4027 struct tree *merge_base,
4028 struct tree *side1,
4029 struct tree *side2,
4030 struct merge_result *result)
4031{
557ac035
EN
4032 trace2_region_enter("merge", "incore_nonrecursive", opt->repo);
4033
4034 trace2_region_enter("merge", "merge_start", opt->repo);
231e2dd4 4035 assert(opt->ancestor != NULL);
64aceb6d 4036 merge_check_renames_reusable(opt, result, merge_base, side1, side2);
231e2dd4 4037 merge_start(opt, result);
64aceb6d
EN
4038 /*
4039 * Record the trees used in this merge, so if there's a next merge in
4040 * a cherry-pick or rebase sequence it might be able to take advantage
4041 * of the cached_pairs in that next merge.
4042 */
4043 opt->priv->renames.merge_trees[0] = merge_base;
4044 opt->priv->renames.merge_trees[1] = side1;
4045 opt->priv->renames.merge_trees[2] = side2;
557ac035
EN
4046 trace2_region_leave("merge", "merge_start", opt->repo);
4047
231e2dd4 4048 merge_ort_nonrecursive_internal(opt, merge_base, side1, side2, result);
557ac035 4049 trace2_region_leave("merge", "incore_nonrecursive", opt->repo);
17e5574b
EN
4050}
4051
4052void merge_incore_recursive(struct merge_options *opt,
4053 struct commit_list *merge_bases,
4054 struct commit *side1,
4055 struct commit *side2,
4056 struct merge_result *result)
4057{
557ac035
EN
4058 trace2_region_enter("merge", "incore_recursive", opt->repo);
4059
8119214f
EN
4060 /* We set the ancestor label based on the merge_bases */
4061 assert(opt->ancestor == NULL);
4062
557ac035 4063 trace2_region_enter("merge", "merge_start", opt->repo);
8119214f 4064 merge_start(opt, result);
557ac035
EN
4065 trace2_region_leave("merge", "merge_start", opt->repo);
4066
8119214f 4067 merge_ort_internal(opt, merge_bases, side1, side2, result);
557ac035 4068 trace2_region_leave("merge", "incore_recursive", opt->repo);
17e5574b 4069}