[thirdparty/git.git] / merge-ort.c

/*
 * "Ostensibly Recursive's Twin" merge strategy, or "ort" for short.  Meant
 * as a drop-in replacement for the "recursive" merge strategy, allowing one
 * to replace
 *
 *   git merge [-s recursive]
 *
 * with
 *
 *   git merge -s ort
 *
 * Note: git's parser allows the space between '-s' and its argument to be
 * missing.  (Should I have backronymed "ham", "alsa", "kip", "nap, "alvo",
 * "cale", "peedy", or "ins" instead of "ort"?)
 */

#include "cache.h"
#include "merge-ort.h"

#include "diff.h"
#include "diffcore.h"
#include "strmap.h"
#include "tree.h"
#include "xdiff-interface.h"

struct merge_options_internal {
	/*
	 * paths: primary data structure in all of merge ort.
	 *
	 * The keys of paths:
	 *   * are full relative paths from the toplevel of the repository
	 *     (e.g. "drivers/firmware/raspberrypi.c").
	 *   * store all relevant paths in the repo, both directories and
	 *     files (e.g. drivers, drivers/firmware would also be included)
	 *   * these keys serve to intern all the path strings, which allows
	 *     us to do pointer comparison on directory names instead of
	 *     strcmp; we just have to be careful to use the interned strings.
	 *
	 * The values of paths:
	 *   * either a pointer to a merged_info, or a conflict_info struct
	 *   * merged_info contains all relevant information for a
	 *     non-conflicted entry.
	 *   * conflict_info contains a merged_info, plus any additional
	 *     information about a conflict such as the higher orders stages
	 *     involved and the names of the paths those came from (handy
	 *     once renames get involved).
	 *   * a path may start "conflicted" (i.e. point to a conflict_info)
	 *     and then a later step (e.g. three-way content merge) determines
	 *     it can be cleanly merged, at which point it'll be marked clean
	 *     and the algorithm will ignore any data outside the contained
	 *     merged_info for that entry
	 *   * If an entry remains conflicted, the merged_info portion of a
	 *     conflict_info will later be filled with whatever version of
	 *     the file should be placed in the working directory (e.g. an
	 *     as-merged-as-possible variation that contains conflict markers).
	 */
	struct strmap paths;

	/*
	 * conflicted: a subset of keys->values from "paths"
	 *
	 * conflicted is basically an optimization between process_entries()
	 * and record_conflicted_index_entries(); the latter could loop over
	 * ALL the entries in paths AGAIN and look for the ones that are
	 * still conflicted, but since process_entries() has to loop over
	 * all of them, it saves the ones it couldn't resolve in this strmap
	 * so that record_conflicted_index_entries() can iterate just the
	 * relevant entries.
	 */
	struct strmap conflicted;

	/*
	 * current_dir_name: temporary var used in collect_merge_info_callback()
	 *
	 * Used to set merged_info.directory_name; see documentation for that
	 * variable and the requirements placed on that field.
	 */
	const char *current_dir_name;

	/* call_depth: recursion level counter for merging merge bases */
	int call_depth;
};

struct version_info {
	struct object_id oid;
	unsigned short mode;
};

struct merged_info {
	/* if is_null, ignore result.  otherwise result has oid & mode */
	struct version_info result;
	unsigned is_null:1;

	/*
	 * clean: whether the path in question is cleanly merged.
	 *
	 * see conflict_info.merged for more details.
	 */
	unsigned clean:1;

	/*
	 * basename_offset: offset of basename of path.
	 *
	 * perf optimization to avoid recomputing offset of final '/'
	 * character in pathname (0 if no '/' in pathname).
	 */
	size_t basename_offset;

	 /*
	  * directory_name: containing directory name.
	  *
	  * Note that we assume directory_name is constructed such that
	  *    strcmp(dir1_name, dir2_name) == 0 iff dir1_name == dir2_name,
	  * i.e. string equality is equivalent to pointer equality.  For this
	  * to hold, we have to be careful setting directory_name.
	  */
	const char *directory_name;
};

struct conflict_info {
	/*
	 * merged: the version of the path that will be written to working tree
	 *
	 * WARNING: It is critical to check merged.clean and ensure it is 0
	 * before reading any conflict_info fields outside of merged.
	 * Allocated merge_info structs will always have clean set to 1.
	 * Allocated conflict_info structs will have merged.clean set to 0
	 * initially.  The merged.clean field is how we know if it is safe
	 * to access other parts of conflict_info besides merged; if a
	 * conflict_info's merged.clean is changed to 1, the rest of the
	 * algorithm is not allowed to look at anything outside of the
	 * merged member anymore.
	 */
	struct merged_info merged;

	/* oids & modes from each of the three trees for this path */
	struct version_info stages[3];

	/* pathnames for each stage; may differ due to rename detection */
	const char *pathnames[3];

	/* Whether this path is/was involved in a directory/file conflict */
	unsigned df_conflict:1;

	/*
	 * For filemask and dirmask, the ith bit corresponds to whether the
	 * ith entry is a file (filemask) or a directory (dirmask).  Thus,
	 * filemask & dirmask is always zero, and filemask | dirmask is at
	 * most 7 but can be less when a path does not appear as either a
	 * file or a directory on at least one side of history.
	 *
	 * Note that these masks are related to enum merge_side, as the ith
	 * entry corresponds to side i.
	 *
	 * These values come from a traverse_trees() call; more info may be
	 * found looking at tree-walk.h's struct traverse_info,
	 * particularly the documentation above the "fn" member (note that
	 * filemask = mask & ~dirmask from that documentation).
	 */
	unsigned filemask:3;
	unsigned dirmask:3;

	/*
	 * Optimization to track which stages match, to avoid the need to
	 * recompute it in multiple steps. Either 0 or at least 2 bits are
	 * set; if at least 2 bits are set, their corresponding stages match.
	 */
	unsigned match_mask:3;
};

static int err(struct merge_options *opt, const char *err, ...)
{
	va_list params;
	struct strbuf sb = STRBUF_INIT;

	strbuf_addstr(&sb, "error: ");
	va_start(params, err);
	strbuf_vaddf(&sb, err, params);
	va_end(params);

	error("%s", sb.buf);
	strbuf_release(&sb);

	return -1;
}

static int collect_merge_info(struct merge_options *opt,
			      struct tree *merge_base,
			      struct tree *side1,
			      struct tree *side2)
{
	die("Not yet implemented.");
}

static int detect_and_process_renames(struct merge_options *opt,
				      struct tree *merge_base,
				      struct tree *side1,
				      struct tree *side2)
{
	int clean = 1;

	/*
	 * Rename detection works by detecting file similarity.  Here we use
	 * a really easy-to-implement scheme: files are similar IFF they have
	 * the same filename.  Therefore, by this scheme, there are no renames.
	 *
	 * TODO: Actually implement a real rename detection scheme.
	 */
	return clean;
}

static void process_entries(struct merge_options *opt,
			    struct object_id *result_oid)
{
	die("Not yet implemented.");
}

void merge_switch_to_result(struct merge_options *opt,
			    struct tree *head,
			    struct merge_result *result,
			    int update_worktree_and_index,
			    int display_update_msgs)
{
	die("Not yet implemented");
	merge_finalize(opt, result);
}

void merge_finalize(struct merge_options *opt,
		    struct merge_result *result)
{
	die("Not yet implemented");
}

static void merge_start(struct merge_options *opt, struct merge_result *result)
{
	/* Sanity checks on opt */
	assert(opt->repo);

	assert(opt->branch1 && opt->branch2);

	assert(opt->detect_directory_renames >= MERGE_DIRECTORY_RENAMES_NONE &&
	       opt->detect_directory_renames <= MERGE_DIRECTORY_RENAMES_TRUE);
	assert(opt->rename_limit >= -1);
	assert(opt->rename_score >= 0 && opt->rename_score <= MAX_SCORE);
	assert(opt->show_rename_progress >= 0 && opt->show_rename_progress <= 1);

	assert(opt->xdl_opts >= 0);
	assert(opt->recursive_variant >= MERGE_VARIANT_NORMAL &&
	       opt->recursive_variant <= MERGE_VARIANT_THEIRS);

	/*
	 * detect_renames, verbosity, buffer_output, and obuf are ignored
	 * fields that were used by "recursive" rather than "ort" -- but
	 * sanity check them anyway.
	 */
	assert(opt->detect_renames >= -1 &&
	       opt->detect_renames <= DIFF_DETECT_COPY);
	assert(opt->verbosity >= 0 && opt->verbosity <= 5);
	assert(opt->buffer_output <= 2);
	assert(opt->obuf.len == 0);

	assert(opt->priv == NULL);

	/* Default to histogram diff.  Actually, just hardcode it...for now. */
	opt->xdl_opts = DIFF_WITH_ALG(opt, HISTOGRAM_DIFF);

	/* Initialization of opt->priv, our internal merge data */
	opt->priv = xcalloc(1, sizeof(*opt->priv));

	/*
	 * Although we initialize opt->priv->paths with strdup_strings=0,
	 * that's just to avoid making yet another copy of an allocated
	 * string.  Putting the entry into paths means we are taking
	 * ownership, so we will later free it.
	 *
	 * In contrast, conflicted just has a subset of keys from paths, so
	 * we don't want to free those (it'd be a duplicate free).
	 */
	strmap_init_with_options(&opt->priv->paths, NULL, 0);
	strmap_init_with_options(&opt->priv->conflicted, NULL, 0);
}

/*
 * Originally from merge_trees_internal(); heavily adapted, though.
 */
static void merge_ort_nonrecursive_internal(struct merge_options *opt,
					    struct tree *merge_base,
					    struct tree *side1,
					    struct tree *side2,
					    struct merge_result *result)
{
	struct object_id working_tree_oid;

	if (collect_merge_info(opt, merge_base, side1, side2) != 0) {
		/*
		 * TRANSLATORS: The %s arguments are: 1) tree hash of a merge
		 * base, and 2-3) the trees for the two trees we're merging.
		 */
		err(opt, _("collecting merge info failed for trees %s, %s, %s"),
		    oid_to_hex(&merge_base->object.oid),
		    oid_to_hex(&side1->object.oid),
		    oid_to_hex(&side2->object.oid));
		result->clean = -1;
		return;
	}

	result->clean = detect_and_process_renames(opt, merge_base,
						   side1, side2);
	process_entries(opt, &working_tree_oid);

	/* Set return values */
	result->tree = parse_tree_indirect(&working_tree_oid);
	/* existence of conflicted entries implies unclean */
	result->clean &= strmap_empty(&opt->priv->conflicted);
	if (!opt->priv->call_depth) {
		result->priv = opt->priv;
		opt->priv = NULL;
	}
}

void merge_incore_nonrecursive(struct merge_options *opt,
			       struct tree *merge_base,
			       struct tree *side1,
			       struct tree *side2,
			       struct merge_result *result)
{
	assert(opt->ancestor != NULL);
	merge_start(opt, result);
	merge_ort_nonrecursive_internal(opt, merge_base, side1, side2, result);
}

void merge_incore_recursive(struct merge_options *opt,
			    struct commit_list *merge_bases,
			    struct commit *side1,
			    struct commit *side2,
			    struct merge_result *result)
{
	die("Not yet implemented");
}
Commit	Line	Data
17e5574b EN	1	/*
	2	* "Ostensibly Recursive's Twin" merge strategy, or "ort" for short. Meant
	3	* as a drop-in replacement for the "recursive" merge strategy, allowing one
	4	* to replace
	5	*
	6	* git merge [-s recursive]
	7	*
	8	* with
	9	*
	10	* git merge -s ort
	11	*
	12	* Note: git's parser allows the space between '-s' and its argument to be
	13	* missing. (Should I have backronymed "ham", "alsa", "kip", "nap, "alvo",
	14	* "cale", "peedy", or "ins" instead of "ort"?)
	15	*/
	16
	17	#include "cache.h"
	18	#include "merge-ort.h"
	19
e4171b1b EN	20	#include "diff.h"
e4171b1b EN	21	#include "diffcore.h"
5b59c3db	22	#include "strmap.h"
231e2dd4	23	#include "tree.h"
c8017176	24	#include "xdiff-interface.h"
5b59c3db EN	25
	26	struct merge_options_internal {
	27	/*
	28	* paths: primary data structure in all of merge ort.
	29	*
	30	* The keys of paths:
	31	* * are full relative paths from the toplevel of the repository
	32	* (e.g. "drivers/firmware/raspberrypi.c").
	33	* * store all relevant paths in the repo, both directories and
	34	* files (e.g. drivers, drivers/firmware would also be included)
	35	* * these keys serve to intern all the path strings, which allows
	36	* us to do pointer comparison on directory names instead of
	37	* strcmp; we just have to be careful to use the interned strings.
	38	*
	39	* The values of paths:
	40	* * either a pointer to a merged_info, or a conflict_info struct
	41	* * merged_info contains all relevant information for a
	42	* non-conflicted entry.
	43	* * conflict_info contains a merged_info, plus any additional
	44	* information about a conflict such as the higher orders stages
	45	* involved and the names of the paths those came from (handy
	46	* once renames get involved).
	47	* * a path may start "conflicted" (i.e. point to a conflict_info)
	48	* and then a later step (e.g. three-way content merge) determines
	49	* it can be cleanly merged, at which point it'll be marked clean
	50	* and the algorithm will ignore any data outside the contained
	51	* merged_info for that entry
	52	* * If an entry remains conflicted, the merged_info portion of a
	53	* conflict_info will later be filled with whatever version of
	54	* the file should be placed in the working directory (e.g. an
	55	* as-merged-as-possible variation that contains conflict markers).
	56	*/
	57	struct strmap paths;
	58
	59	/*
	60	* conflicted: a subset of keys->values from "paths"
	61	*
	62	* conflicted is basically an optimization between process_entries()
	63	* and record_conflicted_index_entries(); the latter could loop over
	64	* ALL the entries in paths AGAIN and look for the ones that are
	65	* still conflicted, but since process_entries() has to loop over
	66	* all of them, it saves the ones it couldn't resolve in this strmap
	67	* so that record_conflicted_index_entries() can iterate just the
	68	* relevant entries.
	69	*/
	70	struct strmap conflicted;
	71
	72	/*
	73	* current_dir_name: temporary var used in collect_merge_info_callback()
	74	*
	75	* Used to set merged_info.directory_name; see documentation for that
	76	* variable and the requirements placed on that field.
	77	*/
	78	const char *current_dir_name;
	79
	80	/* call_depth: recursion level counter for merging merge bases */
	81	int call_depth;
	82	};
	83
	84	struct version_info {
	85	struct object_id oid;
	86	unsigned short mode;
	87	};
	88
89	struct merged_info {
90	/* if is_null, ignore result. otherwise result has oid & mode */
91	struct version_info result;
92	unsigned is_null:1;
93
94	/*
95	* clean: whether the path in question is cleanly merged.
96	*
97	* see conflict_info.merged for more details.
98	*/
99	unsigned clean:1;
100
101	/*
102	* basename_offset: offset of basename of path.
103	*
104	* perf optimization to avoid recomputing offset of final '/'
105	* character in pathname (0 if no '/' in pathname).
106	*/
107	size_t basename_offset;
108
109	/*
110	* directory_name: containing directory name.
111	*
112	* Note that we assume directory_name is constructed such that
113	* strcmp(dir1_name, dir2_name) == 0 iff dir1_name == dir2_name,
114	* i.e. string equality is equivalent to pointer equality. For this
115	* to hold, we have to be careful setting directory_name.
116	*/
117	const char *directory_name;
118	};
119
120	struct conflict_info {
121	/*
122	* merged: the version of the path that will be written to working tree
123	*
124	* WARNING: It is critical to check merged.clean and ensure it is 0
125	* before reading any conflict_info fields outside of merged.
126	* Allocated merge_info structs will always have clean set to 1.
127	* Allocated conflict_info structs will have merged.clean set to 0
128	* initially. The merged.clean field is how we know if it is safe
129	* to access other parts of conflict_info besides merged; if a
130	* conflict_info's merged.clean is changed to 1, the rest of the
131	* algorithm is not allowed to look at anything outside of the
132	* merged member anymore.
133	*/
134	struct merged_info merged;
135
136	/* oids & modes from each of the three trees for this path */
137	struct version_info stages[3];
138
139	/* pathnames for each stage; may differ due to rename detection */
140	const char *pathnames[3];
141
142	/* Whether this path is/was involved in a directory/file conflict */
143	unsigned df_conflict:1;
144
145	/*
146	* For filemask and dirmask, the ith bit corresponds to whether the
147	* ith entry is a file (filemask) or a directory (dirmask). Thus,
148	* filemask & dirmask is always zero, and filemask \| dirmask is at
149	* most 7 but can be less when a path does not appear as either a
150	* file or a directory on at least one side of history.
151	*
152	* Note that these masks are related to enum merge_side, as the ith
153	* entry corresponds to side i.
154	*
155	* These values come from a traverse_trees() call; more info may be
156	* found looking at tree-walk.h's struct traverse_info,
157	* particularly the documentation above the "fn" member (note that
158	* filemask = mask & ~dirmask from that documentation).
159	*/
160	unsigned filemask:3;
161	unsigned dirmask:3;
162
163	/*
164	* Optimization to track which stages match, to avoid the need to
165	* recompute it in multiple steps. Either 0 or at least 2 bits are
166	* set; if at least 2 bits are set, their corresponding stages match.
167	*/
168	unsigned match_mask:3;
169	};
170
0c0d705b EN	171	static int err(struct merge_options opt, const char err, ...)
	172	{
	173	va_list params;
	174	struct strbuf sb = STRBUF_INIT;
	175
	176	strbuf_addstr(&sb, "error: ");
	177	va_start(params, err);
	178	strbuf_vaddf(&sb, err, params);
	179	va_end(params);
	180
	181	error("%s", sb.buf);
	182	strbuf_release(&sb);
	183
	184	return -1;
	185	}
	186
231e2dd4 EN	187	static int collect_merge_info(struct merge_options *opt,
	188	struct tree *merge_base,
	189	struct tree *side1,
	190	struct tree *side2)
	191	{
231e2dd4 EN	192	die("Not yet implemented.");
	193	}
	194
	195	static int detect_and_process_renames(struct merge_options *opt,
	196	struct tree *merge_base,
	197	struct tree *side1,
	198	struct tree *side2)
	199	{
	200	int clean = 1;
	201
	202	/*
	203	* Rename detection works by detecting file similarity. Here we use
	204	* a really easy-to-implement scheme: files are similar IFF they have
	205	* the same filename. Therefore, by this scheme, there are no renames.
	206	*
	207	* TODO: Actually implement a real rename detection scheme.
	208	*/
	209	return clean;
	210	}
	211
	212	static void process_entries(struct merge_options *opt,
	213	struct object_id *result_oid)
	214	{
	215	die("Not yet implemented.");
	216	}
	217
17e5574b EN	218	void merge_switch_to_result(struct merge_options *opt,
	219	struct tree *head,
	220	struct merge_result *result,
	221	int update_worktree_and_index,
	222	int display_update_msgs)
	223	{
	224	die("Not yet implemented");
	225	merge_finalize(opt, result);
	226	}
	227
	228	void merge_finalize(struct merge_options *opt,
	229	struct merge_result *result)
	230	{
	231	die("Not yet implemented");
	232	}
	233
231e2dd4 EN	234	static void merge_start(struct merge_options opt, struct merge_result result)
231e2dd4 EN	235	{
e4171b1b EN	236	/* Sanity checks on opt */
	237	assert(opt->repo);
	238
	239	assert(opt->branch1 && opt->branch2);
	240
	241	assert(opt->detect_directory_renames >= MERGE_DIRECTORY_RENAMES_NONE &&
	242	opt->detect_directory_renames <= MERGE_DIRECTORY_RENAMES_TRUE);
	243	assert(opt->rename_limit >= -1);
	244	assert(opt->rename_score >= 0 && opt->rename_score <= MAX_SCORE);
	245	assert(opt->show_rename_progress >= 0 && opt->show_rename_progress <= 1);
	246
	247	assert(opt->xdl_opts >= 0);
	248	assert(opt->recursive_variant >= MERGE_VARIANT_NORMAL &&
	249	opt->recursive_variant <= MERGE_VARIANT_THEIRS);
	250
	251	/*
	252	* detect_renames, verbosity, buffer_output, and obuf are ignored
	253	* fields that were used by "recursive" rather than "ort" -- but
	254	* sanity check them anyway.
	255	*/
	256	assert(opt->detect_renames >= -1 &&
	257	opt->detect_renames <= DIFF_DETECT_COPY);
	258	assert(opt->verbosity >= 0 && opt->verbosity <= 5);
	259	assert(opt->buffer_output <= 2);
	260	assert(opt->obuf.len == 0);
	261
	262	assert(opt->priv == NULL);
	263
c8017176 EN	264	/* Default to histogram diff. Actually, just hardcode it...for now. */
	265	opt->xdl_opts = DIFF_WITH_ALG(opt, HISTOGRAM_DIFF);
	266
e4171b1b EN	267	/* Initialization of opt->priv, our internal merge data */
	268	opt->priv = xcalloc(1, sizeof(*opt->priv));
	269
	270	/*
	271	* Although we initialize opt->priv->paths with strdup_strings=0,
	272	* that's just to avoid making yet another copy of an allocated
	273	* string. Putting the entry into paths means we are taking
	274	* ownership, so we will later free it.
	275	*
	276	* In contrast, conflicted just has a subset of keys from paths, so
	277	* we don't want to free those (it'd be a duplicate free).
	278	*/
	279	strmap_init_with_options(&opt->priv->paths, NULL, 0);
	280	strmap_init_with_options(&opt->priv->conflicted, NULL, 0);
231e2dd4 EN	281	}
	282
	283	/*
	284	* Originally from merge_trees_internal(); heavily adapted, though.
	285	*/
	286	static void merge_ort_nonrecursive_internal(struct merge_options *opt,
	287	struct tree *merge_base,
	288	struct tree *side1,
	289	struct tree *side2,
	290	struct merge_result *result)
	291	{
	292	struct object_id working_tree_oid;
	293
0c0d705b EN	294	if (collect_merge_info(opt, merge_base, side1, side2) != 0) {
	295	/*
	296	* TRANSLATORS: The %s arguments are: 1) tree hash of a merge
	297	* base, and 2-3) the trees for the two trees we're merging.
	298	*/
	299	err(opt, _("collecting merge info failed for trees %s, %s, %s"),
	300	oid_to_hex(&merge_base->object.oid),
	301	oid_to_hex(&side1->object.oid),
	302	oid_to_hex(&side2->object.oid));
	303	result->clean = -1;
	304	return;
	305	}
	306
231e2dd4 EN	307	result->clean = detect_and_process_renames(opt, merge_base,
	308	side1, side2);
	309	process_entries(opt, &working_tree_oid);
	310
	311	/* Set return values */
	312	result->tree = parse_tree_indirect(&working_tree_oid);
	313	/* existence of conflicted entries implies unclean */
	314	result->clean &= strmap_empty(&opt->priv->conflicted);
	315	if (!opt->priv->call_depth) {
	316	result->priv = opt->priv;
	317	opt->priv = NULL;
	318	}
	319	}
	320
17e5574b EN	321	void merge_incore_nonrecursive(struct merge_options *opt,
	322	struct tree *merge_base,
	323	struct tree *side1,
	324	struct tree *side2,
	325	struct merge_result *result)
	326	{
231e2dd4 EN	327	assert(opt->ancestor != NULL);
	328	merge_start(opt, result);
	329	merge_ort_nonrecursive_internal(opt, merge_base, side1, side2, result);
17e5574b EN	330	}
	331
	332	void merge_incore_recursive(struct merge_options *opt,
	333	struct commit_list *merge_bases,
	334	struct commit *side1,
	335	struct commit *side2,
	336	struct merge_result *result)
	337	{
	338	die("Not yet implemented");
	339	}