[thirdparty/git.git] / bisect.c

#include "cache.h"
#include "commit.h"
#include "diff.h"
#include "revision.h"
#include "bisect.h"

static unsigned char (*skipped_sha1)[20];
static int skipped_sha1_nr;

/* bits #0-15 in revision.h */

#define COUNTED		(1u<<16)

/*
 * This is a truly stupid algorithm, but it's only
 * used for bisection, and we just don't care enough.
 *
 * We care just barely enough to avoid recursing for
 * non-merge entries.
 */
static int count_distance(struct commit_list *entry)
{
	int nr = 0;

	while (entry) {
		struct commit *commit = entry->item;
		struct commit_list *p;

		if (commit->object.flags & (UNINTERESTING | COUNTED))
			break;
		if (!(commit->object.flags & TREESAME))
			nr++;
		commit->object.flags |= COUNTED;
		p = commit->parents;
		entry = p;
		if (p) {
			p = p->next;
			while (p) {
				nr += count_distance(p);
				p = p->next;
			}
		}
	}

	return nr;
}

static void clear_distance(struct commit_list *list)
{
	while (list) {
		struct commit *commit = list->item;
		commit->object.flags &= ~COUNTED;
		list = list->next;
	}
}

#define DEBUG_BISECT 0

static inline int weight(struct commit_list *elem)
{
	return *((int*)(elem->item->util));
}

static inline void weight_set(struct commit_list *elem, int weight)
{
	*((int*)(elem->item->util)) = weight;
}

static int count_interesting_parents(struct commit *commit)
{
	struct commit_list *p;
	int count;

	for (count = 0, p = commit->parents; p; p = p->next) {
		if (p->item->object.flags & UNINTERESTING)
			continue;
		count++;
	}
	return count;
}

static inline int halfway(struct commit_list *p, int nr)
{
	/*
	 * Don't short-cut something we are not going to return!
	 */
	if (p->item->object.flags & TREESAME)
		return 0;
	if (DEBUG_BISECT)
		return 0;
	/*
	 * 2 and 3 are halfway of 5.
	 * 3 is halfway of 6 but 2 and 4 are not.
	 */
	switch (2 * weight(p) - nr) {
	case -1: case 0: case 1:
		return 1;
	default:
		return 0;
	}
}

#if !DEBUG_BISECT
#define show_list(a,b,c,d) do { ; } while (0)
#else
static void show_list(const char *debug, int counted, int nr,
		      struct commit_list *list)
{
	struct commit_list *p;

	fprintf(stderr, "%s (%d/%d)\n", debug, counted, nr);

	for (p = list; p; p = p->next) {
		struct commit_list *pp;
		struct commit *commit = p->item;
		unsigned flags = commit->object.flags;
		enum object_type type;
		unsigned long size;
		char *buf = read_sha1_file(commit->object.sha1, &type, &size);
		char *ep, *sp;

		fprintf(stderr, "%c%c%c ",
			(flags & TREESAME) ? ' ' : 'T',
			(flags & UNINTERESTING) ? 'U' : ' ',
			(flags & COUNTED) ? 'C' : ' ');
		if (commit->util)
			fprintf(stderr, "%3d", weight(p));
		else
			fprintf(stderr, "---");
		fprintf(stderr, " %.*s", 8, sha1_to_hex(commit->object.sha1));
		for (pp = commit->parents; pp; pp = pp->next)
			fprintf(stderr, " %.*s", 8,
				sha1_to_hex(pp->item->object.sha1));

		sp = strstr(buf, "\n\n");
		if (sp) {
			sp += 2;
			for (ep = sp; *ep && *ep != '\n'; ep++)
				;
			fprintf(stderr, " %.*s", (int)(ep - sp), sp);
		}
		fprintf(stderr, "\n");
	}
}
#endif /* DEBUG_BISECT */

static struct commit_list *best_bisection(struct commit_list *list, int nr)
{
	struct commit_list *p, *best;
	int best_distance = -1;

	best = list;
	for (p = list; p; p = p->next) {
		int distance;
		unsigned flags = p->item->object.flags;

		if (flags & TREESAME)
			continue;
		distance = weight(p);
		if (nr - distance < distance)
			distance = nr - distance;
		if (distance > best_distance) {
			best = p;
			best_distance = distance;
		}
	}

	return best;
}

struct commit_dist {
	struct commit *commit;
	int distance;
};

static int compare_commit_dist(const void *a_, const void *b_)
{
	struct commit_dist *a, *b;

	a = (struct commit_dist *)a_;
	b = (struct commit_dist *)b_;
	if (a->distance != b->distance)
		return b->distance - a->distance; /* desc sort */
	return hashcmp(a->commit->object.sha1, b->commit->object.sha1);
}

static struct commit_list *best_bisection_sorted(struct commit_list *list, int nr)
{
	struct commit_list *p;
	struct commit_dist *array = xcalloc(nr, sizeof(*array));
	int cnt, i;

	for (p = list, cnt = 0; p; p = p->next) {
		int distance;
		unsigned flags = p->item->object.flags;

		if (flags & TREESAME)
			continue;
		distance = weight(p);
		if (nr - distance < distance)
			distance = nr - distance;
		array[cnt].commit = p->item;
		array[cnt].distance = distance;
		cnt++;
	}
	qsort(array, cnt, sizeof(*array), compare_commit_dist);
	for (p = list, i = 0; i < cnt; i++) {
		struct name_decoration *r = xmalloc(sizeof(*r) + 100);
		struct object *obj = &(array[i].commit->object);

		sprintf(r->name, "dist=%d", array[i].distance);
		r->next = add_decoration(&name_decoration, obj, r);
		p->item = array[i].commit;
		p = p->next;
	}
	if (p)
		p->next = NULL;
	free(array);
	return list;
}

/*
 * zero or positive weight is the number of interesting commits it can
 * reach, including itself.  Especially, weight = 0 means it does not
 * reach any tree-changing commits (e.g. just above uninteresting one
 * but traversal is with pathspec).
 *
 * weight = -1 means it has one parent and its distance is yet to
 * be computed.
 *
 * weight = -2 means it has more than one parent and its distance is
 * unknown.  After running count_distance() first, they will get zero
 * or positive distance.
 */
static struct commit_list *do_find_bisection(struct commit_list *list,
					     int nr, int *weights,
					     int find_all)
{
	int n, counted;
	struct commit_list *p;

	counted = 0;

	for (n = 0, p = list; p; p = p->next) {
		struct commit *commit = p->item;
		unsigned flags = commit->object.flags;

		p->item->util = &weights[n++];
		switch (count_interesting_parents(commit)) {
		case 0:
			if (!(flags & TREESAME)) {
				weight_set(p, 1);
				counted++;
				show_list("bisection 2 count one",
					  counted, nr, list);
			}
			/*
			 * otherwise, it is known not to reach any
			 * tree-changing commit and gets weight 0.
			 */
			break;
		case 1:
			weight_set(p, -1);
			break;
		default:
			weight_set(p, -2);
			break;
		}
	}

	show_list("bisection 2 initialize", counted, nr, list);

	/*
	 * If you have only one parent in the resulting set
	 * then you can reach one commit more than that parent
	 * can reach.  So we do not have to run the expensive
	 * count_distance() for single strand of pearls.
	 *
	 * However, if you have more than one parents, you cannot
	 * just add their distance and one for yourself, since
	 * they usually reach the same ancestor and you would
	 * end up counting them twice that way.
	 *
	 * So we will first count distance of merges the usual
	 * way, and then fill the blanks using cheaper algorithm.
	 */
	for (p = list; p; p = p->next) {
		if (p->item->object.flags & UNINTERESTING)
			continue;
		if (weight(p) != -2)
			continue;
		weight_set(p, count_distance(p));
		clear_distance(list);

		/* Does it happen to be at exactly half-way? */
		if (!find_all && halfway(p, nr))
			return p;
		counted++;
	}

	show_list("bisection 2 count_distance", counted, nr, list);

	while (counted < nr) {
		for (p = list; p; p = p->next) {
			struct commit_list *q;
			unsigned flags = p->item->object.flags;

			if (0 <= weight(p))
				continue;
			for (q = p->item->parents; q; q = q->next) {
				if (q->item->object.flags & UNINTERESTING)
					continue;
				if (0 <= weight(q))
					break;
			}
			if (!q)
				continue;

			/*
			 * weight for p is unknown but q is known.
			 * add one for p itself if p is to be counted,
			 * otherwise inherit it from q directly.
			 */
			if (!(flags & TREESAME)) {
				weight_set(p, weight(q)+1);
				counted++;
				show_list("bisection 2 count one",
					  counted, nr, list);
			}
			else
				weight_set(p, weight(q));

			/* Does it happen to be at exactly half-way? */
			if (!find_all && halfway(p, nr))
				return p;
		}
	}

	show_list("bisection 2 counted all", counted, nr, list);

	if (!find_all)
		return best_bisection(list, nr);
	else
		return best_bisection_sorted(list, nr);
}

struct commit_list *find_bisection(struct commit_list *list,
					  int *reaches, int *all,
					  int find_all)
{
	int nr, on_list;
	struct commit_list *p, *best, *next, *last;
	int *weights;

	show_list("bisection 2 entry", 0, 0, list);

	/*
	 * Count the number of total and tree-changing items on the
	 * list, while reversing the list.
	 */
	for (nr = on_list = 0, last = NULL, p = list;
	     p;
	     p = next) {
		unsigned flags = p->item->object.flags;

		next = p->next;
		if (flags & UNINTERESTING)
			continue;
		p->next = last;
		last = p;
		if (!(flags & TREESAME))
			nr++;
		on_list++;
	}
	list = last;
	show_list("bisection 2 sorted", 0, nr, list);

	*all = nr;
	weights = xcalloc(on_list, sizeof(*weights));

	/* Do the real work of finding bisection commit. */
	best = do_find_bisection(list, nr, weights, find_all);
	if (best) {
		if (!find_all)
			best->next = NULL;
		*reaches = weight(best);
	}
	free(weights);
	return best;
}

static int skipcmp(const void *a, const void *b)
{
	return hashcmp(a, b);
}

static void prepare_skipped(void)
{
	qsort(skipped_sha1, skipped_sha1_nr, sizeof(*skipped_sha1), skipcmp);
}

static int lookup_skipped(unsigned char *sha1)
{
	int lo, hi;
	lo = 0;
	hi = skipped_sha1_nr;
	while (lo < hi) {
		int mi = (lo + hi) / 2;
		int cmp = hashcmp(sha1, skipped_sha1[mi]);
		if (!cmp)
			return mi;
		if (cmp < 0)
			hi = mi;
		else
			lo = mi + 1;
	}
	return -lo - 1;
}

struct commit_list *filter_skipped(struct commit_list *list,
				   struct commit_list **tried,
				   int show_all)
{
	struct commit_list *filtered = NULL, **f = &filtered;

	*tried = NULL;

	if (!skipped_sha1_nr)
		return list;

	prepare_skipped();

	while (list) {
		struct commit_list *next = list->next;
		list->next = NULL;
		if (0 <= lookup_skipped(list->item->object.sha1)) {
			/* Move current to tried list */
			*tried = list;
			tried = &list->next;
		} else {
			if (!show_all)
				return list;
			/* Move current to filtered list */
			*f = list;
			f = &list->next;
		}
		list = next;
	}

	return filtered;
}
Commit	Line	Data
a2ad79ce CC	1	#include "cache.h"
	2	#include "commit.h"
	3	#include "diff.h"
	4	#include "revision.h"
	5	#include "bisect.h"
	6
95188648 CC	7	static unsigned char (*skipped_sha1)[20];
	8	static int skipped_sha1_nr;
	9
a2ad79ce CC	10	/* bits #0-15 in revision.h */
	11
	12	#define COUNTED (1u<<16)
	13
	14	/*
	15	* This is a truly stupid algorithm, but it's only
	16	* used for bisection, and we just don't care enough.
	17	*
	18	* We care just barely enough to avoid recursing for
	19	* non-merge entries.
	20	*/
	21	static int count_distance(struct commit_list *entry)
	22	{
	23	int nr = 0;
	24
	25	while (entry) {
	26	struct commit *commit = entry->item;
	27	struct commit_list *p;
	28
	29	if (commit->object.flags & (UNINTERESTING \| COUNTED))
	30	break;
	31	if (!(commit->object.flags & TREESAME))
	32	nr++;
	33	commit->object.flags \|= COUNTED;
	34	p = commit->parents;
	35	entry = p;
	36	if (p) {
	37	p = p->next;
	38	while (p) {
	39	nr += count_distance(p);
	40	p = p->next;
	41	}
	42	}
	43	}
	44
	45	return nr;
	46	}
	47
	48	static void clear_distance(struct commit_list *list)
	49	{
	50	while (list) {
	51	struct commit *commit = list->item;
	52	commit->object.flags &= ~COUNTED;
	53	list = list->next;
	54	}
	55	}
	56
	57	#define DEBUG_BISECT 0
	58
	59	static inline int weight(struct commit_list *elem)
	60	{
	61	return ((int)(elem->item->util));
	62	}
	63
	64	static inline void weight_set(struct commit_list *elem, int weight)
	65	{
	66	((int)(elem->item->util)) = weight;
	67	}
	68
	69	static int count_interesting_parents(struct commit *commit)
	70	{
	71	struct commit_list *p;
	72	int count;
	73
74	for (count = 0, p = commit->parents; p; p = p->next) {
75	if (p->item->object.flags & UNINTERESTING)
76	continue;
77	count++;
78	}
79	return count;
80	}
81
82	static inline int halfway(struct commit_list *p, int nr)
83	{
84	/*
85	* Don't short-cut something we are not going to return!
86	*/
87	if (p->item->object.flags & TREESAME)
88	return 0;
89	if (DEBUG_BISECT)
90	return 0;
91	/*
92	* 2 and 3 are halfway of 5.
93	* 3 is halfway of 6 but 2 and 4 are not.
94	*/
95	switch (2 * weight(p) - nr) {
96	case -1: case 0: case 1:
97	return 1;
98	default:
99	return 0;
100	}
101	}
102
103	#if !DEBUG_BISECT
104	#define show_list(a,b,c,d) do { ; } while (0)
105	#else
106	static void show_list(const char *debug, int counted, int nr,
107	struct commit_list *list)
108	{
109	struct commit_list *p;
110
111	fprintf(stderr, "%s (%d/%d)\n", debug, counted, nr);
112
113	for (p = list; p; p = p->next) {
114	struct commit_list *pp;
115	struct commit *commit = p->item;
116	unsigned flags = commit->object.flags;
117	enum object_type type;
118	unsigned long size;
119	char *buf = read_sha1_file(commit->object.sha1, &type, &size);
120	char ep, sp;
121
122	fprintf(stderr, "%c%c%c ",
123	(flags & TREESAME) ? ' ' : 'T',
124	(flags & UNINTERESTING) ? 'U' : ' ',
125	(flags & COUNTED) ? 'C' : ' ');
126	if (commit->util)
127	fprintf(stderr, "%3d", weight(p));
128	else
129	fprintf(stderr, "---");
130	fprintf(stderr, " %.*s", 8, sha1_to_hex(commit->object.sha1));
131	for (pp = commit->parents; pp; pp = pp->next)
132	fprintf(stderr, " %.*s", 8,
133	sha1_to_hex(pp->item->object.sha1));
134
135	sp = strstr(buf, "\n\n");
136	if (sp) {
137	sp += 2;
138	for (ep = sp; ep && ep != '\n'; ep++)
139	;
140	fprintf(stderr, " %.*s", (int)(ep - sp), sp);
141	}
142	fprintf(stderr, "\n");
143	}
144	}
145	#endif /* DEBUG_BISECT */
146
147	static struct commit_list best_bisection(struct commit_list list, int nr)
148	{
149	struct commit_list p, best;
150	int best_distance = -1;
151
152	best = list;
153	for (p = list; p; p = p->next) {
154	int distance;
155	unsigned flags = p->item->object.flags;
156
157	if (flags & TREESAME)
158	continue;
159	distance = weight(p);
160	if (nr - distance < distance)
161	distance = nr - distance;
162	if (distance > best_distance) {
163	best = p;
164	best_distance = distance;
165	}
166	}
167
168	return best;
169	}
170
171	struct commit_dist {
172	struct commit *commit;
173	int distance;
174	};
175
176	static int compare_commit_dist(const void a_, const void b_)
177	{
178	struct commit_dist a, b;
179
180	a = (struct commit_dist *)a_;
181	b = (struct commit_dist *)b_;
182	if (a->distance != b->distance)
183	return b->distance - a->distance; /* desc sort */
184	return hashcmp(a->commit->object.sha1, b->commit->object.sha1);
185	}
186
187	static struct commit_list best_bisection_sorted(struct commit_list list, int nr)
188	{
189	struct commit_list *p;
190	struct commit_dist array = xcalloc(nr, sizeof(array));
191	int cnt, i;
192
193	for (p = list, cnt = 0; p; p = p->next) {
194	int distance;
195	unsigned flags = p->item->object.flags;
196
197	if (flags & TREESAME)
198	continue;
199	distance = weight(p);
200	if (nr - distance < distance)
201	distance = nr - distance;
202	array[cnt].commit = p->item;
203	array[cnt].distance = distance;
204	cnt++;
205	}
206	qsort(array, cnt, sizeof(*array), compare_commit_dist);
207	for (p = list, i = 0; i < cnt; i++) {
208	struct name_decoration r = xmalloc(sizeof(r) + 100);
209	struct object *obj = &(array[i].commit->object);
210
211	sprintf(r->name, "dist=%d", array[i].distance);
212	r->next = add_decoration(&name_decoration, obj, r);
213	p->item = array[i].commit;
214	p = p->next;
215	}
216	if (p)
217	p->next = NULL;
218	free(array);
219	return list;
220	}
221
222	/*
223	* zero or positive weight is the number of interesting commits it can
224	* reach, including itself. Especially, weight = 0 means it does not
225	* reach any tree-changing commits (e.g. just above uninteresting one
226	* but traversal is with pathspec).
227	*
228	* weight = -1 means it has one parent and its distance is yet to
229	* be computed.
230	*
231	* weight = -2 means it has more than one parent and its distance is
232	* unknown. After running count_distance() first, they will get zero
233	* or positive distance.
234	*/
235	static struct commit_list do_find_bisection(struct commit_list list,
236	int nr, int *weights,
237	int find_all)
238	{
239	int n, counted;
240	struct commit_list *p;
241
242	counted = 0;
243
244	for (n = 0, p = list; p; p = p->next) {
245	struct commit *commit = p->item;
246	unsigned flags = commit->object.flags;
247
248	p->item->util = &weights[n++];
249	switch (count_interesting_parents(commit)) {
250	case 0:
251	if (!(flags & TREESAME)) {
252	weight_set(p, 1);
253	counted++;
254	show_list("bisection 2 count one",
255	counted, nr, list);
256	}
257	/*
258	* otherwise, it is known not to reach any
259	* tree-changing commit and gets weight 0.
260	*/
261	break;
262	case 1:
263	weight_set(p, -1);
264	break;
265	default:
266	weight_set(p, -2);
267	break;
268	}
269	}
270
271	show_list("bisection 2 initialize", counted, nr, list);
272
273	/*
274	* If you have only one parent in the resulting set
275	* then you can reach one commit more than that parent
276	* can reach. So we do not have to run the expensive
277	* count_distance() for single strand of pearls.
278	*
279	* However, if you have more than one parents, you cannot
280	* just add their distance and one for yourself, since
281	* they usually reach the same ancestor and you would
282	* end up counting them twice that way.
283	*
284	* So we will first count distance of merges the usual
285	* way, and then fill the blanks using cheaper algorithm.
286	*/
287	for (p = list; p; p = p->next) {
288	if (p->item->object.flags & UNINTERESTING)
289	continue;
290	if (weight(p) != -2)
291	continue;
292	weight_set(p, count_distance(p));
293	clear_distance(list);
294
295	/* Does it happen to be at exactly half-way? */
296	if (!find_all && halfway(p, nr))
297	return p;
298	counted++;
299	}
300
301	show_list("bisection 2 count_distance", counted, nr, list);
302
303	while (counted < nr) {
304	for (p = list; p; p = p->next) {
305	struct commit_list *q;
306	unsigned flags = p->item->object.flags;
307
308	if (0 <= weight(p))
309	continue;
310	for (q = p->item->parents; q; q = q->next) {
311	if (q->item->object.flags & UNINTERESTING)
312	continue;
313	if (0 <= weight(q))
314	break;
315	}
316	if (!q)
317	continue;
318
319	/*
320	* weight for p is unknown but q is known.
321	* add one for p itself if p is to be counted,
322	* otherwise inherit it from q directly.
323	*/
324	if (!(flags & TREESAME)) {
325	weight_set(p, weight(q)+1);
326	counted++;
327	show_list("bisection 2 count one",
328	counted, nr, list);
329	}
330	else
331	weight_set(p, weight(q));
332
333	/* Does it happen to be at exactly half-way? */
334	if (!find_all && halfway(p, nr))
335	return p;
336	}
337	}
338
339	show_list("bisection 2 counted all", counted, nr, list);
340
341	if (!find_all)
342	return best_bisection(list, nr);
343	else
344	return best_bisection_sorted(list, nr);
345	}
346
347	struct commit_list find_bisection(struct commit_list list,
348	int reaches, int all,
349	int find_all)
350	{
351	int nr, on_list;
352	struct commit_list p, best, next, last;
353	int *weights;
354
355	show_list("bisection 2 entry", 0, 0, list);
356
357	/*
358	* Count the number of total and tree-changing items on the
359	* list, while reversing the list.
360	*/
361	for (nr = on_list = 0, last = NULL, p = list;
362	p;
363	p = next) {
364	unsigned flags = p->item->object.flags;
365
366	next = p->next;
367	if (flags & UNINTERESTING)
368	continue;
369	p->next = last;
370	last = p;
371	if (!(flags & TREESAME))
372	nr++;
373	on_list++;
374	}
375	list = last;
376	show_list("bisection 2 sorted", 0, nr, list);
377
378	*all = nr;
379	weights = xcalloc(on_list, sizeof(*weights));
380
381	/* Do the real work of finding bisection commit. */
382	best = do_find_bisection(list, nr, weights, find_all);
383	if (best) {
384	if (!find_all)
385	best->next = NULL;
386	*reaches = weight(best);
387	}
388	free(weights);
389	return best;
390	}
391
95188648 CC	392	static int skipcmp(const void a, const void b)
	393	{
	394	return hashcmp(a, b);
	395	}
	396
	397	static void prepare_skipped(void)
	398	{
	399	qsort(skipped_sha1, skipped_sha1_nr, sizeof(*skipped_sha1), skipcmp);
	400	}
	401
	402	static int lookup_skipped(unsigned char *sha1)
	403	{
	404	int lo, hi;
	405	lo = 0;
	406	hi = skipped_sha1_nr;
	407	while (lo < hi) {
	408	int mi = (lo + hi) / 2;
	409	int cmp = hashcmp(sha1, skipped_sha1[mi]);
	410	if (!cmp)
	411	return mi;
	412	if (cmp < 0)
	413	hi = mi;
	414	else
	415	lo = mi + 1;
	416	}
	417	return -lo - 1;
	418	}
	419
	420	struct commit_list filter_skipped(struct commit_list list,
	421	struct commit_list **tried,
	422	int show_all)
	423	{
	424	struct commit_list filtered = NULL, *f = &filtered;
	425
	426	*tried = NULL;
	427
	428	if (!skipped_sha1_nr)
	429	return list;
	430
	431	prepare_skipped();
	432
	433	while (list) {
	434	struct commit_list *next = list->next;
	435	list->next = NULL;
	436	if (0 <= lookup_skipped(list->item->object.sha1)) {
	437	/* Move current to tried list */
	438	*tried = list;
	439	tried = &list->next;
	440	} else {
	441	if (!show_all)
	442	return list;
	443	/* Move current to filtered list */
	444	*f = list;
	445	f = &list->next;
	446	}
	447	list = next;
	448	}
	449
	450	return filtered;
	451	}