]> git.ipfire.org Git - thirdparty/git.git/blob - diff.c
diff.c: remove left-over scoring debug message
[thirdparty/git.git] / diff.c
1 /*
2 * Copyright (C) 2005 Junio C Hamano
3 */
4 #include <sys/types.h>
5 #include <sys/wait.h>
6 #include <signal.h>
7 #include <limits.h>
8 #include "cache.h"
9 #include "diff.h"
10 #include "delta.h"
11
12 static const char *diff_opts = "-pu";
13 static unsigned char null_sha1[20] = { 0, };
14
15 static const char *external_diff(void)
16 {
17 static const char *external_diff_cmd = NULL;
18 static int done_preparing = 0;
19
20 if (done_preparing)
21 return external_diff_cmd;
22
23 /*
24 * Default values above are meant to match the
25 * Linux kernel development style. Examples of
26 * alternative styles you can specify via environment
27 * variables are:
28 *
29 * GIT_DIFF_OPTS="-c";
30 */
31 if (gitenv("GIT_EXTERNAL_DIFF"))
32 external_diff_cmd = gitenv("GIT_EXTERNAL_DIFF");
33
34 /* In case external diff fails... */
35 diff_opts = gitenv("GIT_DIFF_OPTS") ? : diff_opts;
36
37 done_preparing = 1;
38 return external_diff_cmd;
39 }
40
41 /* Help to copy the thing properly quoted for the shell safety.
42 * any single quote is replaced with '\'', and the caller is
43 * expected to enclose the result within a single quote pair.
44 *
45 * E.g.
46 * original sq_expand result
47 * name ==> name ==> 'name'
48 * a b ==> a b ==> 'a b'
49 * a'b ==> a'\''b ==> 'a'\''b'
50 */
51 static char *sq_expand(const char *src)
52 {
53 static char *buf = NULL;
54 int cnt, c;
55 const char *cp;
56 char *bp;
57
58 /* count bytes needed to store the quoted string. */
59 for (cnt = 1, cp = src; *cp; cnt++, cp++)
60 if (*cp == '\'')
61 cnt += 3;
62
63 buf = xmalloc(cnt);
64 bp = buf;
65 while ((c = *src++)) {
66 if (c != '\'')
67 *bp++ = c;
68 else {
69 bp = strcpy(bp, "'\\''");
70 bp += 4;
71 }
72 }
73 *bp = 0;
74 return buf;
75 }
76
77 static struct diff_tempfile {
78 const char *name;
79 char hex[41];
80 char mode[10];
81 char tmp_path[50];
82 } diff_temp[2];
83
84 struct diff_spec {
85 unsigned char blob_sha1[20];
86 unsigned short mode; /* file mode */
87 unsigned sha1_valid : 1; /* if true, use blob_sha1 and trust mode;
88 * however with a NULL SHA1, read them
89 * from the file system.
90 * if false, use the name and read mode from
91 * the filesystem.
92 */
93 unsigned file_valid : 1; /* if false the file does not even exist */
94 };
95
96 static void builtin_diff(const char *name_a,
97 const char *name_b,
98 struct diff_tempfile *temp)
99 {
100 int i, next_at;
101 const char *diff_cmd = "diff -L'%s%s' -L'%s%s'";
102 const char *diff_arg = "'%s' '%s'||:"; /* "||:" is to return 0 */
103 const char *input_name_sq[2];
104 const char *path0[2];
105 const char *path1[2];
106 const char *name_sq[2];
107 char *cmd;
108
109 name_sq[0] = sq_expand(name_a);
110 name_sq[1] = sq_expand(name_b);
111
112 /* diff_cmd and diff_arg have 6 %s in total which makes
113 * the sum of these strings 12 bytes larger than required.
114 * we use 2 spaces around diff-opts, and we need to count
115 * terminating NUL, so we subtract 9 here.
116 */
117 int cmd_size = (strlen(diff_cmd) + strlen(diff_opts) +
118 strlen(diff_arg) - 9);
119 for (i = 0; i < 2; i++) {
120 input_name_sq[i] = sq_expand(temp[i].name);
121 if (!strcmp(temp[i].name, "/dev/null")) {
122 path0[i] = "/dev/null";
123 path1[i] = "";
124 } else {
125 path0[i] = i ? "b/" : "a/";
126 path1[i] = name_sq[i];
127 }
128 cmd_size += (strlen(path0[i]) + strlen(path1[i]) +
129 strlen(input_name_sq[i]));
130 }
131
132 cmd = xmalloc(cmd_size);
133
134 next_at = 0;
135 next_at += snprintf(cmd+next_at, cmd_size-next_at,
136 diff_cmd,
137 path0[0], path1[0], path0[1], path1[1]);
138 next_at += snprintf(cmd+next_at, cmd_size-next_at,
139 " %s ", diff_opts);
140 next_at += snprintf(cmd+next_at, cmd_size-next_at,
141 diff_arg, input_name_sq[0], input_name_sq[1]);
142
143 printf("diff --git a/%s b/%s\n", name_a, name_b);
144 if (!path1[0][0])
145 printf("new file mode %s\n", temp[1].mode);
146 else if (!path1[1][0])
147 printf("deleted file mode %s\n", temp[0].mode);
148 else {
149 if (strcmp(temp[0].mode, temp[1].mode)) {
150 printf("old mode %s\n", temp[0].mode);
151 printf("new mode %s\n", temp[1].mode);
152 }
153 if (strcmp(name_a, name_b)) {
154 printf("rename old %s\n", name_a);
155 printf("rename new %s\n", name_b);
156 }
157 if (strncmp(temp[0].mode, temp[1].mode, 3))
158 /* we do not run diff between different kind
159 * of objects.
160 */
161 exit(0);
162 }
163 fflush(NULL);
164 execlp("/bin/sh","sh", "-c", cmd, NULL);
165 }
166
167 /*
168 * Given a name and sha1 pair, if the dircache tells us the file in
169 * the work tree has that object contents, return true, so that
170 * prepare_temp_file() does not have to inflate and extract.
171 */
172 static int work_tree_matches(const char *name, const unsigned char *sha1)
173 {
174 struct cache_entry *ce;
175 struct stat st;
176 int pos, len;
177
178 /* We do not read the cache ourselves here, because the
179 * benchmark with my previous version that always reads cache
180 * shows that it makes things worse for diff-tree comparing
181 * two linux-2.6 kernel trees in an already checked out work
182 * tree. This is because most diff-tree comparisons deal with
183 * only a small number of files, while reading the cache is
184 * expensive for a large project, and its cost outweighs the
185 * savings we get by not inflating the object to a temporary
186 * file. Practically, this code only helps when we are used
187 * by diff-cache --cached, which does read the cache before
188 * calling us.
189 */
190 if (!active_cache)
191 return 0;
192
193 len = strlen(name);
194 pos = cache_name_pos(name, len);
195 if (pos < 0)
196 return 0;
197 ce = active_cache[pos];
198 if ((lstat(name, &st) < 0) ||
199 !S_ISREG(st.st_mode) ||
200 ce_match_stat(ce, &st) ||
201 memcmp(sha1, ce->sha1, 20))
202 return 0;
203 return 1;
204 }
205
206 static void prep_temp_blob(struct diff_tempfile *temp,
207 void *blob,
208 unsigned long size,
209 unsigned char *sha1,
210 int mode)
211 {
212 int fd;
213
214 strcpy(temp->tmp_path, ".diff_XXXXXX");
215 fd = mkstemp(temp->tmp_path);
216 if (fd < 0)
217 die("unable to create temp-file");
218 if (write(fd, blob, size) != size)
219 die("unable to write temp-file");
220 close(fd);
221 temp->name = temp->tmp_path;
222 strcpy(temp->hex, sha1_to_hex(sha1));
223 temp->hex[40] = 0;
224 sprintf(temp->mode, "%06o", mode);
225 }
226
227 static void prepare_temp_file(const char *name,
228 struct diff_tempfile *temp,
229 struct diff_spec *one)
230 {
231 if (!one->file_valid) {
232 not_a_valid_file:
233 /* A '-' entry produces this for file-2, and
234 * a '+' entry produces this for file-1.
235 */
236 temp->name = "/dev/null";
237 strcpy(temp->hex, ".");
238 strcpy(temp->mode, ".");
239 return;
240 }
241
242 if (!one->sha1_valid ||
243 work_tree_matches(name, one->blob_sha1)) {
244 struct stat st;
245 temp->name = name;
246 if (lstat(temp->name, &st) < 0) {
247 if (errno == ENOENT)
248 goto not_a_valid_file;
249 die("stat(%s): %s", temp->name, strerror(errno));
250 }
251 if (S_ISLNK(st.st_mode)) {
252 int ret;
253 char *buf, buf_[1024];
254 buf = ((sizeof(buf_) < st.st_size) ?
255 xmalloc(st.st_size) : buf_);
256 ret = readlink(name, buf, st.st_size);
257 if (ret < 0)
258 die("readlink(%s)", name);
259 prep_temp_blob(temp, buf, st.st_size,
260 (one->sha1_valid ?
261 one->blob_sha1 : null_sha1),
262 (one->sha1_valid ?
263 one->mode : S_IFLNK));
264 }
265 else {
266 if (!one->sha1_valid)
267 strcpy(temp->hex, sha1_to_hex(null_sha1));
268 else
269 strcpy(temp->hex, sha1_to_hex(one->blob_sha1));
270 sprintf(temp->mode, "%06o",
271 S_IFREG |ce_permissions(st.st_mode));
272 }
273 return;
274 }
275 else {
276 void *blob;
277 char type[20];
278 unsigned long size;
279
280 blob = read_sha1_file(one->blob_sha1, type, &size);
281 if (!blob || strcmp(type, "blob"))
282 die("unable to read blob object for %s (%s)",
283 name, sha1_to_hex(one->blob_sha1));
284 prep_temp_blob(temp, blob, size, one->blob_sha1, one->mode);
285 free(blob);
286 }
287 }
288
289 static void remove_tempfile(void)
290 {
291 int i;
292
293 for (i = 0; i < 2; i++)
294 if (diff_temp[i].name == diff_temp[i].tmp_path) {
295 unlink(diff_temp[i].name);
296 diff_temp[i].name = NULL;
297 }
298 }
299
300 static void remove_tempfile_on_signal(int signo)
301 {
302 remove_tempfile();
303 }
304
305 static int detect_rename;
306 static int reverse_diff;
307 static const char **pathspec;
308 static int speccnt;
309 static int diff_rename_minimum_score;
310
311 static int matches_pathspec(const char *name)
312 {
313 int i;
314 int namelen;
315
316 if (speccnt == 0)
317 return 1;
318
319 namelen = strlen(name);
320 for (i = 0; i < speccnt; i++) {
321 int speclen = strlen(pathspec[i]);
322 if (! strncmp(pathspec[i], name, speclen) &&
323 speclen <= namelen &&
324 (name[speclen] == 0 || name[speclen] == '/'))
325 return 1;
326 }
327 return 0;
328 }
329
330 /* An external diff command takes:
331 *
332 * diff-cmd name infile1 infile1-sha1 infile1-mode \
333 * infile2 infile2-sha1 infile2-mode [ rename-to ]
334 *
335 */
336 static void run_external_diff(const char *name,
337 const char *other,
338 struct diff_spec *one,
339 struct diff_spec *two)
340 {
341 struct diff_tempfile *temp = diff_temp;
342 pid_t pid;
343 int status;
344 static int atexit_asked = 0;
345
346 if (reverse_diff) {
347 struct diff_spec *tmp_spec;
348 tmp_spec = one; one = two; two = tmp_spec;
349 if (other) {
350 const char *tmp;
351 tmp = name; name = other; other = tmp;
352 }
353 }
354
355 if (!matches_pathspec(name) && (!other || !matches_pathspec(other)))
356 return;
357
358 if (one && two) {
359 prepare_temp_file(name, &temp[0], one);
360 prepare_temp_file(other ? : name, &temp[1], two);
361 if (! atexit_asked &&
362 (temp[0].name == temp[0].tmp_path ||
363 temp[1].name == temp[1].tmp_path)) {
364 atexit_asked = 1;
365 atexit(remove_tempfile);
366 }
367 signal(SIGINT, remove_tempfile_on_signal);
368 }
369
370 fflush(NULL);
371 pid = fork();
372 if (pid < 0)
373 die("unable to fork");
374 if (!pid) {
375 const char *pgm = external_diff();
376 if (pgm) {
377 if (one && two) {
378 const char *exec_arg[9];
379 const char **arg = &exec_arg[0];
380 *arg++ = pgm;
381 *arg++ = name;
382 *arg++ = temp[0].name;
383 *arg++ = temp[0].hex;
384 *arg++ = temp[0].mode;
385 *arg++ = temp[1].name;
386 *arg++ = temp[1].hex;
387 *arg++ = temp[1].mode;
388 if (other)
389 *arg++ = other;
390 *arg = 0;
391 execvp(pgm, (char *const*) exec_arg);
392 }
393 else
394 execlp(pgm, pgm, name, NULL);
395 }
396 /*
397 * otherwise we use the built-in one.
398 */
399 if (one && two)
400 builtin_diff(name, other ? : name, temp);
401 else
402 printf("* Unmerged path %s\n", name);
403 exit(0);
404 }
405 if (waitpid(pid, &status, 0) < 0 ||
406 !WIFEXITED(status) || WEXITSTATUS(status)) {
407 /* Earlier we did not check the exit status because
408 * diff exits non-zero if files are different, and
409 * we are not interested in knowing that. It was a
410 * mistake which made it harder to quit a diff-*
411 * session that uses the git-apply-patch-script as
412 * the GIT_EXTERNAL_DIFF. A custom GIT_EXTERNAL_DIFF
413 * should also exit non-zero only when it wants to
414 * abort the entire diff-* session.
415 */
416 remove_tempfile();
417 fprintf(stderr, "external diff died, stopping at %s.\n", name);
418 exit(1);
419 }
420 remove_tempfile();
421 }
422
423 /*
424 * We do not detect circular renames. Just hold created and deleted
425 * entries and later attempt to match them up. If they do not match,
426 * then spit them out as deletes or creates as original.
427 */
428
429 static struct diff_spec_hold {
430 struct diff_spec_hold *next;
431 struct diff_spec it;
432 unsigned long size;
433 int flags;
434 #define MATCHED 1
435 #define SHOULD_FREE 2
436 #define SHOULD_MUNMAP 4
437 void *data;
438 char path[1];
439 } *createdfile, *deletedfile;
440
441 static void hold_diff(const char *name,
442 struct diff_spec *one,
443 struct diff_spec *two)
444 {
445 struct diff_spec_hold **list, *elem;
446
447 if (one->file_valid && two->file_valid)
448 die("internal error");
449
450 if (!detect_rename) {
451 run_external_diff(name, NULL, one, two);
452 return;
453 }
454 elem = xmalloc(sizeof(*elem) + strlen(name));
455 strcpy(elem->path, name);
456 elem->size = 0;
457 elem->data = NULL;
458 elem->flags = 0;
459 if (one->file_valid) {
460 list = &deletedfile;
461 elem->it = *one;
462 }
463 else {
464 list = &createdfile;
465 elem->it = *two;
466 }
467 elem->next = *list;
468 *list = elem;
469 }
470
471 static int populate_data(struct diff_spec_hold *s)
472 {
473 char type[20];
474
475 if (s->data)
476 return 0;
477 if (s->it.sha1_valid) {
478 s->data = read_sha1_file(s->it.blob_sha1, type, &s->size);
479 s->flags |= SHOULD_FREE;
480 }
481 else {
482 struct stat st;
483 int fd;
484 fd = open(s->path, O_RDONLY);
485 if (fd < 0)
486 return -1;
487 if (fstat(fd, &st)) {
488 close(fd);
489 return -1;
490 }
491 s->size = st.st_size;
492 s->data = mmap(NULL, s->size, PROT_READ, MAP_PRIVATE, fd, 0);
493 close(fd);
494 if (!s->size)
495 s->data = "";
496 else
497 s->flags |= SHOULD_MUNMAP;
498 }
499 return 0;
500 }
501
502 static void free_data(struct diff_spec_hold *s)
503 {
504 if (s->flags & SHOULD_FREE)
505 free(s->data);
506 else if (s->flags & SHOULD_MUNMAP)
507 munmap(s->data, s->size);
508 s->flags &= ~(SHOULD_FREE|SHOULD_MUNMAP);
509 }
510
511 static void flush_remaining_diff(struct diff_spec_hold *elem,
512 int on_created_list)
513 {
514 static struct diff_spec null_file_spec;
515
516 null_file_spec.file_valid = 0;
517 for ( ; elem ; elem = elem->next) {
518 free_data(elem);
519 if (elem->flags & MATCHED)
520 continue;
521 if (on_created_list)
522 run_external_diff(elem->path, NULL,
523 &null_file_spec, &elem->it);
524 else
525 run_external_diff(elem->path, NULL,
526 &elem->it, &null_file_spec);
527 }
528 }
529
530 static int is_exact_match(struct diff_spec_hold *src,
531 struct diff_spec_hold *dst)
532 {
533 if (src->it.sha1_valid && dst->it.sha1_valid &&
534 !memcmp(src->it.blob_sha1, dst->it.blob_sha1, 20))
535 return 1;
536 if (populate_data(src) || populate_data(dst))
537 /* this is an error but will be caught downstream */
538 return 0;
539 if (src->size == dst->size &&
540 !memcmp(src->data, dst->data, src->size))
541 return 1;
542 return 0;
543 }
544
545 #define MINIMUM_SCORE 5000
546 int estimate_similarity(struct diff_spec_hold *src, struct diff_spec_hold *dst)
547 {
548 /* src points at a deleted file and dst points at a created
549 * file. They may be quite similar, in which case we want to
550 * say src is renamed to dst.
551 *
552 * Compare them and return how similar they are, representing
553 * the score as an integer between 0 and 10000. 10000 is
554 * reserved for the case where they match exactly.
555 */
556 void *delta;
557 unsigned long delta_size;
558
559 delta_size = ((src->size < dst->size) ?
560 (dst->size - src->size) : (src->size - dst->size));
561
562 /* We would not consider rename followed by more than
563 * 20% edits; that is, delta_size must be smaller than
564 * (src->size + dst->size)/2 * 0.2, which means...
565 */
566 if ((src->size + dst->size) < delta_size * 10)
567 return 0;
568
569 delta = diff_delta(src->data, src->size,
570 dst->data, dst->size,
571 &delta_size);
572 free(delta);
573
574 /* This "delta" is really xdiff with adler32 and all the
575 * overheads but it is a quick and dirty approximation.
576 *
577 * Now we will give some score to it. Let's say 20% edit gets
578 * 5000 points and 0% edit gets 9000 points. That is, every
579 * 1/20000 edit gets 1 point penalty. The amount of penalty is:
580 *
581 * (delta_size * 2 / (src->size + dst->size)) * 20000
582 *
583 */
584 return 9000 - (40000 * delta_size / (src->size+dst->size));
585 }
586
587 struct diff_score {
588 struct diff_spec_hold *src;
589 struct diff_spec_hold *dst;
590 int score;
591 };
592
593 static int score_compare(const void *a_, const void *b_)
594 {
595 const struct diff_score *a = a_, *b = b_;
596 return b->score - a->score;
597 }
598
599 static void flush_rename_pair(struct diff_spec_hold *src,
600 struct diff_spec_hold *dst)
601 {
602 src->flags |= MATCHED;
603 dst->flags |= MATCHED;
604 free_data(src);
605 free_data(dst);
606 run_external_diff(src->path, dst->path,
607 &src->it, &dst->it);
608 }
609
610 static void free_held_diff(struct diff_spec_hold *list)
611 {
612 struct diff_spec_hold *h;
613 for (h = list; list; list = h) {
614 h = list->next;
615 free_data(list);
616 free(list);
617 }
618 }
619
620 void diff_flush(void)
621 {
622 int num_create, num_delete, c, d;
623 struct diff_spec_hold *elem, *src, *dst;
624 struct diff_score *mx;
625
626 /* We really want to cull the candidates list early
627 * with cheap tests in order to avoid doing deltas.
628 */
629 for (dst = createdfile; dst; dst = dst->next) {
630 for (src = deletedfile; src; src = src->next) {
631 if (! is_exact_match(src, dst))
632 continue;
633 flush_rename_pair(src, dst);
634 break;
635 }
636 }
637
638 /* Count surviving candidates */
639 for (num_create = 0, elem = createdfile; elem; elem = elem->next)
640 if (!(elem->flags & MATCHED))
641 num_create++;
642
643 for (num_delete = 0, elem = deletedfile; elem; elem = elem->next)
644 if (!(elem->flags & MATCHED))
645 num_delete++;
646
647 if (num_create == 0 || num_delete == 0)
648 goto exit_path;
649
650 mx = xmalloc(sizeof(*mx) * num_create * num_delete);
651 for (c = 0, dst = createdfile; dst; dst = dst->next) {
652 int base = c * num_delete;
653 if (dst->flags & MATCHED)
654 continue;
655 for (d = 0, src = deletedfile; src; src = src->next) {
656 struct diff_score *m = &mx[base+d];
657 if (src->flags & MATCHED)
658 continue;
659 m->src = src;
660 m->dst = dst;
661 m->score = estimate_similarity(src, dst);
662 d++;
663 }
664 c++;
665 }
666 qsort(mx, num_create*num_delete, sizeof(*mx), score_compare);
667
668 for (c = 0; c < num_create * num_delete; c++) {
669 src = mx[c].src;
670 dst = mx[c].dst;
671 if ((src->flags & MATCHED) || (dst->flags & MATCHED))
672 continue;
673 }
674
675 for (c = 0; c < num_create * num_delete; c++) {
676 src = mx[c].src;
677 dst = mx[c].dst;
678 if ((src->flags & MATCHED) || (dst->flags & MATCHED))
679 continue;
680 if (mx[c].score < diff_rename_minimum_score)
681 break;
682 flush_rename_pair(src, dst);
683 }
684
685 exit_path:
686 flush_remaining_diff(createdfile, 1);
687 flush_remaining_diff(deletedfile, 0);
688 free_held_diff(createdfile);
689 free_held_diff(deletedfile);
690 createdfile = deletedfile = NULL;
691 }
692
693 void diff_setup(int detect_rename_, int minimum_score_, int reverse_diff_,
694 const char **pathspec_, int speccnt_)
695 {
696 free_held_diff(createdfile);
697 free_held_diff(deletedfile);
698 createdfile = deletedfile = NULL;
699
700 detect_rename = detect_rename_;
701 reverse_diff = reverse_diff_;
702 pathspec = pathspec_;
703 speccnt = speccnt_;
704 diff_rename_minimum_score = minimum_score_ ? : MINIMUM_SCORE;
705 }
706
707 void diff_addremove(int addremove, unsigned mode,
708 const unsigned char *sha1,
709 const char *base, const char *path)
710 {
711 char concatpath[PATH_MAX];
712 struct diff_spec spec[2], *one, *two;
713
714 memcpy(spec[0].blob_sha1, sha1, 20);
715 spec[0].mode = mode;
716 spec[0].sha1_valid = !!memcmp(sha1, null_sha1, 20);
717 spec[0].file_valid = 1;
718 spec[1].file_valid = 0;
719
720 if (addremove == '+') {
721 one = spec + 1; two = spec;
722 } else {
723 one = spec; two = one + 1;
724 }
725
726 if (path) {
727 strcpy(concatpath, base);
728 strcat(concatpath, path);
729 }
730 hold_diff(path ? concatpath : base, one, two);
731 }
732
733 void diff_change(unsigned old_mode, unsigned new_mode,
734 const unsigned char *old_sha1,
735 const unsigned char *new_sha1,
736 const char *base, const char *path) {
737 char concatpath[PATH_MAX];
738 struct diff_spec spec[2];
739
740 if (path) {
741 strcpy(concatpath, base);
742 strcat(concatpath, path);
743 }
744
745 memcpy(spec[0].blob_sha1, old_sha1, 20);
746 spec[0].mode = old_mode;
747 memcpy(spec[1].blob_sha1, new_sha1, 20);
748 spec[1].mode = new_mode;
749 spec[0].sha1_valid = !!memcmp(old_sha1, null_sha1, 20);
750 spec[1].sha1_valid = !!memcmp(new_sha1, null_sha1, 20);
751 spec[1].file_valid = spec[0].file_valid = 1;
752
753 /* We do not look at changed files as candidate for
754 * rename detection ever.
755 */
756 run_external_diff(path ? concatpath : base, NULL, &spec[0], &spec[1]);
757 }
758
759 void diff_unmerge(const char *path)
760 {
761 run_external_diff(path, NULL, NULL, NULL);
762 }