]>
Commit | Line | Data |
---|---|---|
d9c66f0b JS |
1 | #include "cache.h" |
2 | #include "range-diff.h" | |
3 | #include "string-list.h" | |
4 | #include "run-command.h" | |
5 | #include "argv-array.h" | |
6 | #include "hashmap.h" | |
7 | #include "xdiff-interface.h" | |
8 | #include "linear-assignment.h" | |
9 | ||
10 | struct patch_util { | |
11 | /* For the search for an exact match */ | |
12 | struct hashmap_entry e; | |
13 | const char *diff, *patch; | |
14 | ||
15 | int i; | |
16 | int diffsize; | |
17 | size_t diff_offset; | |
18 | /* the index of the matching item in the other branch, or -1 */ | |
19 | int matching; | |
20 | struct object_id oid; | |
21 | }; | |
22 | ||
23 | /* | |
24 | * Reads the patches into a string list, with the `util` field being populated | |
25 | * as struct object_id (will need to be free()d). | |
26 | */ | |
27 | static int read_patches(const char *range, struct string_list *list) | |
28 | { | |
29 | struct child_process cp = CHILD_PROCESS_INIT; | |
30 | FILE *in; | |
31 | struct strbuf buf = STRBUF_INIT, line = STRBUF_INIT; | |
32 | struct patch_util *util = NULL; | |
33 | int in_header = 1; | |
34 | ||
35 | argv_array_pushl(&cp.args, "log", "--no-color", "-p", "--no-merges", | |
36 | "--reverse", "--date-order", "--decorate=no", | |
37 | "--no-abbrev-commit", range, | |
38 | NULL); | |
39 | cp.out = -1; | |
40 | cp.no_stdin = 1; | |
41 | cp.git_cmd = 1; | |
42 | ||
43 | if (start_command(&cp)) | |
44 | return error_errno(_("could not start `log`")); | |
45 | in = fdopen(cp.out, "r"); | |
46 | if (!in) { | |
47 | error_errno(_("could not read `log` output")); | |
48 | finish_command(&cp); | |
49 | return -1; | |
50 | } | |
51 | ||
52 | while (strbuf_getline(&line, in) != EOF) { | |
53 | const char *p; | |
54 | ||
55 | if (skip_prefix(line.buf, "commit ", &p)) { | |
56 | if (util) { | |
57 | string_list_append(list, buf.buf)->util = util; | |
58 | strbuf_reset(&buf); | |
59 | } | |
60 | util = xcalloc(sizeof(*util), 1); | |
61 | if (get_oid(p, &util->oid)) { | |
62 | error(_("could not parse commit '%s'"), p); | |
63 | free(util); | |
64 | string_list_clear(list, 1); | |
65 | strbuf_release(&buf); | |
66 | strbuf_release(&line); | |
67 | fclose(in); | |
68 | finish_command(&cp); | |
69 | return -1; | |
70 | } | |
71 | util->matching = -1; | |
72 | in_header = 1; | |
73 | continue; | |
74 | } | |
75 | ||
76 | if (starts_with(line.buf, "diff --git")) { | |
77 | in_header = 0; | |
78 | strbuf_addch(&buf, '\n'); | |
79 | if (!util->diff_offset) | |
80 | util->diff_offset = buf.len; | |
81 | strbuf_addbuf(&buf, &line); | |
82 | } else if (in_header) { | |
83 | if (starts_with(line.buf, "Author: ")) { | |
84 | strbuf_addbuf(&buf, &line); | |
85 | strbuf_addstr(&buf, "\n\n"); | |
86 | } else if (starts_with(line.buf, " ")) { | |
87 | strbuf_addbuf(&buf, &line); | |
88 | strbuf_addch(&buf, '\n'); | |
89 | } | |
90 | continue; | |
91 | } else if (starts_with(line.buf, "@@ ")) | |
92 | strbuf_addstr(&buf, "@@"); | |
93 | else if (!line.buf[0] || starts_with(line.buf, "index ")) | |
94 | /* | |
95 | * A completely blank (not ' \n', which is context) | |
96 | * line is not valid in a diff. We skip it | |
97 | * silently, because this neatly handles the blank | |
98 | * separator line between commits in git-log | |
99 | * output. | |
100 | * | |
101 | * We also want to ignore the diff's `index` lines | |
102 | * because they contain exact blob hashes in which | |
103 | * we are not interested. | |
104 | */ | |
105 | continue; | |
106 | else | |
107 | strbuf_addbuf(&buf, &line); | |
108 | ||
109 | strbuf_addch(&buf, '\n'); | |
110 | util->diffsize++; | |
111 | } | |
112 | fclose(in); | |
113 | strbuf_release(&line); | |
114 | ||
115 | if (util) | |
116 | string_list_append(list, buf.buf)->util = util; | |
117 | strbuf_release(&buf); | |
118 | ||
119 | if (finish_command(&cp)) | |
120 | return -1; | |
121 | ||
122 | return 0; | |
123 | } | |
124 | ||
125 | static int patch_util_cmp(const void *dummy, const struct patch_util *a, | |
126 | const struct patch_util *b, const char *keydata) | |
127 | { | |
128 | return strcmp(a->diff, keydata ? keydata : b->diff); | |
129 | } | |
130 | ||
131 | static void find_exact_matches(struct string_list *a, struct string_list *b) | |
132 | { | |
133 | struct hashmap map; | |
134 | int i; | |
135 | ||
136 | hashmap_init(&map, (hashmap_cmp_fn)patch_util_cmp, NULL, 0); | |
137 | ||
138 | /* First, add the patches of a to a hash map */ | |
139 | for (i = 0; i < a->nr; i++) { | |
140 | struct patch_util *util = a->items[i].util; | |
141 | ||
142 | util->i = i; | |
143 | util->patch = a->items[i].string; | |
144 | util->diff = util->patch + util->diff_offset; | |
145 | hashmap_entry_init(util, strhash(util->diff)); | |
146 | hashmap_add(&map, util); | |
147 | } | |
148 | ||
149 | /* Now try to find exact matches in b */ | |
150 | for (i = 0; i < b->nr; i++) { | |
151 | struct patch_util *util = b->items[i].util, *other; | |
152 | ||
153 | util->i = i; | |
154 | util->patch = b->items[i].string; | |
155 | util->diff = util->patch + util->diff_offset; | |
156 | hashmap_entry_init(util, strhash(util->diff)); | |
157 | other = hashmap_remove(&map, util, NULL); | |
158 | if (other) { | |
159 | if (other->matching >= 0) | |
160 | BUG("already assigned!"); | |
161 | ||
162 | other->matching = i; | |
163 | util->matching = other->i; | |
164 | } | |
165 | } | |
166 | ||
167 | hashmap_free(&map, 0); | |
168 | } | |
169 | ||
170 | static void diffsize_consume(void *data, char *line, unsigned long len) | |
171 | { | |
172 | (*(int *)data)++; | |
173 | } | |
174 | ||
175 | static int diffsize(const char *a, const char *b) | |
176 | { | |
177 | xpparam_t pp = { 0 }; | |
178 | xdemitconf_t cfg = { 0 }; | |
179 | mmfile_t mf1, mf2; | |
180 | int count = 0; | |
181 | ||
182 | mf1.ptr = (char *)a; | |
183 | mf1.size = strlen(a); | |
184 | mf2.ptr = (char *)b; | |
185 | mf2.size = strlen(b); | |
186 | ||
187 | cfg.ctxlen = 3; | |
188 | if (!xdi_diff_outf(&mf1, &mf2, diffsize_consume, &count, &pp, &cfg)) | |
189 | return count; | |
190 | ||
191 | error(_("failed to generate diff")); | |
192 | return COST_MAX; | |
193 | } | |
194 | ||
195 | static void get_correspondences(struct string_list *a, struct string_list *b, | |
196 | int creation_factor) | |
197 | { | |
198 | int n = a->nr + b->nr; | |
199 | int *cost, c, *a2b, *b2a; | |
200 | int i, j; | |
201 | ||
202 | ALLOC_ARRAY(cost, st_mult(n, n)); | |
203 | ALLOC_ARRAY(a2b, n); | |
204 | ALLOC_ARRAY(b2a, n); | |
205 | ||
206 | for (i = 0; i < a->nr; i++) { | |
207 | struct patch_util *a_util = a->items[i].util; | |
208 | ||
209 | for (j = 0; j < b->nr; j++) { | |
210 | struct patch_util *b_util = b->items[j].util; | |
211 | ||
212 | if (a_util->matching == j) | |
213 | c = 0; | |
214 | else if (a_util->matching < 0 && b_util->matching < 0) | |
215 | c = diffsize(a_util->diff, b_util->diff); | |
216 | else | |
217 | c = COST_MAX; | |
218 | cost[i + n * j] = c; | |
219 | } | |
220 | ||
221 | c = a_util->matching < 0 ? | |
222 | a_util->diffsize * creation_factor / 100 : COST_MAX; | |
223 | for (j = b->nr; j < n; j++) | |
224 | cost[i + n * j] = c; | |
225 | } | |
226 | ||
227 | for (j = 0; j < b->nr; j++) { | |
228 | struct patch_util *util = b->items[j].util; | |
229 | ||
230 | c = util->matching < 0 ? | |
231 | util->diffsize * creation_factor / 100 : COST_MAX; | |
232 | for (i = a->nr; i < n; i++) | |
233 | cost[i + n * j] = c; | |
234 | } | |
235 | ||
236 | for (i = a->nr; i < n; i++) | |
237 | for (j = b->nr; j < n; j++) | |
238 | cost[i + n * j] = 0; | |
239 | ||
240 | compute_assignment(n, n, cost, a2b, b2a); | |
241 | ||
242 | for (i = 0; i < a->nr; i++) | |
243 | if (a2b[i] >= 0 && a2b[i] < b->nr) { | |
244 | struct patch_util *a_util = a->items[i].util; | |
245 | struct patch_util *b_util = b->items[a2b[i]].util; | |
246 | ||
247 | a_util->matching = a2b[i]; | |
248 | b_util->matching = i; | |
249 | } | |
250 | ||
251 | free(cost); | |
252 | free(a2b); | |
253 | free(b2a); | |
254 | } | |
255 | ||
256 | static const char *short_oid(struct patch_util *util) | |
257 | { | |
258 | return find_unique_abbrev(&util->oid, DEFAULT_ABBREV); | |
259 | } | |
260 | ||
261 | static void output(struct string_list *a, struct string_list *b) | |
262 | { | |
263 | int i; | |
264 | ||
265 | for (i = 0; i < b->nr; i++) { | |
266 | struct patch_util *util = b->items[i].util, *prev; | |
267 | ||
268 | if (util->matching < 0) | |
269 | printf("-: -------- > %d: %s\n", | |
270 | i + 1, short_oid(util)); | |
271 | else { | |
272 | prev = a->items[util->matching].util; | |
273 | printf("%d: %s ! %d: %s\n", | |
274 | util->matching + 1, short_oid(prev), | |
275 | i + 1, short_oid(util)); | |
276 | } | |
277 | } | |
278 | ||
279 | for (i = 0; i < a->nr; i++) { | |
280 | struct patch_util *util = a->items[i].util; | |
281 | ||
282 | if (util->matching < 0) | |
283 | printf("%d: %s < -: --------\n", | |
284 | i + 1, short_oid(util)); | |
285 | } | |
286 | } | |
287 | ||
288 | int show_range_diff(const char *range1, const char *range2, | |
289 | int creation_factor) | |
290 | { | |
291 | int res = 0; | |
292 | ||
293 | struct string_list branch1 = STRING_LIST_INIT_DUP; | |
294 | struct string_list branch2 = STRING_LIST_INIT_DUP; | |
295 | ||
296 | if (read_patches(range1, &branch1)) | |
297 | res = error(_("could not parse log for '%s'"), range1); | |
298 | if (!res && read_patches(range2, &branch2)) | |
299 | res = error(_("could not parse log for '%s'"), range2); | |
300 | ||
301 | if (!res) { | |
302 | find_exact_matches(&branch1, &branch2); | |
303 | get_correspondences(&branch1, &branch2, creation_factor); | |
304 | output(&branch1, &branch2); | |
305 | } | |
306 | ||
307 | string_list_clear(&branch1, 1); | |
308 | string_list_clear(&branch2, 1); | |
309 | ||
310 | return res; | |
311 | } |