]> git.ipfire.org Git - thirdparty/git.git/blame - convert.c
Merge branch 'master' into ph/strbuf
[thirdparty/git.git] / convert.c
CommitLineData
6c510bee 1#include "cache.h"
35ebfd6a 2#include "attr.h"
3fed15f5 3#include "run-command.h"
35ebfd6a 4
6c510bee
LT
5/*
6 * convert.c - convert a file when checking it out and checking it in.
7 *
8 * This should use the pathname to decide on whether it wants to do some
9 * more interesting conversions (automatic gzip/unzip, general format
10 * conversions etc etc), but by default it just does automatic CRLF<->LF
11 * translation when the "auto_crlf" option is set.
12 */
13
163b9591
JH
14#define CRLF_GUESS (-1)
15#define CRLF_BINARY 0
16#define CRLF_TEXT 1
17#define CRLF_INPUT 2
18
6c510bee
LT
19struct text_stat {
20 /* CR, LF and CRLF counts */
21 unsigned cr, lf, crlf;
22
23 /* These are just approximations! */
24 unsigned printable, nonprintable;
25};
26
27static void gather_stats(const char *buf, unsigned long size, struct text_stat *stats)
28{
29 unsigned long i;
30
31 memset(stats, 0, sizeof(*stats));
32
33 for (i = 0; i < size; i++) {
34 unsigned char c = buf[i];
35 if (c == '\r') {
36 stats->cr++;
37 if (i+1 < size && buf[i+1] == '\n')
38 stats->crlf++;
39 continue;
40 }
41 if (c == '\n') {
42 stats->lf++;
43 continue;
44 }
45 if (c == 127)
46 /* DEL */
47 stats->nonprintable++;
48 else if (c < 32) {
49 switch (c) {
50 /* BS, HT, ESC and FF */
51 case '\b': case '\t': case '\033': case '\014':
52 stats->printable++;
53 break;
54 default:
55 stats->nonprintable++;
56 }
57 }
58 else
59 stats->printable++;
60 }
61}
62
63/*
64 * The same heuristics as diff.c::mmfile_is_binary()
65 */
66static int is_binary(unsigned long size, struct text_stat *stats)
67{
68
69 if ((stats->printable >> 7) < stats->nonprintable)
70 return 1;
71 /*
72 * Other heuristics? Average line length might be relevant,
73 * as might LF vs CR vs CRLF counts..
74 *
75 * NOTE! It might be normal to have a low ratio of CRLF to LF
76 * (somebody starts with a LF-only file and edits it with an editor
77 * that adds CRLF only to lines that are added..). But do we
78 * want to support CR-only? Probably not.
79 */
80 return 0;
81}
82
5ecd293d
PH
83static int crlf_to_git(const char *path, const char *src, size_t len,
84 struct strbuf *buf, int action)
6c510bee 85{
6c510bee 86 struct text_stat stats;
5ecd293d 87 char *dst;
6c510bee 88
5ecd293d
PH
89 if ((action == CRLF_BINARY) || !auto_crlf || !len)
90 return 0;
6c510bee 91
5ecd293d 92 gather_stats(src, len, &stats);
6c510bee
LT
93 /* No CR? Nothing to convert, regardless. */
94 if (!stats.cr)
5ecd293d 95 return 0;
6c510bee 96
163b9591 97 if (action == CRLF_GUESS) {
201ac8ef
JH
98 /*
99 * We're currently not going to even try to convert stuff
100 * that has bare CR characters. Does anybody do that crazy
101 * stuff?
102 */
103 if (stats.cr != stats.crlf)
5ecd293d 104 return 0;
201ac8ef
JH
105
106 /*
107 * And add some heuristics for binary vs text, of course...
108 */
5ecd293d
PH
109 if (is_binary(len, &stats))
110 return 0;
201ac8ef 111 }
6c510bee 112
5ecd293d
PH
113 strbuf_grow(buf, len);
114 dst = buf->buf;
163b9591
JH
115 if (action == CRLF_GUESS) {
116 /*
117 * If we guessed, we already know we rejected a file with
118 * lone CR, and we can strip a CR without looking at what
119 * follow it.
120 */
201ac8ef 121 do {
ac78e548 122 unsigned char c = *src++;
201ac8ef 123 if (c != '\r')
ac78e548 124 *dst++ = c;
5ecd293d 125 } while (--len);
201ac8ef
JH
126 } else {
127 do {
ac78e548 128 unsigned char c = *src++;
5ecd293d 129 if (! (c == '\r' && (1 < len && *src == '\n')))
ac78e548 130 *dst++ = c;
5ecd293d 131 } while (--len);
201ac8ef 132 }
5ecd293d
PH
133 strbuf_setlen(buf, dst - buf->buf);
134 return 1;
6c510bee
LT
135}
136
5ecd293d
PH
137static int crlf_to_worktree(const char *path, const char *src, size_t len,
138 struct strbuf *buf, int action)
6c510bee 139{
5ecd293d 140 char *to_free = NULL;
6c510bee 141 struct text_stat stats;
6c510bee 142
163b9591 143 if ((action == CRLF_BINARY) || (action == CRLF_INPUT) ||
760f0c62 144 auto_crlf <= 0)
5ecd293d 145 return 0;
6c510bee 146
5ecd293d
PH
147 if (!len)
148 return 0;
6c510bee 149
5ecd293d 150 gather_stats(src, len, &stats);
6c510bee
LT
151
152 /* No LF? Nothing to convert, regardless. */
153 if (!stats.lf)
5ecd293d 154 return 0;
6c510bee
LT
155
156 /* Was it already in CRLF format? */
157 if (stats.lf == stats.crlf)
5ecd293d 158 return 0;
6c510bee 159
163b9591 160 if (action == CRLF_GUESS) {
201ac8ef
JH
161 /* If we have any bare CR characters, we're not going to touch it */
162 if (stats.cr != stats.crlf)
5ecd293d 163 return 0;
6c510bee 164
5ecd293d
PH
165 if (is_binary(len, &stats))
166 return 0;
201ac8ef 167 }
6c510bee 168
5ecd293d
PH
169 /* are we "faking" in place editing ? */
170 if (src == buf->buf)
171 to_free = strbuf_detach(buf);
172
173 strbuf_grow(buf, len + stats.lf - stats.crlf);
174 for (;;) {
175 const char *nl = memchr(src, '\n', len);
176 if (!nl)
177 break;
178 if (nl > src && nl[-1] == '\r') {
179 strbuf_add(buf, src, nl + 1 - src);
180 } else {
181 strbuf_add(buf, src, nl - src);
182 strbuf_addstr(buf, "\r\n");
183 }
184 len -= nl + 1 - src;
185 src = nl + 1;
186 }
187 strbuf_add(buf, src, len);
188
189 free(to_free);
190 return 1;
6c510bee 191}
35ebfd6a 192
aa4ed402
JH
193static int filter_buffer(const char *path, const char *src,
194 unsigned long size, const char *cmd)
195{
196 /*
197 * Spawn cmd and feed the buffer contents through its stdin.
198 */
199 struct child_process child_process;
200 int pipe_feed[2];
201 int write_err, status;
202
203 memset(&child_process, 0, sizeof(child_process));
204
205 if (pipe(pipe_feed) < 0) {
206 error("cannot create pipe to run external filter %s", cmd);
207 return 1;
208 }
209
210 child_process.pid = fork();
211 if (child_process.pid < 0) {
212 error("cannot fork to run external filter %s", cmd);
213 close(pipe_feed[0]);
214 close(pipe_feed[1]);
215 return 1;
216 }
217 if (!child_process.pid) {
218 dup2(pipe_feed[0], 0);
219 close(pipe_feed[0]);
220 close(pipe_feed[1]);
221 execlp("sh", "sh", "-c", cmd, NULL);
222 return 1;
223 }
224 close(pipe_feed[0]);
225
226 write_err = (write_in_full(pipe_feed[1], src, size) < 0);
227 if (close(pipe_feed[1]))
228 write_err = 1;
229 if (write_err)
230 error("cannot feed the input to external filter %s", cmd);
231
232 status = finish_command(&child_process);
233 if (status)
234 error("external filter %s failed %d", cmd, -status);
235 return (write_err || status);
236}
237
5ecd293d
PH
238static int apply_filter(const char *path, const char *src, size_t len,
239 struct strbuf *dst, const char *cmd)
aa4ed402
JH
240{
241 /*
242 * Create a pipeline to have the command filter the buffer's
243 * contents.
244 *
245 * (child --> cmd) --> us
246 */
aa4ed402 247 int pipe_feed[2];
5ecd293d 248 int status, ret = 1;
aa4ed402 249 struct child_process child_process;
5ecd293d 250 struct strbuf nbuf;
aa4ed402
JH
251
252 if (!cmd)
5ecd293d 253 return 0;
aa4ed402
JH
254
255 memset(&child_process, 0, sizeof(child_process));
256
257 if (pipe(pipe_feed) < 0) {
258 error("cannot create pipe to run external filter %s", cmd);
5ecd293d 259 return 0;
aa4ed402
JH
260 }
261
262 fflush(NULL);
263 child_process.pid = fork();
264 if (child_process.pid < 0) {
265 error("cannot fork to run external filter %s", cmd);
266 close(pipe_feed[0]);
267 close(pipe_feed[1]);
5ecd293d 268 return 0;
aa4ed402
JH
269 }
270 if (!child_process.pid) {
271 dup2(pipe_feed[1], 1);
272 close(pipe_feed[0]);
273 close(pipe_feed[1]);
5ecd293d 274 exit(filter_buffer(path, src, len, cmd));
aa4ed402
JH
275 }
276 close(pipe_feed[1]);
277
5ecd293d
PH
278 strbuf_init(&nbuf, 0);
279 if (strbuf_read(&nbuf, pipe_feed[0], len) < 0) {
280 error("read from external filter %s failed", cmd);
281 ret = 0;
aa4ed402
JH
282 }
283 if (close(pipe_feed[0])) {
5ecd293d
PH
284 ret = error("read from external filter %s failed", cmd);
285 ret = 0;
aa4ed402 286 }
aa4ed402
JH
287 status = finish_command(&child_process);
288 if (status) {
5ecd293d
PH
289 ret = error("external filter %s failed %d", cmd, -status);
290 ret = 0;
aa4ed402
JH
291 }
292
5ecd293d
PH
293 if (ret) {
294 *dst = nbuf;
295 } else {
296 strbuf_release(&nbuf);
297 }
298 return ret;
aa4ed402
JH
299}
300
301static struct convert_driver {
302 const char *name;
303 struct convert_driver *next;
304 char *smudge;
305 char *clean;
306} *user_convert, **user_convert_tail;
307
308static int read_convert_config(const char *var, const char *value)
309{
310 const char *ep, *name;
311 int namelen;
312 struct convert_driver *drv;
313
314 /*
315 * External conversion drivers are configured using
316 * "filter.<name>.variable".
317 */
318 if (prefixcmp(var, "filter.") || (ep = strrchr(var, '.')) == var + 6)
319 return 0;
320 name = var + 7;
321 namelen = ep - name;
322 for (drv = user_convert; drv; drv = drv->next)
323 if (!strncmp(drv->name, name, namelen) && !drv->name[namelen])
324 break;
325 if (!drv) {
326 char *namebuf;
327 drv = xcalloc(1, sizeof(struct convert_driver));
328 namebuf = xmalloc(namelen + 1);
329 memcpy(namebuf, name, namelen);
330 namebuf[namelen] = 0;
331 drv->name = namebuf;
332 drv->next = NULL;
333 *user_convert_tail = drv;
334 user_convert_tail = &(drv->next);
335 }
336
337 ep++;
338
339 /*
340 * filter.<name>.smudge and filter.<name>.clean specifies
341 * the command line:
342 *
343 * command-line
344 *
345 * The command-line will not be interpolated in any way.
346 */
347
348 if (!strcmp("smudge", ep)) {
349 if (!value)
350 return error("%s: lacks value", var);
351 drv->smudge = strdup(value);
352 return 0;
353 }
354
355 if (!strcmp("clean", ep)) {
356 if (!value)
357 return error("%s: lacks value", var);
358 drv->clean = strdup(value);
359 return 0;
360 }
361 return 0;
362}
363
6073ee85 364static void setup_convert_check(struct git_attr_check *check)
35ebfd6a
JH
365{
366 static struct git_attr *attr_crlf;
3fed15f5 367 static struct git_attr *attr_ident;
aa4ed402 368 static struct git_attr *attr_filter;
35ebfd6a 369
3fed15f5 370 if (!attr_crlf) {
35ebfd6a 371 attr_crlf = git_attr("crlf", 4);
3fed15f5 372 attr_ident = git_attr("ident", 5);
aa4ed402
JH
373 attr_filter = git_attr("filter", 6);
374 user_convert_tail = &user_convert;
375 git_config(read_convert_config);
3fed15f5
JH
376 }
377 check[0].attr = attr_crlf;
378 check[1].attr = attr_ident;
aa4ed402 379 check[2].attr = attr_filter;
3fed15f5
JH
380}
381
382static int count_ident(const char *cp, unsigned long size)
383{
384 /*
af9b54bb 385 * "$Id: 0000000000000000000000000000000000000000 $" <=> "$Id$"
3fed15f5
JH
386 */
387 int cnt = 0;
388 char ch;
389
390 while (size) {
391 ch = *cp++;
392 size--;
393 if (ch != '$')
394 continue;
af9b54bb 395 if (size < 3)
3fed15f5 396 break;
af9b54bb 397 if (memcmp("Id", cp, 2))
3fed15f5 398 continue;
af9b54bb
AP
399 ch = cp[2];
400 cp += 3;
401 size -= 3;
3fed15f5 402 if (ch == '$')
af9b54bb 403 cnt++; /* $Id$ */
3fed15f5
JH
404 if (ch != ':')
405 continue;
406
407 /*
af9b54bb 408 * "$Id: ... "; scan up to the closing dollar sign and discard.
3fed15f5
JH
409 */
410 while (size) {
411 ch = *cp++;
412 size--;
413 if (ch == '$') {
414 cnt++;
415 break;
416 }
417 }
418 }
419 return cnt;
420}
421
5ecd293d
PH
422static int ident_to_git(const char *path, const char *src, size_t len,
423 struct strbuf *buf, int ident)
3fed15f5 424{
5ecd293d 425 char *dst, *dollar;
3fed15f5 426
5ecd293d
PH
427 if (!ident || !count_ident(src, len))
428 return 0;
429
430 strbuf_grow(buf, len);
431 dst = buf->buf;
432 for (;;) {
433 dollar = memchr(src, '$', len);
434 if (!dollar)
435 break;
436 memcpy(dst, src, dollar + 1 - src);
437 dst += dollar + 1 - src;
438 len -= dollar + 1 - src;
439 src = dollar + 1;
440
441 if (len > 3 && !memcmp(src, "Id:", 3)) {
442 dollar = memchr(src + 3, '$', len - 3);
443 if (!dollar)
444 break;
af9b54bb
AP
445 memcpy(dst, "Id$", 3);
446 dst += 3;
5ecd293d
PH
447 len -= dollar + 1 - src;
448 src = dollar + 1;
3fed15f5
JH
449 }
450 }
5ecd293d
PH
451 memcpy(dst, src, len);
452 strbuf_setlen(buf, dst + len - buf->buf);
453 return 1;
3fed15f5
JH
454}
455
5ecd293d
PH
456static int ident_to_worktree(const char *path, const char *src, size_t len,
457 struct strbuf *buf, int ident)
3fed15f5 458{
3fed15f5 459 unsigned char sha1[20];
5ecd293d
PH
460 char *to_free = NULL, *dollar;
461 int cnt;
3fed15f5
JH
462
463 if (!ident)
5ecd293d 464 return 0;
3fed15f5 465
5ecd293d 466 cnt = count_ident(src, len);
3fed15f5 467 if (!cnt)
5ecd293d 468 return 0;
3fed15f5 469
5ecd293d
PH
470 /* are we "faking" in place editing ? */
471 if (src == buf->buf)
472 to_free = strbuf_detach(buf);
473 hash_sha1_file(src, len, "blob", sha1);
3fed15f5 474
5ecd293d
PH
475 strbuf_grow(buf, len + cnt * 43);
476 for (;;) {
477 /* step 1: run to the next '$' */
478 dollar = memchr(src, '$', len);
479 if (!dollar)
480 break;
481 strbuf_add(buf, src, dollar + 1 - src);
482 len -= dollar + 1 - src;
483 src = dollar + 1;
c23290d5 484
5ecd293d
PH
485 /* step 2: does it looks like a bit like Id:xxx$ or Id$ ? */
486 if (len < 3 || memcmp("Id", src, 2))
3fed15f5
JH
487 continue;
488
5ecd293d
PH
489 /* step 3: skip over Id$ or Id:xxxxx$ */
490 if (src[2] == '$') {
491 src += 3;
492 len -= 3;
493 } else if (src[2] == ':') {
494 /*
495 * It's possible that an expanded Id has crept its way into the
496 * repository, we cope with that by stripping the expansion out
497 */
498 dollar = memchr(src + 3, '$', len - 3);
499 if (!dollar) {
500 /* incomplete keyword, no more '$', so just quit the loop */
501 break;
502 }
c23290d5 503
5ecd293d
PH
504 len -= dollar + 1 - src;
505 src = dollar + 1;
506 } else {
507 /* it wasn't a "Id$" or "Id:xxxx$" */
508 continue;
509 }
c23290d5 510
5ecd293d
PH
511 /* step 4: substitute */
512 strbuf_addstr(buf, "Id: ");
513 strbuf_add(buf, sha1_to_hex(sha1), 40);
514 strbuf_addstr(buf, " $");
3fed15f5 515 }
5ecd293d 516 strbuf_add(buf, src, len);
3fed15f5 517
5ecd293d
PH
518 free(to_free);
519 return 1;
35ebfd6a
JH
520}
521
6073ee85 522static int git_path_check_crlf(const char *path, struct git_attr_check *check)
35ebfd6a 523{
6073ee85
JH
524 const char *value = check->value;
525
526 if (ATTR_TRUE(value))
527 return CRLF_TEXT;
528 else if (ATTR_FALSE(value))
529 return CRLF_BINARY;
530 else if (ATTR_UNSET(value))
531 ;
532 else if (!strcmp(value, "input"))
533 return CRLF_INPUT;
163b9591 534 return CRLF_GUESS;
35ebfd6a
JH
535}
536
aa4ed402
JH
537static struct convert_driver *git_path_check_convert(const char *path,
538 struct git_attr_check *check)
539{
540 const char *value = check->value;
541 struct convert_driver *drv;
542
543 if (ATTR_TRUE(value) || ATTR_FALSE(value) || ATTR_UNSET(value))
544 return NULL;
545 for (drv = user_convert; drv; drv = drv->next)
546 if (!strcmp(value, drv->name))
547 return drv;
548 return NULL;
549}
550
3fed15f5
JH
551static int git_path_check_ident(const char *path, struct git_attr_check *check)
552{
553 const char *value = check->value;
554
555 return !!ATTR_TRUE(value);
556}
557
5ecd293d 558int convert_to_git(const char *path, const char *src, size_t len, struct strbuf *dst)
35ebfd6a 559{
aa4ed402 560 struct git_attr_check check[3];
6073ee85 561 int crlf = CRLF_GUESS;
5ecd293d 562 int ident = 0, ret = 0;
aa4ed402 563 char *filter = NULL;
6073ee85
JH
564
565 setup_convert_check(check);
3fed15f5 566 if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
aa4ed402 567 struct convert_driver *drv;
3fed15f5
JH
568 crlf = git_path_check_crlf(path, check + 0);
569 ident = git_path_check_ident(path, check + 1);
aa4ed402
JH
570 drv = git_path_check_convert(path, check + 2);
571 if (drv && drv->clean)
572 filter = drv->clean;
3fed15f5
JH
573 }
574
5ecd293d
PH
575 ret |= apply_filter(path, src, len, dst, filter);
576 if (ret) {
577 src = dst->buf;
578 len = dst->len;
aa4ed402 579 }
5ecd293d
PH
580 ret |= crlf_to_git(path, src, len, dst, crlf);
581 if (ret) {
582 src = dst->buf;
583 len = dst->len;
6073ee85 584 }
5ecd293d 585 return ret | ident_to_git(path, src, len, dst, ident);
35ebfd6a
JH
586}
587
5ecd293d 588int convert_to_working_tree(const char *path, const char *src, size_t len, struct strbuf *dst)
35ebfd6a 589{
aa4ed402 590 struct git_attr_check check[3];
6073ee85 591 int crlf = CRLF_GUESS;
5ecd293d 592 int ident = 0, ret = 0;
aa4ed402 593 char *filter = NULL;
6073ee85
JH
594
595 setup_convert_check(check);
3fed15f5 596 if (!git_checkattr(path, ARRAY_SIZE(check), check)) {
aa4ed402 597 struct convert_driver *drv;
3fed15f5
JH
598 crlf = git_path_check_crlf(path, check + 0);
599 ident = git_path_check_ident(path, check + 1);
aa4ed402
JH
600 drv = git_path_check_convert(path, check + 2);
601 if (drv && drv->smudge)
602 filter = drv->smudge;
6073ee85 603 }
3fed15f5 604
5ecd293d
PH
605 ret |= ident_to_worktree(path, src, len, dst, ident);
606 if (ret) {
607 src = dst->buf;
608 len = dst->len;
3fed15f5 609 }
5ecd293d
PH
610 ret |= crlf_to_worktree(path, src, len, dst, crlf);
611 if (ret) {
612 src = dst->buf;
613 len = dst->len;
aa4ed402 614 }
5ecd293d 615 return ret | apply_filter(path, src, len, dst, filter);
35ebfd6a 616}