]>
Commit | Line | Data |
---|---|---|
6c510bee | 1 | #include "cache.h" |
35ebfd6a | 2 | #include "attr.h" |
3fed15f5 | 3 | #include "run-command.h" |
35ebfd6a | 4 | |
6c510bee LT |
5 | /* |
6 | * convert.c - convert a file when checking it out and checking it in. | |
7 | * | |
8 | * This should use the pathname to decide on whether it wants to do some | |
9 | * more interesting conversions (automatic gzip/unzip, general format | |
10 | * conversions etc etc), but by default it just does automatic CRLF<->LF | |
11 | * translation when the "auto_crlf" option is set. | |
12 | */ | |
13 | ||
163b9591 JH |
14 | #define CRLF_GUESS (-1) |
15 | #define CRLF_BINARY 0 | |
16 | #define CRLF_TEXT 1 | |
17 | #define CRLF_INPUT 2 | |
18 | ||
6c510bee LT |
19 | struct text_stat { |
20 | /* CR, LF and CRLF counts */ | |
21 | unsigned cr, lf, crlf; | |
22 | ||
23 | /* These are just approximations! */ | |
24 | unsigned printable, nonprintable; | |
25 | }; | |
26 | ||
27 | static void gather_stats(const char *buf, unsigned long size, struct text_stat *stats) | |
28 | { | |
29 | unsigned long i; | |
30 | ||
31 | memset(stats, 0, sizeof(*stats)); | |
32 | ||
33 | for (i = 0; i < size; i++) { | |
34 | unsigned char c = buf[i]; | |
35 | if (c == '\r') { | |
36 | stats->cr++; | |
37 | if (i+1 < size && buf[i+1] == '\n') | |
38 | stats->crlf++; | |
39 | continue; | |
40 | } | |
41 | if (c == '\n') { | |
42 | stats->lf++; | |
43 | continue; | |
44 | } | |
45 | if (c == 127) | |
46 | /* DEL */ | |
47 | stats->nonprintable++; | |
48 | else if (c < 32) { | |
49 | switch (c) { | |
50 | /* BS, HT, ESC and FF */ | |
51 | case '\b': case '\t': case '\033': case '\014': | |
52 | stats->printable++; | |
53 | break; | |
54 | default: | |
55 | stats->nonprintable++; | |
56 | } | |
57 | } | |
58 | else | |
59 | stats->printable++; | |
60 | } | |
61 | } | |
62 | ||
63 | /* | |
64 | * The same heuristics as diff.c::mmfile_is_binary() | |
65 | */ | |
66 | static int is_binary(unsigned long size, struct text_stat *stats) | |
67 | { | |
68 | ||
69 | if ((stats->printable >> 7) < stats->nonprintable) | |
70 | return 1; | |
71 | /* | |
72 | * Other heuristics? Average line length might be relevant, | |
73 | * as might LF vs CR vs CRLF counts.. | |
74 | * | |
75 | * NOTE! It might be normal to have a low ratio of CRLF to LF | |
76 | * (somebody starts with a LF-only file and edits it with an editor | |
77 | * that adds CRLF only to lines that are added..). But do we | |
78 | * want to support CR-only? Probably not. | |
79 | */ | |
80 | return 0; | |
81 | } | |
82 | ||
ac78e548 | 83 | static char *crlf_to_git(const char *path, const char *src, unsigned long *sizep, int action) |
6c510bee | 84 | { |
ac78e548 | 85 | char *buffer, *dst; |
6c510bee LT |
86 | unsigned long size, nsize; |
87 | struct text_stat stats; | |
88 | ||
163b9591 | 89 | if ((action == CRLF_BINARY) || (action == CRLF_GUESS && !auto_crlf)) |
ac78e548 | 90 | return NULL; |
6c510bee LT |
91 | |
92 | size = *sizep; | |
93 | if (!size) | |
ac78e548 | 94 | return NULL; |
6c510bee | 95 | |
ac78e548 | 96 | gather_stats(src, size, &stats); |
6c510bee LT |
97 | |
98 | /* No CR? Nothing to convert, regardless. */ | |
99 | if (!stats.cr) | |
ac78e548 | 100 | return NULL; |
6c510bee | 101 | |
163b9591 | 102 | if (action == CRLF_GUESS) { |
201ac8ef JH |
103 | /* |
104 | * We're currently not going to even try to convert stuff | |
105 | * that has bare CR characters. Does anybody do that crazy | |
106 | * stuff? | |
107 | */ | |
108 | if (stats.cr != stats.crlf) | |
ac78e548 | 109 | return NULL; |
201ac8ef JH |
110 | |
111 | /* | |
112 | * And add some heuristics for binary vs text, of course... | |
113 | */ | |
114 | if (is_binary(size, &stats)) | |
ac78e548 | 115 | return NULL; |
201ac8ef | 116 | } |
6c510bee LT |
117 | |
118 | /* | |
67e22ed5 AR |
119 | * Ok, allocate a new buffer, fill it in, and return it |
120 | * to let the caller know that we switched buffers. | |
6c510bee LT |
121 | */ |
122 | nsize = size - stats.crlf; | |
ac78e548 | 123 | buffer = xmalloc(nsize); |
6c510bee | 124 | *sizep = nsize; |
201ac8ef | 125 | |
ac78e548 | 126 | dst = buffer; |
163b9591 JH |
127 | if (action == CRLF_GUESS) { |
128 | /* | |
129 | * If we guessed, we already know we rejected a file with | |
130 | * lone CR, and we can strip a CR without looking at what | |
131 | * follow it. | |
132 | */ | |
201ac8ef | 133 | do { |
ac78e548 | 134 | unsigned char c = *src++; |
201ac8ef | 135 | if (c != '\r') |
ac78e548 | 136 | *dst++ = c; |
201ac8ef JH |
137 | } while (--size); |
138 | } else { | |
139 | do { | |
ac78e548 | 140 | unsigned char c = *src++; |
67e22ed5 | 141 | if (! (c == '\r' && (1 < size && *src == '\n'))) |
ac78e548 | 142 | *dst++ = c; |
201ac8ef JH |
143 | } while (--size); |
144 | } | |
6c510bee | 145 | |
ac78e548 | 146 | return buffer; |
6c510bee LT |
147 | } |
148 | ||
ac78e548 | 149 | static char *crlf_to_worktree(const char *path, const char *src, unsigned long *sizep, int action) |
6c510bee | 150 | { |
ac78e548 | 151 | char *buffer, *dst; |
6c510bee LT |
152 | unsigned long size, nsize; |
153 | struct text_stat stats; | |
154 | unsigned char last; | |
155 | ||
163b9591 JH |
156 | if ((action == CRLF_BINARY) || (action == CRLF_INPUT) || |
157 | (action == CRLF_GUESS && auto_crlf <= 0)) | |
ac78e548 | 158 | return NULL; |
6c510bee LT |
159 | |
160 | size = *sizep; | |
161 | if (!size) | |
ac78e548 | 162 | return NULL; |
6c510bee | 163 | |
ac78e548 | 164 | gather_stats(src, size, &stats); |
6c510bee LT |
165 | |
166 | /* No LF? Nothing to convert, regardless. */ | |
167 | if (!stats.lf) | |
ac78e548 | 168 | return NULL; |
6c510bee LT |
169 | |
170 | /* Was it already in CRLF format? */ | |
171 | if (stats.lf == stats.crlf) | |
ac78e548 | 172 | return NULL; |
6c510bee | 173 | |
163b9591 | 174 | if (action == CRLF_GUESS) { |
201ac8ef JH |
175 | /* If we have any bare CR characters, we're not going to touch it */ |
176 | if (stats.cr != stats.crlf) | |
ac78e548 | 177 | return NULL; |
6c510bee | 178 | |
201ac8ef | 179 | if (is_binary(size, &stats)) |
ac78e548 | 180 | return NULL; |
201ac8ef | 181 | } |
6c510bee LT |
182 | |
183 | /* | |
67e22ed5 AR |
184 | * Ok, allocate a new buffer, fill it in, and return it |
185 | * to let the caller know that we switched buffers. | |
6c510bee LT |
186 | */ |
187 | nsize = size + stats.lf - stats.crlf; | |
ac78e548 | 188 | buffer = xmalloc(nsize); |
6c510bee LT |
189 | *sizep = nsize; |
190 | last = 0; | |
ac78e548 AR |
191 | |
192 | dst = buffer; | |
6c510bee | 193 | do { |
ac78e548 | 194 | unsigned char c = *src++; |
6c510bee | 195 | if (c == '\n' && last != '\r') |
ac78e548 AR |
196 | *dst++ = '\r'; |
197 | *dst++ = c; | |
6c510bee LT |
198 | last = c; |
199 | } while (--size); | |
200 | ||
ac78e548 | 201 | return buffer; |
6c510bee | 202 | } |
35ebfd6a | 203 | |
6073ee85 | 204 | static void setup_convert_check(struct git_attr_check *check) |
35ebfd6a JH |
205 | { |
206 | static struct git_attr *attr_crlf; | |
3fed15f5 | 207 | static struct git_attr *attr_ident; |
35ebfd6a | 208 | |
3fed15f5 | 209 | if (!attr_crlf) { |
35ebfd6a | 210 | attr_crlf = git_attr("crlf", 4); |
3fed15f5 JH |
211 | attr_ident = git_attr("ident", 5); |
212 | } | |
213 | check[0].attr = attr_crlf; | |
214 | check[1].attr = attr_ident; | |
215 | } | |
216 | ||
217 | static int count_ident(const char *cp, unsigned long size) | |
218 | { | |
219 | /* | |
220 | * "$ident: 0000000000000000000000000000000000000000 $" <=> "$ident$" | |
221 | */ | |
222 | int cnt = 0; | |
223 | char ch; | |
224 | ||
225 | while (size) { | |
226 | ch = *cp++; | |
227 | size--; | |
228 | if (ch != '$') | |
229 | continue; | |
230 | if (size < 6) | |
231 | break; | |
232 | if (memcmp("ident", cp, 5)) | |
233 | continue; | |
234 | ch = cp[5]; | |
235 | cp += 6; | |
236 | size -= 6; | |
237 | if (ch == '$') | |
238 | cnt++; /* $ident$ */ | |
239 | if (ch != ':') | |
240 | continue; | |
241 | ||
242 | /* | |
243 | * "$ident: ... "; scan up to the closing dollar sign and discard. | |
244 | */ | |
245 | while (size) { | |
246 | ch = *cp++; | |
247 | size--; | |
248 | if (ch == '$') { | |
249 | cnt++; | |
250 | break; | |
251 | } | |
252 | } | |
253 | } | |
254 | return cnt; | |
255 | } | |
256 | ||
257 | static char *ident_to_git(const char *path, const char *src, unsigned long *sizep, int ident) | |
258 | { | |
259 | int cnt; | |
260 | unsigned long size; | |
261 | char *dst, *buf; | |
262 | ||
263 | if (!ident) | |
264 | return NULL; | |
265 | size = *sizep; | |
266 | cnt = count_ident(src, size); | |
267 | if (!cnt) | |
268 | return NULL; | |
269 | buf = xmalloc(size); | |
270 | ||
271 | for (dst = buf; size; size--) { | |
272 | char ch = *src++; | |
273 | *dst++ = ch; | |
274 | if ((ch == '$') && (6 <= size) && | |
275 | !memcmp("ident:", src, 6)) { | |
276 | unsigned long rem = size - 6; | |
277 | const char *cp = src + 6; | |
278 | do { | |
279 | ch = *cp++; | |
280 | if (ch == '$') | |
281 | break; | |
282 | rem--; | |
283 | } while (rem); | |
284 | if (!rem) | |
285 | continue; | |
286 | memcpy(dst, "ident$", 6); | |
287 | dst += 6; | |
288 | size -= (cp - src); | |
289 | src = cp; | |
290 | } | |
291 | } | |
292 | ||
293 | *sizep = dst - buf; | |
294 | return buf; | |
295 | } | |
296 | ||
297 | static char *ident_to_worktree(const char *path, const char *src, unsigned long *sizep, int ident) | |
298 | { | |
299 | int cnt; | |
300 | unsigned long size; | |
301 | char *dst, *buf; | |
302 | unsigned char sha1[20]; | |
303 | ||
304 | if (!ident) | |
305 | return NULL; | |
306 | ||
307 | size = *sizep; | |
308 | cnt = count_ident(src, size); | |
309 | if (!cnt) | |
310 | return NULL; | |
311 | ||
312 | hash_sha1_file(src, size, "blob", sha1); | |
313 | buf = xmalloc(size + cnt * 43); | |
314 | ||
315 | for (dst = buf; size; size--) { | |
316 | const char *cp; | |
317 | char ch = *src++; | |
318 | *dst++ = ch; | |
319 | if ((ch != '$') || (size < 6) || memcmp("ident", src, 5)) | |
320 | continue; | |
321 | ||
322 | if (src[5] == ':') { | |
323 | /* discard up to but not including the closing $ */ | |
324 | unsigned long rem = size - 6; | |
325 | cp = src + 6; | |
326 | do { | |
327 | ch = *cp++; | |
328 | if (ch == '$') | |
329 | break; | |
330 | rem--; | |
331 | } while (rem); | |
332 | if (!rem) | |
333 | continue; | |
334 | size -= (cp - src); | |
335 | } else if (src[5] == '$') | |
336 | cp = src + 5; | |
337 | else | |
338 | continue; | |
339 | ||
340 | memcpy(dst, "ident: ", 7); | |
341 | dst += 7; | |
342 | memcpy(dst, sha1_to_hex(sha1), 40); | |
343 | dst += 40; | |
344 | *dst++ = ' '; | |
345 | size -= (cp - src); | |
346 | src = cp; | |
347 | *dst++ = *src++; | |
348 | size--; | |
349 | } | |
350 | ||
351 | *sizep = dst - buf; | |
352 | return buf; | |
35ebfd6a JH |
353 | } |
354 | ||
6073ee85 | 355 | static int git_path_check_crlf(const char *path, struct git_attr_check *check) |
35ebfd6a | 356 | { |
6073ee85 JH |
357 | const char *value = check->value; |
358 | ||
359 | if (ATTR_TRUE(value)) | |
360 | return CRLF_TEXT; | |
361 | else if (ATTR_FALSE(value)) | |
362 | return CRLF_BINARY; | |
363 | else if (ATTR_UNSET(value)) | |
364 | ; | |
365 | else if (!strcmp(value, "input")) | |
366 | return CRLF_INPUT; | |
163b9591 | 367 | return CRLF_GUESS; |
35ebfd6a JH |
368 | } |
369 | ||
3fed15f5 JH |
370 | static int git_path_check_ident(const char *path, struct git_attr_check *check) |
371 | { | |
372 | const char *value = check->value; | |
373 | ||
374 | return !!ATTR_TRUE(value); | |
375 | } | |
376 | ||
ac78e548 | 377 | char *convert_to_git(const char *path, const char *src, unsigned long *sizep) |
35ebfd6a | 378 | { |
3fed15f5 | 379 | struct git_attr_check check[2]; |
6073ee85 | 380 | int crlf = CRLF_GUESS; |
3fed15f5 JH |
381 | int ident = 0; |
382 | char *buf, *buf2; | |
6073ee85 JH |
383 | |
384 | setup_convert_check(check); | |
3fed15f5 JH |
385 | if (!git_checkattr(path, ARRAY_SIZE(check), check)) { |
386 | crlf = git_path_check_crlf(path, check + 0); | |
387 | ident = git_path_check_ident(path, check + 1); | |
388 | } | |
389 | ||
390 | buf = crlf_to_git(path, src, sizep, crlf); | |
391 | ||
392 | buf2 = ident_to_git(path, buf ? buf : src, sizep, ident); | |
393 | if (buf2) { | |
394 | free(buf); | |
395 | buf = buf2; | |
6073ee85 | 396 | } |
3fed15f5 JH |
397 | |
398 | return buf; | |
35ebfd6a JH |
399 | } |
400 | ||
ac78e548 | 401 | char *convert_to_working_tree(const char *path, const char *src, unsigned long *sizep) |
35ebfd6a | 402 | { |
3fed15f5 | 403 | struct git_attr_check check[2]; |
6073ee85 | 404 | int crlf = CRLF_GUESS; |
3fed15f5 JH |
405 | int ident = 0; |
406 | char *buf, *buf2; | |
6073ee85 JH |
407 | |
408 | setup_convert_check(check); | |
3fed15f5 JH |
409 | if (!git_checkattr(path, ARRAY_SIZE(check), check)) { |
410 | crlf = git_path_check_crlf(path, check + 0); | |
411 | ident = git_path_check_ident(path, check + 1); | |
6073ee85 | 412 | } |
3fed15f5 JH |
413 | |
414 | buf = ident_to_worktree(path, src, sizep, ident); | |
415 | ||
416 | buf2 = crlf_to_worktree(path, buf ? buf : src, sizep, crlf); | |
417 | if (buf2) { | |
418 | free(buf); | |
419 | buf = buf2; | |
420 | } | |
421 | ||
422 | return buf; | |
35ebfd6a | 423 | } |