]> git.ipfire.org Git - thirdparty/git.git/blame - convert.c
Split out mailmap handling out of shortlog
[thirdparty/git.git] / convert.c
CommitLineData
6c510bee 1#include "cache.h"
35ebfd6a
JH
2#include "attr.h"
3
6c510bee
LT
4/*
5 * convert.c - convert a file when checking it out and checking it in.
6 *
7 * This should use the pathname to decide on whether it wants to do some
8 * more interesting conversions (automatic gzip/unzip, general format
9 * conversions etc etc), but by default it just does automatic CRLF<->LF
10 * translation when the "auto_crlf" option is set.
11 */
12
163b9591
JH
13#define CRLF_GUESS (-1)
14#define CRLF_BINARY 0
15#define CRLF_TEXT 1
16#define CRLF_INPUT 2
17
6c510bee
LT
18struct text_stat {
19 /* CR, LF and CRLF counts */
20 unsigned cr, lf, crlf;
21
22 /* These are just approximations! */
23 unsigned printable, nonprintable;
24};
25
26static void gather_stats(const char *buf, unsigned long size, struct text_stat *stats)
27{
28 unsigned long i;
29
30 memset(stats, 0, sizeof(*stats));
31
32 for (i = 0; i < size; i++) {
33 unsigned char c = buf[i];
34 if (c == '\r') {
35 stats->cr++;
36 if (i+1 < size && buf[i+1] == '\n')
37 stats->crlf++;
38 continue;
39 }
40 if (c == '\n') {
41 stats->lf++;
42 continue;
43 }
44 if (c == 127)
45 /* DEL */
46 stats->nonprintable++;
47 else if (c < 32) {
48 switch (c) {
49 /* BS, HT, ESC and FF */
50 case '\b': case '\t': case '\033': case '\014':
51 stats->printable++;
52 break;
53 default:
54 stats->nonprintable++;
55 }
56 }
57 else
58 stats->printable++;
59 }
60}
61
62/*
63 * The same heuristics as diff.c::mmfile_is_binary()
64 */
65static int is_binary(unsigned long size, struct text_stat *stats)
66{
67
68 if ((stats->printable >> 7) < stats->nonprintable)
69 return 1;
70 /*
71 * Other heuristics? Average line length might be relevant,
72 * as might LF vs CR vs CRLF counts..
73 *
74 * NOTE! It might be normal to have a low ratio of CRLF to LF
75 * (somebody starts with a LF-only file and edits it with an editor
76 * that adds CRLF only to lines that are added..). But do we
77 * want to support CR-only? Probably not.
78 */
79 return 0;
80}
81
ac78e548 82static char *crlf_to_git(const char *path, const char *src, unsigned long *sizep, int action)
6c510bee 83{
ac78e548 84 char *buffer, *dst;
6c510bee
LT
85 unsigned long size, nsize;
86 struct text_stat stats;
87
163b9591 88 if ((action == CRLF_BINARY) || (action == CRLF_GUESS && !auto_crlf))
ac78e548 89 return NULL;
6c510bee
LT
90
91 size = *sizep;
92 if (!size)
ac78e548 93 return NULL;
6c510bee 94
ac78e548 95 gather_stats(src, size, &stats);
6c510bee
LT
96
97 /* No CR? Nothing to convert, regardless. */
98 if (!stats.cr)
ac78e548 99 return NULL;
6c510bee 100
163b9591 101 if (action == CRLF_GUESS) {
201ac8ef
JH
102 /*
103 * We're currently not going to even try to convert stuff
104 * that has bare CR characters. Does anybody do that crazy
105 * stuff?
106 */
107 if (stats.cr != stats.crlf)
ac78e548 108 return NULL;
201ac8ef
JH
109
110 /*
111 * And add some heuristics for binary vs text, of course...
112 */
113 if (is_binary(size, &stats))
ac78e548 114 return NULL;
201ac8ef 115 }
6c510bee
LT
116
117 /*
67e22ed5
AR
118 * Ok, allocate a new buffer, fill it in, and return it
119 * to let the caller know that we switched buffers.
6c510bee
LT
120 */
121 nsize = size - stats.crlf;
ac78e548 122 buffer = xmalloc(nsize);
6c510bee 123 *sizep = nsize;
201ac8ef 124
ac78e548 125 dst = buffer;
163b9591
JH
126 if (action == CRLF_GUESS) {
127 /*
128 * If we guessed, we already know we rejected a file with
129 * lone CR, and we can strip a CR without looking at what
130 * follow it.
131 */
201ac8ef 132 do {
ac78e548 133 unsigned char c = *src++;
201ac8ef 134 if (c != '\r')
ac78e548 135 *dst++ = c;
201ac8ef
JH
136 } while (--size);
137 } else {
138 do {
ac78e548 139 unsigned char c = *src++;
67e22ed5 140 if (! (c == '\r' && (1 < size && *src == '\n')))
ac78e548 141 *dst++ = c;
201ac8ef
JH
142 } while (--size);
143 }
6c510bee 144
ac78e548 145 return buffer;
6c510bee
LT
146}
147
ac78e548 148static char *crlf_to_worktree(const char *path, const char *src, unsigned long *sizep, int action)
6c510bee 149{
ac78e548 150 char *buffer, *dst;
6c510bee
LT
151 unsigned long size, nsize;
152 struct text_stat stats;
153 unsigned char last;
154
163b9591
JH
155 if ((action == CRLF_BINARY) || (action == CRLF_INPUT) ||
156 (action == CRLF_GUESS && auto_crlf <= 0))
ac78e548 157 return NULL;
6c510bee
LT
158
159 size = *sizep;
160 if (!size)
ac78e548 161 return NULL;
6c510bee 162
ac78e548 163 gather_stats(src, size, &stats);
6c510bee
LT
164
165 /* No LF? Nothing to convert, regardless. */
166 if (!stats.lf)
ac78e548 167 return NULL;
6c510bee
LT
168
169 /* Was it already in CRLF format? */
170 if (stats.lf == stats.crlf)
ac78e548 171 return NULL;
6c510bee 172
163b9591 173 if (action == CRLF_GUESS) {
201ac8ef
JH
174 /* If we have any bare CR characters, we're not going to touch it */
175 if (stats.cr != stats.crlf)
ac78e548 176 return NULL;
6c510bee 177
201ac8ef 178 if (is_binary(size, &stats))
ac78e548 179 return NULL;
201ac8ef 180 }
6c510bee
LT
181
182 /*
67e22ed5
AR
183 * Ok, allocate a new buffer, fill it in, and return it
184 * to let the caller know that we switched buffers.
6c510bee
LT
185 */
186 nsize = size + stats.lf - stats.crlf;
ac78e548 187 buffer = xmalloc(nsize);
6c510bee
LT
188 *sizep = nsize;
189 last = 0;
ac78e548
AR
190
191 dst = buffer;
6c510bee 192 do {
ac78e548 193 unsigned char c = *src++;
6c510bee 194 if (c == '\n' && last != '\r')
ac78e548
AR
195 *dst++ = '\r';
196 *dst++ = c;
6c510bee
LT
197 last = c;
198 } while (--size);
199
ac78e548 200 return buffer;
6c510bee 201}
35ebfd6a 202
6073ee85 203static void setup_convert_check(struct git_attr_check *check)
35ebfd6a
JH
204{
205 static struct git_attr *attr_crlf;
206
207 if (!attr_crlf)
208 attr_crlf = git_attr("crlf", 4);
209 check->attr = attr_crlf;
210}
211
6073ee85 212static int git_path_check_crlf(const char *path, struct git_attr_check *check)
35ebfd6a 213{
6073ee85
JH
214 const char *value = check->value;
215
216 if (ATTR_TRUE(value))
217 return CRLF_TEXT;
218 else if (ATTR_FALSE(value))
219 return CRLF_BINARY;
220 else if (ATTR_UNSET(value))
221 ;
222 else if (!strcmp(value, "input"))
223 return CRLF_INPUT;
163b9591 224 return CRLF_GUESS;
35ebfd6a
JH
225}
226
ac78e548 227char *convert_to_git(const char *path, const char *src, unsigned long *sizep)
35ebfd6a 228{
6073ee85
JH
229 struct git_attr_check check[1];
230 int crlf = CRLF_GUESS;
231
232 setup_convert_check(check);
233 if (!git_checkattr(path, 1, check)) {
234 crlf = git_path_check_crlf(path, check);
235 }
236 return crlf_to_git(path, src, sizep, crlf);
35ebfd6a
JH
237}
238
ac78e548 239char *convert_to_working_tree(const char *path, const char *src, unsigned long *sizep)
35ebfd6a 240{
6073ee85
JH
241 struct git_attr_check check[1];
242 int crlf = CRLF_GUESS;
243
244 setup_convert_check(check);
245 if (!git_checkattr(path, 1, check)) {
246 crlf = git_path_check_crlf(path, check);
247 }
248 return crlf_to_worktree(path, src, sizep, crlf);
35ebfd6a 249}