]>
Commit | Line | Data |
---|---|---|
2744b234 LT |
1 | /* |
2 | * Another stupid program, this one parsing the headers of an | |
3 | * email to figure out authorship and subject | |
4 | */ | |
f1f909e3 | 5 | #include "cache.h" |
34488e3c | 6 | #include "builtin.h" |
b45974a6 | 7 | #include "utf8.h" |
2744b234 | 8 | |
34488e3c | 9 | static FILE *cmitmsg, *patchfile, *fin, *fout; |
2744b234 | 10 | |
96f1e58f DR |
11 | static int keep_subject; |
12 | static const char *metainfo_charset; | |
2744b234 LT |
13 | static char line[1000]; |
14 | static char name[1000]; | |
15 | static char email[1000]; | |
2744b234 | 16 | |
d4a9ce78 JH |
17 | static enum { |
18 | TE_DONTCARE, TE_QP, TE_BASE64, | |
19 | } transfer_encoding; | |
87ab7992 DZ |
20 | static enum { |
21 | TYPE_TEXT, TYPE_OTHER, | |
22 | } message_type; | |
d4a9ce78 | 23 | |
87ab7992 | 24 | static char charset[256]; |
96f1e58f | 25 | static int patch_lines; |
87ab7992 DZ |
26 | static char **p_hdr_data, **s_hdr_data; |
27 | ||
28 | #define MAX_HDR_PARSED 10 | |
29 | #define MAX_BOUNDARIES 5 | |
d4a9ce78 | 30 | |
2744b234 LT |
31 | static char *sanity_check(char *name, char *email) |
32 | { | |
33 | int len = strlen(name); | |
34 | if (len < 3 || len > 60) | |
35 | return email; | |
36 | if (strchr(name, '@') || strchr(name, '<') || strchr(name, '>')) | |
37 | return email; | |
38 | return name; | |
39 | } | |
40 | ||
e0e3ba20 JH |
41 | static int bogus_from(char *line) |
42 | { | |
43 | /* John Doe <johndoe> */ | |
44 | char *bra, *ket, *dst, *cp; | |
45 | ||
46 | /* This is fallback, so do not bother if we already have an | |
47 | * e-mail address. | |
34488e3c | 48 | */ |
e0e3ba20 JH |
49 | if (*email) |
50 | return 0; | |
51 | ||
52 | bra = strchr(line, '<'); | |
53 | if (!bra) | |
54 | return 0; | |
55 | ket = strchr(bra, '>'); | |
56 | if (!ket) | |
57 | return 0; | |
58 | ||
59 | for (dst = email, cp = bra+1; cp < ket; ) | |
60 | *dst++ = *cp++; | |
61 | *dst = 0; | |
62 | for (cp = line; isspace(*cp); cp++) | |
63 | ; | |
64 | for (bra--; isspace(*bra); bra--) | |
65 | *bra = 0; | |
66 | cp = sanity_check(cp, email); | |
67 | strcpy(name, cp); | |
68 | return 1; | |
69 | } | |
70 | ||
2dec02b1 | 71 | static int handle_from(char *in_line) |
2744b234 | 72 | { |
2dec02b1 EB |
73 | char line[1000]; |
74 | char *at; | |
2744b234 LT |
75 | char *dst; |
76 | ||
2dec02b1 EB |
77 | strcpy(line, in_line); |
78 | at = strchr(line, '@'); | |
2744b234 | 79 | if (!at) |
e0e3ba20 | 80 | return bogus_from(line); |
2744b234 LT |
81 | |
82 | /* | |
83 | * If we already have one email, don't take any confusing lines | |
84 | */ | |
85 | if (*email && strchr(at+1, '@')) | |
86 | return 0; | |
87 | ||
d4a9ce78 JH |
88 | /* Pick up the string around '@', possibly delimited with <> |
89 | * pair; that is the email part. White them out while copying. | |
90 | */ | |
2744b234 LT |
91 | while (at > line) { |
92 | char c = at[-1]; | |
d4a9ce78 JH |
93 | if (isspace(c)) |
94 | break; | |
95 | if (c == '<') { | |
96 | at[-1] = ' '; | |
2744b234 | 97 | break; |
d4a9ce78 | 98 | } |
2744b234 LT |
99 | at--; |
100 | } | |
101 | dst = email; | |
102 | for (;;) { | |
103 | unsigned char c = *at; | |
d4a9ce78 JH |
104 | if (!c || c == '>' || isspace(c)) { |
105 | if (c == '>') | |
106 | *at = ' '; | |
2744b234 | 107 | break; |
d4a9ce78 | 108 | } |
2744b234 LT |
109 | *at++ = ' '; |
110 | *dst++ = c; | |
111 | } | |
112 | *dst++ = 0; | |
113 | ||
d4a9ce78 JH |
114 | /* The remainder is name. It could be "John Doe <john.doe@xz>" |
115 | * or "john.doe@xz (John Doe)", but we have whited out the | |
116 | * email part, so trim from both ends, possibly removing | |
117 | * the () pair at the end. | |
118 | */ | |
2744b234 LT |
119 | at = line + strlen(line); |
120 | while (at > line) { | |
121 | unsigned char c = *--at; | |
d4a9ce78 JH |
122 | if (!isspace(c)) { |
123 | at[(c == ')') ? 0 : 1] = 0; | |
2744b234 | 124 | break; |
d4a9ce78 | 125 | } |
2744b234 LT |
126 | } |
127 | ||
128 | at = line; | |
129 | for (;;) { | |
130 | unsigned char c = *at; | |
d4a9ce78 JH |
131 | if (!c || !isspace(c)) { |
132 | if (c == '(') | |
133 | at++; | |
2744b234 | 134 | break; |
d4a9ce78 | 135 | } |
2744b234 LT |
136 | at++; |
137 | } | |
2744b234 | 138 | at = sanity_check(at, email); |
2744b234 LT |
139 | strcpy(name, at); |
140 | return 1; | |
141 | } | |
142 | ||
87ab7992 | 143 | static int handle_header(char *line, char *data, int ofs) |
62c1f6b4 | 144 | { |
87ab7992 DZ |
145 | if (!line || !data) |
146 | return 1; | |
147 | ||
148 | strcpy(data, line+ofs); | |
62c1f6b4 | 149 | |
d4a9ce78 JH |
150 | return 0; |
151 | } | |
152 | ||
153 | /* NOTE NOTE NOTE. We do not claim we do full MIME. We just attempt | |
154 | * to have enough heuristics to grok MIME encoded patches often found | |
155 | * on our mailing lists. For example, we do not even treat header lines | |
156 | * case insensitively. | |
157 | */ | |
158 | ||
159 | static int slurp_attr(const char *line, const char *name, char *attr) | |
160 | { | |
554fe20d | 161 | const char *ends, *ap = strcasestr(line, name); |
d4a9ce78 JH |
162 | size_t sz; |
163 | ||
164 | if (!ap) { | |
165 | *attr = 0; | |
166 | return 0; | |
167 | } | |
168 | ap += strlen(name); | |
169 | if (*ap == '"') { | |
170 | ap++; | |
171 | ends = "\""; | |
172 | } | |
173 | else | |
174 | ends = "; \t"; | |
175 | sz = strcspn(ap, ends); | |
176 | memcpy(attr, ap, sz); | |
177 | attr[sz] = 0; | |
178 | return 1; | |
179 | } | |
180 | ||
87ab7992 DZ |
181 | struct content_type { |
182 | char *boundary; | |
183 | int boundary_len; | |
184 | }; | |
185 | ||
186 | static struct content_type content[MAX_BOUNDARIES]; | |
187 | ||
188 | static struct content_type *content_top = content; | |
189 | ||
190 | static int handle_content_type(char *line) | |
d4a9ce78 | 191 | { |
87ab7992 DZ |
192 | char boundary[256]; |
193 | ||
194 | if (strcasestr(line, "text/") == NULL) | |
195 | message_type = TYPE_OTHER; | |
196 | if (slurp_attr(line, "boundary=", boundary + 2)) { | |
197 | memcpy(boundary, "--", 2); | |
198 | if (content_top++ >= &content[MAX_BOUNDARIES]) { | |
199 | fprintf(stderr, "Too many boundaries to handle\n"); | |
200 | exit(1); | |
201 | } | |
202 | content_top->boundary_len = strlen(boundary); | |
203 | content_top->boundary = xmalloc(content_top->boundary_len+1); | |
204 | strcpy(content_top->boundary, boundary); | |
b893f091 | 205 | } |
87ab7992 | 206 | if (slurp_attr(line, "charset=", charset)) { |
d4a9ce78 JH |
207 | int i, c; |
208 | for (i = 0; (c = charset[i]) != 0; i++) | |
209 | charset[i] = tolower(c); | |
210 | } | |
211 | return 0; | |
212 | } | |
213 | ||
d4a9ce78 JH |
214 | static int handle_content_transfer_encoding(char *line) |
215 | { | |
216 | if (strcasestr(line, "base64")) | |
217 | transfer_encoding = TE_BASE64; | |
218 | else if (strcasestr(line, "quoted-printable")) | |
219 | transfer_encoding = TE_QP; | |
220 | else | |
221 | transfer_encoding = TE_DONTCARE; | |
222 | return 0; | |
2744b234 LT |
223 | } |
224 | ||
d4a9ce78 JH |
225 | static int is_multipart_boundary(const char *line) |
226 | { | |
87ab7992 | 227 | return (!memcmp(line, content_top->boundary, content_top->boundary_len)); |
d4a9ce78 JH |
228 | } |
229 | ||
230 | static int eatspace(char *line) | |
2744b234 | 231 | { |
d4a9ce78 JH |
232 | int len = strlen(line); |
233 | while (len > 0 && isspace(line[len-1])) | |
234 | line[--len] = 0; | |
235 | return len; | |
2744b234 LT |
236 | } |
237 | ||
d4a9ce78 | 238 | static char *cleanup_subject(char *subject) |
2744b234 LT |
239 | { |
240 | for (;;) { | |
241 | char *p; | |
242 | int len, remove; | |
243 | switch (*subject) { | |
244 | case 'r': case 'R': | |
245 | if (!memcmp("e:", subject+1, 2)) { | |
87ab7992 | 246 | subject += 3; |
2744b234 LT |
247 | continue; |
248 | } | |
249 | break; | |
250 | case ' ': case '\t': case ':': | |
251 | subject++; | |
252 | continue; | |
253 | ||
254 | case '[': | |
255 | p = strchr(subject, ']'); | |
256 | if (!p) { | |
257 | subject++; | |
258 | continue; | |
259 | } | |
260 | len = strlen(p); | |
261 | remove = p - subject; | |
262 | if (remove <= len *2) { | |
263 | subject = p+1; | |
264 | continue; | |
34488e3c | 265 | } |
2744b234 LT |
266 | break; |
267 | } | |
ae448e38 | 268 | eatspace(subject); |
2744b234 LT |
269 | return subject; |
270 | } | |
34488e3c | 271 | } |
2744b234 LT |
272 | |
273 | static void cleanup_space(char *buf) | |
274 | { | |
275 | unsigned char c; | |
276 | while ((c = *buf) != 0) { | |
277 | buf++; | |
278 | if (isspace(c)) { | |
279 | buf[-1] = ' '; | |
280 | c = *buf; | |
281 | while (isspace(c)) { | |
282 | int len = strlen(buf); | |
283 | memmove(buf, buf+1, len); | |
284 | c = *buf; | |
285 | } | |
286 | } | |
287 | } | |
288 | } | |
289 | ||
8dabdfcc | 290 | static void decode_header(char *it, unsigned itsize); |
538dfe73 | 291 | static const char *header[MAX_HDR_PARSED] = { |
87ab7992 | 292 | "From","Subject","Date", |
d4a9ce78 JH |
293 | }; |
294 | ||
8dabdfcc | 295 | static int check_header(char *line, unsigned linesize, char **hdr_data, int overwrite) |
d4a9ce78 JH |
296 | { |
297 | int i; | |
298 | ||
87ab7992 DZ |
299 | /* search for the interesting parts */ |
300 | for (i = 0; header[i]; i++) { | |
301 | int len = strlen(header[i]); | |
86747c13 | 302 | if ((!hdr_data[i] || overwrite) && |
87ab7992 | 303 | !strncasecmp(line, header[i], len) && |
d4a9ce78 | 304 | line[len] == ':' && isspace(line[len + 1])) { |
33504530 EB |
305 | /* Unwrap inline B and Q encoding, and optionally |
306 | * normalize the meta information to utf8. | |
307 | */ | |
8dabdfcc | 308 | decode_header(line + len + 2, linesize - len - 2); |
87ab7992 DZ |
309 | hdr_data[i] = xmalloc(1000 * sizeof(char)); |
310 | if (! handle_header(line, hdr_data[i], len + 2)) { | |
311 | return 1; | |
312 | } | |
d4a9ce78 JH |
313 | } |
314 | } | |
d4a9ce78 | 315 | |
87ab7992 DZ |
316 | /* Content stuff */ |
317 | if (!strncasecmp(line, "Content-Type", 12) && | |
318 | line[12] == ':' && isspace(line[12 + 1])) { | |
8dabdfcc | 319 | decode_header(line + 12 + 2, linesize - 12 - 2); |
87ab7992 DZ |
320 | if (! handle_content_type(line)) { |
321 | return 1; | |
322 | } | |
323 | } | |
324 | if (!strncasecmp(line, "Content-Transfer-Encoding", 25) && | |
325 | line[25] == ':' && isspace(line[25 + 1])) { | |
8dabdfcc | 326 | decode_header(line + 25 + 2, linesize - 25 - 2); |
87ab7992 DZ |
327 | if (! handle_content_transfer_encoding(line)) { |
328 | return 1; | |
329 | } | |
330 | } | |
331 | ||
332 | /* for inbody stuff */ | |
333 | if (!memcmp(">From", line, 5) && isspace(line[5])) | |
334 | return 1; | |
335 | if (!memcmp("[PATCH]", line, 7) && isspace(line[7])) { | |
336 | for (i = 0; header[i]; i++) { | |
e9fe804a LS |
337 | if (!memcmp("Subject", header[i], 7)) { |
338 | if (!hdr_data[i]) | |
339 | hdr_data[i] = xmalloc(linesize + 20); | |
87ab7992 DZ |
340 | if (! handle_header(line, hdr_data[i], 0)) { |
341 | return 1; | |
342 | } | |
343 | } | |
344 | } | |
345 | } | |
346 | ||
347 | /* no match */ | |
348 | return 0; | |
d4a9ce78 JH |
349 | } |
350 | ||
ef29c117 JH |
351 | static int is_rfc2822_header(char *line) |
352 | { | |
353 | /* | |
354 | * The section that defines the loosest possible | |
355 | * field name is "3.6.8 Optional fields". | |
356 | * | |
357 | * optional-field = field-name ":" unstructured CRLF | |
358 | * field-name = 1*ftext | |
359 | * ftext = %d33-57 / %59-126 | |
360 | */ | |
361 | int ch; | |
362 | char *cp = line; | |
34fc5cef LT |
363 | |
364 | /* Count mbox From headers as headers */ | |
365 | if (!memcmp(line, "From ", 5) || !memcmp(line, ">From ", 6)) | |
366 | return 1; | |
367 | ||
ef29c117 JH |
368 | while ((ch = *cp++)) { |
369 | if (ch == ':') | |
370 | return cp != line; | |
371 | if ((33 <= ch && ch <= 57) || | |
372 | (59 <= ch && ch <= 126)) | |
373 | continue; | |
374 | break; | |
375 | } | |
376 | return 0; | |
377 | } | |
378 | ||
34fc5cef LT |
379 | /* |
380 | * sz is size of 'line' buffer in bytes. Must be reasonably | |
381 | * long enough to hold one physical real-world e-mail line. | |
382 | */ | |
d4a9ce78 JH |
383 | static int read_one_header_line(char *line, int sz, FILE *in) |
384 | { | |
34fc5cef LT |
385 | int len; |
386 | ||
387 | /* | |
388 | * We will read at most (sz-1) bytes and then potentially | |
389 | * re-add NUL after it. Accessing line[sz] after this is safe | |
390 | * and we can allow len to grow up to and including sz. | |
391 | */ | |
392 | sz--; | |
393 | ||
394 | /* Get the first part of the line. */ | |
395 | if (!fgets(line, sz, in)) | |
396 | return 0; | |
397 | ||
398 | /* | |
399 | * Is it an empty line or not a valid rfc2822 header? | |
400 | * If so, stop here, and return false ("not a header") | |
401 | */ | |
402 | len = eatspace(line); | |
403 | if (!len || !is_rfc2822_header(line)) { | |
404 | /* Re-add the newline */ | |
405 | line[len] = '\n'; | |
406 | line[len + 1] = '\0'; | |
407 | return 0; | |
408 | } | |
409 | ||
410 | /* | |
411 | * Now we need to eat all the continuation lines.. | |
412 | * Yuck, 2822 header "folding" | |
413 | */ | |
414 | for (;;) { | |
415 | int peek, addlen; | |
416 | static char continuation[1000]; | |
417 | ||
f30b2028 EB |
418 | peek = fgetc(in); ungetc(peek, in); |
419 | if (peek != ' ' && peek != '\t') | |
420 | break; | |
34fc5cef LT |
421 | if (!fgets(continuation, sizeof(continuation), in)) |
422 | break; | |
423 | addlen = eatspace(continuation); | |
424 | if (len < sz - 1) { | |
425 | if (addlen >= sz - len) | |
426 | addlen = sz - len - 1; | |
427 | memcpy(line + len, continuation, addlen); | |
d7f6bae2 | 428 | line[len] = '\n'; |
34fc5cef LT |
429 | len += addlen; |
430 | } | |
d4a9ce78 | 431 | } |
34fc5cef LT |
432 | line[len] = 0; |
433 | ||
434 | return 1; | |
d4a9ce78 JH |
435 | } |
436 | ||
8dabdfcc | 437 | static int decode_q_segment(char *in, char *ot, unsigned otsize, char *ep, int rfc2047) |
d4a9ce78 | 438 | { |
9aa23094 | 439 | char *otbegin = ot; |
8dabdfcc | 440 | char *otend = ot + otsize; |
d4a9ce78 JH |
441 | int c; |
442 | while ((c = *in++) != 0 && (in <= ep)) { | |
8dabdfcc AR |
443 | if (ot == otend) { |
444 | *--ot = '\0'; | |
445 | return -1; | |
446 | } | |
d4a9ce78 JH |
447 | if (c == '=') { |
448 | int d = *in++; | |
449 | if (d == '\n' || !d) | |
450 | break; /* drop trailing newline */ | |
451 | *ot++ = ((hexval(d) << 4) | hexval(*in++)); | |
75731930 | 452 | continue; |
d4a9ce78 | 453 | } |
75731930 JH |
454 | if (rfc2047 && c == '_') /* rfc2047 4.2 (2) */ |
455 | c = 0x20; | |
456 | *ot++ = c; | |
d4a9ce78 JH |
457 | } |
458 | *ot = 0; | |
9aa23094 | 459 | return (ot - otbegin); |
d4a9ce78 JH |
460 | } |
461 | ||
8dabdfcc | 462 | static int decode_b_segment(char *in, char *ot, unsigned otsize, char *ep) |
d4a9ce78 JH |
463 | { |
464 | /* Decode in..ep, possibly in-place to ot */ | |
465 | int c, pos = 0, acc = 0; | |
9aa23094 | 466 | char *otbegin = ot; |
8dabdfcc | 467 | char *otend = ot + otsize; |
d4a9ce78 JH |
468 | |
469 | while ((c = *in++) != 0 && (in <= ep)) { | |
8dabdfcc AR |
470 | if (ot == otend) { |
471 | *--ot = '\0'; | |
472 | return -1; | |
473 | } | |
d4a9ce78 JH |
474 | if (c == '+') |
475 | c = 62; | |
476 | else if (c == '/') | |
477 | c = 63; | |
478 | else if ('A' <= c && c <= 'Z') | |
479 | c -= 'A'; | |
480 | else if ('a' <= c && c <= 'z') | |
481 | c -= 'a' - 26; | |
482 | else if ('0' <= c && c <= '9') | |
483 | c -= '0' - 52; | |
484 | else if (c == '=') { | |
485 | /* padding is almost like (c == 0), except we do | |
486 | * not output NUL resulting only from it; | |
487 | * for now we just trust the data. | |
488 | */ | |
489 | c = 0; | |
490 | } | |
491 | else | |
492 | continue; /* garbage */ | |
493 | switch (pos++) { | |
494 | case 0: | |
495 | acc = (c << 2); | |
496 | break; | |
497 | case 1: | |
498 | *ot++ = (acc | (c >> 4)); | |
499 | acc = (c & 15) << 4; | |
500 | break; | |
501 | case 2: | |
502 | *ot++ = (acc | (c >> 2)); | |
503 | acc = (c & 3) << 6; | |
504 | break; | |
505 | case 3: | |
506 | *ot++ = (acc | c); | |
507 | acc = pos = 0; | |
508 | break; | |
509 | } | |
510 | } | |
511 | *ot = 0; | |
9aa23094 | 512 | return (ot - otbegin); |
d4a9ce78 JH |
513 | } |
514 | ||
b59d398b LT |
515 | /* |
516 | * When there is no known charset, guess. | |
517 | * | |
518 | * Right now we assume that if the target is UTF-8 (the default), | |
519 | * and it already looks like UTF-8 (which includes US-ASCII as its | |
520 | * subset, of course) then that is what it is and there is nothing | |
521 | * to do. | |
522 | * | |
523 | * Otherwise, we default to assuming it is Latin1 for historical | |
524 | * reasons. | |
525 | */ | |
526 | static const char *guess_charset(const char *line, const char *target_charset) | |
527 | { | |
528 | if (is_encoding_utf8(target_charset)) { | |
529 | if (is_utf8(line)) | |
530 | return NULL; | |
531 | } | |
532 | return "latin1"; | |
533 | } | |
534 | ||
8dabdfcc | 535 | static void convert_to_utf8(char *line, unsigned linesize, const char *charset) |
d4a9ce78 | 536 | { |
b59d398b LT |
537 | char *out; |
538 | ||
539 | if (!charset || !*charset) { | |
540 | charset = guess_charset(line, metainfo_charset); | |
541 | if (!charset) | |
542 | return; | |
543 | } | |
b45974a6 | 544 | |
7296096c JS |
545 | if (!strcmp(metainfo_charset, charset)) |
546 | return; | |
b59d398b | 547 | out = reencode_string(line, metainfo_charset, charset); |
bb1091a4 JH |
548 | if (!out) |
549 | die("cannot convert from %s to %s\n", | |
b59d398b | 550 | charset, metainfo_charset); |
8dabdfcc | 551 | strlcpy(line, out, linesize); |
b45974a6 | 552 | free(out); |
d4a9ce78 JH |
553 | } |
554 | ||
8dabdfcc | 555 | static int decode_header_bq(char *it, unsigned itsize) |
d4a9ce78 JH |
556 | { |
557 | char *in, *out, *ep, *cp, *sp; | |
558 | char outbuf[1000]; | |
b75bf2c3 | 559 | int rfc2047 = 0; |
d4a9ce78 JH |
560 | |
561 | in = it; | |
562 | out = outbuf; | |
563 | while ((ep = strstr(in, "=?")) != NULL) { | |
564 | int sz, encoding; | |
565 | char charset_q[256], piecebuf[256]; | |
b75bf2c3 JH |
566 | rfc2047 = 1; |
567 | ||
d4a9ce78 JH |
568 | if (in != ep) { |
569 | sz = ep - in; | |
570 | memcpy(out, in, sz); | |
571 | out += sz; | |
572 | in += sz; | |
573 | } | |
574 | /* E.g. | |
575 | * ep : "=?iso-2022-jp?B?GyR...?= foo" | |
576 | * ep : "=?ISO-8859-1?Q?Foo=FCbar?= baz" | |
577 | */ | |
578 | ep += 2; | |
579 | cp = strchr(ep, '?'); | |
580 | if (!cp) | |
b75bf2c3 | 581 | return rfc2047; /* no munging */ |
d4a9ce78 JH |
582 | for (sp = ep; sp < cp; sp++) |
583 | charset_q[sp - ep] = tolower(*sp); | |
584 | charset_q[cp - ep] = 0; | |
585 | encoding = cp[1]; | |
586 | if (!encoding || cp[2] != '?') | |
b75bf2c3 | 587 | return rfc2047; /* no munging */ |
d4a9ce78 JH |
588 | ep = strstr(cp + 3, "?="); |
589 | if (!ep) | |
b75bf2c3 | 590 | return rfc2047; /* no munging */ |
d4a9ce78 JH |
591 | switch (tolower(encoding)) { |
592 | default: | |
b75bf2c3 | 593 | return rfc2047; /* no munging */ |
d4a9ce78 | 594 | case 'b': |
8dabdfcc | 595 | sz = decode_b_segment(cp + 3, piecebuf, sizeof(piecebuf), ep); |
d4a9ce78 JH |
596 | break; |
597 | case 'q': | |
8dabdfcc | 598 | sz = decode_q_segment(cp + 3, piecebuf, sizeof(piecebuf), ep, 1); |
d4a9ce78 JH |
599 | break; |
600 | } | |
601 | if (sz < 0) | |
b75bf2c3 | 602 | return rfc2047; |
650e4be5 | 603 | if (metainfo_charset) |
8dabdfcc AR |
604 | convert_to_utf8(piecebuf, sizeof(piecebuf), charset_q); |
605 | ||
606 | sz = strlen(piecebuf); | |
607 | if (outbuf + sizeof(outbuf) <= out + sz) | |
608 | return rfc2047; /* no munging */ | |
d4a9ce78 | 609 | strcpy(out, piecebuf); |
8dabdfcc | 610 | out += sz; |
d4a9ce78 JH |
611 | in = ep + 2; |
612 | } | |
613 | strcpy(out, in); | |
8dabdfcc | 614 | strlcpy(it, outbuf, itsize); |
b75bf2c3 JH |
615 | return rfc2047; |
616 | } | |
617 | ||
8dabdfcc | 618 | static void decode_header(char *it, unsigned itsize) |
b75bf2c3 JH |
619 | { |
620 | ||
8dabdfcc | 621 | if (decode_header_bq(it, itsize)) |
b75bf2c3 JH |
622 | return; |
623 | /* otherwise "it" is a straight copy of the input. | |
624 | * This can be binary guck but there is no charset specified. | |
625 | */ | |
626 | if (metainfo_charset) | |
8dabdfcc | 627 | convert_to_utf8(it, itsize, ""); |
d4a9ce78 JH |
628 | } |
629 | ||
9aa23094 | 630 | static int decode_transfer_encoding(char *line, unsigned linesize, int inputlen) |
d4a9ce78 JH |
631 | { |
632 | char *ep; | |
633 | ||
634 | switch (transfer_encoding) { | |
635 | case TE_QP: | |
9aa23094 JH |
636 | ep = line + inputlen; |
637 | return decode_q_segment(line, line, linesize, ep, 0); | |
d4a9ce78 | 638 | case TE_BASE64: |
9aa23094 JH |
639 | ep = line + inputlen; |
640 | return decode_b_segment(line, line, linesize, ep); | |
d4a9ce78 | 641 | case TE_DONTCARE: |
9aa23094 JH |
642 | default: |
643 | return inputlen; | |
d4a9ce78 JH |
644 | } |
645 | } | |
646 | ||
cce8d6fd | 647 | static int handle_filter(char *line, unsigned linesize, int linelen); |
87ab7992 DZ |
648 | |
649 | static int find_boundary(void) | |
2744b234 | 650 | { |
87ab7992 DZ |
651 | while(fgets(line, sizeof(line), fin) != NULL) { |
652 | if (is_multipart_boundary(line)) | |
653 | return 1; | |
654 | } | |
655 | return 0; | |
656 | } | |
657 | ||
658 | static int handle_boundary(void) | |
659 | { | |
86747c13 | 660 | char newline[]="\n"; |
87ab7992 DZ |
661 | again: |
662 | if (!memcmp(line+content_top->boundary_len, "--", 2)) { | |
663 | /* we hit an end boundary */ | |
664 | /* pop the current boundary off the stack */ | |
665 | free(content_top->boundary); | |
666 | ||
667 | /* technically won't happen as is_multipart_boundary() | |
668 | will fail first. But just in case.. | |
669 | */ | |
670 | if (content_top-- < content) { | |
671 | fprintf(stderr, "Detected mismatched boundaries, " | |
672 | "can't recover\n"); | |
673 | exit(1); | |
674 | } | |
cce8d6fd | 675 | handle_filter(newline, sizeof(newline), strlen(newline)); |
87ab7992 DZ |
676 | |
677 | /* skip to the next boundary */ | |
678 | if (!find_boundary()) | |
679 | return 0; | |
680 | goto again; | |
681 | } | |
682 | ||
683 | /* set some defaults */ | |
684 | transfer_encoding = TE_DONTCARE; | |
685 | charset[0] = 0; | |
686 | message_type = TYPE_TEXT; | |
d4a9ce78 | 687 | |
87ab7992 DZ |
688 | /* slurp in this section's info */ |
689 | while (read_one_header_line(line, sizeof(line), fin)) | |
8dabdfcc | 690 | check_header(line, sizeof(line), p_hdr_data, 0); |
2744b234 | 691 | |
87ab7992 DZ |
692 | /* eat the blank line after section info */ |
693 | return (fgets(line, sizeof(line), fin) != NULL); | |
d4a9ce78 JH |
694 | } |
695 | ||
f0658cf2 DZ |
696 | static inline int patchbreak(const char *line) |
697 | { | |
698 | /* Beginning of a "diff -" header? */ | |
699 | if (!memcmp("diff -", line, 6)) | |
700 | return 1; | |
701 | ||
702 | /* CVS "Index: " line? */ | |
703 | if (!memcmp("Index: ", line, 7)) | |
704 | return 1; | |
705 | ||
706 | /* | |
707 | * "--- <filename>" starts patches without headers | |
708 | * "---<sp>*" is a manual separator | |
709 | */ | |
710 | if (!memcmp("---", line, 3)) { | |
711 | line += 3; | |
712 | /* space followed by a filename? */ | |
713 | if (line[0] == ' ' && !isspace(line[1])) | |
714 | return 1; | |
715 | /* Just whitespace? */ | |
716 | for (;;) { | |
717 | unsigned char c = *line++; | |
718 | if (c == '\n') | |
719 | return 1; | |
720 | if (!isspace(c)) | |
721 | break; | |
722 | } | |
723 | return 0; | |
724 | } | |
725 | return 0; | |
726 | } | |
727 | ||
728 | ||
8dabdfcc | 729 | static int handle_commit_msg(char *line, unsigned linesize) |
d4a9ce78 | 730 | { |
87ab7992 | 731 | static int still_looking = 1; |
8dabdfcc | 732 | char *endline = line + linesize; |
87ab7992 | 733 | |
d4a9ce78 JH |
734 | if (!cmitmsg) |
735 | return 0; | |
2744b234 | 736 | |
87ab7992 DZ |
737 | if (still_looking) { |
738 | char *cp = line; | |
739 | if (isspace(*line)) { | |
740 | for (cp = line + 1; *cp; cp++) { | |
741 | if (!isspace(*cp)) | |
742 | break; | |
743 | } | |
744 | if (!*cp) | |
745 | return 0; | |
746 | } | |
8dabdfcc | 747 | if ((still_looking = check_header(cp, endline - cp, s_hdr_data, 0)) != 0) |
87ab7992 DZ |
748 | return 0; |
749 | } | |
8b4525fb | 750 | |
86747c13 DZ |
751 | /* normalize the log message to UTF-8. */ |
752 | if (metainfo_charset) | |
8dabdfcc | 753 | convert_to_utf8(line, endline - line, charset); |
86747c13 | 754 | |
f0658cf2 | 755 | if (patchbreak(line)) { |
87ab7992 DZ |
756 | fclose(cmitmsg); |
757 | cmitmsg = NULL; | |
758 | return 1; | |
759 | } | |
8b4525fb | 760 | |
87ab7992 | 761 | fputs(line, cmitmsg); |
d4a9ce78 | 762 | return 0; |
2744b234 LT |
763 | } |
764 | ||
cce8d6fd | 765 | static int handle_patch(char *line, int len) |
2744b234 | 766 | { |
cce8d6fd | 767 | fwrite(line, 1, len, patchfile); |
87ab7992 DZ |
768 | patch_lines++; |
769 | return 0; | |
2744b234 LT |
770 | } |
771 | ||
cce8d6fd | 772 | static int handle_filter(char *line, unsigned linesize, int linelen) |
2744b234 | 773 | { |
87ab7992 | 774 | static int filter = 0; |
2744b234 | 775 | |
87ab7992 DZ |
776 | /* filter tells us which part we left off on |
777 | * a non-zero return indicates we hit a filter point | |
778 | */ | |
779 | switch (filter) { | |
780 | case 0: | |
8dabdfcc | 781 | if (!handle_commit_msg(line, linesize)) |
d4a9ce78 | 782 | break; |
87ab7992 DZ |
783 | filter++; |
784 | case 1: | |
cce8d6fd | 785 | if (!handle_patch(line, linelen)) |
87ab7992 DZ |
786 | break; |
787 | filter++; | |
788 | default: | |
789 | return 1; | |
2744b234 | 790 | } |
87ab7992 | 791 | |
d4a9ce78 | 792 | return 0; |
2744b234 LT |
793 | } |
794 | ||
87ab7992 | 795 | static void handle_body(void) |
1d8fa411 | 796 | { |
87ab7992 DZ |
797 | int rc = 0; |
798 | static char newline[2000]; | |
799 | static char *np = newline; | |
cce8d6fd | 800 | int len = strlen(line); |
d4a9ce78 JH |
801 | |
802 | /* Skip up to the first boundary */ | |
87ab7992 DZ |
803 | if (content_top->boundary) { |
804 | if (!find_boundary()) | |
805 | return; | |
806 | } | |
807 | ||
808 | do { | |
809 | /* process any boundary lines */ | |
810 | if (content_top->boundary && is_multipart_boundary(line)) { | |
811 | /* flush any leftover */ | |
9aa23094 | 812 | if (np != newline) |
cce8d6fd | 813 | handle_filter(newline, sizeof(newline), |
9aa23094 | 814 | np - newline); |
87ab7992 DZ |
815 | if (!handle_boundary()) |
816 | return; | |
6fc2a25e | 817 | len = strlen(line); |
87ab7992 DZ |
818 | } |
819 | ||
86747c13 | 820 | /* Unwrap transfer encoding */ |
9aa23094 JH |
821 | len = decode_transfer_encoding(line, sizeof(line), len); |
822 | if (len < 0) { | |
823 | error("Malformed input line"); | |
824 | return; | |
825 | } | |
87ab7992 DZ |
826 | |
827 | switch (transfer_encoding) { | |
828 | case TE_BASE64: | |
87f1b884 | 829 | case TE_QP: |
87ab7992 DZ |
830 | { |
831 | char *op = line; | |
832 | ||
833 | /* binary data most likely doesn't have newlines */ | |
834 | if (message_type != TYPE_TEXT) { | |
cce8d6fd | 835 | rc = handle_filter(line, sizeof(line), len); |
87ab7992 DZ |
836 | break; |
837 | } | |
838 | ||
9aa23094 JH |
839 | /* |
840 | * This is a decoded line that may contain | |
87ab7992 DZ |
841 | * multiple new lines. Pass only one chunk |
842 | * at a time to handle_filter() | |
843 | */ | |
87ab7992 | 844 | do { |
9aa23094 | 845 | while (op < line + len && *op != '\n') |
87ab7992 DZ |
846 | *np++ = *op++; |
847 | *np = *op; | |
848 | if (*np != 0) { | |
849 | /* should be sitting on a new line */ | |
850 | *(++np) = 0; | |
851 | op++; | |
cce8d6fd | 852 | rc = handle_filter(newline, sizeof(newline), np - newline); |
87ab7992 DZ |
853 | np = newline; |
854 | } | |
9aa23094 JH |
855 | } while (op < line + len); |
856 | /* | |
857 | * The partial chunk is saved in newline and will be | |
858 | * appended by the next iteration of read_line_with_nul(). | |
87ab7992 | 859 | */ |
d4a9ce78 | 860 | break; |
1d8fa411 | 861 | } |
87ab7992 | 862 | default: |
cce8d6fd | 863 | rc = handle_filter(line, sizeof(line), len); |
d4a9ce78 | 864 | } |
87ab7992 DZ |
865 | if (rc) |
866 | /* nothing left to filter */ | |
867 | break; | |
cce8d6fd | 868 | } while ((len = read_line_with_nul(line, sizeof(line), fin))); |
87ab7992 DZ |
869 | |
870 | return; | |
1d8fa411 JH |
871 | } |
872 | ||
d7f6bae2 JH |
873 | static void output_header_lines(FILE *fout, const char *hdr, char *data) |
874 | { | |
875 | while (1) { | |
876 | char *ep = strchr(data, '\n'); | |
877 | int len; | |
878 | if (!ep) | |
879 | len = strlen(data); | |
880 | else | |
881 | len = ep - data; | |
882 | fprintf(fout, "%s: %.*s\n", hdr, len, data); | |
883 | if (!ep) | |
884 | break; | |
885 | data = ep + 1; | |
886 | } | |
887 | } | |
888 | ||
87ab7992 | 889 | static void handle_info(void) |
2744b234 | 890 | { |
87ab7992 DZ |
891 | char *sub; |
892 | char *hdr; | |
893 | int i; | |
894 | ||
895 | for (i = 0; header[i]; i++) { | |
896 | ||
897 | /* only print inbody headers if we output a patch file */ | |
898 | if (patch_lines && s_hdr_data[i]) | |
899 | hdr = s_hdr_data[i]; | |
900 | else if (p_hdr_data[i]) | |
901 | hdr = p_hdr_data[i]; | |
902 | else | |
903 | continue; | |
904 | ||
905 | if (!memcmp(header[i], "Subject", 7)) { | |
d7f6bae2 JH |
906 | if (keep_subject) |
907 | sub = hdr; | |
908 | else { | |
909 | sub = cleanup_subject(hdr); | |
910 | cleanup_space(sub); | |
911 | } | |
912 | output_header_lines(fout, "Subject", sub); | |
87ab7992 DZ |
913 | } else if (!memcmp(header[i], "From", 4)) { |
914 | handle_from(hdr); | |
915 | fprintf(fout, "Author: %s\n", name); | |
916 | fprintf(fout, "Email: %s\n", email); | |
917 | } else { | |
918 | cleanup_space(hdr); | |
919 | fprintf(fout, "%s: %s\n", header[i], hdr); | |
920 | } | |
d4a9ce78 | 921 | } |
87ab7992 | 922 | fprintf(fout, "\n"); |
2744b234 LT |
923 | } |
924 | ||
fcd056a6 JH |
925 | static int mailinfo(FILE *in, FILE *out, int ks, const char *encoding, |
926 | const char *msg, const char *patch) | |
34488e3c | 927 | { |
f88a545a | 928 | int peek; |
34488e3c LS |
929 | keep_subject = ks; |
930 | metainfo_charset = encoding; | |
931 | fin = in; | |
932 | fout = out; | |
933 | ||
934 | cmitmsg = fopen(msg, "w"); | |
935 | if (!cmitmsg) { | |
936 | perror(msg); | |
937 | return -1; | |
938 | } | |
939 | patchfile = fopen(patch, "w"); | |
940 | if (!patchfile) { | |
941 | perror(patch); | |
942 | fclose(cmitmsg); | |
943 | return -1; | |
944 | } | |
87ab7992 DZ |
945 | |
946 | p_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(char *)); | |
947 | s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(char *)); | |
948 | ||
f88a545a SS |
949 | do { |
950 | peek = fgetc(in); | |
951 | } while (isspace(peek)); | |
952 | ungetc(peek, in); | |
953 | ||
87ab7992 DZ |
954 | /* process the email header */ |
955 | while (read_one_header_line(line, sizeof(line), fin)) | |
8dabdfcc | 956 | check_header(line, sizeof(line), p_hdr_data, 1); |
87ab7992 DZ |
957 | |
958 | handle_body(); | |
959 | handle_info(); | |
34488e3c LS |
960 | |
961 | return 0; | |
962 | } | |
963 | ||
6bff6a60 | 964 | static const char mailinfo_usage[] = |
b4958181 | 965 | "git-mailinfo [-k] [-u | --encoding=<encoding> | -n] msg patch <mail >info"; |
d4a9ce78 | 966 | |
a633fca0 | 967 | int cmd_mailinfo(int argc, const char **argv, const char *prefix) |
2744b234 | 968 | { |
bb1091a4 JH |
969 | const char *def_charset; |
970 | ||
f1f909e3 JH |
971 | /* NEEDSWORK: might want to do the optional .git/ directory |
972 | * discovery | |
973 | */ | |
ef90d6d4 | 974 | git_config(git_default_config, NULL); |
f1f909e3 | 975 | |
bb1091a4 JH |
976 | def_charset = (git_commit_encoding ? git_commit_encoding : "utf-8"); |
977 | metainfo_charset = def_charset; | |
978 | ||
6bff6a60 JH |
979 | while (1 < argc && argv[1][0] == '-') { |
980 | if (!strcmp(argv[1], "-k")) | |
981 | keep_subject = 1; | |
d4a9ce78 | 982 | else if (!strcmp(argv[1], "-u")) |
bb1091a4 JH |
983 | metainfo_charset = def_charset; |
984 | else if (!strcmp(argv[1], "-n")) | |
985 | metainfo_charset = NULL; | |
cc44c765 | 986 | else if (!prefixcmp(argv[1], "--encoding=")) |
9f63892b | 987 | metainfo_charset = argv[1] + 11; |
d4a9ce78 | 988 | else |
f1f909e3 | 989 | usage(mailinfo_usage); |
6bff6a60 JH |
990 | argc--; argv++; |
991 | } | |
992 | ||
a196d8d4 | 993 | if (argc != 3) |
f1f909e3 | 994 | usage(mailinfo_usage); |
34488e3c LS |
995 | |
996 | return !!mailinfo(stdin, stdout, keep_subject, metainfo_charset, argv[1], argv[2]); | |
2744b234 | 997 | } |