]>
Commit | Line | Data |
---|---|---|
2744b234 LT |
1 | /* |
2 | * Another stupid program, this one parsing the headers of an | |
3 | * email to figure out authorship and subject | |
4 | */ | |
f1f909e3 | 5 | #include "cache.h" |
34488e3c | 6 | #include "builtin.h" |
b45974a6 | 7 | #include "utf8.h" |
2744b234 | 8 | |
34488e3c | 9 | static FILE *cmitmsg, *patchfile, *fin, *fout; |
2744b234 | 10 | |
96f1e58f DR |
11 | static int keep_subject; |
12 | static const char *metainfo_charset; | |
2744b234 LT |
13 | static char line[1000]; |
14 | static char name[1000]; | |
15 | static char email[1000]; | |
2744b234 | 16 | |
d4a9ce78 JH |
17 | static enum { |
18 | TE_DONTCARE, TE_QP, TE_BASE64, | |
19 | } transfer_encoding; | |
87ab7992 DZ |
20 | static enum { |
21 | TYPE_TEXT, TYPE_OTHER, | |
22 | } message_type; | |
d4a9ce78 | 23 | |
87ab7992 | 24 | static char charset[256]; |
96f1e58f | 25 | static int patch_lines; |
87ab7992 DZ |
26 | static char **p_hdr_data, **s_hdr_data; |
27 | ||
28 | #define MAX_HDR_PARSED 10 | |
29 | #define MAX_BOUNDARIES 5 | |
d4a9ce78 | 30 | |
2744b234 LT |
31 | static char *sanity_check(char *name, char *email) |
32 | { | |
33 | int len = strlen(name); | |
34 | if (len < 3 || len > 60) | |
35 | return email; | |
36 | if (strchr(name, '@') || strchr(name, '<') || strchr(name, '>')) | |
37 | return email; | |
38 | return name; | |
39 | } | |
40 | ||
e0e3ba20 JH |
41 | static int bogus_from(char *line) |
42 | { | |
43 | /* John Doe <johndoe> */ | |
44 | char *bra, *ket, *dst, *cp; | |
45 | ||
46 | /* This is fallback, so do not bother if we already have an | |
47 | * e-mail address. | |
34488e3c | 48 | */ |
e0e3ba20 JH |
49 | if (*email) |
50 | return 0; | |
51 | ||
52 | bra = strchr(line, '<'); | |
53 | if (!bra) | |
54 | return 0; | |
55 | ket = strchr(bra, '>'); | |
56 | if (!ket) | |
57 | return 0; | |
58 | ||
59 | for (dst = email, cp = bra+1; cp < ket; ) | |
60 | *dst++ = *cp++; | |
61 | *dst = 0; | |
62 | for (cp = line; isspace(*cp); cp++) | |
63 | ; | |
64 | for (bra--; isspace(*bra); bra--) | |
65 | *bra = 0; | |
66 | cp = sanity_check(cp, email); | |
67 | strcpy(name, cp); | |
68 | return 1; | |
69 | } | |
70 | ||
2dec02b1 | 71 | static int handle_from(char *in_line) |
2744b234 | 72 | { |
2dec02b1 EB |
73 | char line[1000]; |
74 | char *at; | |
2744b234 LT |
75 | char *dst; |
76 | ||
2dec02b1 EB |
77 | strcpy(line, in_line); |
78 | at = strchr(line, '@'); | |
2744b234 | 79 | if (!at) |
e0e3ba20 | 80 | return bogus_from(line); |
2744b234 LT |
81 | |
82 | /* | |
83 | * If we already have one email, don't take any confusing lines | |
84 | */ | |
85 | if (*email && strchr(at+1, '@')) | |
86 | return 0; | |
87 | ||
d4a9ce78 JH |
88 | /* Pick up the string around '@', possibly delimited with <> |
89 | * pair; that is the email part. White them out while copying. | |
90 | */ | |
2744b234 LT |
91 | while (at > line) { |
92 | char c = at[-1]; | |
d4a9ce78 JH |
93 | if (isspace(c)) |
94 | break; | |
95 | if (c == '<') { | |
96 | at[-1] = ' '; | |
2744b234 | 97 | break; |
d4a9ce78 | 98 | } |
2744b234 LT |
99 | at--; |
100 | } | |
101 | dst = email; | |
102 | for (;;) { | |
103 | unsigned char c = *at; | |
d4a9ce78 JH |
104 | if (!c || c == '>' || isspace(c)) { |
105 | if (c == '>') | |
106 | *at = ' '; | |
2744b234 | 107 | break; |
d4a9ce78 | 108 | } |
2744b234 LT |
109 | *at++ = ' '; |
110 | *dst++ = c; | |
111 | } | |
112 | *dst++ = 0; | |
113 | ||
d4a9ce78 JH |
114 | /* The remainder is name. It could be "John Doe <john.doe@xz>" |
115 | * or "john.doe@xz (John Doe)", but we have whited out the | |
116 | * email part, so trim from both ends, possibly removing | |
117 | * the () pair at the end. | |
118 | */ | |
2744b234 LT |
119 | at = line + strlen(line); |
120 | while (at > line) { | |
121 | unsigned char c = *--at; | |
d4a9ce78 JH |
122 | if (!isspace(c)) { |
123 | at[(c == ')') ? 0 : 1] = 0; | |
2744b234 | 124 | break; |
d4a9ce78 | 125 | } |
2744b234 LT |
126 | } |
127 | ||
128 | at = line; | |
129 | for (;;) { | |
130 | unsigned char c = *at; | |
d4a9ce78 JH |
131 | if (!c || !isspace(c)) { |
132 | if (c == '(') | |
133 | at++; | |
2744b234 | 134 | break; |
d4a9ce78 | 135 | } |
2744b234 LT |
136 | at++; |
137 | } | |
2744b234 | 138 | at = sanity_check(at, email); |
2744b234 LT |
139 | strcpy(name, at); |
140 | return 1; | |
141 | } | |
142 | ||
87ab7992 | 143 | static int handle_header(char *line, char *data, int ofs) |
62c1f6b4 | 144 | { |
87ab7992 DZ |
145 | if (!line || !data) |
146 | return 1; | |
147 | ||
148 | strcpy(data, line+ofs); | |
62c1f6b4 | 149 | |
d4a9ce78 JH |
150 | return 0; |
151 | } | |
152 | ||
153 | /* NOTE NOTE NOTE. We do not claim we do full MIME. We just attempt | |
154 | * to have enough heuristics to grok MIME encoded patches often found | |
155 | * on our mailing lists. For example, we do not even treat header lines | |
156 | * case insensitively. | |
157 | */ | |
158 | ||
159 | static int slurp_attr(const char *line, const char *name, char *attr) | |
160 | { | |
554fe20d | 161 | const char *ends, *ap = strcasestr(line, name); |
d4a9ce78 JH |
162 | size_t sz; |
163 | ||
164 | if (!ap) { | |
165 | *attr = 0; | |
166 | return 0; | |
167 | } | |
168 | ap += strlen(name); | |
169 | if (*ap == '"') { | |
170 | ap++; | |
171 | ends = "\""; | |
172 | } | |
173 | else | |
174 | ends = "; \t"; | |
175 | sz = strcspn(ap, ends); | |
176 | memcpy(attr, ap, sz); | |
177 | attr[sz] = 0; | |
178 | return 1; | |
179 | } | |
180 | ||
87ab7992 DZ |
181 | struct content_type { |
182 | char *boundary; | |
183 | int boundary_len; | |
184 | }; | |
185 | ||
186 | static struct content_type content[MAX_BOUNDARIES]; | |
187 | ||
188 | static struct content_type *content_top = content; | |
189 | ||
190 | static int handle_content_type(char *line) | |
d4a9ce78 | 191 | { |
87ab7992 DZ |
192 | char boundary[256]; |
193 | ||
194 | if (strcasestr(line, "text/") == NULL) | |
195 | message_type = TYPE_OTHER; | |
196 | if (slurp_attr(line, "boundary=", boundary + 2)) { | |
197 | memcpy(boundary, "--", 2); | |
198 | if (content_top++ >= &content[MAX_BOUNDARIES]) { | |
199 | fprintf(stderr, "Too many boundaries to handle\n"); | |
200 | exit(1); | |
201 | } | |
202 | content_top->boundary_len = strlen(boundary); | |
203 | content_top->boundary = xmalloc(content_top->boundary_len+1); | |
204 | strcpy(content_top->boundary, boundary); | |
b893f091 | 205 | } |
87ab7992 | 206 | if (slurp_attr(line, "charset=", charset)) { |
d4a9ce78 JH |
207 | int i, c; |
208 | for (i = 0; (c = charset[i]) != 0; i++) | |
209 | charset[i] = tolower(c); | |
210 | } | |
211 | return 0; | |
212 | } | |
213 | ||
d4a9ce78 JH |
214 | static int handle_content_transfer_encoding(char *line) |
215 | { | |
216 | if (strcasestr(line, "base64")) | |
217 | transfer_encoding = TE_BASE64; | |
218 | else if (strcasestr(line, "quoted-printable")) | |
219 | transfer_encoding = TE_QP; | |
220 | else | |
221 | transfer_encoding = TE_DONTCARE; | |
222 | return 0; | |
2744b234 LT |
223 | } |
224 | ||
d4a9ce78 JH |
225 | static int is_multipart_boundary(const char *line) |
226 | { | |
87ab7992 | 227 | return (!memcmp(line, content_top->boundary, content_top->boundary_len)); |
d4a9ce78 JH |
228 | } |
229 | ||
230 | static int eatspace(char *line) | |
2744b234 | 231 | { |
d4a9ce78 JH |
232 | int len = strlen(line); |
233 | while (len > 0 && isspace(line[len-1])) | |
234 | line[--len] = 0; | |
235 | return len; | |
2744b234 LT |
236 | } |
237 | ||
d4a9ce78 | 238 | static char *cleanup_subject(char *subject) |
2744b234 LT |
239 | { |
240 | for (;;) { | |
241 | char *p; | |
242 | int len, remove; | |
243 | switch (*subject) { | |
244 | case 'r': case 'R': | |
245 | if (!memcmp("e:", subject+1, 2)) { | |
87ab7992 | 246 | subject += 3; |
2744b234 LT |
247 | continue; |
248 | } | |
249 | break; | |
250 | case ' ': case '\t': case ':': | |
251 | subject++; | |
252 | continue; | |
253 | ||
254 | case '[': | |
255 | p = strchr(subject, ']'); | |
256 | if (!p) { | |
257 | subject++; | |
258 | continue; | |
259 | } | |
260 | len = strlen(p); | |
261 | remove = p - subject; | |
262 | if (remove <= len *2) { | |
263 | subject = p+1; | |
264 | continue; | |
34488e3c | 265 | } |
2744b234 LT |
266 | break; |
267 | } | |
ae448e38 | 268 | eatspace(subject); |
2744b234 LT |
269 | return subject; |
270 | } | |
34488e3c | 271 | } |
2744b234 LT |
272 | |
273 | static void cleanup_space(char *buf) | |
274 | { | |
275 | unsigned char c; | |
276 | while ((c = *buf) != 0) { | |
277 | buf++; | |
278 | if (isspace(c)) { | |
279 | buf[-1] = ' '; | |
280 | c = *buf; | |
281 | while (isspace(c)) { | |
282 | int len = strlen(buf); | |
283 | memmove(buf, buf+1, len); | |
284 | c = *buf; | |
285 | } | |
286 | } | |
287 | } | |
288 | } | |
289 | ||
8dabdfcc | 290 | static void decode_header(char *it, unsigned itsize); |
538dfe73 | 291 | static const char *header[MAX_HDR_PARSED] = { |
87ab7992 | 292 | "From","Subject","Date", |
d4a9ce78 JH |
293 | }; |
294 | ||
8dabdfcc | 295 | static int check_header(char *line, unsigned linesize, char **hdr_data, int overwrite) |
d4a9ce78 JH |
296 | { |
297 | int i; | |
298 | ||
87ab7992 DZ |
299 | /* search for the interesting parts */ |
300 | for (i = 0; header[i]; i++) { | |
301 | int len = strlen(header[i]); | |
86747c13 | 302 | if ((!hdr_data[i] || overwrite) && |
87ab7992 | 303 | !strncasecmp(line, header[i], len) && |
d4a9ce78 | 304 | line[len] == ':' && isspace(line[len + 1])) { |
33504530 EB |
305 | /* Unwrap inline B and Q encoding, and optionally |
306 | * normalize the meta information to utf8. | |
307 | */ | |
8dabdfcc | 308 | decode_header(line + len + 2, linesize - len - 2); |
87ab7992 DZ |
309 | hdr_data[i] = xmalloc(1000 * sizeof(char)); |
310 | if (! handle_header(line, hdr_data[i], len + 2)) { | |
311 | return 1; | |
312 | } | |
d4a9ce78 JH |
313 | } |
314 | } | |
d4a9ce78 | 315 | |
87ab7992 DZ |
316 | /* Content stuff */ |
317 | if (!strncasecmp(line, "Content-Type", 12) && | |
318 | line[12] == ':' && isspace(line[12 + 1])) { | |
8dabdfcc | 319 | decode_header(line + 12 + 2, linesize - 12 - 2); |
87ab7992 DZ |
320 | if (! handle_content_type(line)) { |
321 | return 1; | |
322 | } | |
323 | } | |
324 | if (!strncasecmp(line, "Content-Transfer-Encoding", 25) && | |
325 | line[25] == ':' && isspace(line[25 + 1])) { | |
8dabdfcc | 326 | decode_header(line + 25 + 2, linesize - 25 - 2); |
87ab7992 DZ |
327 | if (! handle_content_transfer_encoding(line)) { |
328 | return 1; | |
329 | } | |
330 | } | |
331 | ||
332 | /* for inbody stuff */ | |
333 | if (!memcmp(">From", line, 5) && isspace(line[5])) | |
334 | return 1; | |
335 | if (!memcmp("[PATCH]", line, 7) && isspace(line[7])) { | |
336 | for (i = 0; header[i]; i++) { | |
337 | if (!memcmp("Subject: ", header[i], 9)) { | |
338 | if (! handle_header(line, hdr_data[i], 0)) { | |
339 | return 1; | |
340 | } | |
341 | } | |
342 | } | |
343 | } | |
344 | ||
345 | /* no match */ | |
346 | return 0; | |
d4a9ce78 JH |
347 | } |
348 | ||
ef29c117 JH |
349 | static int is_rfc2822_header(char *line) |
350 | { | |
351 | /* | |
352 | * The section that defines the loosest possible | |
353 | * field name is "3.6.8 Optional fields". | |
354 | * | |
355 | * optional-field = field-name ":" unstructured CRLF | |
356 | * field-name = 1*ftext | |
357 | * ftext = %d33-57 / %59-126 | |
358 | */ | |
359 | int ch; | |
360 | char *cp = line; | |
34fc5cef LT |
361 | |
362 | /* Count mbox From headers as headers */ | |
363 | if (!memcmp(line, "From ", 5) || !memcmp(line, ">From ", 6)) | |
364 | return 1; | |
365 | ||
ef29c117 JH |
366 | while ((ch = *cp++)) { |
367 | if (ch == ':') | |
368 | return cp != line; | |
369 | if ((33 <= ch && ch <= 57) || | |
370 | (59 <= ch && ch <= 126)) | |
371 | continue; | |
372 | break; | |
373 | } | |
374 | return 0; | |
375 | } | |
376 | ||
34fc5cef LT |
377 | /* |
378 | * sz is size of 'line' buffer in bytes. Must be reasonably | |
379 | * long enough to hold one physical real-world e-mail line. | |
380 | */ | |
d4a9ce78 JH |
381 | static int read_one_header_line(char *line, int sz, FILE *in) |
382 | { | |
34fc5cef LT |
383 | int len; |
384 | ||
385 | /* | |
386 | * We will read at most (sz-1) bytes and then potentially | |
387 | * re-add NUL after it. Accessing line[sz] after this is safe | |
388 | * and we can allow len to grow up to and including sz. | |
389 | */ | |
390 | sz--; | |
391 | ||
392 | /* Get the first part of the line. */ | |
393 | if (!fgets(line, sz, in)) | |
394 | return 0; | |
395 | ||
396 | /* | |
397 | * Is it an empty line or not a valid rfc2822 header? | |
398 | * If so, stop here, and return false ("not a header") | |
399 | */ | |
400 | len = eatspace(line); | |
401 | if (!len || !is_rfc2822_header(line)) { | |
402 | /* Re-add the newline */ | |
403 | line[len] = '\n'; | |
404 | line[len + 1] = '\0'; | |
405 | return 0; | |
406 | } | |
407 | ||
408 | /* | |
409 | * Now we need to eat all the continuation lines.. | |
410 | * Yuck, 2822 header "folding" | |
411 | */ | |
412 | for (;;) { | |
413 | int peek, addlen; | |
414 | static char continuation[1000]; | |
415 | ||
f30b2028 EB |
416 | peek = fgetc(in); ungetc(peek, in); |
417 | if (peek != ' ' && peek != '\t') | |
418 | break; | |
34fc5cef LT |
419 | if (!fgets(continuation, sizeof(continuation), in)) |
420 | break; | |
421 | addlen = eatspace(continuation); | |
422 | if (len < sz - 1) { | |
423 | if (addlen >= sz - len) | |
424 | addlen = sz - len - 1; | |
425 | memcpy(line + len, continuation, addlen); | |
d7f6bae2 | 426 | line[len] = '\n'; |
34fc5cef LT |
427 | len += addlen; |
428 | } | |
d4a9ce78 | 429 | } |
34fc5cef LT |
430 | line[len] = 0; |
431 | ||
432 | return 1; | |
d4a9ce78 JH |
433 | } |
434 | ||
8dabdfcc | 435 | static int decode_q_segment(char *in, char *ot, unsigned otsize, char *ep, int rfc2047) |
d4a9ce78 | 436 | { |
9aa23094 | 437 | char *otbegin = ot; |
8dabdfcc | 438 | char *otend = ot + otsize; |
d4a9ce78 JH |
439 | int c; |
440 | while ((c = *in++) != 0 && (in <= ep)) { | |
8dabdfcc AR |
441 | if (ot == otend) { |
442 | *--ot = '\0'; | |
443 | return -1; | |
444 | } | |
d4a9ce78 JH |
445 | if (c == '=') { |
446 | int d = *in++; | |
447 | if (d == '\n' || !d) | |
448 | break; /* drop trailing newline */ | |
449 | *ot++ = ((hexval(d) << 4) | hexval(*in++)); | |
75731930 | 450 | continue; |
d4a9ce78 | 451 | } |
75731930 JH |
452 | if (rfc2047 && c == '_') /* rfc2047 4.2 (2) */ |
453 | c = 0x20; | |
454 | *ot++ = c; | |
d4a9ce78 JH |
455 | } |
456 | *ot = 0; | |
9aa23094 | 457 | return (ot - otbegin); |
d4a9ce78 JH |
458 | } |
459 | ||
8dabdfcc | 460 | static int decode_b_segment(char *in, char *ot, unsigned otsize, char *ep) |
d4a9ce78 JH |
461 | { |
462 | /* Decode in..ep, possibly in-place to ot */ | |
463 | int c, pos = 0, acc = 0; | |
9aa23094 | 464 | char *otbegin = ot; |
8dabdfcc | 465 | char *otend = ot + otsize; |
d4a9ce78 JH |
466 | |
467 | while ((c = *in++) != 0 && (in <= ep)) { | |
8dabdfcc AR |
468 | if (ot == otend) { |
469 | *--ot = '\0'; | |
470 | return -1; | |
471 | } | |
d4a9ce78 JH |
472 | if (c == '+') |
473 | c = 62; | |
474 | else if (c == '/') | |
475 | c = 63; | |
476 | else if ('A' <= c && c <= 'Z') | |
477 | c -= 'A'; | |
478 | else if ('a' <= c && c <= 'z') | |
479 | c -= 'a' - 26; | |
480 | else if ('0' <= c && c <= '9') | |
481 | c -= '0' - 52; | |
482 | else if (c == '=') { | |
483 | /* padding is almost like (c == 0), except we do | |
484 | * not output NUL resulting only from it; | |
485 | * for now we just trust the data. | |
486 | */ | |
487 | c = 0; | |
488 | } | |
489 | else | |
490 | continue; /* garbage */ | |
491 | switch (pos++) { | |
492 | case 0: | |
493 | acc = (c << 2); | |
494 | break; | |
495 | case 1: | |
496 | *ot++ = (acc | (c >> 4)); | |
497 | acc = (c & 15) << 4; | |
498 | break; | |
499 | case 2: | |
500 | *ot++ = (acc | (c >> 2)); | |
501 | acc = (c & 3) << 6; | |
502 | break; | |
503 | case 3: | |
504 | *ot++ = (acc | c); | |
505 | acc = pos = 0; | |
506 | break; | |
507 | } | |
508 | } | |
509 | *ot = 0; | |
9aa23094 | 510 | return (ot - otbegin); |
d4a9ce78 JH |
511 | } |
512 | ||
b59d398b LT |
513 | /* |
514 | * When there is no known charset, guess. | |
515 | * | |
516 | * Right now we assume that if the target is UTF-8 (the default), | |
517 | * and it already looks like UTF-8 (which includes US-ASCII as its | |
518 | * subset, of course) then that is what it is and there is nothing | |
519 | * to do. | |
520 | * | |
521 | * Otherwise, we default to assuming it is Latin1 for historical | |
522 | * reasons. | |
523 | */ | |
524 | static const char *guess_charset(const char *line, const char *target_charset) | |
525 | { | |
526 | if (is_encoding_utf8(target_charset)) { | |
527 | if (is_utf8(line)) | |
528 | return NULL; | |
529 | } | |
530 | return "latin1"; | |
531 | } | |
532 | ||
8dabdfcc | 533 | static void convert_to_utf8(char *line, unsigned linesize, const char *charset) |
d4a9ce78 | 534 | { |
b59d398b LT |
535 | char *out; |
536 | ||
537 | if (!charset || !*charset) { | |
538 | charset = guess_charset(line, metainfo_charset); | |
539 | if (!charset) | |
540 | return; | |
541 | } | |
b45974a6 | 542 | |
7296096c JS |
543 | if (!strcmp(metainfo_charset, charset)) |
544 | return; | |
b59d398b | 545 | out = reencode_string(line, metainfo_charset, charset); |
bb1091a4 JH |
546 | if (!out) |
547 | die("cannot convert from %s to %s\n", | |
b59d398b | 548 | charset, metainfo_charset); |
8dabdfcc | 549 | strlcpy(line, out, linesize); |
b45974a6 | 550 | free(out); |
d4a9ce78 JH |
551 | } |
552 | ||
8dabdfcc | 553 | static int decode_header_bq(char *it, unsigned itsize) |
d4a9ce78 JH |
554 | { |
555 | char *in, *out, *ep, *cp, *sp; | |
556 | char outbuf[1000]; | |
b75bf2c3 | 557 | int rfc2047 = 0; |
d4a9ce78 JH |
558 | |
559 | in = it; | |
560 | out = outbuf; | |
561 | while ((ep = strstr(in, "=?")) != NULL) { | |
562 | int sz, encoding; | |
563 | char charset_q[256], piecebuf[256]; | |
b75bf2c3 JH |
564 | rfc2047 = 1; |
565 | ||
d4a9ce78 JH |
566 | if (in != ep) { |
567 | sz = ep - in; | |
568 | memcpy(out, in, sz); | |
569 | out += sz; | |
570 | in += sz; | |
571 | } | |
572 | /* E.g. | |
573 | * ep : "=?iso-2022-jp?B?GyR...?= foo" | |
574 | * ep : "=?ISO-8859-1?Q?Foo=FCbar?= baz" | |
575 | */ | |
576 | ep += 2; | |
577 | cp = strchr(ep, '?'); | |
578 | if (!cp) | |
b75bf2c3 | 579 | return rfc2047; /* no munging */ |
d4a9ce78 JH |
580 | for (sp = ep; sp < cp; sp++) |
581 | charset_q[sp - ep] = tolower(*sp); | |
582 | charset_q[cp - ep] = 0; | |
583 | encoding = cp[1]; | |
584 | if (!encoding || cp[2] != '?') | |
b75bf2c3 | 585 | return rfc2047; /* no munging */ |
d4a9ce78 JH |
586 | ep = strstr(cp + 3, "?="); |
587 | if (!ep) | |
b75bf2c3 | 588 | return rfc2047; /* no munging */ |
d4a9ce78 JH |
589 | switch (tolower(encoding)) { |
590 | default: | |
b75bf2c3 | 591 | return rfc2047; /* no munging */ |
d4a9ce78 | 592 | case 'b': |
8dabdfcc | 593 | sz = decode_b_segment(cp + 3, piecebuf, sizeof(piecebuf), ep); |
d4a9ce78 JH |
594 | break; |
595 | case 'q': | |
8dabdfcc | 596 | sz = decode_q_segment(cp + 3, piecebuf, sizeof(piecebuf), ep, 1); |
d4a9ce78 JH |
597 | break; |
598 | } | |
599 | if (sz < 0) | |
b75bf2c3 | 600 | return rfc2047; |
650e4be5 | 601 | if (metainfo_charset) |
8dabdfcc AR |
602 | convert_to_utf8(piecebuf, sizeof(piecebuf), charset_q); |
603 | ||
604 | sz = strlen(piecebuf); | |
605 | if (outbuf + sizeof(outbuf) <= out + sz) | |
606 | return rfc2047; /* no munging */ | |
d4a9ce78 | 607 | strcpy(out, piecebuf); |
8dabdfcc | 608 | out += sz; |
d4a9ce78 JH |
609 | in = ep + 2; |
610 | } | |
611 | strcpy(out, in); | |
8dabdfcc | 612 | strlcpy(it, outbuf, itsize); |
b75bf2c3 JH |
613 | return rfc2047; |
614 | } | |
615 | ||
8dabdfcc | 616 | static void decode_header(char *it, unsigned itsize) |
b75bf2c3 JH |
617 | { |
618 | ||
8dabdfcc | 619 | if (decode_header_bq(it, itsize)) |
b75bf2c3 JH |
620 | return; |
621 | /* otherwise "it" is a straight copy of the input. | |
622 | * This can be binary guck but there is no charset specified. | |
623 | */ | |
624 | if (metainfo_charset) | |
8dabdfcc | 625 | convert_to_utf8(it, itsize, ""); |
d4a9ce78 JH |
626 | } |
627 | ||
9aa23094 | 628 | static int decode_transfer_encoding(char *line, unsigned linesize, int inputlen) |
d4a9ce78 JH |
629 | { |
630 | char *ep; | |
631 | ||
632 | switch (transfer_encoding) { | |
633 | case TE_QP: | |
9aa23094 JH |
634 | ep = line + inputlen; |
635 | return decode_q_segment(line, line, linesize, ep, 0); | |
d4a9ce78 | 636 | case TE_BASE64: |
9aa23094 JH |
637 | ep = line + inputlen; |
638 | return decode_b_segment(line, line, linesize, ep); | |
d4a9ce78 | 639 | case TE_DONTCARE: |
9aa23094 JH |
640 | default: |
641 | return inputlen; | |
d4a9ce78 JH |
642 | } |
643 | } | |
644 | ||
cce8d6fd | 645 | static int handle_filter(char *line, unsigned linesize, int linelen); |
87ab7992 DZ |
646 | |
647 | static int find_boundary(void) | |
2744b234 | 648 | { |
87ab7992 DZ |
649 | while(fgets(line, sizeof(line), fin) != NULL) { |
650 | if (is_multipart_boundary(line)) | |
651 | return 1; | |
652 | } | |
653 | return 0; | |
654 | } | |
655 | ||
656 | static int handle_boundary(void) | |
657 | { | |
86747c13 | 658 | char newline[]="\n"; |
87ab7992 DZ |
659 | again: |
660 | if (!memcmp(line+content_top->boundary_len, "--", 2)) { | |
661 | /* we hit an end boundary */ | |
662 | /* pop the current boundary off the stack */ | |
663 | free(content_top->boundary); | |
664 | ||
665 | /* technically won't happen as is_multipart_boundary() | |
666 | will fail first. But just in case.. | |
667 | */ | |
668 | if (content_top-- < content) { | |
669 | fprintf(stderr, "Detected mismatched boundaries, " | |
670 | "can't recover\n"); | |
671 | exit(1); | |
672 | } | |
cce8d6fd | 673 | handle_filter(newline, sizeof(newline), strlen(newline)); |
87ab7992 DZ |
674 | |
675 | /* skip to the next boundary */ | |
676 | if (!find_boundary()) | |
677 | return 0; | |
678 | goto again; | |
679 | } | |
680 | ||
681 | /* set some defaults */ | |
682 | transfer_encoding = TE_DONTCARE; | |
683 | charset[0] = 0; | |
684 | message_type = TYPE_TEXT; | |
d4a9ce78 | 685 | |
87ab7992 DZ |
686 | /* slurp in this section's info */ |
687 | while (read_one_header_line(line, sizeof(line), fin)) | |
8dabdfcc | 688 | check_header(line, sizeof(line), p_hdr_data, 0); |
2744b234 | 689 | |
87ab7992 DZ |
690 | /* eat the blank line after section info */ |
691 | return (fgets(line, sizeof(line), fin) != NULL); | |
d4a9ce78 JH |
692 | } |
693 | ||
f0658cf2 DZ |
694 | static inline int patchbreak(const char *line) |
695 | { | |
696 | /* Beginning of a "diff -" header? */ | |
697 | if (!memcmp("diff -", line, 6)) | |
698 | return 1; | |
699 | ||
700 | /* CVS "Index: " line? */ | |
701 | if (!memcmp("Index: ", line, 7)) | |
702 | return 1; | |
703 | ||
704 | /* | |
705 | * "--- <filename>" starts patches without headers | |
706 | * "---<sp>*" is a manual separator | |
707 | */ | |
708 | if (!memcmp("---", line, 3)) { | |
709 | line += 3; | |
710 | /* space followed by a filename? */ | |
711 | if (line[0] == ' ' && !isspace(line[1])) | |
712 | return 1; | |
713 | /* Just whitespace? */ | |
714 | for (;;) { | |
715 | unsigned char c = *line++; | |
716 | if (c == '\n') | |
717 | return 1; | |
718 | if (!isspace(c)) | |
719 | break; | |
720 | } | |
721 | return 0; | |
722 | } | |
723 | return 0; | |
724 | } | |
725 | ||
726 | ||
8dabdfcc | 727 | static int handle_commit_msg(char *line, unsigned linesize) |
d4a9ce78 | 728 | { |
87ab7992 | 729 | static int still_looking = 1; |
8dabdfcc | 730 | char *endline = line + linesize; |
87ab7992 | 731 | |
d4a9ce78 JH |
732 | if (!cmitmsg) |
733 | return 0; | |
2744b234 | 734 | |
87ab7992 DZ |
735 | if (still_looking) { |
736 | char *cp = line; | |
737 | if (isspace(*line)) { | |
738 | for (cp = line + 1; *cp; cp++) { | |
739 | if (!isspace(*cp)) | |
740 | break; | |
741 | } | |
742 | if (!*cp) | |
743 | return 0; | |
744 | } | |
8dabdfcc | 745 | if ((still_looking = check_header(cp, endline - cp, s_hdr_data, 0)) != 0) |
87ab7992 DZ |
746 | return 0; |
747 | } | |
8b4525fb | 748 | |
86747c13 DZ |
749 | /* normalize the log message to UTF-8. */ |
750 | if (metainfo_charset) | |
8dabdfcc | 751 | convert_to_utf8(line, endline - line, charset); |
86747c13 | 752 | |
f0658cf2 | 753 | if (patchbreak(line)) { |
87ab7992 DZ |
754 | fclose(cmitmsg); |
755 | cmitmsg = NULL; | |
756 | return 1; | |
757 | } | |
8b4525fb | 758 | |
87ab7992 | 759 | fputs(line, cmitmsg); |
d4a9ce78 | 760 | return 0; |
2744b234 LT |
761 | } |
762 | ||
cce8d6fd | 763 | static int handle_patch(char *line, int len) |
2744b234 | 764 | { |
cce8d6fd | 765 | fwrite(line, 1, len, patchfile); |
87ab7992 DZ |
766 | patch_lines++; |
767 | return 0; | |
2744b234 LT |
768 | } |
769 | ||
cce8d6fd | 770 | static int handle_filter(char *line, unsigned linesize, int linelen) |
2744b234 | 771 | { |
87ab7992 | 772 | static int filter = 0; |
2744b234 | 773 | |
87ab7992 DZ |
774 | /* filter tells us which part we left off on |
775 | * a non-zero return indicates we hit a filter point | |
776 | */ | |
777 | switch (filter) { | |
778 | case 0: | |
8dabdfcc | 779 | if (!handle_commit_msg(line, linesize)) |
d4a9ce78 | 780 | break; |
87ab7992 DZ |
781 | filter++; |
782 | case 1: | |
cce8d6fd | 783 | if (!handle_patch(line, linelen)) |
87ab7992 DZ |
784 | break; |
785 | filter++; | |
786 | default: | |
787 | return 1; | |
2744b234 | 788 | } |
87ab7992 | 789 | |
d4a9ce78 | 790 | return 0; |
2744b234 LT |
791 | } |
792 | ||
87ab7992 | 793 | static void handle_body(void) |
1d8fa411 | 794 | { |
87ab7992 DZ |
795 | int rc = 0; |
796 | static char newline[2000]; | |
797 | static char *np = newline; | |
cce8d6fd | 798 | int len = strlen(line); |
d4a9ce78 JH |
799 | |
800 | /* Skip up to the first boundary */ | |
87ab7992 DZ |
801 | if (content_top->boundary) { |
802 | if (!find_boundary()) | |
803 | return; | |
804 | } | |
805 | ||
806 | do { | |
807 | /* process any boundary lines */ | |
808 | if (content_top->boundary && is_multipart_boundary(line)) { | |
809 | /* flush any leftover */ | |
9aa23094 | 810 | if (np != newline) |
cce8d6fd | 811 | handle_filter(newline, sizeof(newline), |
9aa23094 | 812 | np - newline); |
87ab7992 DZ |
813 | if (!handle_boundary()) |
814 | return; | |
815 | } | |
816 | ||
86747c13 | 817 | /* Unwrap transfer encoding */ |
9aa23094 JH |
818 | len = decode_transfer_encoding(line, sizeof(line), len); |
819 | if (len < 0) { | |
820 | error("Malformed input line"); | |
821 | return; | |
822 | } | |
87ab7992 DZ |
823 | |
824 | switch (transfer_encoding) { | |
825 | case TE_BASE64: | |
87f1b884 | 826 | case TE_QP: |
87ab7992 DZ |
827 | { |
828 | char *op = line; | |
829 | ||
830 | /* binary data most likely doesn't have newlines */ | |
831 | if (message_type != TYPE_TEXT) { | |
cce8d6fd | 832 | rc = handle_filter(line, sizeof(line), len); |
87ab7992 DZ |
833 | break; |
834 | } | |
835 | ||
9aa23094 JH |
836 | /* |
837 | * This is a decoded line that may contain | |
87ab7992 DZ |
838 | * multiple new lines. Pass only one chunk |
839 | * at a time to handle_filter() | |
840 | */ | |
87ab7992 | 841 | do { |
9aa23094 | 842 | while (op < line + len && *op != '\n') |
87ab7992 DZ |
843 | *np++ = *op++; |
844 | *np = *op; | |
845 | if (*np != 0) { | |
846 | /* should be sitting on a new line */ | |
847 | *(++np) = 0; | |
848 | op++; | |
cce8d6fd | 849 | rc = handle_filter(newline, sizeof(newline), np - newline); |
87ab7992 DZ |
850 | np = newline; |
851 | } | |
9aa23094 JH |
852 | } while (op < line + len); |
853 | /* | |
854 | * The partial chunk is saved in newline and will be | |
855 | * appended by the next iteration of read_line_with_nul(). | |
87ab7992 | 856 | */ |
d4a9ce78 | 857 | break; |
1d8fa411 | 858 | } |
87ab7992 | 859 | default: |
cce8d6fd | 860 | rc = handle_filter(line, sizeof(line), len); |
d4a9ce78 | 861 | } |
87ab7992 DZ |
862 | if (rc) |
863 | /* nothing left to filter */ | |
864 | break; | |
cce8d6fd | 865 | } while ((len = read_line_with_nul(line, sizeof(line), fin))); |
87ab7992 DZ |
866 | |
867 | return; | |
1d8fa411 JH |
868 | } |
869 | ||
d7f6bae2 JH |
870 | static void output_header_lines(FILE *fout, const char *hdr, char *data) |
871 | { | |
872 | while (1) { | |
873 | char *ep = strchr(data, '\n'); | |
874 | int len; | |
875 | if (!ep) | |
876 | len = strlen(data); | |
877 | else | |
878 | len = ep - data; | |
879 | fprintf(fout, "%s: %.*s\n", hdr, len, data); | |
880 | if (!ep) | |
881 | break; | |
882 | data = ep + 1; | |
883 | } | |
884 | } | |
885 | ||
87ab7992 | 886 | static void handle_info(void) |
2744b234 | 887 | { |
87ab7992 DZ |
888 | char *sub; |
889 | char *hdr; | |
890 | int i; | |
891 | ||
892 | for (i = 0; header[i]; i++) { | |
893 | ||
894 | /* only print inbody headers if we output a patch file */ | |
895 | if (patch_lines && s_hdr_data[i]) | |
896 | hdr = s_hdr_data[i]; | |
897 | else if (p_hdr_data[i]) | |
898 | hdr = p_hdr_data[i]; | |
899 | else | |
900 | continue; | |
901 | ||
902 | if (!memcmp(header[i], "Subject", 7)) { | |
d7f6bae2 JH |
903 | if (keep_subject) |
904 | sub = hdr; | |
905 | else { | |
906 | sub = cleanup_subject(hdr); | |
907 | cleanup_space(sub); | |
908 | } | |
909 | output_header_lines(fout, "Subject", sub); | |
87ab7992 DZ |
910 | } else if (!memcmp(header[i], "From", 4)) { |
911 | handle_from(hdr); | |
912 | fprintf(fout, "Author: %s\n", name); | |
913 | fprintf(fout, "Email: %s\n", email); | |
914 | } else { | |
915 | cleanup_space(hdr); | |
916 | fprintf(fout, "%s: %s\n", header[i], hdr); | |
917 | } | |
d4a9ce78 | 918 | } |
87ab7992 | 919 | fprintf(fout, "\n"); |
2744b234 LT |
920 | } |
921 | ||
fcd056a6 JH |
922 | static int mailinfo(FILE *in, FILE *out, int ks, const char *encoding, |
923 | const char *msg, const char *patch) | |
34488e3c | 924 | { |
f88a545a | 925 | int peek; |
34488e3c LS |
926 | keep_subject = ks; |
927 | metainfo_charset = encoding; | |
928 | fin = in; | |
929 | fout = out; | |
930 | ||
931 | cmitmsg = fopen(msg, "w"); | |
932 | if (!cmitmsg) { | |
933 | perror(msg); | |
934 | return -1; | |
935 | } | |
936 | patchfile = fopen(patch, "w"); | |
937 | if (!patchfile) { | |
938 | perror(patch); | |
939 | fclose(cmitmsg); | |
940 | return -1; | |
941 | } | |
87ab7992 DZ |
942 | |
943 | p_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(char *)); | |
944 | s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(char *)); | |
945 | ||
f88a545a SS |
946 | do { |
947 | peek = fgetc(in); | |
948 | } while (isspace(peek)); | |
949 | ungetc(peek, in); | |
950 | ||
87ab7992 DZ |
951 | /* process the email header */ |
952 | while (read_one_header_line(line, sizeof(line), fin)) | |
8dabdfcc | 953 | check_header(line, sizeof(line), p_hdr_data, 1); |
87ab7992 DZ |
954 | |
955 | handle_body(); | |
956 | handle_info(); | |
34488e3c LS |
957 | |
958 | return 0; | |
959 | } | |
960 | ||
6bff6a60 | 961 | static const char mailinfo_usage[] = |
9f63892b | 962 | "git-mailinfo [-k] [-u | --encoding=<encoding>] msg patch <mail >info"; |
d4a9ce78 | 963 | |
a633fca0 | 964 | int cmd_mailinfo(int argc, const char **argv, const char *prefix) |
2744b234 | 965 | { |
bb1091a4 JH |
966 | const char *def_charset; |
967 | ||
f1f909e3 JH |
968 | /* NEEDSWORK: might want to do the optional .git/ directory |
969 | * discovery | |
970 | */ | |
ef90d6d4 | 971 | git_config(git_default_config, NULL); |
f1f909e3 | 972 | |
bb1091a4 JH |
973 | def_charset = (git_commit_encoding ? git_commit_encoding : "utf-8"); |
974 | metainfo_charset = def_charset; | |
975 | ||
6bff6a60 JH |
976 | while (1 < argc && argv[1][0] == '-') { |
977 | if (!strcmp(argv[1], "-k")) | |
978 | keep_subject = 1; | |
d4a9ce78 | 979 | else if (!strcmp(argv[1], "-u")) |
bb1091a4 JH |
980 | metainfo_charset = def_charset; |
981 | else if (!strcmp(argv[1], "-n")) | |
982 | metainfo_charset = NULL; | |
cc44c765 | 983 | else if (!prefixcmp(argv[1], "--encoding=")) |
9f63892b | 984 | metainfo_charset = argv[1] + 11; |
d4a9ce78 | 985 | else |
f1f909e3 | 986 | usage(mailinfo_usage); |
6bff6a60 JH |
987 | argc--; argv++; |
988 | } | |
989 | ||
a196d8d4 | 990 | if (argc != 3) |
f1f909e3 | 991 | usage(mailinfo_usage); |
34488e3c LS |
992 | |
993 | return !!mailinfo(stdin, stdout, keep_subject, metainfo_charset, argv[1], argv[2]); | |
2744b234 | 994 | } |