]>
Commit | Line | Data |
---|---|---|
2744b234 LT |
1 | /* |
2 | * Another stupid program, this one parsing the headers of an | |
3 | * email to figure out authorship and subject | |
4 | */ | |
f1f909e3 | 5 | #include "cache.h" |
34488e3c | 6 | #include "builtin.h" |
b45974a6 | 7 | #include "utf8.h" |
2744b234 | 8 | |
34488e3c | 9 | static FILE *cmitmsg, *patchfile, *fin, *fout; |
2744b234 | 10 | |
96f1e58f DR |
11 | static int keep_subject; |
12 | static const char *metainfo_charset; | |
2744b234 LT |
13 | static char line[1000]; |
14 | static char name[1000]; | |
15 | static char email[1000]; | |
2744b234 | 16 | |
d4a9ce78 JH |
17 | static enum { |
18 | TE_DONTCARE, TE_QP, TE_BASE64, | |
19 | } transfer_encoding; | |
87ab7992 DZ |
20 | static enum { |
21 | TYPE_TEXT, TYPE_OTHER, | |
22 | } message_type; | |
d4a9ce78 | 23 | |
87ab7992 | 24 | static char charset[256]; |
96f1e58f | 25 | static int patch_lines; |
87ab7992 DZ |
26 | static char **p_hdr_data, **s_hdr_data; |
27 | ||
28 | #define MAX_HDR_PARSED 10 | |
29 | #define MAX_BOUNDARIES 5 | |
d4a9ce78 | 30 | |
2744b234 LT |
31 | static char *sanity_check(char *name, char *email) |
32 | { | |
33 | int len = strlen(name); | |
34 | if (len < 3 || len > 60) | |
35 | return email; | |
36 | if (strchr(name, '@') || strchr(name, '<') || strchr(name, '>')) | |
37 | return email; | |
38 | return name; | |
39 | } | |
40 | ||
e0e3ba20 JH |
41 | static int bogus_from(char *line) |
42 | { | |
43 | /* John Doe <johndoe> */ | |
44 | char *bra, *ket, *dst, *cp; | |
45 | ||
46 | /* This is fallback, so do not bother if we already have an | |
47 | * e-mail address. | |
34488e3c | 48 | */ |
e0e3ba20 JH |
49 | if (*email) |
50 | return 0; | |
51 | ||
52 | bra = strchr(line, '<'); | |
53 | if (!bra) | |
54 | return 0; | |
55 | ket = strchr(bra, '>'); | |
56 | if (!ket) | |
57 | return 0; | |
58 | ||
59 | for (dst = email, cp = bra+1; cp < ket; ) | |
60 | *dst++ = *cp++; | |
61 | *dst = 0; | |
62 | for (cp = line; isspace(*cp); cp++) | |
63 | ; | |
64 | for (bra--; isspace(*bra); bra--) | |
65 | *bra = 0; | |
66 | cp = sanity_check(cp, email); | |
67 | strcpy(name, cp); | |
68 | return 1; | |
69 | } | |
70 | ||
2dec02b1 | 71 | static int handle_from(char *in_line) |
2744b234 | 72 | { |
2dec02b1 EB |
73 | char line[1000]; |
74 | char *at; | |
2744b234 LT |
75 | char *dst; |
76 | ||
2dec02b1 EB |
77 | strcpy(line, in_line); |
78 | at = strchr(line, '@'); | |
2744b234 | 79 | if (!at) |
e0e3ba20 | 80 | return bogus_from(line); |
2744b234 LT |
81 | |
82 | /* | |
83 | * If we already have one email, don't take any confusing lines | |
84 | */ | |
85 | if (*email && strchr(at+1, '@')) | |
86 | return 0; | |
87 | ||
d4a9ce78 JH |
88 | /* Pick up the string around '@', possibly delimited with <> |
89 | * pair; that is the email part. White them out while copying. | |
90 | */ | |
2744b234 LT |
91 | while (at > line) { |
92 | char c = at[-1]; | |
d4a9ce78 JH |
93 | if (isspace(c)) |
94 | break; | |
95 | if (c == '<') { | |
96 | at[-1] = ' '; | |
2744b234 | 97 | break; |
d4a9ce78 | 98 | } |
2744b234 LT |
99 | at--; |
100 | } | |
101 | dst = email; | |
102 | for (;;) { | |
103 | unsigned char c = *at; | |
d4a9ce78 JH |
104 | if (!c || c == '>' || isspace(c)) { |
105 | if (c == '>') | |
106 | *at = ' '; | |
2744b234 | 107 | break; |
d4a9ce78 | 108 | } |
2744b234 LT |
109 | *at++ = ' '; |
110 | *dst++ = c; | |
111 | } | |
112 | *dst++ = 0; | |
113 | ||
d4a9ce78 JH |
114 | /* The remainder is name. It could be "John Doe <john.doe@xz>" |
115 | * or "john.doe@xz (John Doe)", but we have whited out the | |
116 | * email part, so trim from both ends, possibly removing | |
117 | * the () pair at the end. | |
118 | */ | |
2744b234 LT |
119 | at = line + strlen(line); |
120 | while (at > line) { | |
121 | unsigned char c = *--at; | |
d4a9ce78 JH |
122 | if (!isspace(c)) { |
123 | at[(c == ')') ? 0 : 1] = 0; | |
2744b234 | 124 | break; |
d4a9ce78 | 125 | } |
2744b234 LT |
126 | } |
127 | ||
128 | at = line; | |
129 | for (;;) { | |
130 | unsigned char c = *at; | |
d4a9ce78 JH |
131 | if (!c || !isspace(c)) { |
132 | if (c == '(') | |
133 | at++; | |
2744b234 | 134 | break; |
d4a9ce78 | 135 | } |
2744b234 LT |
136 | at++; |
137 | } | |
2744b234 | 138 | at = sanity_check(at, email); |
2744b234 LT |
139 | strcpy(name, at); |
140 | return 1; | |
141 | } | |
142 | ||
87ab7992 | 143 | static int handle_header(char *line, char *data, int ofs) |
62c1f6b4 | 144 | { |
87ab7992 DZ |
145 | if (!line || !data) |
146 | return 1; | |
147 | ||
148 | strcpy(data, line+ofs); | |
62c1f6b4 | 149 | |
d4a9ce78 JH |
150 | return 0; |
151 | } | |
152 | ||
153 | /* NOTE NOTE NOTE. We do not claim we do full MIME. We just attempt | |
154 | * to have enough heuristics to grok MIME encoded patches often found | |
155 | * on our mailing lists. For example, we do not even treat header lines | |
156 | * case insensitively. | |
157 | */ | |
158 | ||
159 | static int slurp_attr(const char *line, const char *name, char *attr) | |
160 | { | |
554fe20d | 161 | const char *ends, *ap = strcasestr(line, name); |
d4a9ce78 JH |
162 | size_t sz; |
163 | ||
164 | if (!ap) { | |
165 | *attr = 0; | |
166 | return 0; | |
167 | } | |
168 | ap += strlen(name); | |
169 | if (*ap == '"') { | |
170 | ap++; | |
171 | ends = "\""; | |
172 | } | |
173 | else | |
174 | ends = "; \t"; | |
175 | sz = strcspn(ap, ends); | |
176 | memcpy(attr, ap, sz); | |
177 | attr[sz] = 0; | |
178 | return 1; | |
179 | } | |
180 | ||
87ab7992 DZ |
181 | struct content_type { |
182 | char *boundary; | |
183 | int boundary_len; | |
184 | }; | |
185 | ||
186 | static struct content_type content[MAX_BOUNDARIES]; | |
187 | ||
188 | static struct content_type *content_top = content; | |
189 | ||
190 | static int handle_content_type(char *line) | |
d4a9ce78 | 191 | { |
87ab7992 DZ |
192 | char boundary[256]; |
193 | ||
194 | if (strcasestr(line, "text/") == NULL) | |
195 | message_type = TYPE_OTHER; | |
196 | if (slurp_attr(line, "boundary=", boundary + 2)) { | |
197 | memcpy(boundary, "--", 2); | |
198 | if (content_top++ >= &content[MAX_BOUNDARIES]) { | |
199 | fprintf(stderr, "Too many boundaries to handle\n"); | |
200 | exit(1); | |
201 | } | |
202 | content_top->boundary_len = strlen(boundary); | |
203 | content_top->boundary = xmalloc(content_top->boundary_len+1); | |
204 | strcpy(content_top->boundary, boundary); | |
b893f091 | 205 | } |
87ab7992 | 206 | if (slurp_attr(line, "charset=", charset)) { |
d4a9ce78 JH |
207 | int i, c; |
208 | for (i = 0; (c = charset[i]) != 0; i++) | |
209 | charset[i] = tolower(c); | |
210 | } | |
211 | return 0; | |
212 | } | |
213 | ||
d4a9ce78 JH |
214 | static int handle_content_transfer_encoding(char *line) |
215 | { | |
216 | if (strcasestr(line, "base64")) | |
217 | transfer_encoding = TE_BASE64; | |
218 | else if (strcasestr(line, "quoted-printable")) | |
219 | transfer_encoding = TE_QP; | |
220 | else | |
221 | transfer_encoding = TE_DONTCARE; | |
222 | return 0; | |
2744b234 LT |
223 | } |
224 | ||
d4a9ce78 JH |
225 | static int is_multipart_boundary(const char *line) |
226 | { | |
87ab7992 | 227 | return (!memcmp(line, content_top->boundary, content_top->boundary_len)); |
d4a9ce78 JH |
228 | } |
229 | ||
230 | static int eatspace(char *line) | |
2744b234 | 231 | { |
d4a9ce78 JH |
232 | int len = strlen(line); |
233 | while (len > 0 && isspace(line[len-1])) | |
234 | line[--len] = 0; | |
235 | return len; | |
2744b234 LT |
236 | } |
237 | ||
d4a9ce78 | 238 | static char *cleanup_subject(char *subject) |
2744b234 LT |
239 | { |
240 | for (;;) { | |
241 | char *p; | |
242 | int len, remove; | |
243 | switch (*subject) { | |
244 | case 'r': case 'R': | |
245 | if (!memcmp("e:", subject+1, 2)) { | |
87ab7992 | 246 | subject += 3; |
2744b234 LT |
247 | continue; |
248 | } | |
249 | break; | |
250 | case ' ': case '\t': case ':': | |
251 | subject++; | |
252 | continue; | |
253 | ||
254 | case '[': | |
255 | p = strchr(subject, ']'); | |
256 | if (!p) { | |
257 | subject++; | |
258 | continue; | |
259 | } | |
260 | len = strlen(p); | |
261 | remove = p - subject; | |
262 | if (remove <= len *2) { | |
263 | subject = p+1; | |
264 | continue; | |
34488e3c | 265 | } |
2744b234 LT |
266 | break; |
267 | } | |
ae448e38 | 268 | eatspace(subject); |
2744b234 LT |
269 | return subject; |
270 | } | |
34488e3c | 271 | } |
2744b234 LT |
272 | |
273 | static void cleanup_space(char *buf) | |
274 | { | |
275 | unsigned char c; | |
276 | while ((c = *buf) != 0) { | |
277 | buf++; | |
278 | if (isspace(c)) { | |
279 | buf[-1] = ' '; | |
280 | c = *buf; | |
281 | while (isspace(c)) { | |
282 | int len = strlen(buf); | |
283 | memmove(buf, buf+1, len); | |
284 | c = *buf; | |
285 | } | |
286 | } | |
287 | } | |
288 | } | |
289 | ||
b75bf2c3 | 290 | static void decode_header(char *it); |
87ab7992 DZ |
291 | static char *header[MAX_HDR_PARSED] = { |
292 | "From","Subject","Date", | |
d4a9ce78 JH |
293 | }; |
294 | ||
86747c13 | 295 | static int check_header(char *line, char **hdr_data, int overwrite) |
d4a9ce78 JH |
296 | { |
297 | int i; | |
298 | ||
87ab7992 DZ |
299 | /* search for the interesting parts */ |
300 | for (i = 0; header[i]; i++) { | |
301 | int len = strlen(header[i]); | |
86747c13 | 302 | if ((!hdr_data[i] || overwrite) && |
87ab7992 | 303 | !strncasecmp(line, header[i], len) && |
d4a9ce78 | 304 | line[len] == ':' && isspace(line[len + 1])) { |
33504530 EB |
305 | /* Unwrap inline B and Q encoding, and optionally |
306 | * normalize the meta information to utf8. | |
307 | */ | |
b75bf2c3 | 308 | decode_header(line + len + 2); |
87ab7992 DZ |
309 | hdr_data[i] = xmalloc(1000 * sizeof(char)); |
310 | if (! handle_header(line, hdr_data[i], len + 2)) { | |
311 | return 1; | |
312 | } | |
d4a9ce78 JH |
313 | } |
314 | } | |
d4a9ce78 | 315 | |
87ab7992 DZ |
316 | /* Content stuff */ |
317 | if (!strncasecmp(line, "Content-Type", 12) && | |
318 | line[12] == ':' && isspace(line[12 + 1])) { | |
319 | decode_header(line + 12 + 2); | |
320 | if (! handle_content_type(line)) { | |
321 | return 1; | |
322 | } | |
323 | } | |
324 | if (!strncasecmp(line, "Content-Transfer-Encoding", 25) && | |
325 | line[25] == ':' && isspace(line[25 + 1])) { | |
326 | decode_header(line + 25 + 2); | |
327 | if (! handle_content_transfer_encoding(line)) { | |
328 | return 1; | |
329 | } | |
330 | } | |
331 | ||
332 | /* for inbody stuff */ | |
333 | if (!memcmp(">From", line, 5) && isspace(line[5])) | |
334 | return 1; | |
335 | if (!memcmp("[PATCH]", line, 7) && isspace(line[7])) { | |
336 | for (i = 0; header[i]; i++) { | |
337 | if (!memcmp("Subject: ", header[i], 9)) { | |
338 | if (! handle_header(line, hdr_data[i], 0)) { | |
339 | return 1; | |
340 | } | |
341 | } | |
342 | } | |
343 | } | |
344 | ||
345 | /* no match */ | |
346 | return 0; | |
d4a9ce78 JH |
347 | } |
348 | ||
ef29c117 JH |
349 | static int is_rfc2822_header(char *line) |
350 | { | |
351 | /* | |
352 | * The section that defines the loosest possible | |
353 | * field name is "3.6.8 Optional fields". | |
354 | * | |
355 | * optional-field = field-name ":" unstructured CRLF | |
356 | * field-name = 1*ftext | |
357 | * ftext = %d33-57 / %59-126 | |
358 | */ | |
359 | int ch; | |
360 | char *cp = line; | |
34fc5cef LT |
361 | |
362 | /* Count mbox From headers as headers */ | |
363 | if (!memcmp(line, "From ", 5) || !memcmp(line, ">From ", 6)) | |
364 | return 1; | |
365 | ||
ef29c117 JH |
366 | while ((ch = *cp++)) { |
367 | if (ch == ':') | |
368 | return cp != line; | |
369 | if ((33 <= ch && ch <= 57) || | |
370 | (59 <= ch && ch <= 126)) | |
371 | continue; | |
372 | break; | |
373 | } | |
374 | return 0; | |
375 | } | |
376 | ||
34fc5cef LT |
377 | /* |
378 | * sz is size of 'line' buffer in bytes. Must be reasonably | |
379 | * long enough to hold one physical real-world e-mail line. | |
380 | */ | |
d4a9ce78 JH |
381 | static int read_one_header_line(char *line, int sz, FILE *in) |
382 | { | |
34fc5cef LT |
383 | int len; |
384 | ||
385 | /* | |
386 | * We will read at most (sz-1) bytes and then potentially | |
387 | * re-add NUL after it. Accessing line[sz] after this is safe | |
388 | * and we can allow len to grow up to and including sz. | |
389 | */ | |
390 | sz--; | |
391 | ||
392 | /* Get the first part of the line. */ | |
393 | if (!fgets(line, sz, in)) | |
394 | return 0; | |
395 | ||
396 | /* | |
397 | * Is it an empty line or not a valid rfc2822 header? | |
398 | * If so, stop here, and return false ("not a header") | |
399 | */ | |
400 | len = eatspace(line); | |
401 | if (!len || !is_rfc2822_header(line)) { | |
402 | /* Re-add the newline */ | |
403 | line[len] = '\n'; | |
404 | line[len + 1] = '\0'; | |
405 | return 0; | |
406 | } | |
407 | ||
408 | /* | |
409 | * Now we need to eat all the continuation lines.. | |
410 | * Yuck, 2822 header "folding" | |
411 | */ | |
412 | for (;;) { | |
413 | int peek, addlen; | |
414 | static char continuation[1000]; | |
415 | ||
f30b2028 EB |
416 | peek = fgetc(in); ungetc(peek, in); |
417 | if (peek != ' ' && peek != '\t') | |
418 | break; | |
34fc5cef LT |
419 | if (!fgets(continuation, sizeof(continuation), in)) |
420 | break; | |
421 | addlen = eatspace(continuation); | |
422 | if (len < sz - 1) { | |
423 | if (addlen >= sz - len) | |
424 | addlen = sz - len - 1; | |
425 | memcpy(line + len, continuation, addlen); | |
d7f6bae2 | 426 | line[len] = '\n'; |
34fc5cef LT |
427 | len += addlen; |
428 | } | |
d4a9ce78 | 429 | } |
34fc5cef LT |
430 | line[len] = 0; |
431 | ||
432 | return 1; | |
d4a9ce78 JH |
433 | } |
434 | ||
75731930 | 435 | static int decode_q_segment(char *in, char *ot, char *ep, int rfc2047) |
d4a9ce78 JH |
436 | { |
437 | int c; | |
438 | while ((c = *in++) != 0 && (in <= ep)) { | |
439 | if (c == '=') { | |
440 | int d = *in++; | |
441 | if (d == '\n' || !d) | |
442 | break; /* drop trailing newline */ | |
443 | *ot++ = ((hexval(d) << 4) | hexval(*in++)); | |
75731930 | 444 | continue; |
d4a9ce78 | 445 | } |
75731930 JH |
446 | if (rfc2047 && c == '_') /* rfc2047 4.2 (2) */ |
447 | c = 0x20; | |
448 | *ot++ = c; | |
d4a9ce78 JH |
449 | } |
450 | *ot = 0; | |
451 | return 0; | |
452 | } | |
453 | ||
454 | static int decode_b_segment(char *in, char *ot, char *ep) | |
455 | { | |
456 | /* Decode in..ep, possibly in-place to ot */ | |
457 | int c, pos = 0, acc = 0; | |
458 | ||
459 | while ((c = *in++) != 0 && (in <= ep)) { | |
460 | if (c == '+') | |
461 | c = 62; | |
462 | else if (c == '/') | |
463 | c = 63; | |
464 | else if ('A' <= c && c <= 'Z') | |
465 | c -= 'A'; | |
466 | else if ('a' <= c && c <= 'z') | |
467 | c -= 'a' - 26; | |
468 | else if ('0' <= c && c <= '9') | |
469 | c -= '0' - 52; | |
470 | else if (c == '=') { | |
471 | /* padding is almost like (c == 0), except we do | |
472 | * not output NUL resulting only from it; | |
473 | * for now we just trust the data. | |
474 | */ | |
475 | c = 0; | |
476 | } | |
477 | else | |
478 | continue; /* garbage */ | |
479 | switch (pos++) { | |
480 | case 0: | |
481 | acc = (c << 2); | |
482 | break; | |
483 | case 1: | |
484 | *ot++ = (acc | (c >> 4)); | |
485 | acc = (c & 15) << 4; | |
486 | break; | |
487 | case 2: | |
488 | *ot++ = (acc | (c >> 2)); | |
489 | acc = (c & 3) << 6; | |
490 | break; | |
491 | case 3: | |
492 | *ot++ = (acc | c); | |
493 | acc = pos = 0; | |
494 | break; | |
495 | } | |
496 | } | |
497 | *ot = 0; | |
498 | return 0; | |
499 | } | |
500 | ||
b59d398b LT |
501 | /* |
502 | * When there is no known charset, guess. | |
503 | * | |
504 | * Right now we assume that if the target is UTF-8 (the default), | |
505 | * and it already looks like UTF-8 (which includes US-ASCII as its | |
506 | * subset, of course) then that is what it is and there is nothing | |
507 | * to do. | |
508 | * | |
509 | * Otherwise, we default to assuming it is Latin1 for historical | |
510 | * reasons. | |
511 | */ | |
512 | static const char *guess_charset(const char *line, const char *target_charset) | |
513 | { | |
514 | if (is_encoding_utf8(target_charset)) { | |
515 | if (is_utf8(line)) | |
516 | return NULL; | |
517 | } | |
518 | return "latin1"; | |
519 | } | |
520 | ||
3a55602e | 521 | static void convert_to_utf8(char *line, const char *charset) |
d4a9ce78 | 522 | { |
b59d398b LT |
523 | char *out; |
524 | ||
525 | if (!charset || !*charset) { | |
526 | charset = guess_charset(line, metainfo_charset); | |
527 | if (!charset) | |
528 | return; | |
529 | } | |
b45974a6 | 530 | |
7296096c JS |
531 | if (!strcmp(metainfo_charset, charset)) |
532 | return; | |
b59d398b | 533 | out = reencode_string(line, metainfo_charset, charset); |
bb1091a4 JH |
534 | if (!out) |
535 | die("cannot convert from %s to %s\n", | |
b59d398b | 536 | charset, metainfo_charset); |
b45974a6 JH |
537 | strcpy(line, out); |
538 | free(out); | |
d4a9ce78 JH |
539 | } |
540 | ||
b75bf2c3 | 541 | static int decode_header_bq(char *it) |
d4a9ce78 JH |
542 | { |
543 | char *in, *out, *ep, *cp, *sp; | |
544 | char outbuf[1000]; | |
b75bf2c3 | 545 | int rfc2047 = 0; |
d4a9ce78 JH |
546 | |
547 | in = it; | |
548 | out = outbuf; | |
549 | while ((ep = strstr(in, "=?")) != NULL) { | |
550 | int sz, encoding; | |
551 | char charset_q[256], piecebuf[256]; | |
b75bf2c3 JH |
552 | rfc2047 = 1; |
553 | ||
d4a9ce78 JH |
554 | if (in != ep) { |
555 | sz = ep - in; | |
556 | memcpy(out, in, sz); | |
557 | out += sz; | |
558 | in += sz; | |
559 | } | |
560 | /* E.g. | |
561 | * ep : "=?iso-2022-jp?B?GyR...?= foo" | |
562 | * ep : "=?ISO-8859-1?Q?Foo=FCbar?= baz" | |
563 | */ | |
564 | ep += 2; | |
565 | cp = strchr(ep, '?'); | |
566 | if (!cp) | |
b75bf2c3 | 567 | return rfc2047; /* no munging */ |
d4a9ce78 JH |
568 | for (sp = ep; sp < cp; sp++) |
569 | charset_q[sp - ep] = tolower(*sp); | |
570 | charset_q[cp - ep] = 0; | |
571 | encoding = cp[1]; | |
572 | if (!encoding || cp[2] != '?') | |
b75bf2c3 | 573 | return rfc2047; /* no munging */ |
d4a9ce78 JH |
574 | ep = strstr(cp + 3, "?="); |
575 | if (!ep) | |
b75bf2c3 | 576 | return rfc2047; /* no munging */ |
d4a9ce78 JH |
577 | switch (tolower(encoding)) { |
578 | default: | |
b75bf2c3 | 579 | return rfc2047; /* no munging */ |
d4a9ce78 JH |
580 | case 'b': |
581 | sz = decode_b_segment(cp + 3, piecebuf, ep); | |
582 | break; | |
583 | case 'q': | |
75731930 | 584 | sz = decode_q_segment(cp + 3, piecebuf, ep, 1); |
d4a9ce78 JH |
585 | break; |
586 | } | |
587 | if (sz < 0) | |
b75bf2c3 | 588 | return rfc2047; |
650e4be5 | 589 | if (metainfo_charset) |
d4a9ce78 JH |
590 | convert_to_utf8(piecebuf, charset_q); |
591 | strcpy(out, piecebuf); | |
592 | out += strlen(out); | |
593 | in = ep + 2; | |
594 | } | |
595 | strcpy(out, in); | |
596 | strcpy(it, outbuf); | |
b75bf2c3 JH |
597 | return rfc2047; |
598 | } | |
599 | ||
600 | static void decode_header(char *it) | |
601 | { | |
602 | ||
603 | if (decode_header_bq(it)) | |
604 | return; | |
605 | /* otherwise "it" is a straight copy of the input. | |
606 | * This can be binary guck but there is no charset specified. | |
607 | */ | |
608 | if (metainfo_charset) | |
609 | convert_to_utf8(it, ""); | |
d4a9ce78 JH |
610 | } |
611 | ||
612 | static void decode_transfer_encoding(char *line) | |
613 | { | |
614 | char *ep; | |
615 | ||
616 | switch (transfer_encoding) { | |
617 | case TE_QP: | |
618 | ep = line + strlen(line); | |
75731930 | 619 | decode_q_segment(line, line, ep, 0); |
d4a9ce78 JH |
620 | break; |
621 | case TE_BASE64: | |
622 | ep = line + strlen(line); | |
623 | decode_b_segment(line, line, ep); | |
624 | break; | |
625 | case TE_DONTCARE: | |
626 | break; | |
627 | } | |
628 | } | |
629 | ||
87ab7992 DZ |
630 | static int handle_filter(char *line); |
631 | ||
632 | static int find_boundary(void) | |
2744b234 | 633 | { |
87ab7992 DZ |
634 | while(fgets(line, sizeof(line), fin) != NULL) { |
635 | if (is_multipart_boundary(line)) | |
636 | return 1; | |
637 | } | |
638 | return 0; | |
639 | } | |
640 | ||
641 | static int handle_boundary(void) | |
642 | { | |
86747c13 | 643 | char newline[]="\n"; |
87ab7992 DZ |
644 | again: |
645 | if (!memcmp(line+content_top->boundary_len, "--", 2)) { | |
646 | /* we hit an end boundary */ | |
647 | /* pop the current boundary off the stack */ | |
648 | free(content_top->boundary); | |
649 | ||
650 | /* technically won't happen as is_multipart_boundary() | |
651 | will fail first. But just in case.. | |
652 | */ | |
653 | if (content_top-- < content) { | |
654 | fprintf(stderr, "Detected mismatched boundaries, " | |
655 | "can't recover\n"); | |
656 | exit(1); | |
657 | } | |
86747c13 | 658 | handle_filter(newline); |
87ab7992 DZ |
659 | |
660 | /* skip to the next boundary */ | |
661 | if (!find_boundary()) | |
662 | return 0; | |
663 | goto again; | |
664 | } | |
665 | ||
666 | /* set some defaults */ | |
667 | transfer_encoding = TE_DONTCARE; | |
668 | charset[0] = 0; | |
669 | message_type = TYPE_TEXT; | |
d4a9ce78 | 670 | |
87ab7992 DZ |
671 | /* slurp in this section's info */ |
672 | while (read_one_header_line(line, sizeof(line), fin)) | |
86747c13 | 673 | check_header(line, p_hdr_data, 0); |
2744b234 | 674 | |
87ab7992 DZ |
675 | /* eat the blank line after section info */ |
676 | return (fgets(line, sizeof(line), fin) != NULL); | |
d4a9ce78 JH |
677 | } |
678 | ||
f0658cf2 DZ |
679 | static inline int patchbreak(const char *line) |
680 | { | |
681 | /* Beginning of a "diff -" header? */ | |
682 | if (!memcmp("diff -", line, 6)) | |
683 | return 1; | |
684 | ||
685 | /* CVS "Index: " line? */ | |
686 | if (!memcmp("Index: ", line, 7)) | |
687 | return 1; | |
688 | ||
689 | /* | |
690 | * "--- <filename>" starts patches without headers | |
691 | * "---<sp>*" is a manual separator | |
692 | */ | |
693 | if (!memcmp("---", line, 3)) { | |
694 | line += 3; | |
695 | /* space followed by a filename? */ | |
696 | if (line[0] == ' ' && !isspace(line[1])) | |
697 | return 1; | |
698 | /* Just whitespace? */ | |
699 | for (;;) { | |
700 | unsigned char c = *line++; | |
701 | if (c == '\n') | |
702 | return 1; | |
703 | if (!isspace(c)) | |
704 | break; | |
705 | } | |
706 | return 0; | |
707 | } | |
708 | return 0; | |
709 | } | |
710 | ||
711 | ||
87ab7992 | 712 | static int handle_commit_msg(char *line) |
d4a9ce78 | 713 | { |
87ab7992 DZ |
714 | static int still_looking = 1; |
715 | ||
d4a9ce78 JH |
716 | if (!cmitmsg) |
717 | return 0; | |
2744b234 | 718 | |
87ab7992 DZ |
719 | if (still_looking) { |
720 | char *cp = line; | |
721 | if (isspace(*line)) { | |
722 | for (cp = line + 1; *cp; cp++) { | |
723 | if (!isspace(*cp)) | |
724 | break; | |
725 | } | |
726 | if (!*cp) | |
727 | return 0; | |
728 | } | |
86747c13 | 729 | if ((still_looking = check_header(cp, s_hdr_data, 0)) != 0) |
87ab7992 DZ |
730 | return 0; |
731 | } | |
8b4525fb | 732 | |
86747c13 DZ |
733 | /* normalize the log message to UTF-8. */ |
734 | if (metainfo_charset) | |
735 | convert_to_utf8(line, charset); | |
736 | ||
f0658cf2 | 737 | if (patchbreak(line)) { |
87ab7992 DZ |
738 | fclose(cmitmsg); |
739 | cmitmsg = NULL; | |
740 | return 1; | |
741 | } | |
8b4525fb | 742 | |
87ab7992 | 743 | fputs(line, cmitmsg); |
d4a9ce78 | 744 | return 0; |
2744b234 LT |
745 | } |
746 | ||
87ab7992 | 747 | static int handle_patch(char *line) |
2744b234 | 748 | { |
87ab7992 DZ |
749 | fputs(line, patchfile); |
750 | patch_lines++; | |
751 | return 0; | |
2744b234 LT |
752 | } |
753 | ||
87ab7992 | 754 | static int handle_filter(char *line) |
2744b234 | 755 | { |
87ab7992 | 756 | static int filter = 0; |
2744b234 | 757 | |
87ab7992 DZ |
758 | /* filter tells us which part we left off on |
759 | * a non-zero return indicates we hit a filter point | |
760 | */ | |
761 | switch (filter) { | |
762 | case 0: | |
763 | if (!handle_commit_msg(line)) | |
d4a9ce78 | 764 | break; |
87ab7992 DZ |
765 | filter++; |
766 | case 1: | |
767 | if (!handle_patch(line)) | |
768 | break; | |
769 | filter++; | |
770 | default: | |
771 | return 1; | |
2744b234 | 772 | } |
87ab7992 | 773 | |
d4a9ce78 | 774 | return 0; |
2744b234 LT |
775 | } |
776 | ||
87ab7992 | 777 | static void handle_body(void) |
1d8fa411 | 778 | { |
87ab7992 DZ |
779 | int rc = 0; |
780 | static char newline[2000]; | |
781 | static char *np = newline; | |
d4a9ce78 JH |
782 | |
783 | /* Skip up to the first boundary */ | |
87ab7992 DZ |
784 | if (content_top->boundary) { |
785 | if (!find_boundary()) | |
786 | return; | |
787 | } | |
788 | ||
789 | do { | |
790 | /* process any boundary lines */ | |
791 | if (content_top->boundary && is_multipart_boundary(line)) { | |
792 | /* flush any leftover */ | |
793 | if ((transfer_encoding == TE_BASE64) && | |
794 | (np != newline)) { | |
795 | handle_filter(newline); | |
796 | } | |
797 | if (!handle_boundary()) | |
798 | return; | |
799 | } | |
800 | ||
86747c13 | 801 | /* Unwrap transfer encoding */ |
87ab7992 | 802 | decode_transfer_encoding(line); |
87ab7992 DZ |
803 | |
804 | switch (transfer_encoding) { | |
805 | case TE_BASE64: | |
806 | { | |
807 | char *op = line; | |
808 | ||
809 | /* binary data most likely doesn't have newlines */ | |
810 | if (message_type != TYPE_TEXT) { | |
811 | rc = handle_filter(line); | |
812 | break; | |
813 | } | |
814 | ||
815 | /* this is a decoded line that may contain | |
816 | * multiple new lines. Pass only one chunk | |
817 | * at a time to handle_filter() | |
818 | */ | |
819 | ||
820 | do { | |
821 | while (*op != '\n' && *op != 0) | |
822 | *np++ = *op++; | |
823 | *np = *op; | |
824 | if (*np != 0) { | |
825 | /* should be sitting on a new line */ | |
826 | *(++np) = 0; | |
827 | op++; | |
828 | rc = handle_filter(newline); | |
829 | np = newline; | |
830 | } | |
831 | } while (*op != 0); | |
832 | /* the partial chunk is saved in newline and | |
833 | * will be appended by the next iteration of fgets | |
834 | */ | |
d4a9ce78 | 835 | break; |
1d8fa411 | 836 | } |
87ab7992 DZ |
837 | default: |
838 | rc = handle_filter(line); | |
d4a9ce78 | 839 | } |
87ab7992 DZ |
840 | if (rc) |
841 | /* nothing left to filter */ | |
842 | break; | |
843 | } while (fgets(line, sizeof(line), fin)); | |
844 | ||
845 | return; | |
1d8fa411 JH |
846 | } |
847 | ||
d7f6bae2 JH |
848 | static void output_header_lines(FILE *fout, const char *hdr, char *data) |
849 | { | |
850 | while (1) { | |
851 | char *ep = strchr(data, '\n'); | |
852 | int len; | |
853 | if (!ep) | |
854 | len = strlen(data); | |
855 | else | |
856 | len = ep - data; | |
857 | fprintf(fout, "%s: %.*s\n", hdr, len, data); | |
858 | if (!ep) | |
859 | break; | |
860 | data = ep + 1; | |
861 | } | |
862 | } | |
863 | ||
87ab7992 | 864 | static void handle_info(void) |
2744b234 | 865 | { |
87ab7992 DZ |
866 | char *sub; |
867 | char *hdr; | |
868 | int i; | |
869 | ||
870 | for (i = 0; header[i]; i++) { | |
871 | ||
872 | /* only print inbody headers if we output a patch file */ | |
873 | if (patch_lines && s_hdr_data[i]) | |
874 | hdr = s_hdr_data[i]; | |
875 | else if (p_hdr_data[i]) | |
876 | hdr = p_hdr_data[i]; | |
877 | else | |
878 | continue; | |
879 | ||
880 | if (!memcmp(header[i], "Subject", 7)) { | |
d7f6bae2 JH |
881 | if (keep_subject) |
882 | sub = hdr; | |
883 | else { | |
884 | sub = cleanup_subject(hdr); | |
885 | cleanup_space(sub); | |
886 | } | |
887 | output_header_lines(fout, "Subject", sub); | |
87ab7992 DZ |
888 | } else if (!memcmp(header[i], "From", 4)) { |
889 | handle_from(hdr); | |
890 | fprintf(fout, "Author: %s\n", name); | |
891 | fprintf(fout, "Email: %s\n", email); | |
892 | } else { | |
893 | cleanup_space(hdr); | |
894 | fprintf(fout, "%s: %s\n", header[i], hdr); | |
895 | } | |
d4a9ce78 | 896 | } |
87ab7992 | 897 | fprintf(fout, "\n"); |
2744b234 LT |
898 | } |
899 | ||
fcd056a6 JH |
900 | static int mailinfo(FILE *in, FILE *out, int ks, const char *encoding, |
901 | const char *msg, const char *patch) | |
34488e3c LS |
902 | { |
903 | keep_subject = ks; | |
904 | metainfo_charset = encoding; | |
905 | fin = in; | |
906 | fout = out; | |
907 | ||
908 | cmitmsg = fopen(msg, "w"); | |
909 | if (!cmitmsg) { | |
910 | perror(msg); | |
911 | return -1; | |
912 | } | |
913 | patchfile = fopen(patch, "w"); | |
914 | if (!patchfile) { | |
915 | perror(patch); | |
916 | fclose(cmitmsg); | |
917 | return -1; | |
918 | } | |
87ab7992 DZ |
919 | |
920 | p_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(char *)); | |
921 | s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(char *)); | |
922 | ||
923 | /* process the email header */ | |
924 | while (read_one_header_line(line, sizeof(line), fin)) | |
86747c13 | 925 | check_header(line, p_hdr_data, 1); |
87ab7992 DZ |
926 | |
927 | handle_body(); | |
928 | handle_info(); | |
34488e3c LS |
929 | |
930 | return 0; | |
931 | } | |
932 | ||
6bff6a60 | 933 | static const char mailinfo_usage[] = |
9f63892b | 934 | "git-mailinfo [-k] [-u | --encoding=<encoding>] msg patch <mail >info"; |
d4a9ce78 | 935 | |
a633fca0 | 936 | int cmd_mailinfo(int argc, const char **argv, const char *prefix) |
2744b234 | 937 | { |
bb1091a4 JH |
938 | const char *def_charset; |
939 | ||
f1f909e3 JH |
940 | /* NEEDSWORK: might want to do the optional .git/ directory |
941 | * discovery | |
942 | */ | |
943 | git_config(git_default_config); | |
944 | ||
bb1091a4 JH |
945 | def_charset = (git_commit_encoding ? git_commit_encoding : "utf-8"); |
946 | metainfo_charset = def_charset; | |
947 | ||
6bff6a60 JH |
948 | while (1 < argc && argv[1][0] == '-') { |
949 | if (!strcmp(argv[1], "-k")) | |
950 | keep_subject = 1; | |
d4a9ce78 | 951 | else if (!strcmp(argv[1], "-u")) |
bb1091a4 JH |
952 | metainfo_charset = def_charset; |
953 | else if (!strcmp(argv[1], "-n")) | |
954 | metainfo_charset = NULL; | |
cc44c765 | 955 | else if (!prefixcmp(argv[1], "--encoding=")) |
9f63892b | 956 | metainfo_charset = argv[1] + 11; |
d4a9ce78 | 957 | else |
f1f909e3 | 958 | usage(mailinfo_usage); |
6bff6a60 JH |
959 | argc--; argv++; |
960 | } | |
961 | ||
a196d8d4 | 962 | if (argc != 3) |
f1f909e3 | 963 | usage(mailinfo_usage); |
34488e3c LS |
964 | |
965 | return !!mailinfo(stdin, stdout, keep_subject, metainfo_charset, argv[1], argv[2]); | |
2744b234 | 966 | } |