]>
Commit | Line | Data |
---|---|---|
2744b234 LT |
1 | /* |
2 | * Another stupid program, this one parsing the headers of an | |
3 | * email to figure out authorship and subject | |
4 | */ | |
f1f909e3 | 5 | #include "cache.h" |
34488e3c | 6 | #include "builtin.h" |
b45974a6 | 7 | #include "utf8.h" |
2744b234 | 8 | |
34488e3c | 9 | static FILE *cmitmsg, *patchfile, *fin, *fout; |
2744b234 | 10 | |
96f1e58f DR |
11 | static int keep_subject; |
12 | static const char *metainfo_charset; | |
2744b234 LT |
13 | static char line[1000]; |
14 | static char name[1000]; | |
15 | static char email[1000]; | |
2744b234 | 16 | |
d4a9ce78 JH |
17 | static enum { |
18 | TE_DONTCARE, TE_QP, TE_BASE64, | |
19 | } transfer_encoding; | |
87ab7992 DZ |
20 | static enum { |
21 | TYPE_TEXT, TYPE_OTHER, | |
22 | } message_type; | |
d4a9ce78 | 23 | |
87ab7992 | 24 | static char charset[256]; |
96f1e58f | 25 | static int patch_lines; |
87ab7992 DZ |
26 | static char **p_hdr_data, **s_hdr_data; |
27 | ||
28 | #define MAX_HDR_PARSED 10 | |
29 | #define MAX_BOUNDARIES 5 | |
d4a9ce78 | 30 | |
2744b234 LT |
31 | static char *sanity_check(char *name, char *email) |
32 | { | |
33 | int len = strlen(name); | |
34 | if (len < 3 || len > 60) | |
35 | return email; | |
36 | if (strchr(name, '@') || strchr(name, '<') || strchr(name, '>')) | |
37 | return email; | |
38 | return name; | |
39 | } | |
40 | ||
e0e3ba20 JH |
41 | static int bogus_from(char *line) |
42 | { | |
43 | /* John Doe <johndoe> */ | |
44 | char *bra, *ket, *dst, *cp; | |
45 | ||
46 | /* This is fallback, so do not bother if we already have an | |
47 | * e-mail address. | |
34488e3c | 48 | */ |
e0e3ba20 JH |
49 | if (*email) |
50 | return 0; | |
51 | ||
52 | bra = strchr(line, '<'); | |
53 | if (!bra) | |
54 | return 0; | |
55 | ket = strchr(bra, '>'); | |
56 | if (!ket) | |
57 | return 0; | |
58 | ||
59 | for (dst = email, cp = bra+1; cp < ket; ) | |
60 | *dst++ = *cp++; | |
61 | *dst = 0; | |
62 | for (cp = line; isspace(*cp); cp++) | |
63 | ; | |
64 | for (bra--; isspace(*bra); bra--) | |
65 | *bra = 0; | |
66 | cp = sanity_check(cp, email); | |
67 | strcpy(name, cp); | |
68 | return 1; | |
69 | } | |
70 | ||
2dec02b1 | 71 | static int handle_from(char *in_line) |
2744b234 | 72 | { |
2dec02b1 EB |
73 | char line[1000]; |
74 | char *at; | |
2744b234 LT |
75 | char *dst; |
76 | ||
2dec02b1 EB |
77 | strcpy(line, in_line); |
78 | at = strchr(line, '@'); | |
2744b234 | 79 | if (!at) |
e0e3ba20 | 80 | return bogus_from(line); |
2744b234 LT |
81 | |
82 | /* | |
83 | * If we already have one email, don't take any confusing lines | |
84 | */ | |
85 | if (*email && strchr(at+1, '@')) | |
86 | return 0; | |
87 | ||
d4a9ce78 JH |
88 | /* Pick up the string around '@', possibly delimited with <> |
89 | * pair; that is the email part. White them out while copying. | |
90 | */ | |
2744b234 LT |
91 | while (at > line) { |
92 | char c = at[-1]; | |
d4a9ce78 JH |
93 | if (isspace(c)) |
94 | break; | |
95 | if (c == '<') { | |
96 | at[-1] = ' '; | |
2744b234 | 97 | break; |
d4a9ce78 | 98 | } |
2744b234 LT |
99 | at--; |
100 | } | |
101 | dst = email; | |
102 | for (;;) { | |
103 | unsigned char c = *at; | |
d4a9ce78 JH |
104 | if (!c || c == '>' || isspace(c)) { |
105 | if (c == '>') | |
106 | *at = ' '; | |
2744b234 | 107 | break; |
d4a9ce78 | 108 | } |
2744b234 LT |
109 | *at++ = ' '; |
110 | *dst++ = c; | |
111 | } | |
112 | *dst++ = 0; | |
113 | ||
d4a9ce78 JH |
114 | /* The remainder is name. It could be "John Doe <john.doe@xz>" |
115 | * or "john.doe@xz (John Doe)", but we have whited out the | |
116 | * email part, so trim from both ends, possibly removing | |
117 | * the () pair at the end. | |
118 | */ | |
2744b234 LT |
119 | at = line + strlen(line); |
120 | while (at > line) { | |
121 | unsigned char c = *--at; | |
d4a9ce78 JH |
122 | if (!isspace(c)) { |
123 | at[(c == ')') ? 0 : 1] = 0; | |
2744b234 | 124 | break; |
d4a9ce78 | 125 | } |
2744b234 LT |
126 | } |
127 | ||
128 | at = line; | |
129 | for (;;) { | |
130 | unsigned char c = *at; | |
d4a9ce78 JH |
131 | if (!c || !isspace(c)) { |
132 | if (c == '(') | |
133 | at++; | |
2744b234 | 134 | break; |
d4a9ce78 | 135 | } |
2744b234 LT |
136 | at++; |
137 | } | |
2744b234 | 138 | at = sanity_check(at, email); |
2744b234 LT |
139 | strcpy(name, at); |
140 | return 1; | |
141 | } | |
142 | ||
87ab7992 | 143 | static int handle_header(char *line, char *data, int ofs) |
62c1f6b4 | 144 | { |
87ab7992 DZ |
145 | if (!line || !data) |
146 | return 1; | |
147 | ||
148 | strcpy(data, line+ofs); | |
62c1f6b4 | 149 | |
d4a9ce78 JH |
150 | return 0; |
151 | } | |
152 | ||
153 | /* NOTE NOTE NOTE. We do not claim we do full MIME. We just attempt | |
154 | * to have enough heuristics to grok MIME encoded patches often found | |
155 | * on our mailing lists. For example, we do not even treat header lines | |
156 | * case insensitively. | |
157 | */ | |
158 | ||
159 | static int slurp_attr(const char *line, const char *name, char *attr) | |
160 | { | |
554fe20d | 161 | const char *ends, *ap = strcasestr(line, name); |
d4a9ce78 JH |
162 | size_t sz; |
163 | ||
164 | if (!ap) { | |
165 | *attr = 0; | |
166 | return 0; | |
167 | } | |
168 | ap += strlen(name); | |
169 | if (*ap == '"') { | |
170 | ap++; | |
171 | ends = "\""; | |
172 | } | |
173 | else | |
174 | ends = "; \t"; | |
175 | sz = strcspn(ap, ends); | |
176 | memcpy(attr, ap, sz); | |
177 | attr[sz] = 0; | |
178 | return 1; | |
179 | } | |
180 | ||
87ab7992 DZ |
181 | struct content_type { |
182 | char *boundary; | |
183 | int boundary_len; | |
184 | }; | |
185 | ||
186 | static struct content_type content[MAX_BOUNDARIES]; | |
187 | ||
188 | static struct content_type *content_top = content; | |
189 | ||
190 | static int handle_content_type(char *line) | |
d4a9ce78 | 191 | { |
87ab7992 DZ |
192 | char boundary[256]; |
193 | ||
194 | if (strcasestr(line, "text/") == NULL) | |
195 | message_type = TYPE_OTHER; | |
196 | if (slurp_attr(line, "boundary=", boundary + 2)) { | |
197 | memcpy(boundary, "--", 2); | |
198 | if (content_top++ >= &content[MAX_BOUNDARIES]) { | |
199 | fprintf(stderr, "Too many boundaries to handle\n"); | |
200 | exit(1); | |
201 | } | |
202 | content_top->boundary_len = strlen(boundary); | |
203 | content_top->boundary = xmalloc(content_top->boundary_len+1); | |
204 | strcpy(content_top->boundary, boundary); | |
b893f091 | 205 | } |
87ab7992 | 206 | if (slurp_attr(line, "charset=", charset)) { |
d4a9ce78 JH |
207 | int i, c; |
208 | for (i = 0; (c = charset[i]) != 0; i++) | |
209 | charset[i] = tolower(c); | |
210 | } | |
211 | return 0; | |
212 | } | |
213 | ||
d4a9ce78 JH |
214 | static int handle_content_transfer_encoding(char *line) |
215 | { | |
216 | if (strcasestr(line, "base64")) | |
217 | transfer_encoding = TE_BASE64; | |
218 | else if (strcasestr(line, "quoted-printable")) | |
219 | transfer_encoding = TE_QP; | |
220 | else | |
221 | transfer_encoding = TE_DONTCARE; | |
222 | return 0; | |
2744b234 LT |
223 | } |
224 | ||
d4a9ce78 JH |
225 | static int is_multipart_boundary(const char *line) |
226 | { | |
87ab7992 | 227 | return (!memcmp(line, content_top->boundary, content_top->boundary_len)); |
d4a9ce78 JH |
228 | } |
229 | ||
230 | static int eatspace(char *line) | |
2744b234 | 231 | { |
d4a9ce78 JH |
232 | int len = strlen(line); |
233 | while (len > 0 && isspace(line[len-1])) | |
234 | line[--len] = 0; | |
235 | return len; | |
2744b234 LT |
236 | } |
237 | ||
d4a9ce78 | 238 | static char *cleanup_subject(char *subject) |
2744b234 | 239 | { |
6bff6a60 JH |
240 | if (keep_subject) |
241 | return subject; | |
2744b234 LT |
242 | for (;;) { |
243 | char *p; | |
244 | int len, remove; | |
245 | switch (*subject) { | |
246 | case 'r': case 'R': | |
247 | if (!memcmp("e:", subject+1, 2)) { | |
87ab7992 | 248 | subject += 3; |
2744b234 LT |
249 | continue; |
250 | } | |
251 | break; | |
252 | case ' ': case '\t': case ':': | |
253 | subject++; | |
254 | continue; | |
255 | ||
256 | case '[': | |
257 | p = strchr(subject, ']'); | |
258 | if (!p) { | |
259 | subject++; | |
260 | continue; | |
261 | } | |
262 | len = strlen(p); | |
263 | remove = p - subject; | |
264 | if (remove <= len *2) { | |
265 | subject = p+1; | |
266 | continue; | |
34488e3c | 267 | } |
2744b234 LT |
268 | break; |
269 | } | |
ae448e38 | 270 | eatspace(subject); |
2744b234 LT |
271 | return subject; |
272 | } | |
34488e3c | 273 | } |
2744b234 LT |
274 | |
275 | static void cleanup_space(char *buf) | |
276 | { | |
277 | unsigned char c; | |
278 | while ((c = *buf) != 0) { | |
279 | buf++; | |
280 | if (isspace(c)) { | |
281 | buf[-1] = ' '; | |
282 | c = *buf; | |
283 | while (isspace(c)) { | |
284 | int len = strlen(buf); | |
285 | memmove(buf, buf+1, len); | |
286 | c = *buf; | |
287 | } | |
288 | } | |
289 | } | |
290 | } | |
291 | ||
b75bf2c3 | 292 | static void decode_header(char *it); |
87ab7992 DZ |
293 | static char *header[MAX_HDR_PARSED] = { |
294 | "From","Subject","Date", | |
d4a9ce78 JH |
295 | }; |
296 | ||
86747c13 | 297 | static int check_header(char *line, char **hdr_data, int overwrite) |
d4a9ce78 JH |
298 | { |
299 | int i; | |
300 | ||
87ab7992 DZ |
301 | /* search for the interesting parts */ |
302 | for (i = 0; header[i]; i++) { | |
303 | int len = strlen(header[i]); | |
86747c13 | 304 | if ((!hdr_data[i] || overwrite) && |
87ab7992 | 305 | !strncasecmp(line, header[i], len) && |
d4a9ce78 | 306 | line[len] == ':' && isspace(line[len + 1])) { |
33504530 EB |
307 | /* Unwrap inline B and Q encoding, and optionally |
308 | * normalize the meta information to utf8. | |
309 | */ | |
b75bf2c3 | 310 | decode_header(line + len + 2); |
87ab7992 DZ |
311 | hdr_data[i] = xmalloc(1000 * sizeof(char)); |
312 | if (! handle_header(line, hdr_data[i], len + 2)) { | |
313 | return 1; | |
314 | } | |
d4a9ce78 JH |
315 | } |
316 | } | |
d4a9ce78 | 317 | |
87ab7992 DZ |
318 | /* Content stuff */ |
319 | if (!strncasecmp(line, "Content-Type", 12) && | |
320 | line[12] == ':' && isspace(line[12 + 1])) { | |
321 | decode_header(line + 12 + 2); | |
322 | if (! handle_content_type(line)) { | |
323 | return 1; | |
324 | } | |
325 | } | |
326 | if (!strncasecmp(line, "Content-Transfer-Encoding", 25) && | |
327 | line[25] == ':' && isspace(line[25 + 1])) { | |
328 | decode_header(line + 25 + 2); | |
329 | if (! handle_content_transfer_encoding(line)) { | |
330 | return 1; | |
331 | } | |
332 | } | |
333 | ||
334 | /* for inbody stuff */ | |
335 | if (!memcmp(">From", line, 5) && isspace(line[5])) | |
336 | return 1; | |
337 | if (!memcmp("[PATCH]", line, 7) && isspace(line[7])) { | |
338 | for (i = 0; header[i]; i++) { | |
339 | if (!memcmp("Subject: ", header[i], 9)) { | |
340 | if (! handle_header(line, hdr_data[i], 0)) { | |
341 | return 1; | |
342 | } | |
343 | } | |
344 | } | |
345 | } | |
346 | ||
347 | /* no match */ | |
348 | return 0; | |
d4a9ce78 JH |
349 | } |
350 | ||
ef29c117 JH |
351 | static int is_rfc2822_header(char *line) |
352 | { | |
353 | /* | |
354 | * The section that defines the loosest possible | |
355 | * field name is "3.6.8 Optional fields". | |
356 | * | |
357 | * optional-field = field-name ":" unstructured CRLF | |
358 | * field-name = 1*ftext | |
359 | * ftext = %d33-57 / %59-126 | |
360 | */ | |
361 | int ch; | |
362 | char *cp = line; | |
34fc5cef LT |
363 | |
364 | /* Count mbox From headers as headers */ | |
365 | if (!memcmp(line, "From ", 5) || !memcmp(line, ">From ", 6)) | |
366 | return 1; | |
367 | ||
ef29c117 JH |
368 | while ((ch = *cp++)) { |
369 | if (ch == ':') | |
370 | return cp != line; | |
371 | if ((33 <= ch && ch <= 57) || | |
372 | (59 <= ch && ch <= 126)) | |
373 | continue; | |
374 | break; | |
375 | } | |
376 | return 0; | |
377 | } | |
378 | ||
34fc5cef LT |
379 | /* |
380 | * sz is size of 'line' buffer in bytes. Must be reasonably | |
381 | * long enough to hold one physical real-world e-mail line. | |
382 | */ | |
d4a9ce78 JH |
383 | static int read_one_header_line(char *line, int sz, FILE *in) |
384 | { | |
34fc5cef LT |
385 | int len; |
386 | ||
387 | /* | |
388 | * We will read at most (sz-1) bytes and then potentially | |
389 | * re-add NUL after it. Accessing line[sz] after this is safe | |
390 | * and we can allow len to grow up to and including sz. | |
391 | */ | |
392 | sz--; | |
393 | ||
394 | /* Get the first part of the line. */ | |
395 | if (!fgets(line, sz, in)) | |
396 | return 0; | |
397 | ||
398 | /* | |
399 | * Is it an empty line or not a valid rfc2822 header? | |
400 | * If so, stop here, and return false ("not a header") | |
401 | */ | |
402 | len = eatspace(line); | |
403 | if (!len || !is_rfc2822_header(line)) { | |
404 | /* Re-add the newline */ | |
405 | line[len] = '\n'; | |
406 | line[len + 1] = '\0'; | |
407 | return 0; | |
408 | } | |
409 | ||
410 | /* | |
411 | * Now we need to eat all the continuation lines.. | |
412 | * Yuck, 2822 header "folding" | |
413 | */ | |
414 | for (;;) { | |
415 | int peek, addlen; | |
416 | static char continuation[1000]; | |
417 | ||
f30b2028 EB |
418 | peek = fgetc(in); ungetc(peek, in); |
419 | if (peek != ' ' && peek != '\t') | |
420 | break; | |
34fc5cef LT |
421 | if (!fgets(continuation, sizeof(continuation), in)) |
422 | break; | |
423 | addlen = eatspace(continuation); | |
424 | if (len < sz - 1) { | |
425 | if (addlen >= sz - len) | |
426 | addlen = sz - len - 1; | |
427 | memcpy(line + len, continuation, addlen); | |
428 | len += addlen; | |
429 | } | |
d4a9ce78 | 430 | } |
34fc5cef LT |
431 | line[len] = 0; |
432 | ||
433 | return 1; | |
d4a9ce78 JH |
434 | } |
435 | ||
75731930 | 436 | static int decode_q_segment(char *in, char *ot, char *ep, int rfc2047) |
d4a9ce78 JH |
437 | { |
438 | int c; | |
439 | while ((c = *in++) != 0 && (in <= ep)) { | |
440 | if (c == '=') { | |
441 | int d = *in++; | |
442 | if (d == '\n' || !d) | |
443 | break; /* drop trailing newline */ | |
444 | *ot++ = ((hexval(d) << 4) | hexval(*in++)); | |
75731930 | 445 | continue; |
d4a9ce78 | 446 | } |
75731930 JH |
447 | if (rfc2047 && c == '_') /* rfc2047 4.2 (2) */ |
448 | c = 0x20; | |
449 | *ot++ = c; | |
d4a9ce78 JH |
450 | } |
451 | *ot = 0; | |
452 | return 0; | |
453 | } | |
454 | ||
455 | static int decode_b_segment(char *in, char *ot, char *ep) | |
456 | { | |
457 | /* Decode in..ep, possibly in-place to ot */ | |
458 | int c, pos = 0, acc = 0; | |
459 | ||
460 | while ((c = *in++) != 0 && (in <= ep)) { | |
461 | if (c == '+') | |
462 | c = 62; | |
463 | else if (c == '/') | |
464 | c = 63; | |
465 | else if ('A' <= c && c <= 'Z') | |
466 | c -= 'A'; | |
467 | else if ('a' <= c && c <= 'z') | |
468 | c -= 'a' - 26; | |
469 | else if ('0' <= c && c <= '9') | |
470 | c -= '0' - 52; | |
471 | else if (c == '=') { | |
472 | /* padding is almost like (c == 0), except we do | |
473 | * not output NUL resulting only from it; | |
474 | * for now we just trust the data. | |
475 | */ | |
476 | c = 0; | |
477 | } | |
478 | else | |
479 | continue; /* garbage */ | |
480 | switch (pos++) { | |
481 | case 0: | |
482 | acc = (c << 2); | |
483 | break; | |
484 | case 1: | |
485 | *ot++ = (acc | (c >> 4)); | |
486 | acc = (c & 15) << 4; | |
487 | break; | |
488 | case 2: | |
489 | *ot++ = (acc | (c >> 2)); | |
490 | acc = (c & 3) << 6; | |
491 | break; | |
492 | case 3: | |
493 | *ot++ = (acc | c); | |
494 | acc = pos = 0; | |
495 | break; | |
496 | } | |
497 | } | |
498 | *ot = 0; | |
499 | return 0; | |
500 | } | |
501 | ||
3a55602e | 502 | static void convert_to_utf8(char *line, const char *charset) |
d4a9ce78 | 503 | { |
3a55602e SP |
504 | static const char latin_one[] = "latin1"; |
505 | const char *input_charset = *charset ? charset : latin_one; | |
b45974a6 JH |
506 | char *out = reencode_string(line, metainfo_charset, input_charset); |
507 | ||
bb1091a4 JH |
508 | if (!out) |
509 | die("cannot convert from %s to %s\n", | |
510 | input_charset, metainfo_charset); | |
b45974a6 JH |
511 | strcpy(line, out); |
512 | free(out); | |
d4a9ce78 JH |
513 | } |
514 | ||
b75bf2c3 | 515 | static int decode_header_bq(char *it) |
d4a9ce78 JH |
516 | { |
517 | char *in, *out, *ep, *cp, *sp; | |
518 | char outbuf[1000]; | |
b75bf2c3 | 519 | int rfc2047 = 0; |
d4a9ce78 JH |
520 | |
521 | in = it; | |
522 | out = outbuf; | |
523 | while ((ep = strstr(in, "=?")) != NULL) { | |
524 | int sz, encoding; | |
525 | char charset_q[256], piecebuf[256]; | |
b75bf2c3 JH |
526 | rfc2047 = 1; |
527 | ||
d4a9ce78 JH |
528 | if (in != ep) { |
529 | sz = ep - in; | |
530 | memcpy(out, in, sz); | |
531 | out += sz; | |
532 | in += sz; | |
533 | } | |
534 | /* E.g. | |
535 | * ep : "=?iso-2022-jp?B?GyR...?= foo" | |
536 | * ep : "=?ISO-8859-1?Q?Foo=FCbar?= baz" | |
537 | */ | |
538 | ep += 2; | |
539 | cp = strchr(ep, '?'); | |
540 | if (!cp) | |
b75bf2c3 | 541 | return rfc2047; /* no munging */ |
d4a9ce78 JH |
542 | for (sp = ep; sp < cp; sp++) |
543 | charset_q[sp - ep] = tolower(*sp); | |
544 | charset_q[cp - ep] = 0; | |
545 | encoding = cp[1]; | |
546 | if (!encoding || cp[2] != '?') | |
b75bf2c3 | 547 | return rfc2047; /* no munging */ |
d4a9ce78 JH |
548 | ep = strstr(cp + 3, "?="); |
549 | if (!ep) | |
b75bf2c3 | 550 | return rfc2047; /* no munging */ |
d4a9ce78 JH |
551 | switch (tolower(encoding)) { |
552 | default: | |
b75bf2c3 | 553 | return rfc2047; /* no munging */ |
d4a9ce78 JH |
554 | case 'b': |
555 | sz = decode_b_segment(cp + 3, piecebuf, ep); | |
556 | break; | |
557 | case 'q': | |
75731930 | 558 | sz = decode_q_segment(cp + 3, piecebuf, ep, 1); |
d4a9ce78 JH |
559 | break; |
560 | } | |
561 | if (sz < 0) | |
b75bf2c3 | 562 | return rfc2047; |
650e4be5 | 563 | if (metainfo_charset) |
d4a9ce78 JH |
564 | convert_to_utf8(piecebuf, charset_q); |
565 | strcpy(out, piecebuf); | |
566 | out += strlen(out); | |
567 | in = ep + 2; | |
568 | } | |
569 | strcpy(out, in); | |
570 | strcpy(it, outbuf); | |
b75bf2c3 JH |
571 | return rfc2047; |
572 | } | |
573 | ||
574 | static void decode_header(char *it) | |
575 | { | |
576 | ||
577 | if (decode_header_bq(it)) | |
578 | return; | |
579 | /* otherwise "it" is a straight copy of the input. | |
580 | * This can be binary guck but there is no charset specified. | |
581 | */ | |
582 | if (metainfo_charset) | |
583 | convert_to_utf8(it, ""); | |
d4a9ce78 JH |
584 | } |
585 | ||
586 | static void decode_transfer_encoding(char *line) | |
587 | { | |
588 | char *ep; | |
589 | ||
590 | switch (transfer_encoding) { | |
591 | case TE_QP: | |
592 | ep = line + strlen(line); | |
75731930 | 593 | decode_q_segment(line, line, ep, 0); |
d4a9ce78 JH |
594 | break; |
595 | case TE_BASE64: | |
596 | ep = line + strlen(line); | |
597 | decode_b_segment(line, line, ep); | |
598 | break; | |
599 | case TE_DONTCARE: | |
600 | break; | |
601 | } | |
602 | } | |
603 | ||
87ab7992 DZ |
604 | static int handle_filter(char *line); |
605 | ||
606 | static int find_boundary(void) | |
2744b234 | 607 | { |
87ab7992 DZ |
608 | while(fgets(line, sizeof(line), fin) != NULL) { |
609 | if (is_multipart_boundary(line)) | |
610 | return 1; | |
611 | } | |
612 | return 0; | |
613 | } | |
614 | ||
615 | static int handle_boundary(void) | |
616 | { | |
86747c13 | 617 | char newline[]="\n"; |
87ab7992 DZ |
618 | again: |
619 | if (!memcmp(line+content_top->boundary_len, "--", 2)) { | |
620 | /* we hit an end boundary */ | |
621 | /* pop the current boundary off the stack */ | |
622 | free(content_top->boundary); | |
623 | ||
624 | /* technically won't happen as is_multipart_boundary() | |
625 | will fail first. But just in case.. | |
626 | */ | |
627 | if (content_top-- < content) { | |
628 | fprintf(stderr, "Detected mismatched boundaries, " | |
629 | "can't recover\n"); | |
630 | exit(1); | |
631 | } | |
86747c13 | 632 | handle_filter(newline); |
87ab7992 DZ |
633 | |
634 | /* skip to the next boundary */ | |
635 | if (!find_boundary()) | |
636 | return 0; | |
637 | goto again; | |
638 | } | |
639 | ||
640 | /* set some defaults */ | |
641 | transfer_encoding = TE_DONTCARE; | |
642 | charset[0] = 0; | |
643 | message_type = TYPE_TEXT; | |
d4a9ce78 | 644 | |
87ab7992 DZ |
645 | /* slurp in this section's info */ |
646 | while (read_one_header_line(line, sizeof(line), fin)) | |
86747c13 | 647 | check_header(line, p_hdr_data, 0); |
2744b234 | 648 | |
87ab7992 DZ |
649 | /* eat the blank line after section info */ |
650 | return (fgets(line, sizeof(line), fin) != NULL); | |
d4a9ce78 JH |
651 | } |
652 | ||
f0658cf2 DZ |
653 | static inline int patchbreak(const char *line) |
654 | { | |
655 | /* Beginning of a "diff -" header? */ | |
656 | if (!memcmp("diff -", line, 6)) | |
657 | return 1; | |
658 | ||
659 | /* CVS "Index: " line? */ | |
660 | if (!memcmp("Index: ", line, 7)) | |
661 | return 1; | |
662 | ||
663 | /* | |
664 | * "--- <filename>" starts patches without headers | |
665 | * "---<sp>*" is a manual separator | |
666 | */ | |
667 | if (!memcmp("---", line, 3)) { | |
668 | line += 3; | |
669 | /* space followed by a filename? */ | |
670 | if (line[0] == ' ' && !isspace(line[1])) | |
671 | return 1; | |
672 | /* Just whitespace? */ | |
673 | for (;;) { | |
674 | unsigned char c = *line++; | |
675 | if (c == '\n') | |
676 | return 1; | |
677 | if (!isspace(c)) | |
678 | break; | |
679 | } | |
680 | return 0; | |
681 | } | |
682 | return 0; | |
683 | } | |
684 | ||
685 | ||
87ab7992 | 686 | static int handle_commit_msg(char *line) |
d4a9ce78 | 687 | { |
87ab7992 DZ |
688 | static int still_looking = 1; |
689 | ||
d4a9ce78 JH |
690 | if (!cmitmsg) |
691 | return 0; | |
2744b234 | 692 | |
87ab7992 DZ |
693 | if (still_looking) { |
694 | char *cp = line; | |
695 | if (isspace(*line)) { | |
696 | for (cp = line + 1; *cp; cp++) { | |
697 | if (!isspace(*cp)) | |
698 | break; | |
699 | } | |
700 | if (!*cp) | |
701 | return 0; | |
702 | } | |
86747c13 | 703 | if ((still_looking = check_header(cp, s_hdr_data, 0)) != 0) |
87ab7992 DZ |
704 | return 0; |
705 | } | |
8b4525fb | 706 | |
86747c13 DZ |
707 | /* normalize the log message to UTF-8. */ |
708 | if (metainfo_charset) | |
709 | convert_to_utf8(line, charset); | |
710 | ||
f0658cf2 | 711 | if (patchbreak(line)) { |
87ab7992 DZ |
712 | fclose(cmitmsg); |
713 | cmitmsg = NULL; | |
714 | return 1; | |
715 | } | |
8b4525fb | 716 | |
87ab7992 | 717 | fputs(line, cmitmsg); |
d4a9ce78 | 718 | return 0; |
2744b234 LT |
719 | } |
720 | ||
87ab7992 | 721 | static int handle_patch(char *line) |
2744b234 | 722 | { |
87ab7992 DZ |
723 | fputs(line, patchfile); |
724 | patch_lines++; | |
725 | return 0; | |
2744b234 LT |
726 | } |
727 | ||
87ab7992 | 728 | static int handle_filter(char *line) |
2744b234 | 729 | { |
87ab7992 | 730 | static int filter = 0; |
2744b234 | 731 | |
87ab7992 DZ |
732 | /* filter tells us which part we left off on |
733 | * a non-zero return indicates we hit a filter point | |
734 | */ | |
735 | switch (filter) { | |
736 | case 0: | |
737 | if (!handle_commit_msg(line)) | |
d4a9ce78 | 738 | break; |
87ab7992 DZ |
739 | filter++; |
740 | case 1: | |
741 | if (!handle_patch(line)) | |
742 | break; | |
743 | filter++; | |
744 | default: | |
745 | return 1; | |
2744b234 | 746 | } |
87ab7992 | 747 | |
d4a9ce78 | 748 | return 0; |
2744b234 LT |
749 | } |
750 | ||
87ab7992 | 751 | static void handle_body(void) |
1d8fa411 | 752 | { |
87ab7992 DZ |
753 | int rc = 0; |
754 | static char newline[2000]; | |
755 | static char *np = newline; | |
d4a9ce78 JH |
756 | |
757 | /* Skip up to the first boundary */ | |
87ab7992 DZ |
758 | if (content_top->boundary) { |
759 | if (!find_boundary()) | |
760 | return; | |
761 | } | |
762 | ||
763 | do { | |
764 | /* process any boundary lines */ | |
765 | if (content_top->boundary && is_multipart_boundary(line)) { | |
766 | /* flush any leftover */ | |
767 | if ((transfer_encoding == TE_BASE64) && | |
768 | (np != newline)) { | |
769 | handle_filter(newline); | |
770 | } | |
771 | if (!handle_boundary()) | |
772 | return; | |
773 | } | |
774 | ||
86747c13 | 775 | /* Unwrap transfer encoding */ |
87ab7992 | 776 | decode_transfer_encoding(line); |
87ab7992 DZ |
777 | |
778 | switch (transfer_encoding) { | |
779 | case TE_BASE64: | |
780 | { | |
781 | char *op = line; | |
782 | ||
783 | /* binary data most likely doesn't have newlines */ | |
784 | if (message_type != TYPE_TEXT) { | |
785 | rc = handle_filter(line); | |
786 | break; | |
787 | } | |
788 | ||
789 | /* this is a decoded line that may contain | |
790 | * multiple new lines. Pass only one chunk | |
791 | * at a time to handle_filter() | |
792 | */ | |
793 | ||
794 | do { | |
795 | while (*op != '\n' && *op != 0) | |
796 | *np++ = *op++; | |
797 | *np = *op; | |
798 | if (*np != 0) { | |
799 | /* should be sitting on a new line */ | |
800 | *(++np) = 0; | |
801 | op++; | |
802 | rc = handle_filter(newline); | |
803 | np = newline; | |
804 | } | |
805 | } while (*op != 0); | |
806 | /* the partial chunk is saved in newline and | |
807 | * will be appended by the next iteration of fgets | |
808 | */ | |
d4a9ce78 | 809 | break; |
1d8fa411 | 810 | } |
87ab7992 DZ |
811 | default: |
812 | rc = handle_filter(line); | |
d4a9ce78 | 813 | } |
87ab7992 DZ |
814 | if (rc) |
815 | /* nothing left to filter */ | |
816 | break; | |
817 | } while (fgets(line, sizeof(line), fin)); | |
818 | ||
819 | return; | |
1d8fa411 JH |
820 | } |
821 | ||
87ab7992 | 822 | static void handle_info(void) |
2744b234 | 823 | { |
87ab7992 DZ |
824 | char *sub; |
825 | char *hdr; | |
826 | int i; | |
827 | ||
828 | for (i = 0; header[i]; i++) { | |
829 | ||
830 | /* only print inbody headers if we output a patch file */ | |
831 | if (patch_lines && s_hdr_data[i]) | |
832 | hdr = s_hdr_data[i]; | |
833 | else if (p_hdr_data[i]) | |
834 | hdr = p_hdr_data[i]; | |
835 | else | |
836 | continue; | |
837 | ||
838 | if (!memcmp(header[i], "Subject", 7)) { | |
839 | sub = cleanup_subject(hdr); | |
840 | cleanup_space(sub); | |
841 | fprintf(fout, "Subject: %s\n", sub); | |
842 | } else if (!memcmp(header[i], "From", 4)) { | |
843 | handle_from(hdr); | |
844 | fprintf(fout, "Author: %s\n", name); | |
845 | fprintf(fout, "Email: %s\n", email); | |
846 | } else { | |
847 | cleanup_space(hdr); | |
848 | fprintf(fout, "%s: %s\n", header[i], hdr); | |
849 | } | |
d4a9ce78 | 850 | } |
87ab7992 | 851 | fprintf(fout, "\n"); |
2744b234 LT |
852 | } |
853 | ||
34488e3c LS |
854 | int mailinfo(FILE *in, FILE *out, int ks, const char *encoding, |
855 | const char *msg, const char *patch) | |
856 | { | |
857 | keep_subject = ks; | |
858 | metainfo_charset = encoding; | |
859 | fin = in; | |
860 | fout = out; | |
861 | ||
862 | cmitmsg = fopen(msg, "w"); | |
863 | if (!cmitmsg) { | |
864 | perror(msg); | |
865 | return -1; | |
866 | } | |
867 | patchfile = fopen(patch, "w"); | |
868 | if (!patchfile) { | |
869 | perror(patch); | |
870 | fclose(cmitmsg); | |
871 | return -1; | |
872 | } | |
87ab7992 DZ |
873 | |
874 | p_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(char *)); | |
875 | s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(char *)); | |
876 | ||
877 | /* process the email header */ | |
878 | while (read_one_header_line(line, sizeof(line), fin)) | |
86747c13 | 879 | check_header(line, p_hdr_data, 1); |
87ab7992 DZ |
880 | |
881 | handle_body(); | |
882 | handle_info(); | |
34488e3c LS |
883 | |
884 | return 0; | |
885 | } | |
886 | ||
6bff6a60 | 887 | static const char mailinfo_usage[] = |
9f63892b | 888 | "git-mailinfo [-k] [-u | --encoding=<encoding>] msg patch <mail >info"; |
d4a9ce78 | 889 | |
a633fca0 | 890 | int cmd_mailinfo(int argc, const char **argv, const char *prefix) |
2744b234 | 891 | { |
bb1091a4 JH |
892 | const char *def_charset; |
893 | ||
f1f909e3 JH |
894 | /* NEEDSWORK: might want to do the optional .git/ directory |
895 | * discovery | |
896 | */ | |
897 | git_config(git_default_config); | |
898 | ||
bb1091a4 JH |
899 | def_charset = (git_commit_encoding ? git_commit_encoding : "utf-8"); |
900 | metainfo_charset = def_charset; | |
901 | ||
6bff6a60 JH |
902 | while (1 < argc && argv[1][0] == '-') { |
903 | if (!strcmp(argv[1], "-k")) | |
904 | keep_subject = 1; | |
d4a9ce78 | 905 | else if (!strcmp(argv[1], "-u")) |
bb1091a4 JH |
906 | metainfo_charset = def_charset; |
907 | else if (!strcmp(argv[1], "-n")) | |
908 | metainfo_charset = NULL; | |
cc44c765 | 909 | else if (!prefixcmp(argv[1], "--encoding=")) |
9f63892b | 910 | metainfo_charset = argv[1] + 11; |
d4a9ce78 | 911 | else |
f1f909e3 | 912 | usage(mailinfo_usage); |
6bff6a60 JH |
913 | argc--; argv++; |
914 | } | |
915 | ||
a196d8d4 | 916 | if (argc != 3) |
f1f909e3 | 917 | usage(mailinfo_usage); |
34488e3c LS |
918 | |
919 | return !!mailinfo(stdin, stdout, keep_subject, metainfo_charset, argv[1], argv[2]); | |
2744b234 | 920 | } |