]>
Commit | Line | Data |
---|---|---|
2744b234 LT |
1 | /* |
2 | * Another stupid program, this one parsing the headers of an | |
3 | * email to figure out authorship and subject | |
4 | */ | |
f1f909e3 | 5 | #include "cache.h" |
34488e3c | 6 | #include "builtin.h" |
b45974a6 | 7 | #include "utf8.h" |
2744b234 | 8 | |
34488e3c | 9 | static FILE *cmitmsg, *patchfile, *fin, *fout; |
2744b234 | 10 | |
96f1e58f DR |
11 | static int keep_subject; |
12 | static const char *metainfo_charset; | |
2744b234 LT |
13 | static char line[1000]; |
14 | static char name[1000]; | |
15 | static char email[1000]; | |
2744b234 | 16 | |
d4a9ce78 JH |
17 | static enum { |
18 | TE_DONTCARE, TE_QP, TE_BASE64, | |
19 | } transfer_encoding; | |
87ab7992 DZ |
20 | static enum { |
21 | TYPE_TEXT, TYPE_OTHER, | |
22 | } message_type; | |
d4a9ce78 | 23 | |
87ab7992 | 24 | static char charset[256]; |
96f1e58f | 25 | static int patch_lines; |
87ab7992 DZ |
26 | static char **p_hdr_data, **s_hdr_data; |
27 | ||
28 | #define MAX_HDR_PARSED 10 | |
29 | #define MAX_BOUNDARIES 5 | |
d4a9ce78 | 30 | |
2744b234 LT |
31 | static char *sanity_check(char *name, char *email) |
32 | { | |
33 | int len = strlen(name); | |
34 | if (len < 3 || len > 60) | |
35 | return email; | |
36 | if (strchr(name, '@') || strchr(name, '<') || strchr(name, '>')) | |
37 | return email; | |
38 | return name; | |
39 | } | |
40 | ||
e0e3ba20 JH |
41 | static int bogus_from(char *line) |
42 | { | |
43 | /* John Doe <johndoe> */ | |
44 | char *bra, *ket, *dst, *cp; | |
45 | ||
46 | /* This is fallback, so do not bother if we already have an | |
47 | * e-mail address. | |
34488e3c | 48 | */ |
e0e3ba20 JH |
49 | if (*email) |
50 | return 0; | |
51 | ||
52 | bra = strchr(line, '<'); | |
53 | if (!bra) | |
54 | return 0; | |
55 | ket = strchr(bra, '>'); | |
56 | if (!ket) | |
57 | return 0; | |
58 | ||
59 | for (dst = email, cp = bra+1; cp < ket; ) | |
60 | *dst++ = *cp++; | |
61 | *dst = 0; | |
62 | for (cp = line; isspace(*cp); cp++) | |
63 | ; | |
64 | for (bra--; isspace(*bra); bra--) | |
65 | *bra = 0; | |
66 | cp = sanity_check(cp, email); | |
67 | strcpy(name, cp); | |
68 | return 1; | |
69 | } | |
70 | ||
2dec02b1 | 71 | static int handle_from(char *in_line) |
2744b234 | 72 | { |
2dec02b1 EB |
73 | char line[1000]; |
74 | char *at; | |
2744b234 LT |
75 | char *dst; |
76 | ||
2dec02b1 EB |
77 | strcpy(line, in_line); |
78 | at = strchr(line, '@'); | |
2744b234 | 79 | if (!at) |
e0e3ba20 | 80 | return bogus_from(line); |
2744b234 LT |
81 | |
82 | /* | |
83 | * If we already have one email, don't take any confusing lines | |
84 | */ | |
85 | if (*email && strchr(at+1, '@')) | |
86 | return 0; | |
87 | ||
d4a9ce78 JH |
88 | /* Pick up the string around '@', possibly delimited with <> |
89 | * pair; that is the email part. White them out while copying. | |
90 | */ | |
2744b234 LT |
91 | while (at > line) { |
92 | char c = at[-1]; | |
d4a9ce78 JH |
93 | if (isspace(c)) |
94 | break; | |
95 | if (c == '<') { | |
96 | at[-1] = ' '; | |
2744b234 | 97 | break; |
d4a9ce78 | 98 | } |
2744b234 LT |
99 | at--; |
100 | } | |
101 | dst = email; | |
102 | for (;;) { | |
103 | unsigned char c = *at; | |
d4a9ce78 JH |
104 | if (!c || c == '>' || isspace(c)) { |
105 | if (c == '>') | |
106 | *at = ' '; | |
2744b234 | 107 | break; |
d4a9ce78 | 108 | } |
2744b234 LT |
109 | *at++ = ' '; |
110 | *dst++ = c; | |
111 | } | |
112 | *dst++ = 0; | |
113 | ||
d4a9ce78 JH |
114 | /* The remainder is name. It could be "John Doe <john.doe@xz>" |
115 | * or "john.doe@xz (John Doe)", but we have whited out the | |
116 | * email part, so trim from both ends, possibly removing | |
117 | * the () pair at the end. | |
118 | */ | |
2744b234 LT |
119 | at = line + strlen(line); |
120 | while (at > line) { | |
121 | unsigned char c = *--at; | |
d4a9ce78 JH |
122 | if (!isspace(c)) { |
123 | at[(c == ')') ? 0 : 1] = 0; | |
2744b234 | 124 | break; |
d4a9ce78 | 125 | } |
2744b234 LT |
126 | } |
127 | ||
128 | at = line; | |
129 | for (;;) { | |
130 | unsigned char c = *at; | |
d4a9ce78 JH |
131 | if (!c || !isspace(c)) { |
132 | if (c == '(') | |
133 | at++; | |
2744b234 | 134 | break; |
d4a9ce78 | 135 | } |
2744b234 LT |
136 | at++; |
137 | } | |
2744b234 | 138 | at = sanity_check(at, email); |
2744b234 LT |
139 | strcpy(name, at); |
140 | return 1; | |
141 | } | |
142 | ||
87ab7992 | 143 | static int handle_header(char *line, char *data, int ofs) |
62c1f6b4 | 144 | { |
87ab7992 DZ |
145 | if (!line || !data) |
146 | return 1; | |
147 | ||
148 | strcpy(data, line+ofs); | |
62c1f6b4 | 149 | |
d4a9ce78 JH |
150 | return 0; |
151 | } | |
152 | ||
153 | /* NOTE NOTE NOTE. We do not claim we do full MIME. We just attempt | |
154 | * to have enough heuristics to grok MIME encoded patches often found | |
155 | * on our mailing lists. For example, we do not even treat header lines | |
156 | * case insensitively. | |
157 | */ | |
158 | ||
159 | static int slurp_attr(const char *line, const char *name, char *attr) | |
160 | { | |
554fe20d | 161 | const char *ends, *ap = strcasestr(line, name); |
d4a9ce78 JH |
162 | size_t sz; |
163 | ||
164 | if (!ap) { | |
165 | *attr = 0; | |
166 | return 0; | |
167 | } | |
168 | ap += strlen(name); | |
169 | if (*ap == '"') { | |
170 | ap++; | |
171 | ends = "\""; | |
172 | } | |
173 | else | |
174 | ends = "; \t"; | |
175 | sz = strcspn(ap, ends); | |
176 | memcpy(attr, ap, sz); | |
177 | attr[sz] = 0; | |
178 | return 1; | |
179 | } | |
180 | ||
87ab7992 DZ |
181 | struct content_type { |
182 | char *boundary; | |
183 | int boundary_len; | |
184 | }; | |
185 | ||
186 | static struct content_type content[MAX_BOUNDARIES]; | |
187 | ||
188 | static struct content_type *content_top = content; | |
189 | ||
190 | static int handle_content_type(char *line) | |
d4a9ce78 | 191 | { |
87ab7992 DZ |
192 | char boundary[256]; |
193 | ||
194 | if (strcasestr(line, "text/") == NULL) | |
195 | message_type = TYPE_OTHER; | |
196 | if (slurp_attr(line, "boundary=", boundary + 2)) { | |
197 | memcpy(boundary, "--", 2); | |
198 | if (content_top++ >= &content[MAX_BOUNDARIES]) { | |
199 | fprintf(stderr, "Too many boundaries to handle\n"); | |
200 | exit(1); | |
201 | } | |
202 | content_top->boundary_len = strlen(boundary); | |
203 | content_top->boundary = xmalloc(content_top->boundary_len+1); | |
204 | strcpy(content_top->boundary, boundary); | |
b893f091 | 205 | } |
87ab7992 | 206 | if (slurp_attr(line, "charset=", charset)) { |
d4a9ce78 JH |
207 | int i, c; |
208 | for (i = 0; (c = charset[i]) != 0; i++) | |
209 | charset[i] = tolower(c); | |
210 | } | |
211 | return 0; | |
212 | } | |
213 | ||
d4a9ce78 JH |
214 | static int handle_content_transfer_encoding(char *line) |
215 | { | |
216 | if (strcasestr(line, "base64")) | |
217 | transfer_encoding = TE_BASE64; | |
218 | else if (strcasestr(line, "quoted-printable")) | |
219 | transfer_encoding = TE_QP; | |
220 | else | |
221 | transfer_encoding = TE_DONTCARE; | |
222 | return 0; | |
2744b234 LT |
223 | } |
224 | ||
d4a9ce78 JH |
225 | static int is_multipart_boundary(const char *line) |
226 | { | |
87ab7992 | 227 | return (!memcmp(line, content_top->boundary, content_top->boundary_len)); |
d4a9ce78 JH |
228 | } |
229 | ||
230 | static int eatspace(char *line) | |
2744b234 | 231 | { |
d4a9ce78 JH |
232 | int len = strlen(line); |
233 | while (len > 0 && isspace(line[len-1])) | |
234 | line[--len] = 0; | |
235 | return len; | |
2744b234 LT |
236 | } |
237 | ||
d4a9ce78 | 238 | static char *cleanup_subject(char *subject) |
2744b234 LT |
239 | { |
240 | for (;;) { | |
241 | char *p; | |
242 | int len, remove; | |
243 | switch (*subject) { | |
244 | case 'r': case 'R': | |
245 | if (!memcmp("e:", subject+1, 2)) { | |
87ab7992 | 246 | subject += 3; |
2744b234 LT |
247 | continue; |
248 | } | |
249 | break; | |
250 | case ' ': case '\t': case ':': | |
251 | subject++; | |
252 | continue; | |
253 | ||
254 | case '[': | |
255 | p = strchr(subject, ']'); | |
256 | if (!p) { | |
257 | subject++; | |
258 | continue; | |
259 | } | |
260 | len = strlen(p); | |
261 | remove = p - subject; | |
262 | if (remove <= len *2) { | |
263 | subject = p+1; | |
264 | continue; | |
34488e3c | 265 | } |
2744b234 LT |
266 | break; |
267 | } | |
ae448e38 | 268 | eatspace(subject); |
2744b234 LT |
269 | return subject; |
270 | } | |
34488e3c | 271 | } |
2744b234 LT |
272 | |
273 | static void cleanup_space(char *buf) | |
274 | { | |
275 | unsigned char c; | |
276 | while ((c = *buf) != 0) { | |
277 | buf++; | |
278 | if (isspace(c)) { | |
279 | buf[-1] = ' '; | |
280 | c = *buf; | |
281 | while (isspace(c)) { | |
282 | int len = strlen(buf); | |
283 | memmove(buf, buf+1, len); | |
284 | c = *buf; | |
285 | } | |
286 | } | |
287 | } | |
288 | } | |
289 | ||
8dabdfcc | 290 | static void decode_header(char *it, unsigned itsize); |
538dfe73 | 291 | static const char *header[MAX_HDR_PARSED] = { |
87ab7992 | 292 | "From","Subject","Date", |
d4a9ce78 JH |
293 | }; |
294 | ||
8dabdfcc | 295 | static int check_header(char *line, unsigned linesize, char **hdr_data, int overwrite) |
d4a9ce78 JH |
296 | { |
297 | int i; | |
298 | ||
87ab7992 DZ |
299 | /* search for the interesting parts */ |
300 | for (i = 0; header[i]; i++) { | |
301 | int len = strlen(header[i]); | |
86747c13 | 302 | if ((!hdr_data[i] || overwrite) && |
87ab7992 | 303 | !strncasecmp(line, header[i], len) && |
d4a9ce78 | 304 | line[len] == ':' && isspace(line[len + 1])) { |
33504530 EB |
305 | /* Unwrap inline B and Q encoding, and optionally |
306 | * normalize the meta information to utf8. | |
307 | */ | |
8dabdfcc | 308 | decode_header(line + len + 2, linesize - len - 2); |
87ab7992 DZ |
309 | hdr_data[i] = xmalloc(1000 * sizeof(char)); |
310 | if (! handle_header(line, hdr_data[i], len + 2)) { | |
311 | return 1; | |
312 | } | |
d4a9ce78 JH |
313 | } |
314 | } | |
d4a9ce78 | 315 | |
87ab7992 DZ |
316 | /* Content stuff */ |
317 | if (!strncasecmp(line, "Content-Type", 12) && | |
318 | line[12] == ':' && isspace(line[12 + 1])) { | |
8dabdfcc | 319 | decode_header(line + 12 + 2, linesize - 12 - 2); |
87ab7992 DZ |
320 | if (! handle_content_type(line)) { |
321 | return 1; | |
322 | } | |
323 | } | |
324 | if (!strncasecmp(line, "Content-Transfer-Encoding", 25) && | |
325 | line[25] == ':' && isspace(line[25 + 1])) { | |
8dabdfcc | 326 | decode_header(line + 25 + 2, linesize - 25 - 2); |
87ab7992 DZ |
327 | if (! handle_content_transfer_encoding(line)) { |
328 | return 1; | |
329 | } | |
330 | } | |
331 | ||
332 | /* for inbody stuff */ | |
333 | if (!memcmp(">From", line, 5) && isspace(line[5])) | |
334 | return 1; | |
335 | if (!memcmp("[PATCH]", line, 7) && isspace(line[7])) { | |
336 | for (i = 0; header[i]; i++) { | |
337 | if (!memcmp("Subject: ", header[i], 9)) { | |
338 | if (! handle_header(line, hdr_data[i], 0)) { | |
339 | return 1; | |
340 | } | |
341 | } | |
342 | } | |
343 | } | |
344 | ||
345 | /* no match */ | |
346 | return 0; | |
d4a9ce78 JH |
347 | } |
348 | ||
ef29c117 JH |
349 | static int is_rfc2822_header(char *line) |
350 | { | |
351 | /* | |
352 | * The section that defines the loosest possible | |
353 | * field name is "3.6.8 Optional fields". | |
354 | * | |
355 | * optional-field = field-name ":" unstructured CRLF | |
356 | * field-name = 1*ftext | |
357 | * ftext = %d33-57 / %59-126 | |
358 | */ | |
359 | int ch; | |
360 | char *cp = line; | |
34fc5cef LT |
361 | |
362 | /* Count mbox From headers as headers */ | |
363 | if (!memcmp(line, "From ", 5) || !memcmp(line, ">From ", 6)) | |
364 | return 1; | |
365 | ||
ef29c117 JH |
366 | while ((ch = *cp++)) { |
367 | if (ch == ':') | |
368 | return cp != line; | |
369 | if ((33 <= ch && ch <= 57) || | |
370 | (59 <= ch && ch <= 126)) | |
371 | continue; | |
372 | break; | |
373 | } | |
374 | return 0; | |
375 | } | |
376 | ||
34fc5cef LT |
377 | /* |
378 | * sz is size of 'line' buffer in bytes. Must be reasonably | |
379 | * long enough to hold one physical real-world e-mail line. | |
380 | */ | |
d4a9ce78 JH |
381 | static int read_one_header_line(char *line, int sz, FILE *in) |
382 | { | |
34fc5cef LT |
383 | int len; |
384 | ||
385 | /* | |
386 | * We will read at most (sz-1) bytes and then potentially | |
387 | * re-add NUL after it. Accessing line[sz] after this is safe | |
388 | * and we can allow len to grow up to and including sz. | |
389 | */ | |
390 | sz--; | |
391 | ||
392 | /* Get the first part of the line. */ | |
393 | if (!fgets(line, sz, in)) | |
394 | return 0; | |
395 | ||
396 | /* | |
397 | * Is it an empty line or not a valid rfc2822 header? | |
398 | * If so, stop here, and return false ("not a header") | |
399 | */ | |
400 | len = eatspace(line); | |
401 | if (!len || !is_rfc2822_header(line)) { | |
402 | /* Re-add the newline */ | |
403 | line[len] = '\n'; | |
404 | line[len + 1] = '\0'; | |
405 | return 0; | |
406 | } | |
407 | ||
408 | /* | |
409 | * Now we need to eat all the continuation lines.. | |
410 | * Yuck, 2822 header "folding" | |
411 | */ | |
412 | for (;;) { | |
413 | int peek, addlen; | |
414 | static char continuation[1000]; | |
415 | ||
f30b2028 EB |
416 | peek = fgetc(in); ungetc(peek, in); |
417 | if (peek != ' ' && peek != '\t') | |
418 | break; | |
34fc5cef LT |
419 | if (!fgets(continuation, sizeof(continuation), in)) |
420 | break; | |
421 | addlen = eatspace(continuation); | |
422 | if (len < sz - 1) { | |
423 | if (addlen >= sz - len) | |
424 | addlen = sz - len - 1; | |
425 | memcpy(line + len, continuation, addlen); | |
d7f6bae2 | 426 | line[len] = '\n'; |
34fc5cef LT |
427 | len += addlen; |
428 | } | |
d4a9ce78 | 429 | } |
34fc5cef LT |
430 | line[len] = 0; |
431 | ||
432 | return 1; | |
d4a9ce78 JH |
433 | } |
434 | ||
8dabdfcc | 435 | static int decode_q_segment(char *in, char *ot, unsigned otsize, char *ep, int rfc2047) |
d4a9ce78 | 436 | { |
8dabdfcc | 437 | char *otend = ot + otsize; |
d4a9ce78 JH |
438 | int c; |
439 | while ((c = *in++) != 0 && (in <= ep)) { | |
8dabdfcc AR |
440 | if (ot == otend) { |
441 | *--ot = '\0'; | |
442 | return -1; | |
443 | } | |
d4a9ce78 JH |
444 | if (c == '=') { |
445 | int d = *in++; | |
446 | if (d == '\n' || !d) | |
447 | break; /* drop trailing newline */ | |
448 | *ot++ = ((hexval(d) << 4) | hexval(*in++)); | |
75731930 | 449 | continue; |
d4a9ce78 | 450 | } |
75731930 JH |
451 | if (rfc2047 && c == '_') /* rfc2047 4.2 (2) */ |
452 | c = 0x20; | |
453 | *ot++ = c; | |
d4a9ce78 JH |
454 | } |
455 | *ot = 0; | |
456 | return 0; | |
457 | } | |
458 | ||
8dabdfcc | 459 | static int decode_b_segment(char *in, char *ot, unsigned otsize, char *ep) |
d4a9ce78 JH |
460 | { |
461 | /* Decode in..ep, possibly in-place to ot */ | |
462 | int c, pos = 0, acc = 0; | |
8dabdfcc | 463 | char *otend = ot + otsize; |
d4a9ce78 JH |
464 | |
465 | while ((c = *in++) != 0 && (in <= ep)) { | |
8dabdfcc AR |
466 | if (ot == otend) { |
467 | *--ot = '\0'; | |
468 | return -1; | |
469 | } | |
d4a9ce78 JH |
470 | if (c == '+') |
471 | c = 62; | |
472 | else if (c == '/') | |
473 | c = 63; | |
474 | else if ('A' <= c && c <= 'Z') | |
475 | c -= 'A'; | |
476 | else if ('a' <= c && c <= 'z') | |
477 | c -= 'a' - 26; | |
478 | else if ('0' <= c && c <= '9') | |
479 | c -= '0' - 52; | |
480 | else if (c == '=') { | |
481 | /* padding is almost like (c == 0), except we do | |
482 | * not output NUL resulting only from it; | |
483 | * for now we just trust the data. | |
484 | */ | |
485 | c = 0; | |
486 | } | |
487 | else | |
488 | continue; /* garbage */ | |
489 | switch (pos++) { | |
490 | case 0: | |
491 | acc = (c << 2); | |
492 | break; | |
493 | case 1: | |
494 | *ot++ = (acc | (c >> 4)); | |
495 | acc = (c & 15) << 4; | |
496 | break; | |
497 | case 2: | |
498 | *ot++ = (acc | (c >> 2)); | |
499 | acc = (c & 3) << 6; | |
500 | break; | |
501 | case 3: | |
502 | *ot++ = (acc | c); | |
503 | acc = pos = 0; | |
504 | break; | |
505 | } | |
506 | } | |
507 | *ot = 0; | |
508 | return 0; | |
509 | } | |
510 | ||
b59d398b LT |
511 | /* |
512 | * When there is no known charset, guess. | |
513 | * | |
514 | * Right now we assume that if the target is UTF-8 (the default), | |
515 | * and it already looks like UTF-8 (which includes US-ASCII as its | |
516 | * subset, of course) then that is what it is and there is nothing | |
517 | * to do. | |
518 | * | |
519 | * Otherwise, we default to assuming it is Latin1 for historical | |
520 | * reasons. | |
521 | */ | |
522 | static const char *guess_charset(const char *line, const char *target_charset) | |
523 | { | |
524 | if (is_encoding_utf8(target_charset)) { | |
525 | if (is_utf8(line)) | |
526 | return NULL; | |
527 | } | |
528 | return "latin1"; | |
529 | } | |
530 | ||
8dabdfcc | 531 | static void convert_to_utf8(char *line, unsigned linesize, const char *charset) |
d4a9ce78 | 532 | { |
b59d398b LT |
533 | char *out; |
534 | ||
535 | if (!charset || !*charset) { | |
536 | charset = guess_charset(line, metainfo_charset); | |
537 | if (!charset) | |
538 | return; | |
539 | } | |
b45974a6 | 540 | |
7296096c JS |
541 | if (!strcmp(metainfo_charset, charset)) |
542 | return; | |
b59d398b | 543 | out = reencode_string(line, metainfo_charset, charset); |
bb1091a4 JH |
544 | if (!out) |
545 | die("cannot convert from %s to %s\n", | |
b59d398b | 546 | charset, metainfo_charset); |
8dabdfcc | 547 | strlcpy(line, out, linesize); |
b45974a6 | 548 | free(out); |
d4a9ce78 JH |
549 | } |
550 | ||
8dabdfcc | 551 | static int decode_header_bq(char *it, unsigned itsize) |
d4a9ce78 JH |
552 | { |
553 | char *in, *out, *ep, *cp, *sp; | |
554 | char outbuf[1000]; | |
b75bf2c3 | 555 | int rfc2047 = 0; |
d4a9ce78 JH |
556 | |
557 | in = it; | |
558 | out = outbuf; | |
559 | while ((ep = strstr(in, "=?")) != NULL) { | |
560 | int sz, encoding; | |
561 | char charset_q[256], piecebuf[256]; | |
b75bf2c3 JH |
562 | rfc2047 = 1; |
563 | ||
d4a9ce78 JH |
564 | if (in != ep) { |
565 | sz = ep - in; | |
566 | memcpy(out, in, sz); | |
567 | out += sz; | |
568 | in += sz; | |
569 | } | |
570 | /* E.g. | |
571 | * ep : "=?iso-2022-jp?B?GyR...?= foo" | |
572 | * ep : "=?ISO-8859-1?Q?Foo=FCbar?= baz" | |
573 | */ | |
574 | ep += 2; | |
575 | cp = strchr(ep, '?'); | |
576 | if (!cp) | |
b75bf2c3 | 577 | return rfc2047; /* no munging */ |
d4a9ce78 JH |
578 | for (sp = ep; sp < cp; sp++) |
579 | charset_q[sp - ep] = tolower(*sp); | |
580 | charset_q[cp - ep] = 0; | |
581 | encoding = cp[1]; | |
582 | if (!encoding || cp[2] != '?') | |
b75bf2c3 | 583 | return rfc2047; /* no munging */ |
d4a9ce78 JH |
584 | ep = strstr(cp + 3, "?="); |
585 | if (!ep) | |
b75bf2c3 | 586 | return rfc2047; /* no munging */ |
d4a9ce78 JH |
587 | switch (tolower(encoding)) { |
588 | default: | |
b75bf2c3 | 589 | return rfc2047; /* no munging */ |
d4a9ce78 | 590 | case 'b': |
8dabdfcc | 591 | sz = decode_b_segment(cp + 3, piecebuf, sizeof(piecebuf), ep); |
d4a9ce78 JH |
592 | break; |
593 | case 'q': | |
8dabdfcc | 594 | sz = decode_q_segment(cp + 3, piecebuf, sizeof(piecebuf), ep, 1); |
d4a9ce78 JH |
595 | break; |
596 | } | |
597 | if (sz < 0) | |
b75bf2c3 | 598 | return rfc2047; |
650e4be5 | 599 | if (metainfo_charset) |
8dabdfcc AR |
600 | convert_to_utf8(piecebuf, sizeof(piecebuf), charset_q); |
601 | ||
602 | sz = strlen(piecebuf); | |
603 | if (outbuf + sizeof(outbuf) <= out + sz) | |
604 | return rfc2047; /* no munging */ | |
d4a9ce78 | 605 | strcpy(out, piecebuf); |
8dabdfcc | 606 | out += sz; |
d4a9ce78 JH |
607 | in = ep + 2; |
608 | } | |
609 | strcpy(out, in); | |
8dabdfcc | 610 | strlcpy(it, outbuf, itsize); |
b75bf2c3 JH |
611 | return rfc2047; |
612 | } | |
613 | ||
8dabdfcc | 614 | static void decode_header(char *it, unsigned itsize) |
b75bf2c3 JH |
615 | { |
616 | ||
8dabdfcc | 617 | if (decode_header_bq(it, itsize)) |
b75bf2c3 JH |
618 | return; |
619 | /* otherwise "it" is a straight copy of the input. | |
620 | * This can be binary guck but there is no charset specified. | |
621 | */ | |
622 | if (metainfo_charset) | |
8dabdfcc | 623 | convert_to_utf8(it, itsize, ""); |
d4a9ce78 JH |
624 | } |
625 | ||
8dabdfcc | 626 | static void decode_transfer_encoding(char *line, unsigned linesize) |
d4a9ce78 JH |
627 | { |
628 | char *ep; | |
629 | ||
630 | switch (transfer_encoding) { | |
631 | case TE_QP: | |
632 | ep = line + strlen(line); | |
8dabdfcc | 633 | decode_q_segment(line, line, linesize, ep, 0); |
d4a9ce78 JH |
634 | break; |
635 | case TE_BASE64: | |
636 | ep = line + strlen(line); | |
8dabdfcc | 637 | decode_b_segment(line, line, linesize, ep); |
d4a9ce78 JH |
638 | break; |
639 | case TE_DONTCARE: | |
640 | break; | |
641 | } | |
642 | } | |
643 | ||
8dabdfcc | 644 | static int handle_filter(char *line, unsigned linesize); |
87ab7992 DZ |
645 | |
646 | static int find_boundary(void) | |
2744b234 | 647 | { |
87ab7992 DZ |
648 | while(fgets(line, sizeof(line), fin) != NULL) { |
649 | if (is_multipart_boundary(line)) | |
650 | return 1; | |
651 | } | |
652 | return 0; | |
653 | } | |
654 | ||
655 | static int handle_boundary(void) | |
656 | { | |
86747c13 | 657 | char newline[]="\n"; |
87ab7992 DZ |
658 | again: |
659 | if (!memcmp(line+content_top->boundary_len, "--", 2)) { | |
660 | /* we hit an end boundary */ | |
661 | /* pop the current boundary off the stack */ | |
662 | free(content_top->boundary); | |
663 | ||
664 | /* technically won't happen as is_multipart_boundary() | |
665 | will fail first. But just in case.. | |
666 | */ | |
667 | if (content_top-- < content) { | |
668 | fprintf(stderr, "Detected mismatched boundaries, " | |
669 | "can't recover\n"); | |
670 | exit(1); | |
671 | } | |
8dabdfcc | 672 | handle_filter(newline, sizeof(newline)); |
87ab7992 DZ |
673 | |
674 | /* skip to the next boundary */ | |
675 | if (!find_boundary()) | |
676 | return 0; | |
677 | goto again; | |
678 | } | |
679 | ||
680 | /* set some defaults */ | |
681 | transfer_encoding = TE_DONTCARE; | |
682 | charset[0] = 0; | |
683 | message_type = TYPE_TEXT; | |
d4a9ce78 | 684 | |
87ab7992 DZ |
685 | /* slurp in this section's info */ |
686 | while (read_one_header_line(line, sizeof(line), fin)) | |
8dabdfcc | 687 | check_header(line, sizeof(line), p_hdr_data, 0); |
2744b234 | 688 | |
87ab7992 DZ |
689 | /* eat the blank line after section info */ |
690 | return (fgets(line, sizeof(line), fin) != NULL); | |
d4a9ce78 JH |
691 | } |
692 | ||
f0658cf2 DZ |
693 | static inline int patchbreak(const char *line) |
694 | { | |
695 | /* Beginning of a "diff -" header? */ | |
696 | if (!memcmp("diff -", line, 6)) | |
697 | return 1; | |
698 | ||
699 | /* CVS "Index: " line? */ | |
700 | if (!memcmp("Index: ", line, 7)) | |
701 | return 1; | |
702 | ||
703 | /* | |
704 | * "--- <filename>" starts patches without headers | |
705 | * "---<sp>*" is a manual separator | |
706 | */ | |
707 | if (!memcmp("---", line, 3)) { | |
708 | line += 3; | |
709 | /* space followed by a filename? */ | |
710 | if (line[0] == ' ' && !isspace(line[1])) | |
711 | return 1; | |
712 | /* Just whitespace? */ | |
713 | for (;;) { | |
714 | unsigned char c = *line++; | |
715 | if (c == '\n') | |
716 | return 1; | |
717 | if (!isspace(c)) | |
718 | break; | |
719 | } | |
720 | return 0; | |
721 | } | |
722 | return 0; | |
723 | } | |
724 | ||
725 | ||
8dabdfcc | 726 | static int handle_commit_msg(char *line, unsigned linesize) |
d4a9ce78 | 727 | { |
87ab7992 | 728 | static int still_looking = 1; |
8dabdfcc | 729 | char *endline = line + linesize; |
87ab7992 | 730 | |
d4a9ce78 JH |
731 | if (!cmitmsg) |
732 | return 0; | |
2744b234 | 733 | |
87ab7992 DZ |
734 | if (still_looking) { |
735 | char *cp = line; | |
736 | if (isspace(*line)) { | |
737 | for (cp = line + 1; *cp; cp++) { | |
738 | if (!isspace(*cp)) | |
739 | break; | |
740 | } | |
741 | if (!*cp) | |
742 | return 0; | |
743 | } | |
8dabdfcc | 744 | if ((still_looking = check_header(cp, endline - cp, s_hdr_data, 0)) != 0) |
87ab7992 DZ |
745 | return 0; |
746 | } | |
8b4525fb | 747 | |
86747c13 DZ |
748 | /* normalize the log message to UTF-8. */ |
749 | if (metainfo_charset) | |
8dabdfcc | 750 | convert_to_utf8(line, endline - line, charset); |
86747c13 | 751 | |
f0658cf2 | 752 | if (patchbreak(line)) { |
87ab7992 DZ |
753 | fclose(cmitmsg); |
754 | cmitmsg = NULL; | |
755 | return 1; | |
756 | } | |
8b4525fb | 757 | |
87ab7992 | 758 | fputs(line, cmitmsg); |
d4a9ce78 | 759 | return 0; |
2744b234 LT |
760 | } |
761 | ||
87ab7992 | 762 | static int handle_patch(char *line) |
2744b234 | 763 | { |
87ab7992 DZ |
764 | fputs(line, patchfile); |
765 | patch_lines++; | |
766 | return 0; | |
2744b234 LT |
767 | } |
768 | ||
8dabdfcc | 769 | static int handle_filter(char *line, unsigned linesize) |
2744b234 | 770 | { |
87ab7992 | 771 | static int filter = 0; |
2744b234 | 772 | |
87ab7992 DZ |
773 | /* filter tells us which part we left off on |
774 | * a non-zero return indicates we hit a filter point | |
775 | */ | |
776 | switch (filter) { | |
777 | case 0: | |
8dabdfcc | 778 | if (!handle_commit_msg(line, linesize)) |
d4a9ce78 | 779 | break; |
87ab7992 DZ |
780 | filter++; |
781 | case 1: | |
782 | if (!handle_patch(line)) | |
783 | break; | |
784 | filter++; | |
785 | default: | |
786 | return 1; | |
2744b234 | 787 | } |
87ab7992 | 788 | |
d4a9ce78 | 789 | return 0; |
2744b234 LT |
790 | } |
791 | ||
87ab7992 | 792 | static void handle_body(void) |
1d8fa411 | 793 | { |
87ab7992 DZ |
794 | int rc = 0; |
795 | static char newline[2000]; | |
796 | static char *np = newline; | |
d4a9ce78 JH |
797 | |
798 | /* Skip up to the first boundary */ | |
87ab7992 DZ |
799 | if (content_top->boundary) { |
800 | if (!find_boundary()) | |
801 | return; | |
802 | } | |
803 | ||
804 | do { | |
805 | /* process any boundary lines */ | |
806 | if (content_top->boundary && is_multipart_boundary(line)) { | |
807 | /* flush any leftover */ | |
808 | if ((transfer_encoding == TE_BASE64) && | |
809 | (np != newline)) { | |
8dabdfcc | 810 | handle_filter(newline, sizeof(newline)); |
87ab7992 DZ |
811 | } |
812 | if (!handle_boundary()) | |
813 | return; | |
814 | } | |
815 | ||
86747c13 | 816 | /* Unwrap transfer encoding */ |
8dabdfcc | 817 | decode_transfer_encoding(line, sizeof(line)); |
87ab7992 DZ |
818 | |
819 | switch (transfer_encoding) { | |
820 | case TE_BASE64: | |
821 | { | |
822 | char *op = line; | |
823 | ||
824 | /* binary data most likely doesn't have newlines */ | |
825 | if (message_type != TYPE_TEXT) { | |
8dabdfcc | 826 | rc = handle_filter(line, sizeof(newline)); |
87ab7992 DZ |
827 | break; |
828 | } | |
829 | ||
830 | /* this is a decoded line that may contain | |
831 | * multiple new lines. Pass only one chunk | |
832 | * at a time to handle_filter() | |
833 | */ | |
834 | ||
835 | do { | |
836 | while (*op != '\n' && *op != 0) | |
837 | *np++ = *op++; | |
838 | *np = *op; | |
839 | if (*np != 0) { | |
840 | /* should be sitting on a new line */ | |
841 | *(++np) = 0; | |
842 | op++; | |
8dabdfcc | 843 | rc = handle_filter(newline, sizeof(newline)); |
87ab7992 DZ |
844 | np = newline; |
845 | } | |
846 | } while (*op != 0); | |
847 | /* the partial chunk is saved in newline and | |
848 | * will be appended by the next iteration of fgets | |
849 | */ | |
d4a9ce78 | 850 | break; |
1d8fa411 | 851 | } |
87ab7992 | 852 | default: |
8dabdfcc | 853 | rc = handle_filter(line, sizeof(newline)); |
d4a9ce78 | 854 | } |
87ab7992 DZ |
855 | if (rc) |
856 | /* nothing left to filter */ | |
857 | break; | |
858 | } while (fgets(line, sizeof(line), fin)); | |
859 | ||
860 | return; | |
1d8fa411 JH |
861 | } |
862 | ||
d7f6bae2 JH |
863 | static void output_header_lines(FILE *fout, const char *hdr, char *data) |
864 | { | |
865 | while (1) { | |
866 | char *ep = strchr(data, '\n'); | |
867 | int len; | |
868 | if (!ep) | |
869 | len = strlen(data); | |
870 | else | |
871 | len = ep - data; | |
872 | fprintf(fout, "%s: %.*s\n", hdr, len, data); | |
873 | if (!ep) | |
874 | break; | |
875 | data = ep + 1; | |
876 | } | |
877 | } | |
878 | ||
87ab7992 | 879 | static void handle_info(void) |
2744b234 | 880 | { |
87ab7992 DZ |
881 | char *sub; |
882 | char *hdr; | |
883 | int i; | |
884 | ||
885 | for (i = 0; header[i]; i++) { | |
886 | ||
887 | /* only print inbody headers if we output a patch file */ | |
888 | if (patch_lines && s_hdr_data[i]) | |
889 | hdr = s_hdr_data[i]; | |
890 | else if (p_hdr_data[i]) | |
891 | hdr = p_hdr_data[i]; | |
892 | else | |
893 | continue; | |
894 | ||
895 | if (!memcmp(header[i], "Subject", 7)) { | |
d7f6bae2 JH |
896 | if (keep_subject) |
897 | sub = hdr; | |
898 | else { | |
899 | sub = cleanup_subject(hdr); | |
900 | cleanup_space(sub); | |
901 | } | |
902 | output_header_lines(fout, "Subject", sub); | |
87ab7992 DZ |
903 | } else if (!memcmp(header[i], "From", 4)) { |
904 | handle_from(hdr); | |
905 | fprintf(fout, "Author: %s\n", name); | |
906 | fprintf(fout, "Email: %s\n", email); | |
907 | } else { | |
908 | cleanup_space(hdr); | |
909 | fprintf(fout, "%s: %s\n", header[i], hdr); | |
910 | } | |
d4a9ce78 | 911 | } |
87ab7992 | 912 | fprintf(fout, "\n"); |
2744b234 LT |
913 | } |
914 | ||
fcd056a6 JH |
915 | static int mailinfo(FILE *in, FILE *out, int ks, const char *encoding, |
916 | const char *msg, const char *patch) | |
34488e3c | 917 | { |
f88a545a | 918 | int peek; |
34488e3c LS |
919 | keep_subject = ks; |
920 | metainfo_charset = encoding; | |
921 | fin = in; | |
922 | fout = out; | |
923 | ||
924 | cmitmsg = fopen(msg, "w"); | |
925 | if (!cmitmsg) { | |
926 | perror(msg); | |
927 | return -1; | |
928 | } | |
929 | patchfile = fopen(patch, "w"); | |
930 | if (!patchfile) { | |
931 | perror(patch); | |
932 | fclose(cmitmsg); | |
933 | return -1; | |
934 | } | |
87ab7992 DZ |
935 | |
936 | p_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(char *)); | |
937 | s_hdr_data = xcalloc(MAX_HDR_PARSED, sizeof(char *)); | |
938 | ||
f88a545a SS |
939 | do { |
940 | peek = fgetc(in); | |
941 | } while (isspace(peek)); | |
942 | ungetc(peek, in); | |
943 | ||
87ab7992 DZ |
944 | /* process the email header */ |
945 | while (read_one_header_line(line, sizeof(line), fin)) | |
8dabdfcc | 946 | check_header(line, sizeof(line), p_hdr_data, 1); |
87ab7992 DZ |
947 | |
948 | handle_body(); | |
949 | handle_info(); | |
34488e3c LS |
950 | |
951 | return 0; | |
952 | } | |
953 | ||
6bff6a60 | 954 | static const char mailinfo_usage[] = |
9f63892b | 955 | "git-mailinfo [-k] [-u | --encoding=<encoding>] msg patch <mail >info"; |
d4a9ce78 | 956 | |
a633fca0 | 957 | int cmd_mailinfo(int argc, const char **argv, const char *prefix) |
2744b234 | 958 | { |
bb1091a4 JH |
959 | const char *def_charset; |
960 | ||
f1f909e3 JH |
961 | /* NEEDSWORK: might want to do the optional .git/ directory |
962 | * discovery | |
963 | */ | |
964 | git_config(git_default_config); | |
965 | ||
bb1091a4 JH |
966 | def_charset = (git_commit_encoding ? git_commit_encoding : "utf-8"); |
967 | metainfo_charset = def_charset; | |
968 | ||
6bff6a60 JH |
969 | while (1 < argc && argv[1][0] == '-') { |
970 | if (!strcmp(argv[1], "-k")) | |
971 | keep_subject = 1; | |
d4a9ce78 | 972 | else if (!strcmp(argv[1], "-u")) |
bb1091a4 JH |
973 | metainfo_charset = def_charset; |
974 | else if (!strcmp(argv[1], "-n")) | |
975 | metainfo_charset = NULL; | |
cc44c765 | 976 | else if (!prefixcmp(argv[1], "--encoding=")) |
9f63892b | 977 | metainfo_charset = argv[1] + 11; |
d4a9ce78 | 978 | else |
f1f909e3 | 979 | usage(mailinfo_usage); |
6bff6a60 JH |
980 | argc--; argv++; |
981 | } | |
982 | ||
a196d8d4 | 983 | if (argc != 3) |
f1f909e3 | 984 | usage(mailinfo_usage); |
34488e3c LS |
985 | |
986 | return !!mailinfo(stdin, stdout, keep_subject, metainfo_charset, argv[1], argv[2]); | |
2744b234 | 987 | } |