]>
Commit | Line | Data |
---|---|---|
2744b234 LT |
1 | /* |
2 | * Totally braindamaged mbox splitter program. | |
3 | * | |
4 | * It just splits a mbox into a list of files: "0001" "0002" .. | |
5 | * so you can process them further from there. | |
6 | */ | |
8b73edf4 | 7 | #include "cache.h" |
e690e843 | 8 | #include "builtin.h" |
f394e093 | 9 | #include "gettext.h" |
c455c87c | 10 | #include "string-list.h" |
c8f373a5 | 11 | #include "strbuf.h" |
2744b234 | 12 | |
8b73edf4 | 13 | static const char git_mailsplit_usage[] = |
0adda936 | 14 | "git mailsplit [-d<prec>] [-f<n>] [-b] [--keep-cr] -o<directory> [(<mbox>|<Maildir>)...]"; |
2744b234 LT |
15 | |
16 | static int is_from_line(const char *line, int len) | |
17 | { | |
18 | const char *colon; | |
19 | ||
20 | if (len < 20 || memcmp("From ", line, 5)) | |
21 | return 0; | |
22 | ||
23 | colon = line + len - 2; | |
24 | line += 5; | |
25 | for (;;) { | |
26 | if (colon < line) | |
27 | return 0; | |
28 | if (*--colon == ':') | |
29 | break; | |
30 | } | |
31 | ||
32 | if (!isdigit(colon[-4]) || | |
33 | !isdigit(colon[-2]) || | |
34 | !isdigit(colon[-1]) || | |
35 | !isdigit(colon[ 1]) || | |
36 | !isdigit(colon[ 2])) | |
37 | return 0; | |
38 | ||
39 | /* year */ | |
40 | if (strtol(colon+3, NULL, 10) <= 90) | |
41 | return 0; | |
42 | ||
43 | /* Ok, close enough */ | |
44 | return 1; | |
45 | } | |
46 | ||
c8f373a5 | 47 | static struct strbuf buf = STRBUF_INIT; |
c2ca1d79 | 48 | static int keep_cr; |
c88098d7 EW |
49 | static int mboxrd; |
50 | ||
51 | static int is_gtfrom(const struct strbuf *buf) | |
52 | { | |
53 | size_t min = strlen(">From "); | |
54 | size_t ngt; | |
55 | ||
56 | if (buf->len < min) | |
57 | return 0; | |
58 | ||
59 | ngt = strspn(buf->buf, ">"); | |
60 | return ngt && starts_with(buf->buf + ngt, "From "); | |
61 | } | |
8b73edf4 JH |
62 | |
63 | /* Called with the first line (potentially partial) | |
64 | * already in buf[] -- normally that should begin with | |
65 | * the Unix "From " line. Write it into the specified | |
66 | * file. | |
67 | */ | |
b3f041fb | 68 | static int split_one(FILE *mbox, const char *name, int allow_bare) |
2744b234 | 69 | { |
13b08125 | 70 | FILE *output; |
8b73edf4 JH |
71 | int fd; |
72 | int status = 0; | |
c8f373a5 | 73 | int is_bare = !is_from_line(buf.buf, buf.len); |
2744b234 | 74 | |
13b08125 | 75 | if (is_bare && !allow_bare) { |
13b08125 SB |
76 | fprintf(stderr, "corrupt mailbox\n"); |
77 | exit(1); | |
78 | } | |
66e905b7 | 79 | fd = xopen(name, O_WRONLY | O_CREAT | O_EXCL, 0666); |
41698375 | 80 | output = xfdopen(fd, "w"); |
2744b234 | 81 | |
8b73edf4 JH |
82 | /* Copy it out, while searching for a line that begins with |
83 | * "From " and having something that looks like a date format. | |
2744b234 | 84 | */ |
8b73edf4 | 85 | for (;;) { |
c2ca1d79 JH |
86 | if (!keep_cr && buf.len > 1 && buf.buf[buf.len-1] == '\n' && |
87 | buf.buf[buf.len-2] == '\r') { | |
88 | strbuf_setlen(&buf, buf.len-2); | |
89 | strbuf_addch(&buf, '\n'); | |
90 | } | |
91 | ||
c88098d7 EW |
92 | if (mboxrd && is_gtfrom(&buf)) |
93 | strbuf_remove(&buf, 0, 1); | |
94 | ||
c8f373a5 | 95 | if (fwrite(buf.buf, 1, buf.len, output) != buf.len) |
0721c314 | 96 | die_errno("cannot write output"); |
8b73edf4 | 97 | |
c8f373a5 | 98 | if (strbuf_getwholeline(&buf, mbox, '\n')) { |
8b73edf4 JH |
99 | if (feof(mbox)) { |
100 | status = 1; | |
101 | break; | |
102 | } | |
0721c314 | 103 | die_errno("cannot read mbox"); |
8b73edf4 | 104 | } |
c8f373a5 | 105 | if (!is_bare && is_from_line(buf.buf, buf.len)) |
8b73edf4 JH |
106 | break; /* done with one message */ |
107 | } | |
108 | fclose(output); | |
109 | return status; | |
2744b234 LT |
110 | } |
111 | ||
c455c87c | 112 | static int populate_maildir_list(struct string_list *list, const char *path) |
2744b234 | 113 | { |
d63bd9a2 FP |
114 | DIR *dir; |
115 | struct dirent *dent; | |
1d895f19 | 116 | char *name = NULL; |
d50a4bc4 GP |
117 | char *subs[] = { "cur", "new", NULL }; |
118 | char **sub; | |
1d895f19 | 119 | int ret = -1; |
d50a4bc4 GP |
120 | |
121 | for (sub = subs; *sub; ++sub) { | |
1d895f19 JK |
122 | free(name); |
123 | name = xstrfmt("%s/%s", path, *sub); | |
afe8a907 | 124 | if (!(dir = opendir(name))) { |
d50a4bc4 GP |
125 | if (errno == ENOENT) |
126 | continue; | |
880c0aef | 127 | error_errno("cannot opendir %s", name); |
1d895f19 | 128 | goto out; |
d50a4bc4 | 129 | } |
d63bd9a2 | 130 | |
d50a4bc4 GP |
131 | while ((dent = readdir(dir)) != NULL) { |
132 | if (dent->d_name[0] == '.') | |
133 | continue; | |
1d895f19 JK |
134 | free(name); |
135 | name = xstrfmt("%s/%s", *sub, dent->d_name); | |
78a395d3 | 136 | string_list_insert(list, name); |
d50a4bc4 | 137 | } |
d63bd9a2 | 138 | |
d50a4bc4 | 139 | closedir(dir); |
d63bd9a2 FP |
140 | } |
141 | ||
1d895f19 JK |
142 | ret = 0; |
143 | ||
144 | out: | |
145 | free(name); | |
146 | return ret; | |
d63bd9a2 FP |
147 | } |
148 | ||
18505c34 JK |
149 | static int maildir_filename_cmp(const char *a, const char *b) |
150 | { | |
151 | while (*a && *b) { | |
152 | if (isdigit(*a) && isdigit(*b)) { | |
153 | long int na, nb; | |
154 | na = strtol(a, (char **)&a, 10); | |
155 | nb = strtol(b, (char **)&b, 10); | |
156 | if (na != nb) | |
157 | return na - nb; | |
158 | /* strtol advanced our pointers */ | |
159 | } | |
160 | else { | |
161 | if (*a != *b) | |
162 | return (unsigned char)*a - (unsigned char)*b; | |
163 | a++; | |
164 | b++; | |
165 | } | |
166 | } | |
167 | return (unsigned char)*a - (unsigned char)*b; | |
168 | } | |
169 | ||
d63bd9a2 FP |
170 | static int split_maildir(const char *maildir, const char *dir, |
171 | int nr_prec, int skip) | |
172 | { | |
1d895f19 | 173 | char *file = NULL; |
d270d7b7 | 174 | FILE *f = NULL; |
e690e843 | 175 | int ret = -1; |
d63bd9a2 | 176 | int i; |
183113a5 | 177 | struct string_list list = STRING_LIST_INIT_DUP; |
e690e843 | 178 | |
18505c34 JK |
179 | list.cmp = maildir_filename_cmp; |
180 | ||
d50a4bc4 | 181 | if (populate_maildir_list(&list, maildir) < 0) |
d63bd9a2 | 182 | goto out; |
e690e843 | 183 | |
d63bd9a2 | 184 | for (i = 0; i < list.nr; i++) { |
1d895f19 JK |
185 | char *name; |
186 | ||
187 | free(file); | |
188 | file = xstrfmt("%s/%s", maildir, list.items[i].string); | |
189 | ||
d63bd9a2 FP |
190 | f = fopen(file, "r"); |
191 | if (!f) { | |
880c0aef | 192 | error_errno("cannot open mail %s", file); |
e690e843 LS |
193 | goto out; |
194 | } | |
195 | ||
c8f373a5 | 196 | if (strbuf_getwholeline(&buf, f, '\n')) { |
880c0aef | 197 | error_errno("cannot read mail %s", file); |
e690e843 LS |
198 | goto out; |
199 | } | |
200 | ||
1d895f19 | 201 | name = xstrfmt("%s/%0*d", dir, nr_prec, ++skip); |
d63bd9a2 | 202 | split_one(f, name, 1); |
1d895f19 | 203 | free(name); |
d63bd9a2 FP |
204 | |
205 | fclose(f); | |
d270d7b7 | 206 | f = NULL; |
d63bd9a2 FP |
207 | } |
208 | ||
d63bd9a2 FP |
209 | ret = skip; |
210 | out: | |
d270d7b7 JK |
211 | if (f) |
212 | fclose(f); | |
1d895f19 | 213 | free(file); |
c455c87c | 214 | string_list_clear(&list, 1); |
d63bd9a2 FP |
215 | return ret; |
216 | } | |
217 | ||
fcd056a6 JH |
218 | static int split_mbox(const char *file, const char *dir, int allow_bare, |
219 | int nr_prec, int skip) | |
d63bd9a2 | 220 | { |
d63bd9a2 | 221 | int ret = -1; |
f88a545a | 222 | int peek; |
d63bd9a2 FP |
223 | |
224 | FILE *f = !strcmp(file, "-") ? stdin : fopen(file, "r"); | |
225 | int file_done = 0; | |
226 | ||
7b20af6a JH |
227 | if (isatty(fileno(f))) |
228 | warning(_("reading patches from stdin/tty...")); | |
229 | ||
d63bd9a2 | 230 | if (!f) { |
880c0aef | 231 | error_errno("cannot open mbox %s", file); |
d63bd9a2 FP |
232 | goto out; |
233 | } | |
234 | ||
f88a545a SS |
235 | do { |
236 | peek = fgetc(f); | |
f0733c13 JS |
237 | if (peek == EOF) { |
238 | if (f == stdin) | |
239 | /* empty stdin is OK */ | |
240 | ret = skip; | |
241 | else { | |
242 | fclose(f); | |
243 | error(_("empty mbox: '%s'"), file); | |
244 | } | |
245 | goto out; | |
246 | } | |
f88a545a SS |
247 | } while (isspace(peek)); |
248 | ungetc(peek, f); | |
249 | ||
c8f373a5 | 250 | if (strbuf_getwholeline(&buf, f, '\n')) { |
d63bd9a2 FP |
251 | /* empty stdin is OK */ |
252 | if (f != stdin) { | |
253 | error("cannot read mbox %s", file); | |
254 | goto out; | |
e690e843 | 255 | } |
d63bd9a2 FP |
256 | file_done = 1; |
257 | } | |
e690e843 | 258 | |
d63bd9a2 | 259 | while (!file_done) { |
1d895f19 | 260 | char *name = xstrfmt("%s/%0*d", dir, nr_prec, ++skip); |
d63bd9a2 | 261 | file_done = split_one(f, name, allow_bare); |
1d895f19 | 262 | free(name); |
e690e843 | 263 | } |
d63bd9a2 FP |
264 | |
265 | if (f != stdin) | |
266 | fclose(f); | |
267 | ||
e690e843 LS |
268 | ret = skip; |
269 | out: | |
e690e843 LS |
270 | return ret; |
271 | } | |
d63bd9a2 | 272 | |
a633fca0 | 273 | int cmd_mailsplit(int argc, const char **argv, const char *prefix) |
e690e843 | 274 | { |
d63bd9a2 | 275 | int nr = 0, nr_prec = 4, num = 0; |
b3f041fb PA |
276 | int allow_bare = 0; |
277 | const char *dir = NULL; | |
278 | const char **argp; | |
279 | static const char *stdin_only[] = { "-", NULL }; | |
8b73edf4 | 280 | |
b3f041fb PA |
281 | for (argp = argv+1; *argp; argp++) { |
282 | const char *arg = *argp; | |
8b73edf4 JH |
283 | |
284 | if (arg[0] != '-') | |
285 | break; | |
286 | /* do flags here */ | |
b3f041fb PA |
287 | if ( arg[1] == 'd' ) { |
288 | nr_prec = strtol(arg+2, NULL, 10); | |
8b73edf4 JH |
289 | if (nr_prec < 3 || 10 <= nr_prec) |
290 | usage(git_mailsplit_usage); | |
291 | continue; | |
b3f041fb PA |
292 | } else if ( arg[1] == 'f' ) { |
293 | nr = strtol(arg+2, NULL, 10); | |
aa481d38 JN |
294 | } else if ( arg[1] == 'h' ) { |
295 | usage(git_mailsplit_usage); | |
b3f041fb PA |
296 | } else if ( arg[1] == 'b' && !arg[2] ) { |
297 | allow_bare = 1; | |
c2ca1d79 JH |
298 | } else if (!strcmp(arg, "--keep-cr")) { |
299 | keep_cr = 1; | |
b3f041fb PA |
300 | } else if ( arg[1] == 'o' && arg[2] ) { |
301 | dir = arg+2; | |
c88098d7 EW |
302 | } else if (!strcmp(arg, "--mboxrd")) { |
303 | mboxrd = 1; | |
b3f041fb PA |
304 | } else if ( arg[1] == '-' && !arg[2] ) { |
305 | argp++; /* -- marks end of options */ | |
306 | break; | |
307 | } else { | |
308 | die("unknown option: %s", arg); | |
8b73edf4 | 309 | } |
2744b234 | 310 | } |
8b73edf4 | 311 | |
b3f041fb PA |
312 | if ( !dir ) { |
313 | /* Backwards compatibility: if no -o specified, accept | |
314 | <mbox> <dir> or just <dir> */ | |
315 | switch (argc - (argp-argv)) { | |
316 | case 1: | |
317 | dir = argp[0]; | |
318 | argp = stdin_only; | |
319 | break; | |
320 | case 2: | |
321 | stdin_only[0] = argp[0]; | |
322 | dir = argp[1]; | |
323 | argp = stdin_only; | |
324 | break; | |
325 | default: | |
326 | usage(git_mailsplit_usage); | |
327 | } | |
328 | } else { | |
329 | /* New usage: if no more argument, parse stdin */ | |
330 | if ( !*argp ) | |
331 | argp = stdin_only; | |
2744b234 | 332 | } |
8b73edf4 | 333 | |
d63bd9a2 FP |
334 | while (*argp) { |
335 | const char *arg = *argp++; | |
336 | struct stat argstat; | |
337 | int ret = 0; | |
338 | ||
339 | if (arg[0] == '-' && arg[1] == 0) { | |
340 | ret = split_mbox(arg, dir, allow_bare, nr_prec, nr); | |
341 | if (ret < 0) { | |
342 | error("cannot split patches from stdin"); | |
343 | return 1; | |
344 | } | |
b3327180 JH |
345 | num += (ret - nr); |
346 | nr = ret; | |
d63bd9a2 FP |
347 | continue; |
348 | } | |
349 | ||
350 | if (stat(arg, &argstat) == -1) { | |
880c0aef | 351 | error_errno("cannot stat %s", arg); |
d63bd9a2 FP |
352 | return 1; |
353 | } | |
354 | ||
355 | if (S_ISDIR(argstat.st_mode)) | |
356 | ret = split_maildir(arg, dir, nr_prec, nr); | |
357 | else | |
358 | ret = split_mbox(arg, dir, allow_bare, nr_prec, nr); | |
359 | ||
360 | if (ret < 0) { | |
361 | error("cannot split patches from %s", arg); | |
362 | return 1; | |
363 | } | |
b3327180 JH |
364 | num += (ret - nr); |
365 | nr = ret; | |
d63bd9a2 FP |
366 | } |
367 | ||
368 | printf("%d\n", num); | |
b3f041fb | 369 | |
d63bd9a2 | 370 | return 0; |
2744b234 | 371 | } |