]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Totally braindamaged mbox splitter program. | |
3 | * | |
4 | * It just splits a mbox into a list of files: "0001" "0002" .. | |
5 | * so you can process them further from there. | |
6 | */ | |
7 | #include "cache.h" | |
8 | #include "builtin.h" | |
9 | #include "string-list.h" | |
10 | #include "strbuf.h" | |
11 | ||
12 | static const char git_mailsplit_usage[] = | |
13 | "git mailsplit [-d<prec>] [-f<n>] [-b] [--keep-cr] -o<directory> [(<mbox>|<Maildir>)...]"; | |
14 | ||
15 | static int is_from_line(const char *line, int len) | |
16 | { | |
17 | const char *colon; | |
18 | ||
19 | if (len < 20 || memcmp("From ", line, 5)) | |
20 | return 0; | |
21 | ||
22 | colon = line + len - 2; | |
23 | line += 5; | |
24 | for (;;) { | |
25 | if (colon < line) | |
26 | return 0; | |
27 | if (*--colon == ':') | |
28 | break; | |
29 | } | |
30 | ||
31 | if (!isdigit(colon[-4]) || | |
32 | !isdigit(colon[-2]) || | |
33 | !isdigit(colon[-1]) || | |
34 | !isdigit(colon[ 1]) || | |
35 | !isdigit(colon[ 2])) | |
36 | return 0; | |
37 | ||
38 | /* year */ | |
39 | if (strtol(colon+3, NULL, 10) <= 90) | |
40 | return 0; | |
41 | ||
42 | /* Ok, close enough */ | |
43 | return 1; | |
44 | } | |
45 | ||
46 | static struct strbuf buf = STRBUF_INIT; | |
47 | static int keep_cr; | |
48 | ||
49 | /* Called with the first line (potentially partial) | |
50 | * already in buf[] -- normally that should begin with | |
51 | * the Unix "From " line. Write it into the specified | |
52 | * file. | |
53 | */ | |
54 | static int split_one(FILE *mbox, const char *name, int allow_bare) | |
55 | { | |
56 | FILE *output = NULL; | |
57 | int fd; | |
58 | int status = 0; | |
59 | int is_bare = !is_from_line(buf.buf, buf.len); | |
60 | ||
61 | if (is_bare && !allow_bare) | |
62 | goto corrupt; | |
63 | ||
64 | fd = open(name, O_WRONLY | O_CREAT | O_EXCL, 0666); | |
65 | if (fd < 0) | |
66 | die_errno("cannot open output file '%s'", name); | |
67 | output = xfdopen(fd, "w"); | |
68 | ||
69 | /* Copy it out, while searching for a line that begins with | |
70 | * "From " and having something that looks like a date format. | |
71 | */ | |
72 | for (;;) { | |
73 | if (!keep_cr && buf.len > 1 && buf.buf[buf.len-1] == '\n' && | |
74 | buf.buf[buf.len-2] == '\r') { | |
75 | strbuf_setlen(&buf, buf.len-2); | |
76 | strbuf_addch(&buf, '\n'); | |
77 | } | |
78 | ||
79 | if (fwrite(buf.buf, 1, buf.len, output) != buf.len) | |
80 | die_errno("cannot write output"); | |
81 | ||
82 | if (strbuf_getwholeline(&buf, mbox, '\n')) { | |
83 | if (feof(mbox)) { | |
84 | status = 1; | |
85 | break; | |
86 | } | |
87 | die_errno("cannot read mbox"); | |
88 | } | |
89 | if (!is_bare && is_from_line(buf.buf, buf.len)) | |
90 | break; /* done with one message */ | |
91 | } | |
92 | fclose(output); | |
93 | return status; | |
94 | ||
95 | corrupt: | |
96 | if (output) | |
97 | fclose(output); | |
98 | unlink(name); | |
99 | fprintf(stderr, "corrupt mailbox\n"); | |
100 | exit(1); | |
101 | } | |
102 | ||
103 | static int populate_maildir_list(struct string_list *list, const char *path) | |
104 | { | |
105 | DIR *dir; | |
106 | struct dirent *dent; | |
107 | char name[PATH_MAX]; | |
108 | char *subs[] = { "cur", "new", NULL }; | |
109 | char **sub; | |
110 | ||
111 | for (sub = subs; *sub; ++sub) { | |
112 | snprintf(name, sizeof(name), "%s/%s", path, *sub); | |
113 | if ((dir = opendir(name)) == NULL) { | |
114 | if (errno == ENOENT) | |
115 | continue; | |
116 | error("cannot opendir %s (%s)", name, strerror(errno)); | |
117 | return -1; | |
118 | } | |
119 | ||
120 | while ((dent = readdir(dir)) != NULL) { | |
121 | if (dent->d_name[0] == '.') | |
122 | continue; | |
123 | snprintf(name, sizeof(name), "%s/%s", *sub, dent->d_name); | |
124 | string_list_insert(list, name); | |
125 | } | |
126 | ||
127 | closedir(dir); | |
128 | } | |
129 | ||
130 | return 0; | |
131 | } | |
132 | ||
133 | static int split_maildir(const char *maildir, const char *dir, | |
134 | int nr_prec, int skip) | |
135 | { | |
136 | char file[PATH_MAX]; | |
137 | char name[PATH_MAX]; | |
138 | int ret = -1; | |
139 | int i; | |
140 | struct string_list list = STRING_LIST_INIT_DUP; | |
141 | ||
142 | if (populate_maildir_list(&list, maildir) < 0) | |
143 | goto out; | |
144 | ||
145 | for (i = 0; i < list.nr; i++) { | |
146 | FILE *f; | |
147 | snprintf(file, sizeof(file), "%s/%s", maildir, list.items[i].string); | |
148 | f = fopen(file, "r"); | |
149 | if (!f) { | |
150 | error("cannot open mail %s (%s)", file, strerror(errno)); | |
151 | goto out; | |
152 | } | |
153 | ||
154 | if (strbuf_getwholeline(&buf, f, '\n')) { | |
155 | error("cannot read mail %s (%s)", file, strerror(errno)); | |
156 | goto out; | |
157 | } | |
158 | ||
159 | sprintf(name, "%s/%0*d", dir, nr_prec, ++skip); | |
160 | split_one(f, name, 1); | |
161 | ||
162 | fclose(f); | |
163 | } | |
164 | ||
165 | ret = skip; | |
166 | out: | |
167 | string_list_clear(&list, 1); | |
168 | return ret; | |
169 | } | |
170 | ||
171 | static int split_mbox(const char *file, const char *dir, int allow_bare, | |
172 | int nr_prec, int skip) | |
173 | { | |
174 | char name[PATH_MAX]; | |
175 | int ret = -1; | |
176 | int peek; | |
177 | ||
178 | FILE *f = !strcmp(file, "-") ? stdin : fopen(file, "r"); | |
179 | int file_done = 0; | |
180 | ||
181 | if (!f) { | |
182 | error("cannot open mbox %s", file); | |
183 | goto out; | |
184 | } | |
185 | ||
186 | do { | |
187 | peek = fgetc(f); | |
188 | } while (isspace(peek)); | |
189 | ungetc(peek, f); | |
190 | ||
191 | if (strbuf_getwholeline(&buf, f, '\n')) { | |
192 | /* empty stdin is OK */ | |
193 | if (f != stdin) { | |
194 | error("cannot read mbox %s", file); | |
195 | goto out; | |
196 | } | |
197 | file_done = 1; | |
198 | } | |
199 | ||
200 | while (!file_done) { | |
201 | sprintf(name, "%s/%0*d", dir, nr_prec, ++skip); | |
202 | file_done = split_one(f, name, allow_bare); | |
203 | } | |
204 | ||
205 | if (f != stdin) | |
206 | fclose(f); | |
207 | ||
208 | ret = skip; | |
209 | out: | |
210 | return ret; | |
211 | } | |
212 | ||
213 | int cmd_mailsplit(int argc, const char **argv, const char *prefix) | |
214 | { | |
215 | int nr = 0, nr_prec = 4, num = 0; | |
216 | int allow_bare = 0; | |
217 | const char *dir = NULL; | |
218 | const char **argp; | |
219 | static const char *stdin_only[] = { "-", NULL }; | |
220 | ||
221 | for (argp = argv+1; *argp; argp++) { | |
222 | const char *arg = *argp; | |
223 | ||
224 | if (arg[0] != '-') | |
225 | break; | |
226 | /* do flags here */ | |
227 | if ( arg[1] == 'd' ) { | |
228 | nr_prec = strtol(arg+2, NULL, 10); | |
229 | if (nr_prec < 3 || 10 <= nr_prec) | |
230 | usage(git_mailsplit_usage); | |
231 | continue; | |
232 | } else if ( arg[1] == 'f' ) { | |
233 | nr = strtol(arg+2, NULL, 10); | |
234 | } else if ( arg[1] == 'h' ) { | |
235 | usage(git_mailsplit_usage); | |
236 | } else if ( arg[1] == 'b' && !arg[2] ) { | |
237 | allow_bare = 1; | |
238 | } else if (!strcmp(arg, "--keep-cr")) { | |
239 | keep_cr = 1; | |
240 | } else if ( arg[1] == 'o' && arg[2] ) { | |
241 | dir = arg+2; | |
242 | } else if ( arg[1] == '-' && !arg[2] ) { | |
243 | argp++; /* -- marks end of options */ | |
244 | break; | |
245 | } else { | |
246 | die("unknown option: %s", arg); | |
247 | } | |
248 | } | |
249 | ||
250 | if ( !dir ) { | |
251 | /* Backwards compatibility: if no -o specified, accept | |
252 | <mbox> <dir> or just <dir> */ | |
253 | switch (argc - (argp-argv)) { | |
254 | case 1: | |
255 | dir = argp[0]; | |
256 | argp = stdin_only; | |
257 | break; | |
258 | case 2: | |
259 | stdin_only[0] = argp[0]; | |
260 | dir = argp[1]; | |
261 | argp = stdin_only; | |
262 | break; | |
263 | default: | |
264 | usage(git_mailsplit_usage); | |
265 | } | |
266 | } else { | |
267 | /* New usage: if no more argument, parse stdin */ | |
268 | if ( !*argp ) | |
269 | argp = stdin_only; | |
270 | } | |
271 | ||
272 | while (*argp) { | |
273 | const char *arg = *argp++; | |
274 | struct stat argstat; | |
275 | int ret = 0; | |
276 | ||
277 | if (arg[0] == '-' && arg[1] == 0) { | |
278 | ret = split_mbox(arg, dir, allow_bare, nr_prec, nr); | |
279 | if (ret < 0) { | |
280 | error("cannot split patches from stdin"); | |
281 | return 1; | |
282 | } | |
283 | num += (ret - nr); | |
284 | nr = ret; | |
285 | continue; | |
286 | } | |
287 | ||
288 | if (stat(arg, &argstat) == -1) { | |
289 | error("cannot stat %s (%s)", arg, strerror(errno)); | |
290 | return 1; | |
291 | } | |
292 | ||
293 | if (S_ISDIR(argstat.st_mode)) | |
294 | ret = split_maildir(arg, dir, nr_prec, nr); | |
295 | else | |
296 | ret = split_mbox(arg, dir, allow_bare, nr_prec, nr); | |
297 | ||
298 | if (ret < 0) { | |
299 | error("cannot split patches from %s", arg); | |
300 | return 1; | |
301 | } | |
302 | num += (ret - nr); | |
303 | nr = ret; | |
304 | } | |
305 | ||
306 | printf("%d\n", num); | |
307 | ||
308 | return 0; | |
309 | } |