]>
Commit | Line | Data |
---|---|---|
2744b234 LT |
1 | /* |
2 | * Totally braindamaged mbox splitter program. | |
3 | * | |
4 | * It just splits a mbox into a list of files: "0001" "0002" .. | |
5 | * so you can process them further from there. | |
6 | */ | |
7 | #include <unistd.h> | |
8 | #include <stdlib.h> | |
9 | #include <fcntl.h> | |
10 | #include <sys/types.h> | |
11 | #include <sys/stat.h> | |
2744b234 LT |
12 | #include <string.h> |
13 | #include <stdio.h> | |
8b73edf4 | 14 | #include "cache.h" |
e690e843 | 15 | #include "builtin.h" |
2744b234 | 16 | |
8b73edf4 | 17 | static const char git_mailsplit_usage[] = |
b3f041fb | 18 | "git-mailsplit [-d<prec>] [-f<n>] [-b] -o<directory> <mbox>..."; |
2744b234 LT |
19 | |
20 | static int is_from_line(const char *line, int len) | |
21 | { | |
22 | const char *colon; | |
23 | ||
24 | if (len < 20 || memcmp("From ", line, 5)) | |
25 | return 0; | |
26 | ||
27 | colon = line + len - 2; | |
28 | line += 5; | |
29 | for (;;) { | |
30 | if (colon < line) | |
31 | return 0; | |
32 | if (*--colon == ':') | |
33 | break; | |
34 | } | |
35 | ||
36 | if (!isdigit(colon[-4]) || | |
37 | !isdigit(colon[-2]) || | |
38 | !isdigit(colon[-1]) || | |
39 | !isdigit(colon[ 1]) || | |
40 | !isdigit(colon[ 2])) | |
41 | return 0; | |
42 | ||
43 | /* year */ | |
44 | if (strtol(colon+3, NULL, 10) <= 90) | |
45 | return 0; | |
46 | ||
47 | /* Ok, close enough */ | |
48 | return 1; | |
49 | } | |
50 | ||
8b73edf4 JH |
51 | /* Could be as small as 64, enough to hold a Unix "From " line. */ |
52 | static char buf[4096]; | |
53 | ||
54 | /* Called with the first line (potentially partial) | |
55 | * already in buf[] -- normally that should begin with | |
56 | * the Unix "From " line. Write it into the specified | |
57 | * file. | |
58 | */ | |
b3f041fb | 59 | static int split_one(FILE *mbox, const char *name, int allow_bare) |
2744b234 | 60 | { |
8b73edf4 JH |
61 | FILE *output = NULL; |
62 | int len = strlen(buf); | |
63 | int fd; | |
64 | int status = 0; | |
b3f041fb | 65 | int is_bare = !is_from_line(buf, len); |
2744b234 | 66 | |
b3f041fb | 67 | if (is_bare && !allow_bare) |
2744b234 LT |
68 | goto corrupt; |
69 | ||
8b73edf4 JH |
70 | fd = open(name, O_WRONLY | O_CREAT | O_EXCL, 0666); |
71 | if (fd < 0) | |
72 | die("cannot open output file %s", name); | |
73 | output = fdopen(fd, "w"); | |
2744b234 | 74 | |
8b73edf4 JH |
75 | /* Copy it out, while searching for a line that begins with |
76 | * "From " and having something that looks like a date format. | |
2744b234 | 77 | */ |
8b73edf4 JH |
78 | for (;;) { |
79 | int is_partial = (buf[len-1] != '\n'); | |
80 | ||
81 | if (fputs(buf, output) == EOF) | |
82 | die("cannot write output"); | |
83 | ||
84 | if (fgets(buf, sizeof(buf), mbox) == NULL) { | |
85 | if (feof(mbox)) { | |
86 | status = 1; | |
87 | break; | |
88 | } | |
89 | die("cannot read mbox"); | |
90 | } | |
91 | len = strlen(buf); | |
b3f041fb | 92 | if (!is_partial && !is_bare && is_from_line(buf, len)) |
8b73edf4 JH |
93 | break; /* done with one message */ |
94 | } | |
95 | fclose(output); | |
96 | return status; | |
97 | ||
98 | corrupt: | |
99 | if (output) | |
100 | fclose(output); | |
101 | unlink(name); | |
2744b234 LT |
102 | fprintf(stderr, "corrupt mailbox\n"); |
103 | exit(1); | |
104 | } | |
105 | ||
e690e843 | 106 | int split_mbox(const char **mbox, const char *dir, int allow_bare, int nr_prec, int skip) |
2744b234 | 107 | { |
e690e843 LS |
108 | char *name = xmalloc(strlen(dir) + 2 + 3 * sizeof(skip)); |
109 | int ret = -1; | |
110 | ||
111 | while (*mbox) { | |
112 | const char *file = *mbox++; | |
113 | FILE *f = !strcmp(file, "-") ? stdin : fopen(file, "r"); | |
114 | int file_done = 0; | |
115 | ||
116 | if ( !f ) { | |
117 | error("cannot open mbox %s", file); | |
118 | goto out; | |
119 | } | |
120 | ||
121 | if (fgets(buf, sizeof(buf), f) == NULL) { | |
122 | if (f == stdin) | |
123 | break; /* empty stdin is OK */ | |
124 | error("cannot read mbox %s", file); | |
125 | goto out; | |
126 | } | |
127 | ||
128 | while (!file_done) { | |
129 | sprintf(name, "%s/%0*d", dir, nr_prec, ++skip); | |
130 | file_done = split_one(f, name, allow_bare); | |
131 | } | |
132 | ||
133 | if (f != stdin) | |
134 | fclose(f); | |
135 | } | |
136 | ret = skip; | |
137 | out: | |
138 | free(name); | |
139 | return ret; | |
140 | } | |
a633fca0 | 141 | int cmd_mailsplit(int argc, const char **argv, const char *prefix) |
e690e843 LS |
142 | { |
143 | int nr = 0, nr_prec = 4, ret; | |
b3f041fb PA |
144 | int allow_bare = 0; |
145 | const char *dir = NULL; | |
146 | const char **argp; | |
147 | static const char *stdin_only[] = { "-", NULL }; | |
8b73edf4 | 148 | |
b3f041fb PA |
149 | for (argp = argv+1; *argp; argp++) { |
150 | const char *arg = *argp; | |
8b73edf4 JH |
151 | |
152 | if (arg[0] != '-') | |
153 | break; | |
154 | /* do flags here */ | |
b3f041fb PA |
155 | if ( arg[1] == 'd' ) { |
156 | nr_prec = strtol(arg+2, NULL, 10); | |
8b73edf4 JH |
157 | if (nr_prec < 3 || 10 <= nr_prec) |
158 | usage(git_mailsplit_usage); | |
159 | continue; | |
b3f041fb PA |
160 | } else if ( arg[1] == 'f' ) { |
161 | nr = strtol(arg+2, NULL, 10); | |
162 | } else if ( arg[1] == 'b' && !arg[2] ) { | |
163 | allow_bare = 1; | |
164 | } else if ( arg[1] == 'o' && arg[2] ) { | |
165 | dir = arg+2; | |
166 | } else if ( arg[1] == '-' && !arg[2] ) { | |
167 | argp++; /* -- marks end of options */ | |
168 | break; | |
169 | } else { | |
170 | die("unknown option: %s", arg); | |
8b73edf4 | 171 | } |
2744b234 | 172 | } |
8b73edf4 | 173 | |
b3f041fb PA |
174 | if ( !dir ) { |
175 | /* Backwards compatibility: if no -o specified, accept | |
176 | <mbox> <dir> or just <dir> */ | |
177 | switch (argc - (argp-argv)) { | |
178 | case 1: | |
179 | dir = argp[0]; | |
180 | argp = stdin_only; | |
181 | break; | |
182 | case 2: | |
183 | stdin_only[0] = argp[0]; | |
184 | dir = argp[1]; | |
185 | argp = stdin_only; | |
186 | break; | |
187 | default: | |
188 | usage(git_mailsplit_usage); | |
189 | } | |
190 | } else { | |
191 | /* New usage: if no more argument, parse stdin */ | |
192 | if ( !*argp ) | |
193 | argp = stdin_only; | |
2744b234 | 194 | } |
8b73edf4 | 195 | |
e690e843 LS |
196 | ret = split_mbox(argp, dir, allow_bare, nr_prec, nr); |
197 | if (ret != -1) | |
198 | printf("%d\n", ret); | |
b3f041fb | 199 | |
e690e843 | 200 | return ret == -1; |
2744b234 | 201 | } |