]>
Commit | Line | Data |
---|---|---|
2744b234 LT |
1 | /* |
2 | * Totally braindamaged mbox splitter program. | |
3 | * | |
4 | * It just splits a mbox into a list of files: "0001" "0002" .. | |
5 | * so you can process them further from there. | |
6 | */ | |
7 | #include <unistd.h> | |
8 | #include <stdlib.h> | |
9 | #include <fcntl.h> | |
10 | #include <sys/types.h> | |
11 | #include <sys/stat.h> | |
12 | #include <sys/mman.h> | |
13 | #include <string.h> | |
14 | #include <stdio.h> | |
15 | #include <ctype.h> | |
16 | #include <assert.h> | |
17 | ||
18 | static int usage(void) | |
19 | { | |
20 | fprintf(stderr, "mailsplit <mbox> <directory>\n"); | |
21 | exit(1); | |
22 | } | |
23 | ||
24 | static int linelen(const char *map, unsigned long size) | |
25 | { | |
26 | int len = 0, c; | |
27 | ||
28 | do { | |
29 | c = *map; | |
30 | map++; | |
31 | size--; | |
32 | len++; | |
33 | } while (size && c != '\n'); | |
34 | return len; | |
35 | } | |
36 | ||
37 | static int is_from_line(const char *line, int len) | |
38 | { | |
39 | const char *colon; | |
40 | ||
41 | if (len < 20 || memcmp("From ", line, 5)) | |
42 | return 0; | |
43 | ||
44 | colon = line + len - 2; | |
45 | line += 5; | |
46 | for (;;) { | |
47 | if (colon < line) | |
48 | return 0; | |
49 | if (*--colon == ':') | |
50 | break; | |
51 | } | |
52 | ||
53 | if (!isdigit(colon[-4]) || | |
54 | !isdigit(colon[-2]) || | |
55 | !isdigit(colon[-1]) || | |
56 | !isdigit(colon[ 1]) || | |
57 | !isdigit(colon[ 2])) | |
58 | return 0; | |
59 | ||
60 | /* year */ | |
61 | if (strtol(colon+3, NULL, 10) <= 90) | |
62 | return 0; | |
63 | ||
64 | /* Ok, close enough */ | |
65 | return 1; | |
66 | } | |
67 | ||
68 | static int parse_email(const void *map, unsigned long size) | |
69 | { | |
70 | unsigned long offset; | |
71 | ||
72 | if (size < 6 || memcmp("From ", map, 5)) | |
73 | goto corrupt; | |
74 | ||
75 | /* Make sure we don't trigger on this first line */ | |
76 | map++; size--; offset=1; | |
77 | ||
78 | /* | |
79 | * Search for a line beginning with "From ", and | |
763e287a | 80 | * having something that looks like a date format. |
2744b234 LT |
81 | */ |
82 | do { | |
83 | int len = linelen(map, size); | |
84 | if (is_from_line(map, len)) | |
85 | return offset; | |
86 | map += len; | |
87 | size -= len; | |
88 | offset += len; | |
89 | } while (size); | |
90 | return offset; | |
91 | ||
92 | corrupt: | |
93 | fprintf(stderr, "corrupt mailbox\n"); | |
94 | exit(1); | |
95 | } | |
96 | ||
97 | int main(int argc, char **argv) | |
98 | { | |
99 | int fd, nr; | |
100 | struct stat st; | |
101 | unsigned long size; | |
102 | void *map; | |
103 | ||
104 | if (argc != 3) | |
105 | usage(); | |
106 | fd = open(argv[1], O_RDONLY); | |
107 | if (fd < 0) { | |
108 | perror(argv[1]); | |
109 | exit(1); | |
110 | } | |
111 | if (chdir(argv[2]) < 0) | |
112 | usage(); | |
113 | if (fstat(fd, &st) < 0) { | |
114 | perror("stat"); | |
115 | exit(1); | |
116 | } | |
117 | size = st.st_size; | |
118 | map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); | |
e35f9824 | 119 | if (map == MAP_FAILED) { |
2744b234 | 120 | perror("mmap"); |
e35f9824 | 121 | close(fd); |
2744b234 LT |
122 | exit(1); |
123 | } | |
124 | close(fd); | |
125 | nr = 0; | |
126 | do { | |
127 | char name[10]; | |
128 | unsigned long len = parse_email(map, size); | |
129 | assert(len <= size); | |
130 | sprintf(name, "%04d", ++nr); | |
131 | fd = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600); | |
132 | if (fd < 0) { | |
133 | perror(name); | |
134 | exit(1); | |
135 | } | |
136 | if (write(fd, map, len) != len) { | |
137 | perror("write"); | |
138 | exit(1); | |
139 | } | |
140 | close(fd); | |
141 | map += len; | |
142 | size -= len; | |
143 | } while (size > 0); | |
144 | return 0; | |
145 | } |