]>
Commit | Line | Data |
---|---|---|
db5e523f SP |
1 | #include "builtin.h" |
2 | #include "cache.h" | |
3 | #include "object.h" | |
4 | #include "blob.h" | |
5 | #include "delta.h" | |
6 | #include "pack.h" | |
7 | #include "csum-file.h" | |
8 | ||
9 | static int max_depth = 10; | |
10 | static unsigned long object_count; | |
8bcce301 SP |
11 | static unsigned long duplicate_count; |
12 | static unsigned long packoff; | |
13 | static unsigned long overflow_count; | |
db5e523f SP |
14 | static int packfd; |
15 | static int current_depth; | |
16 | static void *lastdat; | |
17 | static unsigned long lastdatlen; | |
18 | static unsigned char lastsha1[20]; | |
8bcce301 SP |
19 | static unsigned char packsha1[20]; |
20 | ||
21 | struct object_entry | |
22 | { | |
23 | struct object_entry *next; | |
24 | unsigned long offset; | |
25 | unsigned char sha1[20]; | |
26 | }; | |
27 | ||
28 | struct overflow_object_entry | |
29 | { | |
30 | struct overflow_object_entry *next; | |
31 | struct object_entry oe; | |
32 | }; | |
33 | ||
34 | struct object_entry *pool_start; | |
35 | struct object_entry *pool_next; | |
36 | struct object_entry *pool_end; | |
37 | struct overflow_object_entry *overflow; | |
38 | struct object_entry *table[1 << 16]; | |
39 | ||
40 | static struct object_entry* new_object(unsigned char *sha1) | |
41 | { | |
42 | if (pool_next != pool_end) { | |
43 | struct object_entry *e = pool_next++; | |
44 | memcpy(e->sha1, sha1, sizeof(e->sha1)); | |
45 | return e; | |
46 | } else { | |
47 | struct overflow_object_entry *e; | |
48 | ||
49 | e = xmalloc(sizeof(struct overflow_object_entry)); | |
50 | e->next = overflow; | |
51 | memcpy(e->oe.sha1, sha1, sizeof(e->oe.sha1)); | |
52 | overflow = e; | |
53 | overflow_count++; | |
54 | return &e->oe; | |
55 | } | |
56 | } | |
57 | ||
58 | static struct object_entry* insert_object(unsigned char *sha1) | |
59 | { | |
60 | unsigned int h = sha1[0] << 8 | sha1[1]; | |
61 | struct object_entry *e = table[h]; | |
62 | struct object_entry *p = 0; | |
63 | ||
64 | while (e) { | |
65 | if (!memcmp(sha1, e->sha1, sizeof(e->sha1))) | |
66 | return e; | |
67 | p = e; | |
68 | e = e->next; | |
69 | } | |
70 | ||
71 | e = new_object(sha1); | |
72 | e->next = 0; | |
73 | e->offset = 0; | |
74 | if (p) | |
75 | p->next = e; | |
76 | else | |
77 | table[h] = e; | |
78 | return e; | |
79 | } | |
db5e523f SP |
80 | |
81 | static ssize_t yread(int fd, void *buffer, size_t length) | |
82 | { | |
83 | ssize_t ret = 0; | |
84 | while (ret < length) { | |
85 | ssize_t size = xread(fd, (char *) buffer + ret, length - ret); | |
86 | if (size < 0) { | |
87 | return size; | |
88 | } | |
89 | if (size == 0) { | |
90 | return ret; | |
91 | } | |
92 | ret += size; | |
93 | } | |
94 | return ret; | |
95 | } | |
96 | ||
97 | static ssize_t ywrite(int fd, void *buffer, size_t length) | |
98 | { | |
99 | ssize_t ret = 0; | |
100 | while (ret < length) { | |
101 | ssize_t size = xwrite(fd, (char *) buffer + ret, length - ret); | |
102 | if (size < 0) { | |
103 | return size; | |
104 | } | |
105 | if (size == 0) { | |
106 | return ret; | |
107 | } | |
108 | ret += size; | |
109 | } | |
110 | return ret; | |
111 | } | |
112 | ||
113 | static unsigned long encode_header(enum object_type type, unsigned long size, unsigned char *hdr) | |
114 | { | |
115 | int n = 1; | |
116 | unsigned char c; | |
117 | ||
118 | if (type < OBJ_COMMIT || type > OBJ_DELTA) | |
119 | die("bad type %d", type); | |
120 | ||
121 | c = (type << 4) | (size & 15); | |
122 | size >>= 4; | |
123 | while (size) { | |
124 | *hdr++ = c | 0x80; | |
125 | c = size & 0x7f; | |
126 | size >>= 7; | |
127 | n++; | |
128 | } | |
129 | *hdr = c; | |
130 | return n; | |
131 | } | |
132 | ||
8bcce301 | 133 | static void write_blob(void *dat, unsigned long datlen) |
db5e523f SP |
134 | { |
135 | z_stream s; | |
136 | void *out, *delta; | |
137 | unsigned char hdr[64]; | |
138 | unsigned long hdrlen, deltalen; | |
139 | ||
140 | if (lastdat && current_depth < max_depth) { | |
141 | delta = diff_delta(lastdat, lastdatlen, | |
142 | dat, datlen, | |
143 | &deltalen, 0); | |
144 | } else | |
145 | delta = 0; | |
146 | ||
147 | memset(&s, 0, sizeof(s)); | |
148 | deflateInit(&s, zlib_compression_level); | |
149 | ||
150 | if (delta) { | |
151 | current_depth++; | |
152 | s.next_in = delta; | |
153 | s.avail_in = deltalen; | |
154 | hdrlen = encode_header(OBJ_DELTA, deltalen, hdr); | |
155 | if (ywrite(packfd, hdr, hdrlen) != hdrlen) | |
156 | die("Can't write object header: %s", strerror(errno)); | |
157 | if (ywrite(packfd, lastsha1, sizeof(lastsha1)) != sizeof(lastsha1)) | |
158 | die("Can't write object base: %s", strerror(errno)); | |
8bcce301 | 159 | packoff += hdrlen + sizeof(lastsha1); |
db5e523f SP |
160 | } else { |
161 | current_depth = 0; | |
162 | s.next_in = dat; | |
163 | s.avail_in = datlen; | |
164 | hdrlen = encode_header(OBJ_BLOB, datlen, hdr); | |
165 | if (ywrite(packfd, hdr, hdrlen) != hdrlen) | |
166 | die("Can't write object header: %s", strerror(errno)); | |
8bcce301 | 167 | packoff += hdrlen; |
db5e523f SP |
168 | } |
169 | ||
170 | s.avail_out = deflateBound(&s, s.avail_in); | |
171 | s.next_out = out = xmalloc(s.avail_out); | |
172 | while (deflate(&s, Z_FINISH) == Z_OK) | |
173 | /* nothing */; | |
174 | deflateEnd(&s); | |
175 | ||
176 | if (ywrite(packfd, out, s.total_out) != s.total_out) | |
177 | die("Failed writing compressed data %s", strerror(errno)); | |
8bcce301 | 178 | packoff += s.total_out; |
db5e523f SP |
179 | |
180 | free(out); | |
181 | if (delta) | |
182 | free(delta); | |
183 | } | |
184 | ||
8bcce301 | 185 | static void init_pack_header() |
db5e523f SP |
186 | { |
187 | const char* magic = "PACK"; | |
188 | unsigned long version = 2; | |
189 | unsigned long zero = 0; | |
190 | ||
191 | version = htonl(version); | |
192 | ||
193 | if (ywrite(packfd, (char*)magic, 4) != 4) | |
194 | die("Can't write pack magic: %s", strerror(errno)); | |
195 | if (ywrite(packfd, &version, 4) != 4) | |
196 | die("Can't write pack version: %s", strerror(errno)); | |
197 | if (ywrite(packfd, &zero, 4) != 4) | |
198 | die("Can't write 0 object count: %s", strerror(errno)); | |
8bcce301 | 199 | packoff = 4 * 3; |
db5e523f SP |
200 | } |
201 | ||
8bcce301 | 202 | static void fixup_header_footer() |
db5e523f SP |
203 | { |
204 | SHA_CTX c; | |
205 | char hdr[8]; | |
db5e523f SP |
206 | unsigned long cnt; |
207 | char *buf; | |
208 | size_t n; | |
209 | ||
210 | if (lseek(packfd, 0, SEEK_SET) != 0) | |
211 | die("Failed seeking to start: %s", strerror(errno)); | |
212 | ||
213 | SHA1_Init(&c); | |
214 | if (yread(packfd, hdr, 8) != 8) | |
215 | die("Failed reading header: %s", strerror(errno)); | |
216 | SHA1_Update(&c, hdr, 8); | |
217 | ||
db5e523f SP |
218 | cnt = htonl(object_count); |
219 | SHA1_Update(&c, &cnt, 4); | |
220 | if (ywrite(packfd, &cnt, 4) != 4) | |
221 | die("Failed writing object count: %s", strerror(errno)); | |
222 | ||
223 | buf = xmalloc(128 * 1024); | |
224 | for (;;) { | |
225 | n = xread(packfd, buf, 128 * 1024); | |
226 | if (n <= 0) | |
227 | break; | |
228 | SHA1_Update(&c, buf, n); | |
229 | } | |
230 | free(buf); | |
231 | ||
8bcce301 SP |
232 | SHA1_Final(packsha1, &c); |
233 | if (ywrite(packfd, packsha1, sizeof(packsha1)) != sizeof(packsha1)) | |
db5e523f SP |
234 | die("Failed writing pack checksum: %s", strerror(errno)); |
235 | } | |
236 | ||
8bcce301 | 237 | static int oecmp (const void *_a, const void *_b) |
db5e523f | 238 | { |
8bcce301 SP |
239 | struct object_entry *a = *((struct object_entry**)_a); |
240 | struct object_entry *b = *((struct object_entry**)_b); | |
241 | return memcmp(a->sha1, b->sha1, sizeof(a->sha1)); | |
242 | } | |
243 | ||
244 | static void write_index(const char *idx_name) | |
245 | { | |
246 | struct sha1file *f; | |
247 | struct object_entry **idx, **c, **last; | |
248 | struct object_entry *e; | |
249 | struct overflow_object_entry *o; | |
250 | unsigned int array[256]; | |
251 | int i; | |
252 | ||
253 | /* Build the sorted table of object IDs. */ | |
254 | idx = xmalloc(object_count * sizeof(struct object_entry*)); | |
255 | c = idx; | |
256 | for (e = pool_start; e != pool_next; e++) | |
257 | *c++ = e; | |
258 | for (o = overflow; o; o = o->next) | |
259 | *c++ = &o->oe; | |
260 | last = idx + object_count; | |
261 | qsort(idx, object_count, sizeof(struct object_entry*), oecmp); | |
262 | ||
263 | /* Generate the fan-out array. */ | |
264 | c = idx; | |
265 | for (i = 0; i < 256; i++) { | |
266 | struct object_entry **next = c;; | |
267 | while (next < last) { | |
268 | if ((*next)->sha1[0] != i) | |
269 | break; | |
270 | next++; | |
271 | } | |
272 | array[i] = htonl(next - idx); | |
273 | c = next; | |
274 | } | |
275 | ||
276 | f = sha1create("%s", idx_name); | |
277 | sha1write(f, array, 256 * sizeof(int)); | |
278 | for (c = idx; c != last; c++) { | |
279 | unsigned int offset = htonl((*c)->offset); | |
280 | sha1write(f, &offset, 4); | |
281 | sha1write(f, (*c)->sha1, sizeof((*c)->sha1)); | |
282 | } | |
283 | sha1write(f, packsha1, sizeof(packsha1)); | |
284 | sha1close(f, NULL, 1); | |
285 | free(idx); | |
286 | } | |
287 | ||
288 | int main(int argc, const char **argv) | |
289 | { | |
290 | const char *base_name = argv[1]; | |
291 | int est_obj_cnt = atoi(argv[2]); | |
292 | char *pack_name; | |
293 | char *idx_name; | |
294 | ||
295 | pack_name = xmalloc(strlen(base_name) + 6); | |
296 | sprintf(pack_name, "%s.pack", base_name); | |
297 | idx_name = xmalloc(strlen(base_name) + 5); | |
298 | sprintf(idx_name, "%s.idx", base_name); | |
299 | ||
300 | packfd = open(pack_name, O_RDWR|O_CREAT|O_TRUNC, 0666); | |
db5e523f | 301 | if (packfd < 0) |
8bcce301 SP |
302 | die("Can't create pack file %s: %s", pack_name, strerror(errno)); |
303 | ||
304 | pool_start = xmalloc(est_obj_cnt * sizeof(struct object_entry)); | |
305 | pool_next = pool_start; | |
306 | pool_end = pool_start + est_obj_cnt; | |
db5e523f SP |
307 | |
308 | init_pack_header(); | |
309 | for (;;) { | |
310 | unsigned long datlen; | |
311 | int hdrlen; | |
312 | void *dat; | |
313 | char hdr[128]; | |
314 | unsigned char sha1[20]; | |
315 | SHA_CTX c; | |
8bcce301 | 316 | struct object_entry *e; |
db5e523f SP |
317 | |
318 | if (yread(0, &datlen, 4) != 4) | |
8bcce301 | 319 | |
db5e523f SP |
320 | break; |
321 | ||
322 | dat = xmalloc(datlen); | |
323 | if (yread(0, dat, datlen) != datlen) | |
324 | break; | |
325 | ||
326 | hdrlen = sprintf(hdr, "blob %lu", datlen) + 1; | |
327 | SHA1_Init(&c); | |
328 | SHA1_Update(&c, hdr, hdrlen); | |
329 | SHA1_Update(&c, dat, datlen); | |
330 | SHA1_Final(sha1, &c); | |
331 | ||
8bcce301 SP |
332 | e = insert_object(sha1); |
333 | if (!e->offset) { | |
334 | e->offset = packoff; | |
335 | write_blob(dat, datlen); | |
336 | object_count++; | |
337 | printf("%s\n", sha1_to_hex(sha1)); | |
338 | fflush(stdout); | |
db5e523f | 339 | |
8bcce301 SP |
340 | if (lastdat) |
341 | free(lastdat); | |
342 | lastdat = dat; | |
343 | lastdatlen = datlen; | |
344 | memcpy(lastsha1, sha1, sizeof(sha1)); | |
345 | } else { | |
346 | duplicate_count++; | |
347 | free(dat); | |
348 | } | |
db5e523f SP |
349 | } |
350 | fixup_header_footer(); | |
351 | close(packfd); | |
8bcce301 SP |
352 | write_index(idx_name); |
353 | ||
354 | fprintf(stderr, "%lu objects, %lu duplicates, %lu pool overflow\n", | |
355 | object_count, duplicate_count, overflow_count); | |
db5e523f SP |
356 | |
357 | return 0; | |
358 | } |