]>
Commit | Line | Data |
---|---|---|
db5e523f SP |
1 | #include "builtin.h" |
2 | #include "cache.h" | |
3 | #include "object.h" | |
4 | #include "blob.h" | |
5 | #include "delta.h" | |
6 | #include "pack.h" | |
7 | #include "csum-file.h" | |
8 | ||
27d6d290 SP |
9 | struct object_entry |
10 | { | |
11 | struct object_entry *next; | |
12 | unsigned long offset; | |
13 | unsigned char sha1[20]; | |
14 | }; | |
15 | ||
16 | struct object_entry_block | |
17 | { | |
18 | struct object_entry_block *next_block; | |
19 | struct object_entry *next_free; | |
20 | struct object_entry *end; | |
ac47a738 | 21 | struct object_entry entries[FLEX_ARRAY]; /* more */ |
27d6d290 SP |
22 | }; |
23 | ||
ac47a738 SP |
24 | struct last_object |
25 | { | |
26 | void *data; | |
27 | unsigned long len; | |
28 | int depth; | |
29 | unsigned char sha1[20]; | |
30 | }; | |
31 | ||
32 | /* Stats and misc. counters. */ | |
db5e523f | 33 | static int max_depth = 10; |
27d6d290 | 34 | static unsigned long alloc_count; |
db5e523f | 35 | static unsigned long object_count; |
8bcce301 | 36 | static unsigned long duplicate_count; |
6143f064 SP |
37 | static unsigned long object_count_by_type[9]; |
38 | static unsigned long duplicate_count_by_type[9]; | |
ac47a738 SP |
39 | |
40 | /* The .pack file */ | |
41 | static int pack_fd; | |
42 | static unsigned long pack_offset; | |
43 | static unsigned char pack_sha1[20]; | |
44 | ||
45 | /* Table of objects we've written. */ | |
27d6d290 | 46 | struct object_entry_block *blocks; |
ac47a738 SP |
47 | struct object_entry *object_table[1 << 16]; |
48 | ||
49 | /* Our last blob */ | |
50 | struct last_object last_blob; | |
8bcce301 | 51 | |
27d6d290 | 52 | static void alloc_objects(int cnt) |
8bcce301 | 53 | { |
27d6d290 SP |
54 | struct object_entry_block *b; |
55 | ||
56 | b = xmalloc(sizeof(struct object_entry_block) | |
57 | + cnt * sizeof(struct object_entry)); | |
58 | b->next_block = blocks; | |
59 | b->next_free = b->entries; | |
60 | b->end = b->entries + cnt; | |
61 | blocks = b; | |
62 | alloc_count += cnt; | |
63 | } | |
8bcce301 | 64 | |
27d6d290 | 65 | static struct object_entry* new_object(unsigned char *sha1) |
8bcce301 | 66 | { |
27d6d290 | 67 | struct object_entry *e; |
8bcce301 | 68 | |
27d6d290 SP |
69 | if (blocks->next_free == blocks->end) |
70 | alloc_objects(1000); | |
8bcce301 | 71 | |
27d6d290 SP |
72 | e = blocks->next_free++; |
73 | memcpy(e->sha1, sha1, sizeof(e->sha1)); | |
74 | return e; | |
8bcce301 SP |
75 | } |
76 | ||
77 | static struct object_entry* insert_object(unsigned char *sha1) | |
78 | { | |
79 | unsigned int h = sha1[0] << 8 | sha1[1]; | |
27d6d290 | 80 | struct object_entry *e = object_table[h]; |
8bcce301 SP |
81 | struct object_entry *p = 0; |
82 | ||
83 | while (e) { | |
84 | if (!memcmp(sha1, e->sha1, sizeof(e->sha1))) | |
85 | return e; | |
86 | p = e; | |
87 | e = e->next; | |
88 | } | |
89 | ||
90 | e = new_object(sha1); | |
91 | e->next = 0; | |
92 | e->offset = 0; | |
93 | if (p) | |
94 | p->next = e; | |
95 | else | |
27d6d290 | 96 | object_table[h] = e; |
8bcce301 SP |
97 | return e; |
98 | } | |
db5e523f SP |
99 | |
100 | static ssize_t yread(int fd, void *buffer, size_t length) | |
101 | { | |
102 | ssize_t ret = 0; | |
103 | while (ret < length) { | |
104 | ssize_t size = xread(fd, (char *) buffer + ret, length - ret); | |
105 | if (size < 0) { | |
106 | return size; | |
107 | } | |
108 | if (size == 0) { | |
109 | return ret; | |
110 | } | |
111 | ret += size; | |
112 | } | |
113 | return ret; | |
114 | } | |
115 | ||
116 | static ssize_t ywrite(int fd, void *buffer, size_t length) | |
117 | { | |
118 | ssize_t ret = 0; | |
119 | while (ret < length) { | |
120 | ssize_t size = xwrite(fd, (char *) buffer + ret, length - ret); | |
121 | if (size < 0) { | |
122 | return size; | |
123 | } | |
124 | if (size == 0) { | |
125 | return ret; | |
126 | } | |
127 | ret += size; | |
128 | } | |
129 | return ret; | |
130 | } | |
131 | ||
ac47a738 SP |
132 | static unsigned long encode_header( |
133 | enum object_type type, | |
134 | unsigned long size, | |
135 | unsigned char *hdr) | |
db5e523f SP |
136 | { |
137 | int n = 1; | |
138 | unsigned char c; | |
139 | ||
140 | if (type < OBJ_COMMIT || type > OBJ_DELTA) | |
141 | die("bad type %d", type); | |
142 | ||
143 | c = (type << 4) | (size & 15); | |
144 | size >>= 4; | |
145 | while (size) { | |
146 | *hdr++ = c | 0x80; | |
147 | c = size & 0x7f; | |
148 | size >>= 7; | |
149 | n++; | |
150 | } | |
151 | *hdr = c; | |
152 | return n; | |
153 | } | |
154 | ||
ac47a738 SP |
155 | static int store_object( |
156 | enum object_type type, | |
157 | void *dat, | |
158 | unsigned long datlen, | |
159 | struct last_object *last) | |
db5e523f | 160 | { |
db5e523f | 161 | void *out, *delta; |
ac47a738 SP |
162 | struct object_entry *e; |
163 | unsigned char hdr[96]; | |
164 | unsigned char sha1[20]; | |
db5e523f | 165 | unsigned long hdrlen, deltalen; |
ac47a738 SP |
166 | SHA_CTX c; |
167 | z_stream s; | |
168 | ||
169 | hdrlen = sprintf((char*)hdr,"%s %lu",type_names[type],datlen) + 1; | |
170 | SHA1_Init(&c); | |
171 | SHA1_Update(&c, hdr, hdrlen); | |
172 | SHA1_Update(&c, dat, datlen); | |
173 | SHA1_Final(sha1, &c); | |
174 | ||
175 | e = insert_object(sha1); | |
176 | if (e->offset) { | |
177 | duplicate_count++; | |
6143f064 | 178 | duplicate_count_by_type[type]++; |
ac47a738 SP |
179 | return 0; |
180 | } | |
181 | e->offset = pack_offset; | |
182 | object_count++; | |
6143f064 | 183 | object_count_by_type[type]++; |
db5e523f | 184 | |
ac47a738 SP |
185 | if (last->data && last->depth < max_depth) |
186 | delta = diff_delta(last->data, last->len, | |
db5e523f SP |
187 | dat, datlen, |
188 | &deltalen, 0); | |
ac47a738 | 189 | else |
db5e523f SP |
190 | delta = 0; |
191 | ||
192 | memset(&s, 0, sizeof(s)); | |
193 | deflateInit(&s, zlib_compression_level); | |
194 | ||
195 | if (delta) { | |
ac47a738 | 196 | last->depth++; |
db5e523f SP |
197 | s.next_in = delta; |
198 | s.avail_in = deltalen; | |
199 | hdrlen = encode_header(OBJ_DELTA, deltalen, hdr); | |
ac47a738 | 200 | if (ywrite(pack_fd, hdr, hdrlen) != hdrlen) |
db5e523f | 201 | die("Can't write object header: %s", strerror(errno)); |
ac47a738 | 202 | if (ywrite(pack_fd, last->sha1, sizeof(sha1)) != sizeof(sha1)) |
db5e523f | 203 | die("Can't write object base: %s", strerror(errno)); |
ac47a738 | 204 | pack_offset += hdrlen + sizeof(sha1); |
db5e523f | 205 | } else { |
ac47a738 | 206 | last->depth = 0; |
db5e523f SP |
207 | s.next_in = dat; |
208 | s.avail_in = datlen; | |
ac47a738 SP |
209 | hdrlen = encode_header(type, datlen, hdr); |
210 | if (ywrite(pack_fd, hdr, hdrlen) != hdrlen) | |
db5e523f | 211 | die("Can't write object header: %s", strerror(errno)); |
ac47a738 | 212 | pack_offset += hdrlen; |
db5e523f SP |
213 | } |
214 | ||
215 | s.avail_out = deflateBound(&s, s.avail_in); | |
216 | s.next_out = out = xmalloc(s.avail_out); | |
217 | while (deflate(&s, Z_FINISH) == Z_OK) | |
218 | /* nothing */; | |
219 | deflateEnd(&s); | |
220 | ||
ac47a738 | 221 | if (ywrite(pack_fd, out, s.total_out) != s.total_out) |
db5e523f | 222 | die("Failed writing compressed data %s", strerror(errno)); |
ac47a738 | 223 | pack_offset += s.total_out; |
db5e523f SP |
224 | |
225 | free(out); | |
226 | if (delta) | |
227 | free(delta); | |
ac47a738 SP |
228 | if (last->data) |
229 | free(last->data); | |
230 | last->data = dat; | |
231 | last->len = datlen; | |
232 | memcpy(last->sha1, sha1, sizeof(sha1)); | |
233 | return 1; | |
db5e523f SP |
234 | } |
235 | ||
8bcce301 | 236 | static void init_pack_header() |
db5e523f SP |
237 | { |
238 | const char* magic = "PACK"; | |
6143f064 | 239 | unsigned long version = 3; |
db5e523f SP |
240 | unsigned long zero = 0; |
241 | ||
242 | version = htonl(version); | |
243 | ||
ac47a738 | 244 | if (ywrite(pack_fd, (char*)magic, 4) != 4) |
db5e523f | 245 | die("Can't write pack magic: %s", strerror(errno)); |
ac47a738 | 246 | if (ywrite(pack_fd, &version, 4) != 4) |
db5e523f | 247 | die("Can't write pack version: %s", strerror(errno)); |
ac47a738 | 248 | if (ywrite(pack_fd, &zero, 4) != 4) |
db5e523f | 249 | die("Can't write 0 object count: %s", strerror(errno)); |
ac47a738 | 250 | pack_offset = 4 * 3; |
db5e523f SP |
251 | } |
252 | ||
8bcce301 | 253 | static void fixup_header_footer() |
db5e523f SP |
254 | { |
255 | SHA_CTX c; | |
256 | char hdr[8]; | |
db5e523f SP |
257 | unsigned long cnt; |
258 | char *buf; | |
259 | size_t n; | |
260 | ||
ac47a738 | 261 | if (lseek(pack_fd, 0, SEEK_SET) != 0) |
db5e523f SP |
262 | die("Failed seeking to start: %s", strerror(errno)); |
263 | ||
264 | SHA1_Init(&c); | |
ac47a738 | 265 | if (yread(pack_fd, hdr, 8) != 8) |
db5e523f SP |
266 | die("Failed reading header: %s", strerror(errno)); |
267 | SHA1_Update(&c, hdr, 8); | |
268 | ||
db5e523f SP |
269 | cnt = htonl(object_count); |
270 | SHA1_Update(&c, &cnt, 4); | |
ac47a738 | 271 | if (ywrite(pack_fd, &cnt, 4) != 4) |
db5e523f SP |
272 | die("Failed writing object count: %s", strerror(errno)); |
273 | ||
274 | buf = xmalloc(128 * 1024); | |
275 | for (;;) { | |
ac47a738 | 276 | n = xread(pack_fd, buf, 128 * 1024); |
db5e523f SP |
277 | if (n <= 0) |
278 | break; | |
279 | SHA1_Update(&c, buf, n); | |
280 | } | |
281 | free(buf); | |
282 | ||
ac47a738 SP |
283 | SHA1_Final(pack_sha1, &c); |
284 | if (ywrite(pack_fd, pack_sha1, sizeof(pack_sha1)) != sizeof(pack_sha1)) | |
db5e523f SP |
285 | die("Failed writing pack checksum: %s", strerror(errno)); |
286 | } | |
287 | ||
8bcce301 | 288 | static int oecmp (const void *_a, const void *_b) |
db5e523f | 289 | { |
8bcce301 SP |
290 | struct object_entry *a = *((struct object_entry**)_a); |
291 | struct object_entry *b = *((struct object_entry**)_b); | |
292 | return memcmp(a->sha1, b->sha1, sizeof(a->sha1)); | |
293 | } | |
294 | ||
295 | static void write_index(const char *idx_name) | |
296 | { | |
297 | struct sha1file *f; | |
298 | struct object_entry **idx, **c, **last; | |
299 | struct object_entry *e; | |
27d6d290 | 300 | struct object_entry_block *o; |
8bcce301 SP |
301 | unsigned int array[256]; |
302 | int i; | |
303 | ||
304 | /* Build the sorted table of object IDs. */ | |
305 | idx = xmalloc(object_count * sizeof(struct object_entry*)); | |
306 | c = idx; | |
27d6d290 SP |
307 | for (o = blocks; o; o = o->next_block) |
308 | for (e = o->entries; e != o->next_free; e++) | |
309 | *c++ = e; | |
8bcce301 SP |
310 | last = idx + object_count; |
311 | qsort(idx, object_count, sizeof(struct object_entry*), oecmp); | |
312 | ||
313 | /* Generate the fan-out array. */ | |
314 | c = idx; | |
315 | for (i = 0; i < 256; i++) { | |
316 | struct object_entry **next = c;; | |
317 | while (next < last) { | |
318 | if ((*next)->sha1[0] != i) | |
319 | break; | |
320 | next++; | |
321 | } | |
322 | array[i] = htonl(next - idx); | |
323 | c = next; | |
324 | } | |
325 | ||
326 | f = sha1create("%s", idx_name); | |
327 | sha1write(f, array, 256 * sizeof(int)); | |
328 | for (c = idx; c != last; c++) { | |
329 | unsigned int offset = htonl((*c)->offset); | |
330 | sha1write(f, &offset, 4); | |
331 | sha1write(f, (*c)->sha1, sizeof((*c)->sha1)); | |
332 | } | |
ac47a738 | 333 | sha1write(f, pack_sha1, sizeof(pack_sha1)); |
8bcce301 SP |
334 | sha1close(f, NULL, 1); |
335 | free(idx); | |
336 | } | |
337 | ||
6143f064 SP |
338 | static void new_blob() |
339 | { | |
340 | unsigned long datlen; | |
341 | void *dat; | |
342 | ||
343 | if (yread(0, &datlen, 4) != 4) | |
344 | die("Can't obtain blob length"); | |
345 | ||
346 | dat = xmalloc(datlen); | |
347 | if (yread(0, dat, datlen) != datlen) | |
348 | die("Con't obtain %lu bytes of blob data", datlen); | |
349 | ||
350 | if (!store_object(OBJ_BLOB, dat, datlen, &last_blob)) | |
351 | free(dat); | |
352 | } | |
353 | ||
8bcce301 SP |
354 | int main(int argc, const char **argv) |
355 | { | |
356 | const char *base_name = argv[1]; | |
357 | int est_obj_cnt = atoi(argv[2]); | |
358 | char *pack_name; | |
359 | char *idx_name; | |
6143f064 | 360 | struct stat sb; |
8bcce301 SP |
361 | |
362 | pack_name = xmalloc(strlen(base_name) + 6); | |
363 | sprintf(pack_name, "%s.pack", base_name); | |
364 | idx_name = xmalloc(strlen(base_name) + 5); | |
365 | sprintf(idx_name, "%s.idx", base_name); | |
366 | ||
ac47a738 SP |
367 | pack_fd = open(pack_name, O_RDWR|O_CREAT|O_EXCL, 0666); |
368 | if (pack_fd < 0) | |
6143f064 | 369 | die("Can't create %s: %s", pack_name, strerror(errno)); |
8bcce301 | 370 | |
27d6d290 | 371 | alloc_objects(est_obj_cnt); |
db5e523f SP |
372 | init_pack_header(); |
373 | for (;;) { | |
6143f064 SP |
374 | unsigned long cmd; |
375 | if (yread(0, &cmd, 4) != 4) | |
db5e523f SP |
376 | break; |
377 | ||
6143f064 SP |
378 | switch (cmd) { |
379 | case 'blob': new_blob(); break; | |
380 | default: | |
381 | die("Invalid command %lu", cmd); | |
382 | } | |
db5e523f SP |
383 | } |
384 | fixup_header_footer(); | |
ac47a738 | 385 | close(pack_fd); |
8bcce301 SP |
386 | write_index(idx_name); |
387 | ||
6143f064 SP |
388 | fprintf(stderr, "%s statistics:\n", argv[0]); |
389 | fprintf(stderr, "---------------------------------------------------\n"); | |
390 | fprintf(stderr, "Alloc'd objects: %10lu (%10lu overflow )\n", alloc_count, alloc_count - est_obj_cnt); | |
391 | fprintf(stderr, "Total objects: %10lu (%10lu duplicates)\n", object_count, duplicate_count); | |
392 | fprintf(stderr, " blobs : %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_BLOB], duplicate_count_by_type[OBJ_BLOB]); | |
393 | fprintf(stderr, " trees : %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_TREE], duplicate_count_by_type[OBJ_TREE]); | |
394 | fprintf(stderr, " commits: %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_COMMIT], duplicate_count_by_type[OBJ_COMMIT]); | |
395 | fprintf(stderr, " tags : %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_TAG], duplicate_count_by_type[OBJ_TAG]); | |
396 | fprintf(stderr, "---------------------------------------------------\n"); | |
397 | ||
398 | stat(pack_name, &sb); | |
399 | fprintf(stderr, "Pack size: %10lu KiB\n", (unsigned long)(sb.st_size/1024)); | |
400 | stat(idx_name, &sb); | |
401 | fprintf(stderr, "Index size: %10lu KiB\n", (unsigned long)(sb.st_size/1024)); | |
402 | ||
403 | fprintf(stderr, "\n"); | |
db5e523f SP |
404 | |
405 | return 0; | |
406 | } |