]>
Commit | Line | Data |
---|---|---|
27225f2e | 1 | #include <ctype.h> |
c323ac7d LT |
2 | #include "cache.h" |
3 | #include "object.h" | |
4 | #include "delta.h" | |
c38138cd | 5 | #include "csum-file.h" |
c323ac7d | 6 | |
d116a45a | 7 | static const char pack_usage[] = "git-pack-objects [--window=N] [--depth=N] base-name < object-list"; |
c323ac7d LT |
8 | |
9 | enum object_type { | |
10 | OBJ_NONE, | |
11 | OBJ_COMMIT, | |
12 | OBJ_TREE, | |
13 | OBJ_BLOB, | |
c4584ae3 | 14 | OBJ_DELTA |
c323ac7d LT |
15 | }; |
16 | ||
17 | struct object_entry { | |
18 | unsigned char sha1[20]; | |
19 | unsigned long size; | |
20 | unsigned long offset; | |
21 | unsigned int depth; | |
27225f2e | 22 | unsigned int hash; |
c323ac7d LT |
23 | enum object_type type; |
24 | unsigned long delta_size; | |
25 | struct object_entry *delta; | |
26 | }; | |
27 | ||
28 | static struct object_entry **sorted_by_sha, **sorted_by_type; | |
29 | static struct object_entry *objects = NULL; | |
30 | static int nr_objects = 0, nr_alloc = 0; | |
c323ac7d | 31 | static const char *base_name; |
e1808845 | 32 | static unsigned char pack_file_sha1[20]; |
c323ac7d | 33 | |
c323ac7d LT |
34 | static void *delta_against(void *buf, unsigned long size, struct object_entry *entry) |
35 | { | |
36 | unsigned long othersize, delta_size; | |
37 | char type[10]; | |
38 | void *otherbuf = read_sha1_file(entry->delta->sha1, type, &othersize); | |
39 | void *delta_buf; | |
40 | ||
41 | if (!otherbuf) | |
42 | die("unable to read %s", sha1_to_hex(entry->delta->sha1)); | |
8ee378a0 JH |
43 | delta_buf = diff_delta(otherbuf, othersize, |
44 | buf, size, &delta_size, ~0UL); | |
75c42d8c | 45 | if (!delta_buf || delta_size != entry->delta_size) |
c323ac7d LT |
46 | die("delta size changed"); |
47 | free(buf); | |
48 | free(otherbuf); | |
49 | return delta_buf; | |
50 | } | |
51 | ||
c38138cd | 52 | static unsigned long write_object(struct sha1file *f, struct object_entry *entry) |
c323ac7d LT |
53 | { |
54 | unsigned long size; | |
55 | char type[10]; | |
56 | void *buf = read_sha1_file(entry->sha1, type, &size); | |
c4fb06c0 | 57 | char header[25]; |
c323ac7d LT |
58 | unsigned hdrlen, datalen; |
59 | ||
60 | if (!buf) | |
61 | die("unable to read %s", sha1_to_hex(entry->sha1)); | |
62 | if (size != entry->size) | |
63 | die("object %s size inconsistency (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size); | |
64 | ||
65 | /* | |
66 | * The object header is a byte of 'type' followed by four bytes of | |
67 | * length, except for deltas that has the 20 bytes of delta sha | |
68 | * instead. | |
69 | */ | |
70 | header[0] = ".CTB"[entry->type]; | |
c323ac7d LT |
71 | hdrlen = 5; |
72 | if (entry->delta) { | |
d38c3721 | 73 | header[0] = 'D'; |
c4fb06c0 | 74 | memcpy(header+5, entry->delta, 20); |
c323ac7d LT |
75 | buf = delta_against(buf, size, entry); |
76 | size = entry->delta_size; | |
c4fb06c0 | 77 | hdrlen = 25; |
c323ac7d | 78 | } |
c4fb06c0 LT |
79 | datalen = htonl(size); |
80 | memcpy(header+1, &datalen, 4); | |
c38138cd LT |
81 | sha1write(f, header, hdrlen); |
82 | datalen = sha1write_compressed(f, buf, size); | |
c323ac7d LT |
83 | free(buf); |
84 | return hdrlen + datalen; | |
85 | } | |
86 | ||
87 | static void write_pack_file(void) | |
88 | { | |
89 | int i; | |
c38138cd | 90 | struct sha1file *f = sha1create("%s.%s", base_name, "pack"); |
c323ac7d LT |
91 | unsigned long offset = 0; |
92 | unsigned long mb; | |
93 | ||
94 | for (i = 0; i < nr_objects; i++) { | |
95 | struct object_entry *entry = objects + i; | |
96 | entry->offset = offset; | |
97 | offset += write_object(f, entry); | |
98 | } | |
e1808845 | 99 | sha1close(f, pack_file_sha1, 1); |
c323ac7d LT |
100 | mb = offset >> 20; |
101 | offset &= 0xfffff; | |
102 | } | |
103 | ||
104 | static void write_index_file(void) | |
105 | { | |
106 | int i; | |
c38138cd | 107 | struct sha1file *f = sha1create("%s.%s", base_name, "idx"); |
c323ac7d LT |
108 | struct object_entry **list = sorted_by_sha; |
109 | struct object_entry **last = list + nr_objects; | |
110 | unsigned int array[256]; | |
111 | ||
112 | /* | |
113 | * Write the first-level table (the list is sorted, | |
114 | * but we use a 256-entry lookup to be able to avoid | |
e1808845 | 115 | * having to do eight extra binary search iterations). |
c323ac7d LT |
116 | */ |
117 | for (i = 0; i < 256; i++) { | |
118 | struct object_entry **next = list; | |
119 | while (next < last) { | |
120 | struct object_entry *entry = *next; | |
121 | if (entry->sha1[0] != i) | |
122 | break; | |
123 | next++; | |
124 | } | |
125 | array[i] = htonl(next - sorted_by_sha); | |
126 | list = next; | |
127 | } | |
c38138cd | 128 | sha1write(f, array, 256 * sizeof(int)); |
c323ac7d LT |
129 | |
130 | /* | |
131 | * Write the actual SHA1 entries.. | |
132 | */ | |
133 | list = sorted_by_sha; | |
49397104 | 134 | for (i = 0; i < nr_objects; i++) { |
c323ac7d LT |
135 | struct object_entry *entry = *list++; |
136 | unsigned int offset = htonl(entry->offset); | |
c38138cd LT |
137 | sha1write(f, &offset, 4); |
138 | sha1write(f, entry->sha1, 20); | |
c323ac7d | 139 | } |
e1808845 LT |
140 | sha1write(f, pack_file_sha1, 20); |
141 | sha1close(f, NULL, 1); | |
c323ac7d LT |
142 | } |
143 | ||
27225f2e | 144 | static void add_object_entry(unsigned char *sha1, unsigned int hash) |
c323ac7d LT |
145 | { |
146 | unsigned int idx = nr_objects; | |
147 | struct object_entry *entry; | |
148 | ||
149 | if (idx >= nr_alloc) { | |
150 | unsigned int needed = (idx + 1024) * 3 / 2; | |
151 | objects = xrealloc(objects, needed * sizeof(*entry)); | |
152 | nr_alloc = needed; | |
153 | } | |
154 | entry = objects + idx; | |
155 | memset(entry, 0, sizeof(*entry)); | |
156 | memcpy(entry->sha1, sha1, 20); | |
27225f2e | 157 | entry->hash = hash; |
c323ac7d LT |
158 | nr_objects = idx+1; |
159 | } | |
160 | ||
161 | static void check_object(struct object_entry *entry) | |
162 | { | |
36e4d74a JH |
163 | char type[20]; |
164 | ||
165 | if (!sha1_object_info(entry->sha1, type, &entry->size)) { | |
166 | if (!strcmp(type, "commit")) { | |
167 | entry->type = OBJ_COMMIT; | |
168 | } else if (!strcmp(type, "tree")) { | |
169 | entry->type = OBJ_TREE; | |
170 | } else if (!strcmp(type, "blob")) { | |
171 | entry->type = OBJ_BLOB; | |
172 | } else | |
173 | die("unable to pack object %s of type %s", | |
174 | sha1_to_hex(entry->sha1), type); | |
175 | } | |
176 | else | |
177 | die("unable to get type of object %s", | |
178 | sha1_to_hex(entry->sha1)); | |
c323ac7d LT |
179 | } |
180 | ||
181 | static void get_object_details(void) | |
182 | { | |
183 | int i; | |
184 | struct object_entry *entry = objects; | |
185 | ||
186 | for (i = 0; i < nr_objects; i++) | |
187 | check_object(entry++); | |
188 | } | |
189 | ||
190 | typedef int (*entry_sort_t)(const struct object_entry *, const struct object_entry *); | |
191 | ||
192 | static entry_sort_t current_sort; | |
193 | ||
194 | static int sort_comparator(const void *_a, const void *_b) | |
195 | { | |
196 | struct object_entry *a = *(struct object_entry **)_a; | |
197 | struct object_entry *b = *(struct object_entry **)_b; | |
198 | return current_sort(a,b); | |
199 | } | |
200 | ||
201 | static struct object_entry **create_sorted_list(entry_sort_t sort) | |
202 | { | |
203 | struct object_entry **list = xmalloc(nr_objects * sizeof(struct object_entry *)); | |
204 | int i; | |
205 | ||
206 | for (i = 0; i < nr_objects; i++) | |
207 | list[i] = objects + i; | |
208 | current_sort = sort; | |
209 | qsort(list, nr_objects, sizeof(struct object_entry *), sort_comparator); | |
210 | return list; | |
211 | } | |
212 | ||
213 | static int sha1_sort(const struct object_entry *a, const struct object_entry *b) | |
214 | { | |
215 | return memcmp(a->sha1, b->sha1, 20); | |
216 | } | |
217 | ||
218 | static int type_size_sort(const struct object_entry *a, const struct object_entry *b) | |
219 | { | |
220 | if (a->type < b->type) | |
221 | return -1; | |
222 | if (a->type > b->type) | |
223 | return 1; | |
27225f2e LT |
224 | if (a->hash < b->hash) |
225 | return -1; | |
226 | if (a->hash > b->hash) | |
227 | return 1; | |
c323ac7d LT |
228 | if (a->size < b->size) |
229 | return -1; | |
230 | if (a->size > b->size) | |
231 | return 1; | |
232 | return a < b ? -1 : (a > b); | |
233 | } | |
234 | ||
235 | struct unpacked { | |
236 | struct object_entry *entry; | |
237 | void *data; | |
238 | }; | |
239 | ||
240 | /* | |
521a4f4c LT |
241 | * We search for deltas _backwards_ in a list sorted by type and |
242 | * by size, so that we see progressively smaller and smaller files. | |
243 | * That's because we prefer deltas to be from the bigger file | |
244 | * to the smaller - deletes are potentially cheaper, but perhaps | |
245 | * more importantly, the bigger file is likely the more recent | |
246 | * one. | |
c323ac7d | 247 | */ |
d116a45a | 248 | static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_depth) |
c323ac7d LT |
249 | { |
250 | struct object_entry *cur_entry = cur->entry; | |
251 | struct object_entry *old_entry = old->entry; | |
521a4f4c | 252 | unsigned long size, oldsize, delta_size, sizediff; |
75c42d8c | 253 | long max_size; |
c323ac7d LT |
254 | void *delta_buf; |
255 | ||
256 | /* Don't bother doing diffs between different types */ | |
257 | if (cur_entry->type != old_entry->type) | |
258 | return -1; | |
259 | ||
c323ac7d | 260 | size = cur_entry->size; |
75c42d8c LT |
261 | if (size < 50) |
262 | return -1; | |
c323ac7d | 263 | oldsize = old_entry->size; |
521a4f4c LT |
264 | sizediff = oldsize > size ? oldsize - size : size - oldsize; |
265 | if (sizediff > size / 8) | |
c323ac7d | 266 | return -1; |
d116a45a LT |
267 | if (old_entry->depth >= max_depth) |
268 | return 0; | |
c323ac7d LT |
269 | |
270 | /* | |
271 | * NOTE! | |
272 | * | |
273 | * We always delta from the bigger to the smaller, since that's | |
274 | * more space-efficient (deletes don't have to say _what_ they | |
275 | * delete). | |
276 | */ | |
75c42d8c LT |
277 | max_size = size / 2 - 20; |
278 | if (cur_entry->delta) | |
279 | max_size = cur_entry->delta_size-1; | |
27225f2e LT |
280 | if (sizediff >= max_size) |
281 | return -1; | |
8ee378a0 JH |
282 | delta_buf = diff_delta(old->data, oldsize, |
283 | cur->data, size, &delta_size, max_size); | |
c323ac7d | 284 | if (!delta_buf) |
75c42d8c LT |
285 | return 0; |
286 | cur_entry->delta = old_entry; | |
287 | cur_entry->delta_size = delta_size; | |
d116a45a | 288 | cur_entry->depth = old_entry->depth + 1; |
c323ac7d | 289 | free(delta_buf); |
eb41ab11 | 290 | return 0; |
c323ac7d LT |
291 | } |
292 | ||
d116a45a | 293 | static void find_deltas(struct object_entry **list, int window, int depth) |
c323ac7d | 294 | { |
521a4f4c | 295 | int i, idx; |
c323ac7d LT |
296 | unsigned int array_size = window * sizeof(struct unpacked); |
297 | struct unpacked *array = xmalloc(array_size); | |
298 | ||
299 | memset(array, 0, array_size); | |
521a4f4c LT |
300 | i = nr_objects; |
301 | idx = 0; | |
302 | while (--i >= 0) { | |
c323ac7d LT |
303 | struct object_entry *entry = list[i]; |
304 | struct unpacked *n = array + idx; | |
305 | unsigned long size; | |
306 | char type[10]; | |
307 | int j; | |
308 | ||
309 | free(n->data); | |
310 | n->entry = entry; | |
311 | n->data = read_sha1_file(entry->sha1, type, &size); | |
312 | if (size != entry->size) | |
313 | die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size); | |
78817c15 LT |
314 | j = window; |
315 | while (--j > 0) { | |
316 | unsigned int other_idx = idx + j; | |
c323ac7d | 317 | struct unpacked *m; |
78817c15 LT |
318 | if (other_idx >= window) |
319 | other_idx -= window; | |
c323ac7d LT |
320 | m = array + other_idx; |
321 | if (!m->entry) | |
322 | break; | |
d116a45a | 323 | if (try_delta(n, m, depth) < 0) |
c323ac7d LT |
324 | break; |
325 | } | |
521a4f4c LT |
326 | idx++; |
327 | if (idx >= window) | |
328 | idx = 0; | |
c323ac7d LT |
329 | } |
330 | } | |
331 | ||
332 | int main(int argc, char **argv) | |
333 | { | |
27225f2e | 334 | char line[PATH_MAX + 20]; |
d116a45a | 335 | int window = 10, depth = 10; |
c323ac7d LT |
336 | int i; |
337 | ||
338 | for (i = 1; i < argc; i++) { | |
339 | const char *arg = argv[i]; | |
340 | ||
341 | if (*arg == '-') { | |
342 | if (!strncmp("--window=", arg, 9)) { | |
343 | char *end; | |
f846bbff | 344 | window = strtoul(arg+9, &end, 0); |
c323ac7d LT |
345 | if (!arg[9] || *end) |
346 | usage(pack_usage); | |
347 | continue; | |
348 | } | |
d116a45a LT |
349 | if (!strncmp("--depth=", arg, 8)) { |
350 | char *end; | |
351 | depth = strtoul(arg+8, &end, 0); | |
352 | if (!arg[8] || *end) | |
353 | usage(pack_usage); | |
354 | continue; | |
355 | } | |
c323ac7d LT |
356 | usage(pack_usage); |
357 | } | |
358 | if (base_name) | |
359 | usage(pack_usage); | |
360 | base_name = arg; | |
361 | } | |
362 | ||
363 | if (!base_name) | |
364 | usage(pack_usage); | |
365 | ||
366 | while (fgets(line, sizeof(line), stdin) != NULL) { | |
27225f2e LT |
367 | unsigned int hash; |
368 | char *p; | |
c323ac7d | 369 | unsigned char sha1[20]; |
27225f2e | 370 | |
c323ac7d LT |
371 | if (get_sha1_hex(line, sha1)) |
372 | die("expected sha1, got garbage"); | |
27225f2e LT |
373 | hash = 0; |
374 | p = line+40; | |
375 | while (*p) { | |
376 | unsigned char c = *p++; | |
377 | if (isspace(c)) | |
378 | continue; | |
379 | hash = hash * 11 + c; | |
380 | } | |
381 | add_object_entry(sha1, hash); | |
c323ac7d LT |
382 | } |
383 | get_object_details(); | |
384 | ||
385 | printf("Packing %d objects\n", nr_objects); | |
386 | ||
387 | sorted_by_sha = create_sorted_list(sha1_sort); | |
388 | sorted_by_type = create_sorted_list(type_size_sort); | |
d116a45a LT |
389 | if (window && depth) |
390 | find_deltas(sorted_by_type, window+1, depth); | |
c323ac7d LT |
391 | |
392 | write_pack_file(); | |
393 | write_index_file(); | |
394 | return 0; | |
395 | } |