]> git.ipfire.org Git - thirdparty/git.git/blame - pack-bitmap-write.c
pack-bitmap-write: pass ownership of intermediate bitmaps
[thirdparty/git.git] / pack-bitmap-write.c
CommitLineData
7cc8f971 1#include "cache.h"
cbd53a21 2#include "object-store.h"
7cc8f971
VM
3#include "commit.h"
4#include "tag.h"
5#include "diff.h"
6#include "revision.h"
7#include "list-objects.h"
8#include "progress.h"
9#include "pack-revindex.h"
10#include "pack.h"
11#include "pack-bitmap.h"
12#include "sha1-lookup.h"
13#include "pack-objects.h"
64043556 14#include "commit-reach.h"
7cc8f971
VM
15
16struct bitmapped_commit {
17 struct commit *commit;
18 struct ewah_bitmap *bitmap;
19 struct ewah_bitmap *write_as;
20 int flags;
21 int xor_offset;
22 uint32_t commit_pos;
23};
24
25struct bitmap_writer {
26 struct ewah_bitmap *commits;
27 struct ewah_bitmap *trees;
28 struct ewah_bitmap *blobs;
29 struct ewah_bitmap *tags;
30
d2bc62b1
JK
31 kh_oid_map_t *bitmaps;
32 kh_oid_map_t *reused;
7cc8f971
VM
33 struct packing_data *to_pack;
34
35 struct bitmapped_commit *selected;
36 unsigned int selected_nr, selected_alloc;
37
38 struct progress *progress;
39 int show_progress;
825544a3 40 unsigned char pack_checksum[GIT_MAX_RAWSZ];
7cc8f971
VM
41};
42
43static struct bitmap_writer writer;
44
45void bitmap_writer_show_progress(int show)
46{
47 writer.show_progress = show;
48}
49
50/**
51 * Build the initial type index for the packfile
52 */
06af3bba
NTND
53void bitmap_writer_build_type_index(struct packing_data *to_pack,
54 struct pack_idx_entry **index,
7cc8f971
VM
55 uint32_t index_nr)
56{
57 uint32_t i;
58
59 writer.commits = ewah_new();
60 writer.trees = ewah_new();
61 writer.blobs = ewah_new();
62 writer.tags = ewah_new();
06af3bba 63 ALLOC_ARRAY(to_pack->in_pack_pos, to_pack->nr_objects);
7cc8f971
VM
64
65 for (i = 0; i < index_nr; ++i) {
66 struct object_entry *entry = (struct object_entry *)index[i];
67 enum object_type real_type;
68
06af3bba 69 oe_set_in_pack_pos(to_pack, entry, i);
7cc8f971 70
fd9b1bae 71 switch (oe_type(entry)) {
7cc8f971
VM
72 case OBJ_COMMIT:
73 case OBJ_TREE:
74 case OBJ_BLOB:
75 case OBJ_TAG:
fd9b1bae 76 real_type = oe_type(entry);
7cc8f971
VM
77 break;
78
79 default:
7c141127 80 real_type = oid_object_info(to_pack->repo,
0df8e965 81 &entry->idx.oid, NULL);
7cc8f971
VM
82 break;
83 }
84
85 switch (real_type) {
86 case OBJ_COMMIT:
87 ewah_set(writer.commits, i);
88 break;
89
90 case OBJ_TREE:
91 ewah_set(writer.trees, i);
92 break;
93
94 case OBJ_BLOB:
95 ewah_set(writer.blobs, i);
96 break;
97
98 case OBJ_TAG:
99 ewah_set(writer.tags, i);
100 break;
101
102 default:
103 die("Missing type information for %s (%d/%d)",
e6a492b7 104 oid_to_hex(&entry->idx.oid), real_type,
fd9b1bae 105 oe_type(entry));
7cc8f971
VM
106 }
107 }
108}
109
110/**
111 * Compute the actual bitmaps
112 */
7cc8f971
VM
113
114static inline void push_bitmapped_commit(struct commit *commit, struct ewah_bitmap *reused)
115{
116 if (writer.selected_nr >= writer.selected_alloc) {
117 writer.selected_alloc = (writer.selected_alloc + 32) * 2;
2756ca43 118 REALLOC_ARRAY(writer.selected, writer.selected_alloc);
7cc8f971
VM
119 }
120
121 writer.selected[writer.selected_nr].commit = commit;
122 writer.selected[writer.selected_nr].bitmap = reused;
123 writer.selected[writer.selected_nr].flags = 0;
124
125 writer.selected_nr++;
126}
127
05805d74 128static uint32_t find_object_pos(const struct object_id *oid)
7cc8f971 129{
3a37876b 130 struct object_entry *entry = packlist_find(writer.to_pack, oid);
7cc8f971
VM
131
132 if (!entry) {
133 die("Failed to write bitmap index. Packfile doesn't have full closure "
05805d74 134 "(object %s is missing)", oid_to_hex(oid));
7cc8f971
VM
135 }
136
06af3bba 137 return oe_in_pack_pos(writer.to_pack, entry);
7cc8f971
VM
138}
139
7cc8f971
VM
140static void compute_xor_offsets(void)
141{
142 static const int MAX_XOR_OFFSET_SEARCH = 10;
143
144 int i, next = 0;
145
146 while (next < writer.selected_nr) {
147 struct bitmapped_commit *stored = &writer.selected[next];
148
149 int best_offset = 0;
150 struct ewah_bitmap *best_bitmap = stored->bitmap;
151 struct ewah_bitmap *test_xor;
152
153 for (i = 1; i <= MAX_XOR_OFFSET_SEARCH; ++i) {
154 int curr = next - i;
155
156 if (curr < 0)
157 break;
158
159 test_xor = ewah_pool_new();
160 ewah_xor(writer.selected[curr].bitmap, stored->bitmap, test_xor);
161
162 if (test_xor->buffer_size < best_bitmap->buffer_size) {
163 if (best_bitmap != stored->bitmap)
164 ewah_pool_free(best_bitmap);
165
166 best_bitmap = test_xor;
167 best_offset = i;
168 } else {
169 ewah_pool_free(test_xor);
170 }
171 }
172
173 stored->xor_offset = best_offset;
174 stored->write_as = best_bitmap;
175
176 next++;
177 }
178}
179
4a9c5817
JK
180struct bb_commit {
181 struct commit_list *children;
182 struct bitmap *bitmap;
183 unsigned selected:1;
184 unsigned idx; /* within selected array */
185};
7cc8f971 186
4a9c5817 187define_commit_slab(bb_data, struct bb_commit);
7cc8f971 188
4a9c5817
JK
189struct bitmap_builder {
190 struct bb_data data;
191 struct commit **commits;
192 size_t commits_nr, commits_alloc;
193};
7cc8f971 194
4a9c5817
JK
195static void bitmap_builder_init(struct bitmap_builder *bb,
196 struct bitmap_writer *writer)
197{
198 struct rev_info revs;
199 struct commit *commit;
200 unsigned int i;
7cc8f971 201
4a9c5817
JK
202 memset(bb, 0, sizeof(*bb));
203 init_bb_data(&bb->data);
7cc8f971 204
7cc8f971 205 reset_revision_walk();
4a9c5817
JK
206 repo_init_revisions(writer->to_pack->repo, &revs, NULL);
207 revs.topo_order = 1;
208
209 for (i = 0; i < writer->selected_nr; i++) {
210 struct commit *c = writer->selected[i].commit;
211 struct bb_commit *ent = bb_data_at(&bb->data, c);
212 ent->selected = 1;
213 ent->idx = i;
214 add_pending_object(&revs, &c->object, "");
215 }
7cc8f971 216
4a9c5817
JK
217 if (prepare_revision_walk(&revs))
218 die("revision walk setup failed");
7cc8f971 219
4a9c5817
JK
220 while ((commit = get_revision(&revs))) {
221 struct commit_list *p;
7cc8f971 222
4a9c5817 223 parse_commit_or_die(commit);
7cc8f971 224
4a9c5817
JK
225 ALLOC_GROW(bb->commits, bb->commits_nr + 1, bb->commits_alloc);
226 bb->commits[bb->commits_nr++] = commit;
7cc8f971 227
4a9c5817
JK
228 for (p = commit->parents; p; p = p->next) {
229 struct bb_commit *ent = bb_data_at(&bb->data, p->item);
230 commit_list_insert(commit, &ent->children);
231 }
232 }
233}
234
235static void bitmap_builder_clear(struct bitmap_builder *bb)
236{
237 clear_bb_data(&bb->data);
238 free(bb->commits);
239 bb->commits_nr = bb->commits_alloc = 0;
240}
241
242static void fill_bitmap_tree(struct bitmap *bitmap,
243 struct tree *tree)
244{
245 uint32_t pos;
246 struct tree_desc desc;
247 struct name_entry entry;
248
249 /*
250 * If our bit is already set, then there is nothing to do. Both this
251 * tree and all of its children will be set.
252 */
253 pos = find_object_pos(&tree->object.oid);
254 if (bitmap_get(bitmap, pos))
255 return;
256 bitmap_set(bitmap, pos);
257
258 if (parse_tree(tree) < 0)
259 die("unable to load tree object %s",
260 oid_to_hex(&tree->object.oid));
261 init_tree_desc(&desc, tree->buffer, tree->size);
262
263 while (tree_entry(&desc, &entry)) {
264 switch (object_type(entry.mode)) {
265 case OBJ_TREE:
266 fill_bitmap_tree(bitmap,
267 lookup_tree(the_repository, &entry.oid));
268 break;
269 case OBJ_BLOB:
270 bitmap_set(bitmap, find_object_pos(&entry.oid));
271 break;
272 default:
273 /* Gitlink, etc; not reachable */
274 break;
275 }
276 }
7cc8f971 277
4a9c5817
JK
278 free_tree_buffer(tree);
279}
7cc8f971 280
4a9c5817
JK
281static void fill_bitmap_commit(struct bb_commit *ent,
282 struct commit *commit)
283{
284 if (!ent->bitmap)
285 ent->bitmap = bitmap_new();
286
287 /*
288 * mark ourselves, but do not bother with parents; their values
289 * will already have been propagated to us
290 */
291 bitmap_set(ent->bitmap, find_object_pos(&commit->object.oid));
292 fill_bitmap_tree(ent->bitmap, get_commit_tree(commit));
293}
7cc8f971 294
4a9c5817
JK
295static void store_selected(struct bb_commit *ent, struct commit *commit)
296{
297 struct bitmapped_commit *stored = &writer.selected[ent->idx];
298 khiter_t hash_pos;
299 int hash_ret;
300
301 /*
302 * the "reuse bitmaps" phase may have stored something here, but
303 * our new algorithm doesn't use it. Drop it.
304 */
305 if (stored->bitmap)
306 ewah_free(stored->bitmap);
307
308 stored->bitmap = bitmap_to_ewah(ent->bitmap);
309
310 hash_pos = kh_put_oid_map(writer.bitmaps, commit->object.oid, &hash_ret);
311 if (hash_ret == 0)
312 die("Duplicate entry when writing index: %s",
313 oid_to_hex(&commit->object.oid));
314 kh_value(writer.bitmaps, hash_pos) = stored;
315}
7cc8f971 316
4a9c5817
JK
317void bitmap_writer_build(struct packing_data *to_pack)
318{
319 struct bitmap_builder bb;
320 size_t i;
321 int nr_stored = 0; /* for progress */
7cc8f971 322
4a9c5817
JK
323 writer.bitmaps = kh_init_oid_map();
324 writer.to_pack = to_pack;
325
326 if (writer.show_progress)
327 writer.progress = start_progress("Building bitmaps", writer.selected_nr);
328 trace2_region_enter("pack-bitmap-write", "building_bitmaps_total",
329 the_repository);
7cc8f971 330
4a9c5817
JK
331 bitmap_builder_init(&bb, &writer);
332 for (i = bb.commits_nr; i > 0; i--) {
333 struct commit *commit = bb.commits[i-1];
334 struct bb_commit *ent = bb_data_at(&bb.data, commit);
335 struct commit *child;
010e5eac 336 int reused = 0;
7cc8f971 337
4a9c5817 338 fill_bitmap_commit(ent, commit);
7cc8f971 339
4a9c5817
JK
340 if (ent->selected) {
341 store_selected(ent, commit);
342 nr_stored++;
343 display_progress(writer.progress, nr_stored);
344 }
345
346 while ((child = pop_commit(&ent->children))) {
347 struct bb_commit *child_ent =
348 bb_data_at(&bb.data, child);
349
350 if (child_ent->bitmap)
351 bitmap_or(child_ent->bitmap, ent->bitmap);
010e5eac 352 else if (reused)
4a9c5817 353 child_ent->bitmap = bitmap_dup(ent->bitmap);
010e5eac
JK
354 else {
355 child_ent->bitmap = ent->bitmap;
356 reused = 1;
357 }
4a9c5817 358 }
010e5eac
JK
359 if (!reused)
360 bitmap_free(ent->bitmap);
4a9c5817 361 ent->bitmap = NULL;
7cc8f971 362 }
4a9c5817
JK
363 bitmap_builder_clear(&bb);
364
365 trace2_region_leave("pack-bitmap-write", "building_bitmaps_total",
366 the_repository);
7cc8f971 367
7cc8f971
VM
368 stop_progress(&writer.progress);
369
370 compute_xor_offsets();
371}
372
373/**
374 * Select the commits that will be bitmapped
375 */
376static inline unsigned int next_commit_index(unsigned int idx)
377{
378 static const unsigned int MIN_COMMITS = 100;
379 static const unsigned int MAX_COMMITS = 5000;
380
381 static const unsigned int MUST_REGION = 100;
382 static const unsigned int MIN_REGION = 20000;
383
384 unsigned int offset, next;
385
386 if (idx <= MUST_REGION)
387 return 0;
388
389 if (idx <= MIN_REGION) {
390 offset = idx - MUST_REGION;
391 return (offset < MIN_COMMITS) ? offset : MIN_COMMITS;
392 }
393
394 offset = idx - MIN_REGION;
395 next = (offset < MAX_COMMITS) ? offset : MAX_COMMITS;
396
397 return (next > MIN_COMMITS) ? next : MIN_COMMITS;
398}
399
400static int date_compare(const void *_a, const void *_b)
401{
402 struct commit *a = *(struct commit **)_a;
403 struct commit *b = *(struct commit **)_b;
404 return (long)b->date - (long)a->date;
405}
406
407void bitmap_writer_reuse_bitmaps(struct packing_data *to_pack)
408{
3ae5fa07 409 struct bitmap_index *bitmap_git;
7c141127 410 if (!(bitmap_git = prepare_bitmap_git(to_pack->repo)))
7cc8f971
VM
411 return;
412
d2bc62b1 413 writer.reused = kh_init_oid_map();
3ae5fa07
JT
414 rebuild_existing_bitmaps(bitmap_git, to_pack, writer.reused,
415 writer.show_progress);
f3c23db2
JT
416 /*
417 * NEEDSWORK: rebuild_existing_bitmaps() makes writer.reused reference
418 * some bitmaps in bitmap_git, so we can't free the latter.
419 */
7cc8f971
VM
420}
421
05805d74 422static struct ewah_bitmap *find_reused_bitmap(const struct object_id *oid)
7cc8f971
VM
423{
424 khiter_t hash_pos;
425
426 if (!writer.reused)
427 return NULL;
428
d2bc62b1 429 hash_pos = kh_get_oid_map(writer.reused, *oid);
7cc8f971
VM
430 if (hash_pos >= kh_end(writer.reused))
431 return NULL;
432
433 return kh_value(writer.reused, hash_pos);
434}
435
436void bitmap_writer_select_commits(struct commit **indexed_commits,
437 unsigned int indexed_commits_nr,
438 int max_bitmaps)
439{
440 unsigned int i = 0, j, next;
441
9ed0d8d6 442 QSORT(indexed_commits, indexed_commits_nr, date_compare);
7cc8f971
VM
443
444 if (writer.show_progress)
445 writer.progress = start_progress("Selecting bitmap commits", 0);
446
447 if (indexed_commits_nr < 100) {
448 for (i = 0; i < indexed_commits_nr; ++i)
449 push_bitmapped_commit(indexed_commits[i], NULL);
450 return;
451 }
452
453 for (;;) {
454 struct ewah_bitmap *reused_bitmap = NULL;
455 struct commit *chosen = NULL;
456
457 next = next_commit_index(i);
458
459 if (i + next >= indexed_commits_nr)
460 break;
461
462 if (max_bitmaps > 0 && writer.selected_nr >= max_bitmaps) {
463 writer.selected_nr = max_bitmaps;
464 break;
465 }
466
467 if (next == 0) {
468 chosen = indexed_commits[i];
05805d74 469 reused_bitmap = find_reused_bitmap(&chosen->object.oid);
7cc8f971
VM
470 } else {
471 chosen = indexed_commits[i + next];
472
473 for (j = 0; j <= next; ++j) {
474 struct commit *cm = indexed_commits[i + j];
475
05805d74 476 reused_bitmap = find_reused_bitmap(&cm->object.oid);
7cc8f971
VM
477 if (reused_bitmap || (cm->object.flags & NEEDS_BITMAP) != 0) {
478 chosen = cm;
479 break;
480 }
481
482 if (cm->parents && cm->parents->next)
483 chosen = cm;
484 }
485 }
486
487 push_bitmapped_commit(chosen, reused_bitmap);
488
489 i += next + 1;
490 display_progress(writer.progress, i);
491 }
492
493 stop_progress(&writer.progress);
494}
495
496
98a3beab 497static int hashwrite_ewah_helper(void *f, const void *buf, size_t len)
7cc8f971 498{
98a3beab 499 /* hashwrite will die on error */
500 hashwrite(f, buf, len);
7cc8f971
VM
501 return len;
502}
503
504/**
505 * Write the bitmap index to disk
506 */
98a3beab 507static inline void dump_bitmap(struct hashfile *f, struct ewah_bitmap *bitmap)
7cc8f971 508{
98a3beab 509 if (ewah_serialize_to(bitmap, hashwrite_ewah_helper, f) < 0)
7cc8f971
VM
510 die("Failed to write bitmap index");
511}
512
513static const unsigned char *sha1_access(size_t pos, void *table)
514{
515 struct pack_idx_entry **index = table;
e6a492b7 516 return index[pos]->oid.hash;
7cc8f971
VM
517}
518
98a3beab 519static void write_selected_commits_v1(struct hashfile *f,
7cc8f971
VM
520 struct pack_idx_entry **index,
521 uint32_t index_nr)
522{
523 int i;
524
525 for (i = 0; i < writer.selected_nr; ++i) {
526 struct bitmapped_commit *stored = &writer.selected[i];
7cc8f971
VM
527
528 int commit_pos =
ed1c9977 529 sha1_pos(stored->commit->object.oid.hash, index, index_nr, sha1_access);
7cc8f971
VM
530
531 if (commit_pos < 0)
033abf97 532 BUG("trying to write commit not in index");
7cc8f971 533
98a3beab 534 hashwrite_be32(f, commit_pos);
535 hashwrite_u8(f, stored->xor_offset);
536 hashwrite_u8(f, stored->flags);
7cc8f971 537
7cc8f971
VM
538 dump_bitmap(f, stored->write_as);
539 }
540}
541
98a3beab 542static void write_hash_cache(struct hashfile *f,
ae4f07fb
VM
543 struct pack_idx_entry **index,
544 uint32_t index_nr)
545{
546 uint32_t i;
547
548 for (i = 0; i < index_nr; ++i) {
549 struct object_entry *entry = (struct object_entry *)index[i];
7744a5d6 550 hashwrite_be32(f, entry->hash);
ae4f07fb
VM
551 }
552}
553
7cc8f971
VM
554void bitmap_writer_set_checksum(unsigned char *sha1)
555{
556 hashcpy(writer.pack_checksum, sha1);
557}
558
559void bitmap_writer_finish(struct pack_idx_entry **index,
560 uint32_t index_nr,
ae4f07fb
VM
561 const char *filename,
562 uint16_t options)
7cc8f971 563{
7cc8f971
VM
564 static uint16_t default_version = 1;
565 static uint16_t flags = BITMAP_OPT_FULL_DAG;
594fa999 566 struct strbuf tmp_file = STRBUF_INIT;
98a3beab 567 struct hashfile *f;
7cc8f971
VM
568
569 struct bitmap_disk_header header;
570
594fa999 571 int fd = odb_mkstemp(&tmp_file, "pack/tmp_bitmap_XXXXXX");
7cc8f971 572
98a3beab 573 f = hashfd(fd, tmp_file.buf);
7cc8f971
VM
574
575 memcpy(header.magic, BITMAP_IDX_SIGNATURE, sizeof(BITMAP_IDX_SIGNATURE));
576 header.version = htons(default_version);
ae4f07fb 577 header.options = htons(flags | options);
7cc8f971 578 header.entry_count = htonl(writer.selected_nr);
50546b15 579 hashcpy(header.checksum, writer.pack_checksum);
7cc8f971 580
0f4d6cad 581 hashwrite(f, &header, sizeof(header) - GIT_MAX_RAWSZ + the_hash_algo->rawsz);
7cc8f971
VM
582 dump_bitmap(f, writer.commits);
583 dump_bitmap(f, writer.trees);
584 dump_bitmap(f, writer.blobs);
585 dump_bitmap(f, writer.tags);
586 write_selected_commits_v1(f, index, index_nr);
587
ae4f07fb
VM
588 if (options & BITMAP_OPT_HASH_CACHE)
589 write_hash_cache(f, index, index_nr);
590
cfe83216 591 finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
7cc8f971 592
594fa999 593 if (adjust_shared_perm(tmp_file.buf))
7cc8f971
VM
594 die_errno("unable to make temporary bitmap file readable");
595
594fa999 596 if (rename(tmp_file.buf, filename))
7cc8f971 597 die_errno("unable to rename temporary bitmap file to '%s'", filename);
594fa999
JK
598
599 strbuf_release(&tmp_file);
7cc8f971 600}