]> git.ipfire.org Git - thirdparty/git.git/blob - commit-graph.c
commit-graph: fix regression when computing Bloom filters
[thirdparty/git.git] / commit-graph.c
1 #include "git-compat-util.h"
2 #include "config.h"
3 #include "lockfile.h"
4 #include "pack.h"
5 #include "packfile.h"
6 #include "commit.h"
7 #include "object.h"
8 #include "refs.h"
9 #include "revision.h"
10 #include "sha1-lookup.h"
11 #include "commit-graph.h"
12 #include "object-store.h"
13 #include "alloc.h"
14 #include "hashmap.h"
15 #include "replace-object.h"
16 #include "progress.h"
17 #include "bloom.h"
18 #include "commit-slab.h"
19 #include "shallow.h"
20 #include "json-writer.h"
21 #include "trace2.h"
22
23 void git_test_write_commit_graph_or_die(void)
24 {
25 int flags = 0;
26 if (!git_env_bool(GIT_TEST_COMMIT_GRAPH, 0))
27 return;
28
29 if (git_env_bool(GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS, 0))
30 flags = COMMIT_GRAPH_WRITE_BLOOM_FILTERS;
31
32 if (write_commit_graph_reachable(the_repository->objects->odb,
33 flags, NULL))
34 die("failed to write commit-graph under GIT_TEST_COMMIT_GRAPH");
35 }
36
37 #define GRAPH_SIGNATURE 0x43475048 /* "CGPH" */
38 #define GRAPH_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
39 #define GRAPH_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
40 #define GRAPH_CHUNKID_DATA 0x43444154 /* "CDAT" */
41 #define GRAPH_CHUNKID_EXTRAEDGES 0x45444745 /* "EDGE" */
42 #define GRAPH_CHUNKID_BLOOMINDEXES 0x42494458 /* "BIDX" */
43 #define GRAPH_CHUNKID_BLOOMDATA 0x42444154 /* "BDAT" */
44 #define GRAPH_CHUNKID_BASE 0x42415345 /* "BASE" */
45 #define MAX_NUM_CHUNKS 7
46
47 #define GRAPH_DATA_WIDTH (the_hash_algo->rawsz + 16)
48
49 #define GRAPH_VERSION_1 0x1
50 #define GRAPH_VERSION GRAPH_VERSION_1
51
52 #define GRAPH_EXTRA_EDGES_NEEDED 0x80000000
53 #define GRAPH_EDGE_LAST_MASK 0x7fffffff
54 #define GRAPH_PARENT_NONE 0x70000000
55
56 #define GRAPH_LAST_EDGE 0x80000000
57
58 #define GRAPH_HEADER_SIZE 8
59 #define GRAPH_FANOUT_SIZE (4 * 256)
60 #define GRAPH_CHUNKLOOKUP_WIDTH 12
61 #define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * GRAPH_CHUNKLOOKUP_WIDTH \
62 + GRAPH_FANOUT_SIZE + the_hash_algo->rawsz)
63
64 /* Remember to update object flag allocation in object.h */
65 #define REACHABLE (1u<<15)
66
67 /* Keep track of the order in which commits are added to our list. */
68 define_commit_slab(commit_pos, int);
69 static struct commit_pos commit_pos = COMMIT_SLAB_INIT(1, commit_pos);
70
71 static void set_commit_pos(struct repository *r, const struct object_id *oid)
72 {
73 static int32_t max_pos;
74 struct commit *commit = lookup_commit(r, oid);
75
76 if (!commit)
77 return; /* should never happen, but be lenient */
78
79 *commit_pos_at(&commit_pos, commit) = max_pos++;
80 }
81
82 static int commit_pos_cmp(const void *va, const void *vb)
83 {
84 const struct commit *a = *(const struct commit **)va;
85 const struct commit *b = *(const struct commit **)vb;
86 return commit_pos_at(&commit_pos, a) -
87 commit_pos_at(&commit_pos, b);
88 }
89
90 define_commit_slab(commit_graph_data_slab, struct commit_graph_data);
91 static struct commit_graph_data_slab commit_graph_data_slab =
92 COMMIT_SLAB_INIT(1, commit_graph_data_slab);
93
94 uint32_t commit_graph_position(const struct commit *c)
95 {
96 struct commit_graph_data *data =
97 commit_graph_data_slab_peek(&commit_graph_data_slab, c);
98
99 return data ? data->graph_pos : COMMIT_NOT_FROM_GRAPH;
100 }
101
102 uint32_t commit_graph_generation(const struct commit *c)
103 {
104 struct commit_graph_data *data =
105 commit_graph_data_slab_peek(&commit_graph_data_slab, c);
106
107 if (!data)
108 return GENERATION_NUMBER_INFINITY;
109 else if (data->graph_pos == COMMIT_NOT_FROM_GRAPH)
110 return GENERATION_NUMBER_INFINITY;
111
112 return data->generation;
113 }
114
115 static struct commit_graph_data *commit_graph_data_at(const struct commit *c)
116 {
117 unsigned int i, nth_slab;
118 struct commit_graph_data *data =
119 commit_graph_data_slab_peek(&commit_graph_data_slab, c);
120
121 if (data)
122 return data;
123
124 nth_slab = c->index / commit_graph_data_slab.slab_size;
125 data = commit_graph_data_slab_at(&commit_graph_data_slab, c);
126
127 /*
128 * commit-slab initializes elements with zero, overwrite this with
129 * COMMIT_NOT_FROM_GRAPH for graph_pos.
130 *
131 * We avoid initializing generation with checking if graph position
132 * is not COMMIT_NOT_FROM_GRAPH.
133 */
134 for (i = 0; i < commit_graph_data_slab.slab_size; i++) {
135 commit_graph_data_slab.slab[nth_slab][i].graph_pos =
136 COMMIT_NOT_FROM_GRAPH;
137 }
138
139 return data;
140 }
141
142 /*
143 * Should be used only while writing commit-graph as it compares
144 * generation value of commits by directly accessing commit-slab.
145 */
146 static int commit_gen_cmp(const void *va, const void *vb)
147 {
148 const struct commit *a = *(const struct commit **)va;
149 const struct commit *b = *(const struct commit **)vb;
150
151 uint32_t generation_a = commit_graph_data_at(a)->generation;
152 uint32_t generation_b = commit_graph_data_at(b)->generation;
153 /* lower generation commits first */
154 if (generation_a < generation_b)
155 return -1;
156 else if (generation_a > generation_b)
157 return 1;
158
159 /* use date as a heuristic when generations are equal */
160 if (a->date < b->date)
161 return -1;
162 else if (a->date > b->date)
163 return 1;
164 return 0;
165 }
166
167 char *get_commit_graph_filename(struct object_directory *obj_dir)
168 {
169 return xstrfmt("%s/info/commit-graph", obj_dir->path);
170 }
171
172 static char *get_split_graph_filename(struct object_directory *odb,
173 const char *oid_hex)
174 {
175 return xstrfmt("%s/info/commit-graphs/graph-%s.graph", odb->path,
176 oid_hex);
177 }
178
179 char *get_commit_graph_chain_filename(struct object_directory *odb)
180 {
181 return xstrfmt("%s/info/commit-graphs/commit-graph-chain", odb->path);
182 }
183
184 static uint8_t oid_version(void)
185 {
186 switch (hash_algo_by_ptr(the_hash_algo)) {
187 case GIT_HASH_SHA1:
188 return 1;
189 case GIT_HASH_SHA256:
190 return 2;
191 default:
192 die(_("invalid hash version"));
193 }
194 }
195
196 static struct commit_graph *alloc_commit_graph(void)
197 {
198 struct commit_graph *g = xcalloc(1, sizeof(*g));
199
200 return g;
201 }
202
203 extern int read_replace_refs;
204
205 static int commit_graph_compatible(struct repository *r)
206 {
207 if (!r->gitdir)
208 return 0;
209
210 if (read_replace_refs) {
211 prepare_replace_object(r);
212 if (hashmap_get_size(&r->objects->replace_map->map))
213 return 0;
214 }
215
216 prepare_commit_graft(r);
217 if (r->parsed_objects &&
218 (r->parsed_objects->grafts_nr || r->parsed_objects->substituted_parent))
219 return 0;
220 if (is_repository_shallow(r))
221 return 0;
222
223 return 1;
224 }
225
226 int open_commit_graph(const char *graph_file, int *fd, struct stat *st)
227 {
228 *fd = git_open(graph_file);
229 if (*fd < 0)
230 return 0;
231 if (fstat(*fd, st)) {
232 close(*fd);
233 return 0;
234 }
235 return 1;
236 }
237
238 struct commit_graph *load_commit_graph_one_fd_st(struct repository *r,
239 int fd, struct stat *st,
240 struct object_directory *odb)
241 {
242 void *graph_map;
243 size_t graph_size;
244 struct commit_graph *ret;
245
246 graph_size = xsize_t(st->st_size);
247
248 if (graph_size < GRAPH_MIN_SIZE) {
249 close(fd);
250 error(_("commit-graph file is too small"));
251 return NULL;
252 }
253 graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0);
254 close(fd);
255 ret = parse_commit_graph(r, graph_map, graph_size);
256
257 if (ret)
258 ret->odb = odb;
259 else
260 munmap(graph_map, graph_size);
261
262 return ret;
263 }
264
265 static int verify_commit_graph_lite(struct commit_graph *g)
266 {
267 /*
268 * Basic validation shared between parse_commit_graph()
269 * which'll be called every time the graph is used, and the
270 * much more expensive verify_commit_graph() used by
271 * "commit-graph verify".
272 *
273 * There should only be very basic checks here to ensure that
274 * we don't e.g. segfault in fill_commit_in_graph(), but
275 * because this is a very hot codepath nothing that e.g. loops
276 * over g->num_commits, or runs a checksum on the commit-graph
277 * itself.
278 */
279 if (!g->chunk_oid_fanout) {
280 error("commit-graph is missing the OID Fanout chunk");
281 return 1;
282 }
283 if (!g->chunk_oid_lookup) {
284 error("commit-graph is missing the OID Lookup chunk");
285 return 1;
286 }
287 if (!g->chunk_commit_data) {
288 error("commit-graph is missing the Commit Data chunk");
289 return 1;
290 }
291
292 return 0;
293 }
294
295 struct commit_graph *parse_commit_graph(struct repository *r,
296 void *graph_map, size_t graph_size)
297 {
298 const unsigned char *data, *chunk_lookup;
299 uint32_t i;
300 struct commit_graph *graph;
301 uint64_t next_chunk_offset;
302 uint32_t graph_signature;
303 unsigned char graph_version, hash_version;
304
305 if (!graph_map)
306 return NULL;
307
308 if (graph_size < GRAPH_MIN_SIZE)
309 return NULL;
310
311 data = (const unsigned char *)graph_map;
312
313 graph_signature = get_be32(data);
314 if (graph_signature != GRAPH_SIGNATURE) {
315 error(_("commit-graph signature %X does not match signature %X"),
316 graph_signature, GRAPH_SIGNATURE);
317 return NULL;
318 }
319
320 graph_version = *(unsigned char*)(data + 4);
321 if (graph_version != GRAPH_VERSION) {
322 error(_("commit-graph version %X does not match version %X"),
323 graph_version, GRAPH_VERSION);
324 return NULL;
325 }
326
327 hash_version = *(unsigned char*)(data + 5);
328 if (hash_version != oid_version()) {
329 error(_("commit-graph hash version %X does not match version %X"),
330 hash_version, oid_version());
331 return NULL;
332 }
333
334 prepare_repo_settings(r);
335
336 graph = alloc_commit_graph();
337
338 graph->hash_len = the_hash_algo->rawsz;
339 graph->num_chunks = *(unsigned char*)(data + 6);
340 graph->data = graph_map;
341 graph->data_len = graph_size;
342
343 if (graph_size < GRAPH_HEADER_SIZE +
344 (graph->num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH +
345 GRAPH_FANOUT_SIZE + the_hash_algo->rawsz) {
346 error(_("commit-graph file is too small to hold %u chunks"),
347 graph->num_chunks);
348 free(graph);
349 return NULL;
350 }
351
352 chunk_lookup = data + 8;
353 next_chunk_offset = get_be64(chunk_lookup + 4);
354 for (i = 0; i < graph->num_chunks; i++) {
355 uint32_t chunk_id;
356 uint64_t chunk_offset = next_chunk_offset;
357 int chunk_repeated = 0;
358
359 chunk_id = get_be32(chunk_lookup + 0);
360
361 chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH;
362 next_chunk_offset = get_be64(chunk_lookup + 4);
363
364 if (chunk_offset > graph_size - the_hash_algo->rawsz) {
365 error(_("commit-graph improper chunk offset %08x%08x"), (uint32_t)(chunk_offset >> 32),
366 (uint32_t)chunk_offset);
367 goto free_and_return;
368 }
369
370 switch (chunk_id) {
371 case GRAPH_CHUNKID_OIDFANOUT:
372 if (graph->chunk_oid_fanout)
373 chunk_repeated = 1;
374 else
375 graph->chunk_oid_fanout = (uint32_t*)(data + chunk_offset);
376 break;
377
378 case GRAPH_CHUNKID_OIDLOOKUP:
379 if (graph->chunk_oid_lookup)
380 chunk_repeated = 1;
381 else {
382 graph->chunk_oid_lookup = data + chunk_offset;
383 graph->num_commits = (next_chunk_offset - chunk_offset)
384 / graph->hash_len;
385 }
386 break;
387
388 case GRAPH_CHUNKID_DATA:
389 if (graph->chunk_commit_data)
390 chunk_repeated = 1;
391 else
392 graph->chunk_commit_data = data + chunk_offset;
393 break;
394
395 case GRAPH_CHUNKID_EXTRAEDGES:
396 if (graph->chunk_extra_edges)
397 chunk_repeated = 1;
398 else
399 graph->chunk_extra_edges = data + chunk_offset;
400 break;
401
402 case GRAPH_CHUNKID_BASE:
403 if (graph->chunk_base_graphs)
404 chunk_repeated = 1;
405 else
406 graph->chunk_base_graphs = data + chunk_offset;
407 break;
408
409 case GRAPH_CHUNKID_BLOOMINDEXES:
410 if (graph->chunk_bloom_indexes)
411 chunk_repeated = 1;
412 else if (r->settings.commit_graph_read_changed_paths)
413 graph->chunk_bloom_indexes = data + chunk_offset;
414 break;
415
416 case GRAPH_CHUNKID_BLOOMDATA:
417 if (graph->chunk_bloom_data)
418 chunk_repeated = 1;
419 else if (r->settings.commit_graph_read_changed_paths) {
420 uint32_t hash_version;
421 graph->chunk_bloom_data = data + chunk_offset;
422 hash_version = get_be32(data + chunk_offset);
423
424 if (hash_version != 1)
425 break;
426
427 graph->bloom_filter_settings = xmalloc(sizeof(struct bloom_filter_settings));
428 graph->bloom_filter_settings->hash_version = hash_version;
429 graph->bloom_filter_settings->num_hashes = get_be32(data + chunk_offset + 4);
430 graph->bloom_filter_settings->bits_per_entry = get_be32(data + chunk_offset + 8);
431 graph->bloom_filter_settings->max_changed_paths = DEFAULT_BLOOM_MAX_CHANGES;
432 }
433 break;
434 }
435
436 if (chunk_repeated) {
437 error(_("commit-graph chunk id %08x appears multiple times"), chunk_id);
438 goto free_and_return;
439 }
440 }
441
442 if (graph->chunk_bloom_indexes && graph->chunk_bloom_data) {
443 init_bloom_filters();
444 } else {
445 /* We need both the bloom chunks to exist together. Else ignore the data */
446 graph->chunk_bloom_indexes = NULL;
447 graph->chunk_bloom_data = NULL;
448 FREE_AND_NULL(graph->bloom_filter_settings);
449 }
450
451 hashcpy(graph->oid.hash, graph->data + graph->data_len - graph->hash_len);
452
453 if (verify_commit_graph_lite(graph))
454 goto free_and_return;
455
456 return graph;
457
458 free_and_return:
459 free(graph->bloom_filter_settings);
460 free(graph);
461 return NULL;
462 }
463
464 static struct commit_graph *load_commit_graph_one(struct repository *r,
465 const char *graph_file,
466 struct object_directory *odb)
467 {
468
469 struct stat st;
470 int fd;
471 struct commit_graph *g;
472 int open_ok = open_commit_graph(graph_file, &fd, &st);
473
474 if (!open_ok)
475 return NULL;
476
477 g = load_commit_graph_one_fd_st(r, fd, &st, odb);
478
479 if (g)
480 g->filename = xstrdup(graph_file);
481
482 return g;
483 }
484
485 static struct commit_graph *load_commit_graph_v1(struct repository *r,
486 struct object_directory *odb)
487 {
488 char *graph_name = get_commit_graph_filename(odb);
489 struct commit_graph *g = load_commit_graph_one(r, graph_name, odb);
490 free(graph_name);
491
492 return g;
493 }
494
495 static int add_graph_to_chain(struct commit_graph *g,
496 struct commit_graph *chain,
497 struct object_id *oids,
498 int n)
499 {
500 struct commit_graph *cur_g = chain;
501
502 if (n && !g->chunk_base_graphs) {
503 warning(_("commit-graph has no base graphs chunk"));
504 return 0;
505 }
506
507 while (n) {
508 n--;
509
510 if (!cur_g ||
511 !oideq(&oids[n], &cur_g->oid) ||
512 !hasheq(oids[n].hash, g->chunk_base_graphs + g->hash_len * n)) {
513 warning(_("commit-graph chain does not match"));
514 return 0;
515 }
516
517 cur_g = cur_g->base_graph;
518 }
519
520 g->base_graph = chain;
521
522 if (chain)
523 g->num_commits_in_base = chain->num_commits + chain->num_commits_in_base;
524
525 return 1;
526 }
527
528 static struct commit_graph *load_commit_graph_chain(struct repository *r,
529 struct object_directory *odb)
530 {
531 struct commit_graph *graph_chain = NULL;
532 struct strbuf line = STRBUF_INIT;
533 struct stat st;
534 struct object_id *oids;
535 int i = 0, valid = 1, count;
536 char *chain_name = get_commit_graph_chain_filename(odb);
537 FILE *fp;
538 int stat_res;
539
540 fp = fopen(chain_name, "r");
541 stat_res = stat(chain_name, &st);
542 free(chain_name);
543
544 if (!fp ||
545 stat_res ||
546 st.st_size <= the_hash_algo->hexsz)
547 return NULL;
548
549 count = st.st_size / (the_hash_algo->hexsz + 1);
550 oids = xcalloc(count, sizeof(struct object_id));
551
552 prepare_alt_odb(r);
553
554 for (i = 0; i < count; i++) {
555 struct object_directory *odb;
556
557 if (strbuf_getline_lf(&line, fp) == EOF)
558 break;
559
560 if (get_oid_hex(line.buf, &oids[i])) {
561 warning(_("invalid commit-graph chain: line '%s' not a hash"),
562 line.buf);
563 valid = 0;
564 break;
565 }
566
567 valid = 0;
568 for (odb = r->objects->odb; odb; odb = odb->next) {
569 char *graph_name = get_split_graph_filename(odb, line.buf);
570 struct commit_graph *g = load_commit_graph_one(r, graph_name, odb);
571
572 free(graph_name);
573
574 if (g) {
575 if (add_graph_to_chain(g, graph_chain, oids, i)) {
576 graph_chain = g;
577 valid = 1;
578 }
579
580 break;
581 }
582 }
583
584 if (!valid) {
585 warning(_("unable to find all commit-graph files"));
586 break;
587 }
588 }
589
590 free(oids);
591 fclose(fp);
592 strbuf_release(&line);
593
594 return graph_chain;
595 }
596
597 struct commit_graph *read_commit_graph_one(struct repository *r,
598 struct object_directory *odb)
599 {
600 struct commit_graph *g = load_commit_graph_v1(r, odb);
601
602 if (!g)
603 g = load_commit_graph_chain(r, odb);
604
605 return g;
606 }
607
608 static void prepare_commit_graph_one(struct repository *r,
609 struct object_directory *odb)
610 {
611
612 if (r->objects->commit_graph)
613 return;
614
615 r->objects->commit_graph = read_commit_graph_one(r, odb);
616 }
617
618 /*
619 * Return 1 if commit_graph is non-NULL, and 0 otherwise.
620 *
621 * On the first invocation, this function attempts to load the commit
622 * graph if the_repository is configured to have one.
623 */
624 static int prepare_commit_graph(struct repository *r)
625 {
626 struct object_directory *odb;
627
628 /*
629 * This must come before the "already attempted?" check below, because
630 * we want to disable even an already-loaded graph file.
631 */
632 if (r->commit_graph_disabled)
633 return 0;
634
635 if (r->objects->commit_graph_attempted)
636 return !!r->objects->commit_graph;
637 r->objects->commit_graph_attempted = 1;
638
639 prepare_repo_settings(r);
640
641 if (!git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) &&
642 r->settings.core_commit_graph != 1)
643 /*
644 * This repository is not configured to use commit graphs, so
645 * do not load one. (But report commit_graph_attempted anyway
646 * so that commit graph loading is not attempted again for this
647 * repository.)
648 */
649 return 0;
650
651 if (!commit_graph_compatible(r))
652 return 0;
653
654 prepare_alt_odb(r);
655 for (odb = r->objects->odb;
656 !r->objects->commit_graph && odb;
657 odb = odb->next)
658 prepare_commit_graph_one(r, odb);
659 return !!r->objects->commit_graph;
660 }
661
662 int generation_numbers_enabled(struct repository *r)
663 {
664 uint32_t first_generation;
665 struct commit_graph *g;
666 if (!prepare_commit_graph(r))
667 return 0;
668
669 g = r->objects->commit_graph;
670
671 if (!g->num_commits)
672 return 0;
673
674 first_generation = get_be32(g->chunk_commit_data +
675 g->hash_len + 8) >> 2;
676
677 return !!first_generation;
678 }
679
680 struct bloom_filter_settings *get_bloom_filter_settings(struct repository *r)
681 {
682 struct commit_graph *g = r->objects->commit_graph;
683 while (g) {
684 if (g->bloom_filter_settings)
685 return g->bloom_filter_settings;
686 g = g->base_graph;
687 }
688 return NULL;
689 }
690
691 static void close_commit_graph_one(struct commit_graph *g)
692 {
693 if (!g)
694 return;
695
696 close_commit_graph_one(g->base_graph);
697 free_commit_graph(g);
698 }
699
700 void close_commit_graph(struct raw_object_store *o)
701 {
702 close_commit_graph_one(o->commit_graph);
703 o->commit_graph = NULL;
704 }
705
706 static int bsearch_graph(struct commit_graph *g, struct object_id *oid, uint32_t *pos)
707 {
708 return bsearch_hash(oid->hash, g->chunk_oid_fanout,
709 g->chunk_oid_lookup, g->hash_len, pos);
710 }
711
712 static void load_oid_from_graph(struct commit_graph *g,
713 uint32_t pos,
714 struct object_id *oid)
715 {
716 uint32_t lex_index;
717
718 while (g && pos < g->num_commits_in_base)
719 g = g->base_graph;
720
721 if (!g)
722 BUG("NULL commit-graph");
723
724 if (pos >= g->num_commits + g->num_commits_in_base)
725 die(_("invalid commit position. commit-graph is likely corrupt"));
726
727 lex_index = pos - g->num_commits_in_base;
728
729 hashcpy(oid->hash, g->chunk_oid_lookup + g->hash_len * lex_index);
730 }
731
732 static struct commit_list **insert_parent_or_die(struct repository *r,
733 struct commit_graph *g,
734 uint32_t pos,
735 struct commit_list **pptr)
736 {
737 struct commit *c;
738 struct object_id oid;
739
740 if (pos >= g->num_commits + g->num_commits_in_base)
741 die("invalid parent position %"PRIu32, pos);
742
743 load_oid_from_graph(g, pos, &oid);
744 c = lookup_commit(r, &oid);
745 if (!c)
746 die(_("could not find commit %s"), oid_to_hex(&oid));
747 commit_graph_data_at(c)->graph_pos = pos;
748 return &commit_list_insert(c, pptr)->next;
749 }
750
751 static void fill_commit_graph_info(struct commit *item, struct commit_graph *g, uint32_t pos)
752 {
753 const unsigned char *commit_data;
754 struct commit_graph_data *graph_data;
755 uint32_t lex_index;
756
757 while (pos < g->num_commits_in_base)
758 g = g->base_graph;
759
760 lex_index = pos - g->num_commits_in_base;
761 commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * lex_index;
762
763 graph_data = commit_graph_data_at(item);
764 graph_data->graph_pos = pos;
765 graph_data->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
766 }
767
768 static inline void set_commit_tree(struct commit *c, struct tree *t)
769 {
770 c->maybe_tree = t;
771 }
772
773 static int fill_commit_in_graph(struct repository *r,
774 struct commit *item,
775 struct commit_graph *g, uint32_t pos)
776 {
777 uint32_t edge_value;
778 uint32_t *parent_data_ptr;
779 uint64_t date_low, date_high;
780 struct commit_list **pptr;
781 struct commit_graph_data *graph_data;
782 const unsigned char *commit_data;
783 uint32_t lex_index;
784
785 while (pos < g->num_commits_in_base)
786 g = g->base_graph;
787
788 if (pos >= g->num_commits + g->num_commits_in_base)
789 die(_("invalid commit position. commit-graph is likely corrupt"));
790
791 /*
792 * Store the "full" position, but then use the
793 * "local" position for the rest of the calculation.
794 */
795 graph_data = commit_graph_data_at(item);
796 graph_data->graph_pos = pos;
797 lex_index = pos - g->num_commits_in_base;
798
799 commit_data = g->chunk_commit_data + (g->hash_len + 16) * lex_index;
800
801 item->object.parsed = 1;
802
803 set_commit_tree(item, NULL);
804
805 date_high = get_be32(commit_data + g->hash_len + 8) & 0x3;
806 date_low = get_be32(commit_data + g->hash_len + 12);
807 item->date = (timestamp_t)((date_high << 32) | date_low);
808
809 graph_data->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
810
811 pptr = &item->parents;
812
813 edge_value = get_be32(commit_data + g->hash_len);
814 if (edge_value == GRAPH_PARENT_NONE)
815 return 1;
816 pptr = insert_parent_or_die(r, g, edge_value, pptr);
817
818 edge_value = get_be32(commit_data + g->hash_len + 4);
819 if (edge_value == GRAPH_PARENT_NONE)
820 return 1;
821 if (!(edge_value & GRAPH_EXTRA_EDGES_NEEDED)) {
822 pptr = insert_parent_or_die(r, g, edge_value, pptr);
823 return 1;
824 }
825
826 parent_data_ptr = (uint32_t*)(g->chunk_extra_edges +
827 4 * (uint64_t)(edge_value & GRAPH_EDGE_LAST_MASK));
828 do {
829 edge_value = get_be32(parent_data_ptr);
830 pptr = insert_parent_or_die(r, g,
831 edge_value & GRAPH_EDGE_LAST_MASK,
832 pptr);
833 parent_data_ptr++;
834 } while (!(edge_value & GRAPH_LAST_EDGE));
835
836 return 1;
837 }
838
839 static int find_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t *pos)
840 {
841 uint32_t graph_pos = commit_graph_position(item);
842 if (graph_pos != COMMIT_NOT_FROM_GRAPH) {
843 *pos = graph_pos;
844 return 1;
845 } else {
846 struct commit_graph *cur_g = g;
847 uint32_t lex_index;
848
849 while (cur_g && !bsearch_graph(cur_g, &(item->object.oid), &lex_index))
850 cur_g = cur_g->base_graph;
851
852 if (cur_g) {
853 *pos = lex_index + cur_g->num_commits_in_base;
854 return 1;
855 }
856
857 return 0;
858 }
859 }
860
861 static int parse_commit_in_graph_one(struct repository *r,
862 struct commit_graph *g,
863 struct commit *item)
864 {
865 uint32_t pos;
866
867 if (item->object.parsed)
868 return 1;
869
870 if (find_commit_in_graph(item, g, &pos))
871 return fill_commit_in_graph(r, item, g, pos);
872
873 return 0;
874 }
875
876 int parse_commit_in_graph(struct repository *r, struct commit *item)
877 {
878 static int checked_env = 0;
879
880 if (!checked_env &&
881 git_env_bool(GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE, 0))
882 die("dying as requested by the '%s' variable on commit-graph parse!",
883 GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE);
884 checked_env = 1;
885
886 if (!prepare_commit_graph(r))
887 return 0;
888 return parse_commit_in_graph_one(r, r->objects->commit_graph, item);
889 }
890
891 void load_commit_graph_info(struct repository *r, struct commit *item)
892 {
893 uint32_t pos;
894 if (!prepare_commit_graph(r))
895 return;
896 if (find_commit_in_graph(item, r->objects->commit_graph, &pos))
897 fill_commit_graph_info(item, r->objects->commit_graph, pos);
898 }
899
900 static struct tree *load_tree_for_commit(struct repository *r,
901 struct commit_graph *g,
902 struct commit *c)
903 {
904 struct object_id oid;
905 const unsigned char *commit_data;
906 uint32_t graph_pos = commit_graph_position(c);
907
908 while (graph_pos < g->num_commits_in_base)
909 g = g->base_graph;
910
911 commit_data = g->chunk_commit_data +
912 GRAPH_DATA_WIDTH * (graph_pos - g->num_commits_in_base);
913
914 hashcpy(oid.hash, commit_data);
915 set_commit_tree(c, lookup_tree(r, &oid));
916
917 return c->maybe_tree;
918 }
919
920 static struct tree *get_commit_tree_in_graph_one(struct repository *r,
921 struct commit_graph *g,
922 const struct commit *c)
923 {
924 if (c->maybe_tree)
925 return c->maybe_tree;
926 if (commit_graph_position(c) == COMMIT_NOT_FROM_GRAPH)
927 BUG("get_commit_tree_in_graph_one called from non-commit-graph commit");
928
929 return load_tree_for_commit(r, g, (struct commit *)c);
930 }
931
932 struct tree *get_commit_tree_in_graph(struct repository *r, const struct commit *c)
933 {
934 return get_commit_tree_in_graph_one(r, r->objects->commit_graph, c);
935 }
936
937 struct packed_commit_list {
938 struct commit **list;
939 size_t nr;
940 size_t alloc;
941 };
942
943 struct write_commit_graph_context {
944 struct repository *r;
945 struct object_directory *odb;
946 char *graph_name;
947 struct oid_array oids;
948 struct packed_commit_list commits;
949 int num_extra_edges;
950 unsigned long approx_nr_objects;
951 struct progress *progress;
952 int progress_done;
953 uint64_t progress_cnt;
954
955 char *base_graph_name;
956 int num_commit_graphs_before;
957 int num_commit_graphs_after;
958 char **commit_graph_filenames_before;
959 char **commit_graph_filenames_after;
960 char **commit_graph_hash_after;
961 uint32_t new_num_commits_in_base;
962 struct commit_graph *new_base_graph;
963
964 unsigned append:1,
965 report_progress:1,
966 split:1,
967 changed_paths:1,
968 order_by_pack:1;
969
970 const struct commit_graph_opts *opts;
971 size_t total_bloom_filter_data_size;
972 const struct bloom_filter_settings *bloom_settings;
973
974 int count_bloom_filter_computed;
975 int count_bloom_filter_not_computed;
976 int count_bloom_filter_trunc_empty;
977 int count_bloom_filter_trunc_large;
978 };
979
980 static int write_graph_chunk_fanout(struct hashfile *f,
981 struct write_commit_graph_context *ctx)
982 {
983 int i, count = 0;
984 struct commit **list = ctx->commits.list;
985
986 /*
987 * Write the first-level table (the list is sorted,
988 * but we use a 256-entry lookup to be able to avoid
989 * having to do eight extra binary search iterations).
990 */
991 for (i = 0; i < 256; i++) {
992 while (count < ctx->commits.nr) {
993 if ((*list)->object.oid.hash[0] != i)
994 break;
995 display_progress(ctx->progress, ++ctx->progress_cnt);
996 count++;
997 list++;
998 }
999
1000 hashwrite_be32(f, count);
1001 }
1002
1003 return 0;
1004 }
1005
1006 static int write_graph_chunk_oids(struct hashfile *f,
1007 struct write_commit_graph_context *ctx)
1008 {
1009 struct commit **list = ctx->commits.list;
1010 int count;
1011 for (count = 0; count < ctx->commits.nr; count++, list++) {
1012 display_progress(ctx->progress, ++ctx->progress_cnt);
1013 hashwrite(f, (*list)->object.oid.hash, the_hash_algo->rawsz);
1014 }
1015
1016 return 0;
1017 }
1018
1019 static const unsigned char *commit_to_sha1(size_t index, void *table)
1020 {
1021 struct commit **commits = table;
1022 return commits[index]->object.oid.hash;
1023 }
1024
1025 static int write_graph_chunk_data(struct hashfile *f,
1026 struct write_commit_graph_context *ctx)
1027 {
1028 struct commit **list = ctx->commits.list;
1029 struct commit **last = ctx->commits.list + ctx->commits.nr;
1030 uint32_t num_extra_edges = 0;
1031
1032 while (list < last) {
1033 struct commit_list *parent;
1034 struct object_id *tree;
1035 int edge_value;
1036 uint32_t packedDate[2];
1037 display_progress(ctx->progress, ++ctx->progress_cnt);
1038
1039 if (parse_commit_no_graph(*list))
1040 die(_("unable to parse commit %s"),
1041 oid_to_hex(&(*list)->object.oid));
1042 tree = get_commit_tree_oid(*list);
1043 hashwrite(f, tree->hash, the_hash_algo->rawsz);
1044
1045 parent = (*list)->parents;
1046
1047 if (!parent)
1048 edge_value = GRAPH_PARENT_NONE;
1049 else {
1050 edge_value = sha1_pos(parent->item->object.oid.hash,
1051 ctx->commits.list,
1052 ctx->commits.nr,
1053 commit_to_sha1);
1054
1055 if (edge_value >= 0)
1056 edge_value += ctx->new_num_commits_in_base;
1057 else if (ctx->new_base_graph) {
1058 uint32_t pos;
1059 if (find_commit_in_graph(parent->item,
1060 ctx->new_base_graph,
1061 &pos))
1062 edge_value = pos;
1063 }
1064
1065 if (edge_value < 0)
1066 BUG("missing parent %s for commit %s",
1067 oid_to_hex(&parent->item->object.oid),
1068 oid_to_hex(&(*list)->object.oid));
1069 }
1070
1071 hashwrite_be32(f, edge_value);
1072
1073 if (parent)
1074 parent = parent->next;
1075
1076 if (!parent)
1077 edge_value = GRAPH_PARENT_NONE;
1078 else if (parent->next)
1079 edge_value = GRAPH_EXTRA_EDGES_NEEDED | num_extra_edges;
1080 else {
1081 edge_value = sha1_pos(parent->item->object.oid.hash,
1082 ctx->commits.list,
1083 ctx->commits.nr,
1084 commit_to_sha1);
1085
1086 if (edge_value >= 0)
1087 edge_value += ctx->new_num_commits_in_base;
1088 else if (ctx->new_base_graph) {
1089 uint32_t pos;
1090 if (find_commit_in_graph(parent->item,
1091 ctx->new_base_graph,
1092 &pos))
1093 edge_value = pos;
1094 }
1095
1096 if (edge_value < 0)
1097 BUG("missing parent %s for commit %s",
1098 oid_to_hex(&parent->item->object.oid),
1099 oid_to_hex(&(*list)->object.oid));
1100 }
1101
1102 hashwrite_be32(f, edge_value);
1103
1104 if (edge_value & GRAPH_EXTRA_EDGES_NEEDED) {
1105 do {
1106 num_extra_edges++;
1107 parent = parent->next;
1108 } while (parent);
1109 }
1110
1111 if (sizeof((*list)->date) > 4)
1112 packedDate[0] = htonl(((*list)->date >> 32) & 0x3);
1113 else
1114 packedDate[0] = 0;
1115
1116 packedDate[0] |= htonl(commit_graph_data_at(*list)->generation << 2);
1117
1118 packedDate[1] = htonl((*list)->date);
1119 hashwrite(f, packedDate, 8);
1120
1121 list++;
1122 }
1123
1124 return 0;
1125 }
1126
1127 static int write_graph_chunk_extra_edges(struct hashfile *f,
1128 struct write_commit_graph_context *ctx)
1129 {
1130 struct commit **list = ctx->commits.list;
1131 struct commit **last = ctx->commits.list + ctx->commits.nr;
1132 struct commit_list *parent;
1133
1134 while (list < last) {
1135 int num_parents = 0;
1136
1137 display_progress(ctx->progress, ++ctx->progress_cnt);
1138
1139 for (parent = (*list)->parents; num_parents < 3 && parent;
1140 parent = parent->next)
1141 num_parents++;
1142
1143 if (num_parents <= 2) {
1144 list++;
1145 continue;
1146 }
1147
1148 /* Since num_parents > 2, this initializer is safe. */
1149 for (parent = (*list)->parents->next; parent; parent = parent->next) {
1150 int edge_value = sha1_pos(parent->item->object.oid.hash,
1151 ctx->commits.list,
1152 ctx->commits.nr,
1153 commit_to_sha1);
1154
1155 if (edge_value >= 0)
1156 edge_value += ctx->new_num_commits_in_base;
1157 else if (ctx->new_base_graph) {
1158 uint32_t pos;
1159 if (find_commit_in_graph(parent->item,
1160 ctx->new_base_graph,
1161 &pos))
1162 edge_value = pos;
1163 }
1164
1165 if (edge_value < 0)
1166 BUG("missing parent %s for commit %s",
1167 oid_to_hex(&parent->item->object.oid),
1168 oid_to_hex(&(*list)->object.oid));
1169 else if (!parent->next)
1170 edge_value |= GRAPH_LAST_EDGE;
1171
1172 hashwrite_be32(f, edge_value);
1173 }
1174
1175 list++;
1176 }
1177
1178 return 0;
1179 }
1180
1181 static int write_graph_chunk_bloom_indexes(struct hashfile *f,
1182 struct write_commit_graph_context *ctx)
1183 {
1184 struct commit **list = ctx->commits.list;
1185 struct commit **last = ctx->commits.list + ctx->commits.nr;
1186 uint32_t cur_pos = 0;
1187
1188 while (list < last) {
1189 struct bloom_filter *filter = get_bloom_filter(ctx->r, *list);
1190 size_t len = filter ? filter->len : 0;
1191 cur_pos += len;
1192 display_progress(ctx->progress, ++ctx->progress_cnt);
1193 hashwrite_be32(f, cur_pos);
1194 list++;
1195 }
1196
1197 return 0;
1198 }
1199
1200 static void trace2_bloom_filter_settings(struct write_commit_graph_context *ctx)
1201 {
1202 struct json_writer jw = JSON_WRITER_INIT;
1203
1204 jw_object_begin(&jw, 0);
1205 jw_object_intmax(&jw, "hash_version", ctx->bloom_settings->hash_version);
1206 jw_object_intmax(&jw, "num_hashes", ctx->bloom_settings->num_hashes);
1207 jw_object_intmax(&jw, "bits_per_entry", ctx->bloom_settings->bits_per_entry);
1208 jw_object_intmax(&jw, "max_changed_paths", ctx->bloom_settings->max_changed_paths);
1209 jw_end(&jw);
1210
1211 trace2_data_json("bloom", ctx->r, "settings", &jw);
1212
1213 jw_release(&jw);
1214 }
1215
1216 static int write_graph_chunk_bloom_data(struct hashfile *f,
1217 struct write_commit_graph_context *ctx)
1218 {
1219 struct commit **list = ctx->commits.list;
1220 struct commit **last = ctx->commits.list + ctx->commits.nr;
1221
1222 trace2_bloom_filter_settings(ctx);
1223
1224 hashwrite_be32(f, ctx->bloom_settings->hash_version);
1225 hashwrite_be32(f, ctx->bloom_settings->num_hashes);
1226 hashwrite_be32(f, ctx->bloom_settings->bits_per_entry);
1227
1228 while (list < last) {
1229 struct bloom_filter *filter = get_bloom_filter(ctx->r, *list);
1230 size_t len = filter ? filter->len : 0;
1231
1232 display_progress(ctx->progress, ++ctx->progress_cnt);
1233 if (len)
1234 hashwrite(f, filter->data, len * sizeof(unsigned char));
1235 list++;
1236 }
1237
1238 return 0;
1239 }
1240
1241 static int add_packed_commits(const struct object_id *oid,
1242 struct packed_git *pack,
1243 uint32_t pos,
1244 void *data)
1245 {
1246 struct write_commit_graph_context *ctx = (struct write_commit_graph_context*)data;
1247 enum object_type type;
1248 off_t offset = nth_packed_object_offset(pack, pos);
1249 struct object_info oi = OBJECT_INFO_INIT;
1250
1251 if (ctx->progress)
1252 display_progress(ctx->progress, ++ctx->progress_done);
1253
1254 oi.typep = &type;
1255 if (packed_object_info(ctx->r, pack, offset, &oi) < 0)
1256 die(_("unable to get type of object %s"), oid_to_hex(oid));
1257
1258 if (type != OBJ_COMMIT)
1259 return 0;
1260
1261 oid_array_append(&ctx->oids, oid);
1262 set_commit_pos(ctx->r, oid);
1263
1264 return 0;
1265 }
1266
1267 static void add_missing_parents(struct write_commit_graph_context *ctx, struct commit *commit)
1268 {
1269 struct commit_list *parent;
1270 for (parent = commit->parents; parent; parent = parent->next) {
1271 if (!(parent->item->object.flags & REACHABLE)) {
1272 oid_array_append(&ctx->oids, &parent->item->object.oid);
1273 parent->item->object.flags |= REACHABLE;
1274 }
1275 }
1276 }
1277
1278 static void close_reachable(struct write_commit_graph_context *ctx)
1279 {
1280 int i;
1281 struct commit *commit;
1282 enum commit_graph_split_flags flags = ctx->opts ?
1283 ctx->opts->split_flags : COMMIT_GRAPH_SPLIT_UNSPECIFIED;
1284
1285 if (ctx->report_progress)
1286 ctx->progress = start_delayed_progress(
1287 _("Loading known commits in commit graph"),
1288 ctx->oids.nr);
1289 for (i = 0; i < ctx->oids.nr; i++) {
1290 display_progress(ctx->progress, i + 1);
1291 commit = lookup_commit(ctx->r, &ctx->oids.oid[i]);
1292 if (commit)
1293 commit->object.flags |= REACHABLE;
1294 }
1295 stop_progress(&ctx->progress);
1296
1297 /*
1298 * As this loop runs, ctx->oids.nr may grow, but not more
1299 * than the number of missing commits in the reachable
1300 * closure.
1301 */
1302 if (ctx->report_progress)
1303 ctx->progress = start_delayed_progress(
1304 _("Expanding reachable commits in commit graph"),
1305 0);
1306 for (i = 0; i < ctx->oids.nr; i++) {
1307 display_progress(ctx->progress, i + 1);
1308 commit = lookup_commit(ctx->r, &ctx->oids.oid[i]);
1309
1310 if (!commit)
1311 continue;
1312 if (ctx->split) {
1313 if ((!parse_commit(commit) &&
1314 commit_graph_position(commit) == COMMIT_NOT_FROM_GRAPH) ||
1315 flags == COMMIT_GRAPH_SPLIT_REPLACE)
1316 add_missing_parents(ctx, commit);
1317 } else if (!parse_commit_no_graph(commit))
1318 add_missing_parents(ctx, commit);
1319 }
1320 stop_progress(&ctx->progress);
1321
1322 if (ctx->report_progress)
1323 ctx->progress = start_delayed_progress(
1324 _("Clearing commit marks in commit graph"),
1325 ctx->oids.nr);
1326 for (i = 0; i < ctx->oids.nr; i++) {
1327 display_progress(ctx->progress, i + 1);
1328 commit = lookup_commit(ctx->r, &ctx->oids.oid[i]);
1329
1330 if (commit)
1331 commit->object.flags &= ~REACHABLE;
1332 }
1333 stop_progress(&ctx->progress);
1334 }
1335
1336 static void compute_generation_numbers(struct write_commit_graph_context *ctx)
1337 {
1338 int i;
1339 struct commit_list *list = NULL;
1340
1341 if (ctx->report_progress)
1342 ctx->progress = start_delayed_progress(
1343 _("Computing commit graph generation numbers"),
1344 ctx->commits.nr);
1345 for (i = 0; i < ctx->commits.nr; i++) {
1346 uint32_t generation = commit_graph_data_at(ctx->commits.list[i])->generation;
1347
1348 display_progress(ctx->progress, i + 1);
1349 if (generation != GENERATION_NUMBER_INFINITY &&
1350 generation != GENERATION_NUMBER_ZERO)
1351 continue;
1352
1353 commit_list_insert(ctx->commits.list[i], &list);
1354 while (list) {
1355 struct commit *current = list->item;
1356 struct commit_list *parent;
1357 int all_parents_computed = 1;
1358 uint32_t max_generation = 0;
1359
1360 for (parent = current->parents; parent; parent = parent->next) {
1361 generation = commit_graph_data_at(parent->item)->generation;
1362
1363 if (generation == GENERATION_NUMBER_INFINITY ||
1364 generation == GENERATION_NUMBER_ZERO) {
1365 all_parents_computed = 0;
1366 commit_list_insert(parent->item, &list);
1367 break;
1368 } else if (generation > max_generation) {
1369 max_generation = generation;
1370 }
1371 }
1372
1373 if (all_parents_computed) {
1374 struct commit_graph_data *data = commit_graph_data_at(current);
1375
1376 data->generation = max_generation + 1;
1377 pop_commit(&list);
1378
1379 if (data->generation > GENERATION_NUMBER_MAX)
1380 data->generation = GENERATION_NUMBER_MAX;
1381 }
1382 }
1383 }
1384 stop_progress(&ctx->progress);
1385 }
1386
1387 static void trace2_bloom_filter_write_statistics(struct write_commit_graph_context *ctx)
1388 {
1389 trace2_data_intmax("commit-graph", ctx->r, "filter-computed",
1390 ctx->count_bloom_filter_computed);
1391 trace2_data_intmax("commit-graph", ctx->r, "filter-not-computed",
1392 ctx->count_bloom_filter_not_computed);
1393 trace2_data_intmax("commit-graph", ctx->r, "filter-trunc-empty",
1394 ctx->count_bloom_filter_trunc_empty);
1395 trace2_data_intmax("commit-graph", ctx->r, "filter-trunc-large",
1396 ctx->count_bloom_filter_trunc_large);
1397 }
1398
1399 static void compute_bloom_filters(struct write_commit_graph_context *ctx)
1400 {
1401 int i;
1402 struct progress *progress = NULL;
1403 struct commit **sorted_commits;
1404 int max_new_filters;
1405
1406 init_bloom_filters();
1407
1408 if (ctx->report_progress)
1409 progress = start_delayed_progress(
1410 _("Computing commit changed paths Bloom filters"),
1411 ctx->commits.nr);
1412
1413 ALLOC_ARRAY(sorted_commits, ctx->commits.nr);
1414 COPY_ARRAY(sorted_commits, ctx->commits.list, ctx->commits.nr);
1415
1416 if (ctx->order_by_pack)
1417 QSORT(sorted_commits, ctx->commits.nr, commit_pos_cmp);
1418 else
1419 QSORT(sorted_commits, ctx->commits.nr, commit_gen_cmp);
1420
1421 max_new_filters = ctx->opts && ctx->opts->max_new_filters >= 0 ?
1422 ctx->opts->max_new_filters : ctx->commits.nr;
1423
1424 for (i = 0; i < ctx->commits.nr; i++) {
1425 enum bloom_filter_computed computed = 0;
1426 struct commit *c = sorted_commits[i];
1427 struct bloom_filter *filter = get_or_compute_bloom_filter(
1428 ctx->r,
1429 c,
1430 ctx->count_bloom_filter_computed < max_new_filters,
1431 ctx->bloom_settings,
1432 &computed);
1433 if (computed & BLOOM_COMPUTED) {
1434 ctx->count_bloom_filter_computed++;
1435 if (computed & BLOOM_TRUNC_EMPTY)
1436 ctx->count_bloom_filter_trunc_empty++;
1437 if (computed & BLOOM_TRUNC_LARGE)
1438 ctx->count_bloom_filter_trunc_large++;
1439 } else if (computed & BLOOM_NOT_COMPUTED)
1440 ctx->count_bloom_filter_not_computed++;
1441 ctx->total_bloom_filter_data_size += filter
1442 ? sizeof(unsigned char) * filter->len : 0;
1443 display_progress(progress, i + 1);
1444 }
1445
1446 if (trace2_is_enabled())
1447 trace2_bloom_filter_write_statistics(ctx);
1448
1449 free(sorted_commits);
1450 stop_progress(&progress);
1451 }
1452
1453 struct refs_cb_data {
1454 struct oidset *commits;
1455 struct progress *progress;
1456 };
1457
1458 static int add_ref_to_set(const char *refname,
1459 const struct object_id *oid,
1460 int flags, void *cb_data)
1461 {
1462 struct object_id peeled;
1463 struct refs_cb_data *data = (struct refs_cb_data *)cb_data;
1464
1465 if (!peel_ref(refname, &peeled))
1466 oid = &peeled;
1467 if (oid_object_info(the_repository, oid, NULL) == OBJ_COMMIT)
1468 oidset_insert(data->commits, oid);
1469
1470 display_progress(data->progress, oidset_size(data->commits));
1471
1472 return 0;
1473 }
1474
1475 int write_commit_graph_reachable(struct object_directory *odb,
1476 enum commit_graph_write_flags flags,
1477 const struct commit_graph_opts *opts)
1478 {
1479 struct oidset commits = OIDSET_INIT;
1480 struct refs_cb_data data;
1481 int result;
1482
1483 memset(&data, 0, sizeof(data));
1484 data.commits = &commits;
1485 if (flags & COMMIT_GRAPH_WRITE_PROGRESS)
1486 data.progress = start_delayed_progress(
1487 _("Collecting referenced commits"), 0);
1488
1489 for_each_ref(add_ref_to_set, &data);
1490
1491 stop_progress(&data.progress);
1492
1493 result = write_commit_graph(odb, NULL, &commits,
1494 flags, opts);
1495
1496 oidset_clear(&commits);
1497 return result;
1498 }
1499
1500 static int fill_oids_from_packs(struct write_commit_graph_context *ctx,
1501 struct string_list *pack_indexes)
1502 {
1503 uint32_t i;
1504 struct strbuf progress_title = STRBUF_INIT;
1505 struct strbuf packname = STRBUF_INIT;
1506 int dirlen;
1507
1508 strbuf_addf(&packname, "%s/pack/", ctx->odb->path);
1509 dirlen = packname.len;
1510 if (ctx->report_progress) {
1511 strbuf_addf(&progress_title,
1512 Q_("Finding commits for commit graph in %d pack",
1513 "Finding commits for commit graph in %d packs",
1514 pack_indexes->nr),
1515 pack_indexes->nr);
1516 ctx->progress = start_delayed_progress(progress_title.buf, 0);
1517 ctx->progress_done = 0;
1518 }
1519 for (i = 0; i < pack_indexes->nr; i++) {
1520 struct packed_git *p;
1521 strbuf_setlen(&packname, dirlen);
1522 strbuf_addstr(&packname, pack_indexes->items[i].string);
1523 p = add_packed_git(packname.buf, packname.len, 1);
1524 if (!p) {
1525 error(_("error adding pack %s"), packname.buf);
1526 return -1;
1527 }
1528 if (open_pack_index(p)) {
1529 error(_("error opening index for %s"), packname.buf);
1530 return -1;
1531 }
1532 for_each_object_in_pack(p, add_packed_commits, ctx,
1533 FOR_EACH_OBJECT_PACK_ORDER);
1534 close_pack(p);
1535 free(p);
1536 }
1537
1538 stop_progress(&ctx->progress);
1539 strbuf_release(&progress_title);
1540 strbuf_release(&packname);
1541
1542 return 0;
1543 }
1544
1545 static int fill_oids_from_commits(struct write_commit_graph_context *ctx,
1546 struct oidset *commits)
1547 {
1548 struct oidset_iter iter;
1549 struct object_id *oid;
1550
1551 if (!oidset_size(commits))
1552 return 0;
1553
1554 oidset_iter_init(commits, &iter);
1555 while ((oid = oidset_iter_next(&iter))) {
1556 oid_array_append(&ctx->oids, oid);
1557 }
1558
1559 return 0;
1560 }
1561
1562 static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx)
1563 {
1564 if (ctx->report_progress)
1565 ctx->progress = start_delayed_progress(
1566 _("Finding commits for commit graph among packed objects"),
1567 ctx->approx_nr_objects);
1568 for_each_packed_object(add_packed_commits, ctx,
1569 FOR_EACH_OBJECT_PACK_ORDER);
1570 if (ctx->progress_done < ctx->approx_nr_objects)
1571 display_progress(ctx->progress, ctx->approx_nr_objects);
1572 stop_progress(&ctx->progress);
1573 }
1574
1575 static void copy_oids_to_commits(struct write_commit_graph_context *ctx)
1576 {
1577 uint32_t i;
1578 enum commit_graph_split_flags flags = ctx->opts ?
1579 ctx->opts->split_flags : COMMIT_GRAPH_SPLIT_UNSPECIFIED;
1580
1581 ctx->num_extra_edges = 0;
1582 if (ctx->report_progress)
1583 ctx->progress = start_delayed_progress(
1584 _("Finding extra edges in commit graph"),
1585 ctx->oids.nr);
1586 oid_array_sort(&ctx->oids);
1587 for (i = 0; i < ctx->oids.nr; i = oid_array_next_unique(&ctx->oids, i)) {
1588 unsigned int num_parents;
1589
1590 display_progress(ctx->progress, i + 1);
1591
1592 ALLOC_GROW(ctx->commits.list, ctx->commits.nr + 1, ctx->commits.alloc);
1593 ctx->commits.list[ctx->commits.nr] = lookup_commit(ctx->r, &ctx->oids.oid[i]);
1594
1595 if (ctx->split && flags != COMMIT_GRAPH_SPLIT_REPLACE &&
1596 commit_graph_position(ctx->commits.list[ctx->commits.nr]) != COMMIT_NOT_FROM_GRAPH)
1597 continue;
1598
1599 if (ctx->split && flags == COMMIT_GRAPH_SPLIT_REPLACE)
1600 parse_commit(ctx->commits.list[ctx->commits.nr]);
1601 else
1602 parse_commit_no_graph(ctx->commits.list[ctx->commits.nr]);
1603
1604 num_parents = commit_list_count(ctx->commits.list[ctx->commits.nr]->parents);
1605 if (num_parents > 2)
1606 ctx->num_extra_edges += num_parents - 1;
1607
1608 ctx->commits.nr++;
1609 }
1610 stop_progress(&ctx->progress);
1611 }
1612
1613 static int write_graph_chunk_base_1(struct hashfile *f,
1614 struct commit_graph *g)
1615 {
1616 int num = 0;
1617
1618 if (!g)
1619 return 0;
1620
1621 num = write_graph_chunk_base_1(f, g->base_graph);
1622 hashwrite(f, g->oid.hash, the_hash_algo->rawsz);
1623 return num + 1;
1624 }
1625
1626 static int write_graph_chunk_base(struct hashfile *f,
1627 struct write_commit_graph_context *ctx)
1628 {
1629 int num = write_graph_chunk_base_1(f, ctx->new_base_graph);
1630
1631 if (num != ctx->num_commit_graphs_after - 1) {
1632 error(_("failed to write correct number of base graph ids"));
1633 return -1;
1634 }
1635
1636 return 0;
1637 }
1638
1639 typedef int (*chunk_write_fn)(struct hashfile *f,
1640 struct write_commit_graph_context *ctx);
1641
1642 struct chunk_info {
1643 uint32_t id;
1644 uint64_t size;
1645 chunk_write_fn write_fn;
1646 };
1647
1648 static int write_commit_graph_file(struct write_commit_graph_context *ctx)
1649 {
1650 uint32_t i;
1651 int fd;
1652 struct hashfile *f;
1653 struct lock_file lk = LOCK_INIT;
1654 struct chunk_info chunks[MAX_NUM_CHUNKS + 1];
1655 const unsigned hashsz = the_hash_algo->rawsz;
1656 struct strbuf progress_title = STRBUF_INIT;
1657 int num_chunks = 3;
1658 uint64_t chunk_offset;
1659 struct object_id file_hash;
1660
1661 if (ctx->split) {
1662 struct strbuf tmp_file = STRBUF_INIT;
1663
1664 strbuf_addf(&tmp_file,
1665 "%s/info/commit-graphs/tmp_graph_XXXXXX",
1666 ctx->odb->path);
1667 ctx->graph_name = strbuf_detach(&tmp_file, NULL);
1668 } else {
1669 ctx->graph_name = get_commit_graph_filename(ctx->odb);
1670 }
1671
1672 if (safe_create_leading_directories(ctx->graph_name)) {
1673 UNLEAK(ctx->graph_name);
1674 error(_("unable to create leading directories of %s"),
1675 ctx->graph_name);
1676 return -1;
1677 }
1678
1679 if (ctx->split) {
1680 char *lock_name = get_commit_graph_chain_filename(ctx->odb);
1681
1682 hold_lock_file_for_update_mode(&lk, lock_name,
1683 LOCK_DIE_ON_ERROR, 0444);
1684
1685 fd = git_mkstemp_mode(ctx->graph_name, 0444);
1686 if (fd < 0) {
1687 error(_("unable to create temporary graph layer"));
1688 return -1;
1689 }
1690
1691 if (adjust_shared_perm(ctx->graph_name)) {
1692 error(_("unable to adjust shared permissions for '%s'"),
1693 ctx->graph_name);
1694 return -1;
1695 }
1696
1697 f = hashfd(fd, ctx->graph_name);
1698 } else {
1699 hold_lock_file_for_update_mode(&lk, ctx->graph_name,
1700 LOCK_DIE_ON_ERROR, 0444);
1701 fd = lk.tempfile->fd;
1702 f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf);
1703 }
1704
1705 chunks[0].id = GRAPH_CHUNKID_OIDFANOUT;
1706 chunks[0].size = GRAPH_FANOUT_SIZE;
1707 chunks[0].write_fn = write_graph_chunk_fanout;
1708 chunks[1].id = GRAPH_CHUNKID_OIDLOOKUP;
1709 chunks[1].size = hashsz * ctx->commits.nr;
1710 chunks[1].write_fn = write_graph_chunk_oids;
1711 chunks[2].id = GRAPH_CHUNKID_DATA;
1712 chunks[2].size = (hashsz + 16) * ctx->commits.nr;
1713 chunks[2].write_fn = write_graph_chunk_data;
1714 if (ctx->num_extra_edges) {
1715 chunks[num_chunks].id = GRAPH_CHUNKID_EXTRAEDGES;
1716 chunks[num_chunks].size = 4 * ctx->num_extra_edges;
1717 chunks[num_chunks].write_fn = write_graph_chunk_extra_edges;
1718 num_chunks++;
1719 }
1720 if (ctx->changed_paths) {
1721 chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMINDEXES;
1722 chunks[num_chunks].size = sizeof(uint32_t) * ctx->commits.nr;
1723 chunks[num_chunks].write_fn = write_graph_chunk_bloom_indexes;
1724 num_chunks++;
1725 chunks[num_chunks].id = GRAPH_CHUNKID_BLOOMDATA;
1726 chunks[num_chunks].size = sizeof(uint32_t) * 3
1727 + ctx->total_bloom_filter_data_size;
1728 chunks[num_chunks].write_fn = write_graph_chunk_bloom_data;
1729 num_chunks++;
1730 }
1731 if (ctx->num_commit_graphs_after > 1) {
1732 chunks[num_chunks].id = GRAPH_CHUNKID_BASE;
1733 chunks[num_chunks].size = hashsz * (ctx->num_commit_graphs_after - 1);
1734 chunks[num_chunks].write_fn = write_graph_chunk_base;
1735 num_chunks++;
1736 }
1737
1738 chunks[num_chunks].id = 0;
1739 chunks[num_chunks].size = 0;
1740
1741 hashwrite_be32(f, GRAPH_SIGNATURE);
1742
1743 hashwrite_u8(f, GRAPH_VERSION);
1744 hashwrite_u8(f, oid_version());
1745 hashwrite_u8(f, num_chunks);
1746 hashwrite_u8(f, ctx->num_commit_graphs_after - 1);
1747
1748 chunk_offset = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH;
1749 for (i = 0; i <= num_chunks; i++) {
1750 uint32_t chunk_write[3];
1751
1752 chunk_write[0] = htonl(chunks[i].id);
1753 chunk_write[1] = htonl(chunk_offset >> 32);
1754 chunk_write[2] = htonl(chunk_offset & 0xffffffff);
1755 hashwrite(f, chunk_write, 12);
1756
1757 chunk_offset += chunks[i].size;
1758 }
1759
1760 if (ctx->report_progress) {
1761 strbuf_addf(&progress_title,
1762 Q_("Writing out commit graph in %d pass",
1763 "Writing out commit graph in %d passes",
1764 num_chunks),
1765 num_chunks);
1766 ctx->progress = start_delayed_progress(
1767 progress_title.buf,
1768 num_chunks * ctx->commits.nr);
1769 }
1770
1771 for (i = 0; i < num_chunks; i++) {
1772 uint64_t start_offset = f->total + f->offset;
1773
1774 if (chunks[i].write_fn(f, ctx))
1775 return -1;
1776
1777 if (f->total + f->offset != start_offset + chunks[i].size)
1778 BUG("expected to write %"PRId64" bytes to chunk %"PRIx32", but wrote %"PRId64" instead",
1779 chunks[i].size, chunks[i].id,
1780 f->total + f->offset - start_offset);
1781 }
1782
1783 stop_progress(&ctx->progress);
1784 strbuf_release(&progress_title);
1785
1786 if (ctx->split && ctx->base_graph_name && ctx->num_commit_graphs_after > 1) {
1787 char *new_base_hash = xstrdup(oid_to_hex(&ctx->new_base_graph->oid));
1788 char *new_base_name = get_split_graph_filename(ctx->new_base_graph->odb, new_base_hash);
1789
1790 free(ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 2]);
1791 free(ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 2]);
1792 ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 2] = new_base_name;
1793 ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 2] = new_base_hash;
1794 }
1795
1796 close_commit_graph(ctx->r->objects);
1797 finalize_hashfile(f, file_hash.hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC);
1798
1799 if (ctx->split) {
1800 FILE *chainf = fdopen_lock_file(&lk, "w");
1801 char *final_graph_name;
1802 int result;
1803
1804 close(fd);
1805
1806 if (!chainf) {
1807 error(_("unable to open commit-graph chain file"));
1808 return -1;
1809 }
1810
1811 if (ctx->base_graph_name) {
1812 const char *dest;
1813 int idx = ctx->num_commit_graphs_after - 1;
1814 if (ctx->num_commit_graphs_after > 1)
1815 idx--;
1816
1817 dest = ctx->commit_graph_filenames_after[idx];
1818
1819 if (strcmp(ctx->base_graph_name, dest)) {
1820 result = rename(ctx->base_graph_name, dest);
1821
1822 if (result) {
1823 error(_("failed to rename base commit-graph file"));
1824 return -1;
1825 }
1826 }
1827 } else {
1828 char *graph_name = get_commit_graph_filename(ctx->odb);
1829 unlink(graph_name);
1830 }
1831
1832 ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 1] = xstrdup(oid_to_hex(&file_hash));
1833 final_graph_name = get_split_graph_filename(ctx->odb,
1834 ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 1]);
1835 ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 1] = final_graph_name;
1836
1837 result = rename(ctx->graph_name, final_graph_name);
1838
1839 for (i = 0; i < ctx->num_commit_graphs_after; i++)
1840 fprintf(lk.tempfile->fp, "%s\n", ctx->commit_graph_hash_after[i]);
1841
1842 if (result) {
1843 error(_("failed to rename temporary commit-graph file"));
1844 return -1;
1845 }
1846 }
1847
1848 commit_lock_file(&lk);
1849
1850 return 0;
1851 }
1852
1853 static void split_graph_merge_strategy(struct write_commit_graph_context *ctx)
1854 {
1855 struct commit_graph *g;
1856 uint32_t num_commits;
1857 enum commit_graph_split_flags flags = COMMIT_GRAPH_SPLIT_UNSPECIFIED;
1858 uint32_t i;
1859
1860 int max_commits = 0;
1861 int size_mult = 2;
1862
1863 if (ctx->opts) {
1864 max_commits = ctx->opts->max_commits;
1865
1866 if (ctx->opts->size_multiple)
1867 size_mult = ctx->opts->size_multiple;
1868
1869 flags = ctx->opts->split_flags;
1870 }
1871
1872 g = ctx->r->objects->commit_graph;
1873 num_commits = ctx->commits.nr;
1874 if (flags == COMMIT_GRAPH_SPLIT_REPLACE)
1875 ctx->num_commit_graphs_after = 1;
1876 else
1877 ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1;
1878
1879 if (flags != COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED &&
1880 flags != COMMIT_GRAPH_SPLIT_REPLACE) {
1881 while (g && (g->num_commits <= size_mult * num_commits ||
1882 (max_commits && num_commits > max_commits))) {
1883 if (g->odb != ctx->odb)
1884 break;
1885
1886 num_commits += g->num_commits;
1887 g = g->base_graph;
1888
1889 ctx->num_commit_graphs_after--;
1890 }
1891 }
1892
1893 if (flags != COMMIT_GRAPH_SPLIT_REPLACE)
1894 ctx->new_base_graph = g;
1895 else if (ctx->num_commit_graphs_after != 1)
1896 BUG("split_graph_merge_strategy: num_commit_graphs_after "
1897 "should be 1 with --split=replace");
1898
1899 if (ctx->num_commit_graphs_after == 2) {
1900 char *old_graph_name = get_commit_graph_filename(g->odb);
1901
1902 if (!strcmp(g->filename, old_graph_name) &&
1903 g->odb != ctx->odb) {
1904 ctx->num_commit_graphs_after = 1;
1905 ctx->new_base_graph = NULL;
1906 }
1907
1908 free(old_graph_name);
1909 }
1910
1911 CALLOC_ARRAY(ctx->commit_graph_filenames_after, ctx->num_commit_graphs_after);
1912 CALLOC_ARRAY(ctx->commit_graph_hash_after, ctx->num_commit_graphs_after);
1913
1914 for (i = 0; i < ctx->num_commit_graphs_after &&
1915 i < ctx->num_commit_graphs_before; i++)
1916 ctx->commit_graph_filenames_after[i] = xstrdup(ctx->commit_graph_filenames_before[i]);
1917
1918 i = ctx->num_commit_graphs_before - 1;
1919 g = ctx->r->objects->commit_graph;
1920
1921 while (g) {
1922 if (i < ctx->num_commit_graphs_after)
1923 ctx->commit_graph_hash_after[i] = xstrdup(oid_to_hex(&g->oid));
1924
1925 i--;
1926 g = g->base_graph;
1927 }
1928 }
1929
1930 static void merge_commit_graph(struct write_commit_graph_context *ctx,
1931 struct commit_graph *g)
1932 {
1933 uint32_t i;
1934 uint32_t offset = g->num_commits_in_base;
1935
1936 ALLOC_GROW(ctx->commits.list, ctx->commits.nr + g->num_commits, ctx->commits.alloc);
1937
1938 for (i = 0; i < g->num_commits; i++) {
1939 struct object_id oid;
1940 struct commit *result;
1941
1942 display_progress(ctx->progress, i + 1);
1943
1944 load_oid_from_graph(g, i + offset, &oid);
1945
1946 /* only add commits if they still exist in the repo */
1947 result = lookup_commit_reference_gently(ctx->r, &oid, 1);
1948
1949 if (result) {
1950 ctx->commits.list[ctx->commits.nr] = result;
1951 ctx->commits.nr++;
1952 }
1953 }
1954 }
1955
1956 static int commit_compare(const void *_a, const void *_b)
1957 {
1958 const struct commit *a = *(const struct commit **)_a;
1959 const struct commit *b = *(const struct commit **)_b;
1960 return oidcmp(&a->object.oid, &b->object.oid);
1961 }
1962
1963 static void sort_and_scan_merged_commits(struct write_commit_graph_context *ctx)
1964 {
1965 uint32_t i, dedup_i = 0;
1966
1967 if (ctx->report_progress)
1968 ctx->progress = start_delayed_progress(
1969 _("Scanning merged commits"),
1970 ctx->commits.nr);
1971
1972 QSORT(ctx->commits.list, ctx->commits.nr, commit_compare);
1973
1974 ctx->num_extra_edges = 0;
1975 for (i = 0; i < ctx->commits.nr; i++) {
1976 display_progress(ctx->progress, i);
1977
1978 if (i && oideq(&ctx->commits.list[i - 1]->object.oid,
1979 &ctx->commits.list[i]->object.oid)) {
1980 /*
1981 * Silently ignore duplicates. These were likely
1982 * created due to a commit appearing in multiple
1983 * layers of the chain, which is unexpected but
1984 * not invalid. We should make sure there is a
1985 * unique copy in the new layer.
1986 */
1987 } else {
1988 unsigned int num_parents;
1989
1990 ctx->commits.list[dedup_i] = ctx->commits.list[i];
1991 dedup_i++;
1992
1993 num_parents = commit_list_count(ctx->commits.list[i]->parents);
1994 if (num_parents > 2)
1995 ctx->num_extra_edges += num_parents - 1;
1996 }
1997 }
1998
1999 ctx->commits.nr = dedup_i;
2000
2001 stop_progress(&ctx->progress);
2002 }
2003
2004 static void merge_commit_graphs(struct write_commit_graph_context *ctx)
2005 {
2006 struct commit_graph *g = ctx->r->objects->commit_graph;
2007 uint32_t current_graph_number = ctx->num_commit_graphs_before;
2008
2009 while (g && current_graph_number >= ctx->num_commit_graphs_after) {
2010 current_graph_number--;
2011
2012 if (ctx->report_progress)
2013 ctx->progress = start_delayed_progress(_("Merging commit-graph"), 0);
2014
2015 merge_commit_graph(ctx, g);
2016 stop_progress(&ctx->progress);
2017
2018 g = g->base_graph;
2019 }
2020
2021 if (g) {
2022 ctx->new_base_graph = g;
2023 ctx->new_num_commits_in_base = g->num_commits + g->num_commits_in_base;
2024 }
2025
2026 if (ctx->new_base_graph)
2027 ctx->base_graph_name = xstrdup(ctx->new_base_graph->filename);
2028
2029 sort_and_scan_merged_commits(ctx);
2030 }
2031
2032 static void mark_commit_graphs(struct write_commit_graph_context *ctx)
2033 {
2034 uint32_t i;
2035 time_t now = time(NULL);
2036
2037 for (i = ctx->num_commit_graphs_after - 1; i < ctx->num_commit_graphs_before; i++) {
2038 struct stat st;
2039 struct utimbuf updated_time;
2040
2041 stat(ctx->commit_graph_filenames_before[i], &st);
2042
2043 updated_time.actime = st.st_atime;
2044 updated_time.modtime = now;
2045 utime(ctx->commit_graph_filenames_before[i], &updated_time);
2046 }
2047 }
2048
2049 static void expire_commit_graphs(struct write_commit_graph_context *ctx)
2050 {
2051 struct strbuf path = STRBUF_INIT;
2052 DIR *dir;
2053 struct dirent *de;
2054 size_t dirnamelen;
2055 timestamp_t expire_time = time(NULL);
2056
2057 if (ctx->opts && ctx->opts->expire_time)
2058 expire_time = ctx->opts->expire_time;
2059 if (!ctx->split) {
2060 char *chain_file_name = get_commit_graph_chain_filename(ctx->odb);
2061 unlink(chain_file_name);
2062 free(chain_file_name);
2063 ctx->num_commit_graphs_after = 0;
2064 }
2065
2066 strbuf_addstr(&path, ctx->odb->path);
2067 strbuf_addstr(&path, "/info/commit-graphs");
2068 dir = opendir(path.buf);
2069
2070 if (!dir)
2071 goto out;
2072
2073 strbuf_addch(&path, '/');
2074 dirnamelen = path.len;
2075 while ((de = readdir(dir)) != NULL) {
2076 struct stat st;
2077 uint32_t i, found = 0;
2078
2079 strbuf_setlen(&path, dirnamelen);
2080 strbuf_addstr(&path, de->d_name);
2081
2082 stat(path.buf, &st);
2083
2084 if (st.st_mtime > expire_time)
2085 continue;
2086 if (path.len < 6 || strcmp(path.buf + path.len - 6, ".graph"))
2087 continue;
2088
2089 for (i = 0; i < ctx->num_commit_graphs_after; i++) {
2090 if (!strcmp(ctx->commit_graph_filenames_after[i],
2091 path.buf)) {
2092 found = 1;
2093 break;
2094 }
2095 }
2096
2097 if (!found)
2098 unlink(path.buf);
2099 }
2100
2101 out:
2102 strbuf_release(&path);
2103 }
2104
2105 int write_commit_graph(struct object_directory *odb,
2106 struct string_list *pack_indexes,
2107 struct oidset *commits,
2108 enum commit_graph_write_flags flags,
2109 const struct commit_graph_opts *opts)
2110 {
2111 struct write_commit_graph_context *ctx;
2112 uint32_t i;
2113 int res = 0;
2114 int replace = 0;
2115 struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
2116
2117 prepare_repo_settings(the_repository);
2118 if (!the_repository->settings.core_commit_graph) {
2119 warning(_("attempting to write a commit-graph, but 'core.commitGraph' is disabled"));
2120 return 0;
2121 }
2122 if (!commit_graph_compatible(the_repository))
2123 return 0;
2124
2125 ctx = xcalloc(1, sizeof(struct write_commit_graph_context));
2126 ctx->r = the_repository;
2127 ctx->odb = odb;
2128 ctx->append = flags & COMMIT_GRAPH_WRITE_APPEND ? 1 : 0;
2129 ctx->report_progress = flags & COMMIT_GRAPH_WRITE_PROGRESS ? 1 : 0;
2130 ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0;
2131 ctx->opts = opts;
2132 ctx->total_bloom_filter_data_size = 0;
2133
2134 bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY",
2135 bloom_settings.bits_per_entry);
2136 bloom_settings.num_hashes = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_NUM_HASHES",
2137 bloom_settings.num_hashes);
2138 bloom_settings.max_changed_paths = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_MAX_CHANGED_PATHS",
2139 bloom_settings.max_changed_paths);
2140 ctx->bloom_settings = &bloom_settings;
2141
2142 if (flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS)
2143 ctx->changed_paths = 1;
2144 if (!(flags & COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS)) {
2145 struct commit_graph *g;
2146 prepare_commit_graph_one(ctx->r, ctx->odb);
2147
2148 g = ctx->r->objects->commit_graph;
2149
2150 /* We have changed-paths already. Keep them in the next graph */
2151 if (g && g->chunk_bloom_data) {
2152 ctx->changed_paths = 1;
2153 ctx->bloom_settings = g->bloom_filter_settings;
2154 }
2155 }
2156
2157 if (ctx->split) {
2158 struct commit_graph *g;
2159 prepare_commit_graph(ctx->r);
2160
2161 g = ctx->r->objects->commit_graph;
2162
2163 while (g) {
2164 ctx->num_commit_graphs_before++;
2165 g = g->base_graph;
2166 }
2167
2168 if (ctx->num_commit_graphs_before) {
2169 ALLOC_ARRAY(ctx->commit_graph_filenames_before, ctx->num_commit_graphs_before);
2170 i = ctx->num_commit_graphs_before;
2171 g = ctx->r->objects->commit_graph;
2172
2173 while (g) {
2174 ctx->commit_graph_filenames_before[--i] = xstrdup(g->filename);
2175 g = g->base_graph;
2176 }
2177 }
2178
2179 if (ctx->opts)
2180 replace = ctx->opts->split_flags & COMMIT_GRAPH_SPLIT_REPLACE;
2181 }
2182
2183 ctx->approx_nr_objects = approximate_object_count();
2184
2185 if (ctx->append)
2186 prepare_commit_graph_one(ctx->r, ctx->odb);
2187
2188 if (ctx->append && ctx->r->objects->commit_graph) {
2189 struct commit_graph *g = ctx->r->objects->commit_graph;
2190 for (i = 0; i < g->num_commits; i++) {
2191 struct object_id oid;
2192 hashcpy(oid.hash, g->chunk_oid_lookup + g->hash_len * i);
2193 oid_array_append(&ctx->oids, &oid);
2194 }
2195 }
2196
2197 if (pack_indexes) {
2198 ctx->order_by_pack = 1;
2199 if ((res = fill_oids_from_packs(ctx, pack_indexes)))
2200 goto cleanup;
2201 }
2202
2203 if (commits) {
2204 if ((res = fill_oids_from_commits(ctx, commits)))
2205 goto cleanup;
2206 }
2207
2208 if (!pack_indexes && !commits) {
2209 ctx->order_by_pack = 1;
2210 fill_oids_from_all_packs(ctx);
2211 }
2212
2213 close_reachable(ctx);
2214
2215 copy_oids_to_commits(ctx);
2216
2217 if (ctx->commits.nr >= GRAPH_EDGE_LAST_MASK) {
2218 error(_("too many commits to write graph"));
2219 res = -1;
2220 goto cleanup;
2221 }
2222
2223 if (!ctx->commits.nr && !replace)
2224 goto cleanup;
2225
2226 if (ctx->split) {
2227 split_graph_merge_strategy(ctx);
2228
2229 if (!replace)
2230 merge_commit_graphs(ctx);
2231 } else
2232 ctx->num_commit_graphs_after = 1;
2233
2234 compute_generation_numbers(ctx);
2235
2236 if (ctx->changed_paths)
2237 compute_bloom_filters(ctx);
2238
2239 res = write_commit_graph_file(ctx);
2240
2241 if (ctx->split)
2242 mark_commit_graphs(ctx);
2243
2244 expire_commit_graphs(ctx);
2245
2246 cleanup:
2247 free(ctx->graph_name);
2248 free(ctx->commits.list);
2249 oid_array_clear(&ctx->oids);
2250
2251 if (ctx->commit_graph_filenames_after) {
2252 for (i = 0; i < ctx->num_commit_graphs_after; i++) {
2253 free(ctx->commit_graph_filenames_after[i]);
2254 free(ctx->commit_graph_hash_after[i]);
2255 }
2256
2257 for (i = 0; i < ctx->num_commit_graphs_before; i++)
2258 free(ctx->commit_graph_filenames_before[i]);
2259
2260 free(ctx->commit_graph_filenames_after);
2261 free(ctx->commit_graph_filenames_before);
2262 free(ctx->commit_graph_hash_after);
2263 }
2264
2265 free(ctx);
2266
2267 return res;
2268 }
2269
2270 #define VERIFY_COMMIT_GRAPH_ERROR_HASH 2
2271 static int verify_commit_graph_error;
2272
2273 static void graph_report(const char *fmt, ...)
2274 {
2275 va_list ap;
2276
2277 verify_commit_graph_error = 1;
2278 va_start(ap, fmt);
2279 vfprintf(stderr, fmt, ap);
2280 fprintf(stderr, "\n");
2281 va_end(ap);
2282 }
2283
2284 #define GENERATION_ZERO_EXISTS 1
2285 #define GENERATION_NUMBER_EXISTS 2
2286
2287 int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags)
2288 {
2289 uint32_t i, cur_fanout_pos = 0;
2290 struct object_id prev_oid, cur_oid, checksum;
2291 int generation_zero = 0;
2292 struct hashfile *f;
2293 int devnull;
2294 struct progress *progress = NULL;
2295 int local_error = 0;
2296
2297 if (!g) {
2298 graph_report("no commit-graph file loaded");
2299 return 1;
2300 }
2301
2302 verify_commit_graph_error = verify_commit_graph_lite(g);
2303 if (verify_commit_graph_error)
2304 return verify_commit_graph_error;
2305
2306 devnull = open("/dev/null", O_WRONLY);
2307 f = hashfd(devnull, NULL);
2308 hashwrite(f, g->data, g->data_len - g->hash_len);
2309 finalize_hashfile(f, checksum.hash, CSUM_CLOSE);
2310 if (!hasheq(checksum.hash, g->data + g->data_len - g->hash_len)) {
2311 graph_report(_("the commit-graph file has incorrect checksum and is likely corrupt"));
2312 verify_commit_graph_error = VERIFY_COMMIT_GRAPH_ERROR_HASH;
2313 }
2314
2315 for (i = 0; i < g->num_commits; i++) {
2316 struct commit *graph_commit;
2317
2318 hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i);
2319
2320 if (i && oidcmp(&prev_oid, &cur_oid) >= 0)
2321 graph_report(_("commit-graph has incorrect OID order: %s then %s"),
2322 oid_to_hex(&prev_oid),
2323 oid_to_hex(&cur_oid));
2324
2325 oidcpy(&prev_oid, &cur_oid);
2326
2327 while (cur_oid.hash[0] > cur_fanout_pos) {
2328 uint32_t fanout_value = get_be32(g->chunk_oid_fanout + cur_fanout_pos);
2329
2330 if (i != fanout_value)
2331 graph_report(_("commit-graph has incorrect fanout value: fanout[%d] = %u != %u"),
2332 cur_fanout_pos, fanout_value, i);
2333 cur_fanout_pos++;
2334 }
2335
2336 graph_commit = lookup_commit(r, &cur_oid);
2337 if (!parse_commit_in_graph_one(r, g, graph_commit))
2338 graph_report(_("failed to parse commit %s from commit-graph"),
2339 oid_to_hex(&cur_oid));
2340 }
2341
2342 while (cur_fanout_pos < 256) {
2343 uint32_t fanout_value = get_be32(g->chunk_oid_fanout + cur_fanout_pos);
2344
2345 if (g->num_commits != fanout_value)
2346 graph_report(_("commit-graph has incorrect fanout value: fanout[%d] = %u != %u"),
2347 cur_fanout_pos, fanout_value, i);
2348
2349 cur_fanout_pos++;
2350 }
2351
2352 if (verify_commit_graph_error & ~VERIFY_COMMIT_GRAPH_ERROR_HASH)
2353 return verify_commit_graph_error;
2354
2355 if (flags & COMMIT_GRAPH_WRITE_PROGRESS)
2356 progress = start_progress(_("Verifying commits in commit graph"),
2357 g->num_commits);
2358
2359 for (i = 0; i < g->num_commits; i++) {
2360 struct commit *graph_commit, *odb_commit;
2361 struct commit_list *graph_parents, *odb_parents;
2362 uint32_t max_generation = 0;
2363 uint32_t generation;
2364
2365 display_progress(progress, i + 1);
2366 hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i);
2367
2368 graph_commit = lookup_commit(r, &cur_oid);
2369 odb_commit = (struct commit *)create_object(r, &cur_oid, alloc_commit_node(r));
2370 if (parse_commit_internal(odb_commit, 0, 0)) {
2371 graph_report(_("failed to parse commit %s from object database for commit-graph"),
2372 oid_to_hex(&cur_oid));
2373 continue;
2374 }
2375
2376 if (!oideq(&get_commit_tree_in_graph_one(r, g, graph_commit)->object.oid,
2377 get_commit_tree_oid(odb_commit)))
2378 graph_report(_("root tree OID for commit %s in commit-graph is %s != %s"),
2379 oid_to_hex(&cur_oid),
2380 oid_to_hex(get_commit_tree_oid(graph_commit)),
2381 oid_to_hex(get_commit_tree_oid(odb_commit)));
2382
2383 graph_parents = graph_commit->parents;
2384 odb_parents = odb_commit->parents;
2385
2386 while (graph_parents) {
2387 if (odb_parents == NULL) {
2388 graph_report(_("commit-graph parent list for commit %s is too long"),
2389 oid_to_hex(&cur_oid));
2390 break;
2391 }
2392
2393 /* parse parent in case it is in a base graph */
2394 parse_commit_in_graph_one(r, g, graph_parents->item);
2395
2396 if (!oideq(&graph_parents->item->object.oid, &odb_parents->item->object.oid))
2397 graph_report(_("commit-graph parent for %s is %s != %s"),
2398 oid_to_hex(&cur_oid),
2399 oid_to_hex(&graph_parents->item->object.oid),
2400 oid_to_hex(&odb_parents->item->object.oid));
2401
2402 generation = commit_graph_generation(graph_parents->item);
2403 if (generation > max_generation)
2404 max_generation = generation;
2405
2406 graph_parents = graph_parents->next;
2407 odb_parents = odb_parents->next;
2408 }
2409
2410 if (odb_parents != NULL)
2411 graph_report(_("commit-graph parent list for commit %s terminates early"),
2412 oid_to_hex(&cur_oid));
2413
2414 if (!commit_graph_generation(graph_commit)) {
2415 if (generation_zero == GENERATION_NUMBER_EXISTS)
2416 graph_report(_("commit-graph has generation number zero for commit %s, but non-zero elsewhere"),
2417 oid_to_hex(&cur_oid));
2418 generation_zero = GENERATION_ZERO_EXISTS;
2419 } else if (generation_zero == GENERATION_ZERO_EXISTS)
2420 graph_report(_("commit-graph has non-zero generation number for commit %s, but zero elsewhere"),
2421 oid_to_hex(&cur_oid));
2422
2423 if (generation_zero == GENERATION_ZERO_EXISTS)
2424 continue;
2425
2426 /*
2427 * If one of our parents has generation GENERATION_NUMBER_MAX, then
2428 * our generation is also GENERATION_NUMBER_MAX. Decrement to avoid
2429 * extra logic in the following condition.
2430 */
2431 if (max_generation == GENERATION_NUMBER_MAX)
2432 max_generation--;
2433
2434 generation = commit_graph_generation(graph_commit);
2435 if (generation != max_generation + 1)
2436 graph_report(_("commit-graph generation for commit %s is %u != %u"),
2437 oid_to_hex(&cur_oid),
2438 generation,
2439 max_generation + 1);
2440
2441 if (graph_commit->date != odb_commit->date)
2442 graph_report(_("commit date for commit %s in commit-graph is %"PRItime" != %"PRItime),
2443 oid_to_hex(&cur_oid),
2444 graph_commit->date,
2445 odb_commit->date);
2446 }
2447 stop_progress(&progress);
2448
2449 local_error = verify_commit_graph_error;
2450
2451 if (!(flags & COMMIT_GRAPH_VERIFY_SHALLOW) && g->base_graph)
2452 local_error |= verify_commit_graph(r, g->base_graph, flags);
2453
2454 return local_error;
2455 }
2456
2457 void free_commit_graph(struct commit_graph *g)
2458 {
2459 if (!g)
2460 return;
2461 if (g->data) {
2462 munmap((void *)g->data, g->data_len);
2463 g->data = NULL;
2464 }
2465 free(g->filename);
2466 free(g->bloom_filter_settings);
2467 free(g);
2468 }
2469
2470 void disable_commit_graph(struct repository *r)
2471 {
2472 r->commit_graph_disabled = 1;
2473 }