From: Junio C Hamano Date: Fri, 1 May 2020 20:39:53 +0000 (-0700) Subject: Merge branch 'gs/commit-graph-path-filter' X-Git-Tag: v2.27.0-rc0~58 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=9b6606f43d55bbf33b9924d16e02e60e1c09660a;p=thirdparty%2Fgit.git Merge branch 'gs/commit-graph-path-filter' Introduce an extension to the commit-graph to make it efficient to check for the paths that were modified at each commit using Bloom filters. * gs/commit-graph-path-filter: bloom: ignore renames when computing changed paths commit-graph: add GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS test flag t4216: add end to end tests for git log with Bloom filters revision.c: add trace2 stats around Bloom filter usage revision.c: use Bloom filters to speed up path based revision walks commit-graph: add --changed-paths option to write subcommand commit-graph: reuse existing Bloom filters during write commit-graph: write Bloom filters to commit graph file commit-graph: examine commits by generation number commit-graph: examine changed-path objects in pack order commit-graph: compute Bloom filters for changed paths diff: halt tree-diff early after max_changes bloom.c: core Bloom filter implementation for changed paths. bloom.c: introduce core Bloom filter constructs bloom.c: add the murmur3 hash implementation commit-graph: define and use MAX_NUM_CHUNKS --- 9b6606f43d55bbf33b9924d16e02e60e1c09660a diff --cc Documentation/git-commit-graph.txt index 46f7f7c573,f4b13c005b..53a650225a --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@@ -57,18 -57,16 +57,23 @@@ or `--stdin-packs`. With the `--append` option, include all commits that are present in the existing commit-graph file. + + With the `--changed-paths` option, compute and write information about the + paths changed between a commit and it's first parent. This operation can + take a while on large repositories. It provides significant performance gains + for getting history of a directory or a file with `git log -- `. + + -With the `--split` option, write the commit-graph as a chain of multiple -commit-graph files stored in `/info/commit-graphs`. The new commits -not already in the commit-graph are added in a new "tip" file. This file -is merged with the existing file if the following merge conditions are -met: +With the `--split[=]` option, write the commit-graph as a +chain of multiple commit-graph files stored in +`/info/commit-graphs`. Commit-graph layers are merged based on the +strategy and other splitting options. The new commits not already in the +commit-graph are added in a new "tip" file. This file is merged with the +existing file if the following merge conditions are met: +* If `--split=no-merge` is specified, a merge is never performed, and +the remaining options are ignored. `--split=replace` overwrites the +existing chain with a new one. A bare `--split` defers to the remaining +options. (Note that merging a chain of commit graphs replaces the +existing chain with a length-1 chain where the first and only +incremental holds the entire graph). + * If `--size-multiple=` is not specified, let `X` equal 2. If the new tip file would have `N` commits and the previous tip has `M` commits and diff --cc builtin/commit-graph.c index 9ceedbba07,59009837dc..15fe60317c --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@@ -9,9 -9,7 +9,9 @@@ static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph verify [--object-dir ] [--shallow] [--[no-]progress]"), - N_("git commit-graph write [--object-dir ] [--append|--split] [--reachable|--stdin-packs|--stdin-commits] [--changed-paths] [--[no-]progress] "), + N_("git commit-graph write [--object-dir ] [--append] " + "[--split[=]] [--reachable|--stdin-packs|--stdin-commits] " - "[--[no-]progress] "), ++ "[--changed-paths] [--[no-]progress] "), NULL }; @@@ -21,9 -19,7 +21,9 @@@ static const char * const builtin_commi }; static const char * const builtin_commit_graph_write_usage[] = { - N_("git commit-graph write [--object-dir ] [--append|--split] [--reachable|--stdin-packs|--stdin-commits] [--changed-paths] [--[no-]progress] "), + N_("git commit-graph write [--object-dir ] [--append] " + "[--split[=]] [--reachable|--stdin-packs|--stdin-commits] " - "[--[no-]progress] "), ++ "[--changed-paths] [--[no-]progress] "), NULL }; @@@ -158,11 -136,11 +159,13 @@@ static int graph_write(int argc, const N_("start walk at commits listed by stdin")), OPT_BOOL(0, "append", &opts.append, N_("include all commits already in the commit-graph file")), + OPT_BOOL(0, "changed-paths", &opts.enable_changed_paths, + N_("enable computation for changed paths")), OPT_BOOL(0, "progress", &opts.progress, N_("force progress reporting")), - OPT_BOOL(0, "split", &opts.split, - N_("allow writing an incremental commit-graph file")), + OPT_CALLBACK_F(0, "split", &split_opts.flags, NULL, + N_("allow writing an incremental commit-graph file"), + PARSE_OPT_OPTARG | PARSE_OPT_NONEG, + write_option_parse_split), OPT_INTEGER(0, "max-commits", &split_opts.max_commits, N_("maximum number of commits in a non-base split commit-graph")), OPT_INTEGER(0, "size-multiple", &split_opts.size_multiple, diff --cc commit-graph.c index e9b3c5d561,77668629e2..7eb4f22f00 --- a/commit-graph.c +++ b/commit-graph.c @@@ -1132,13 -1273,45 +1272,45 @@@ static void compute_generation_numbers( stop_progress(&ctx->progress); } + static void compute_bloom_filters(struct write_commit_graph_context *ctx) + { + int i; + struct progress *progress = NULL; + struct commit **sorted_commits; + + init_bloom_filters(); + + if (ctx->report_progress) + progress = start_delayed_progress( + _("Computing commit changed paths Bloom filters"), + ctx->commits.nr); + + ALLOC_ARRAY(sorted_commits, ctx->commits.nr); + COPY_ARRAY(sorted_commits, ctx->commits.list, ctx->commits.nr); + + if (ctx->order_by_pack) + QSORT(sorted_commits, ctx->commits.nr, commit_pos_cmp); + else + QSORT(sorted_commits, ctx->commits.nr, commit_gen_cmp); + + for (i = 0; i < ctx->commits.nr; i++) { + struct commit *c = sorted_commits[i]; + struct bloom_filter *filter = get_bloom_filter(ctx->r, c, 1); + ctx->total_bloom_filter_data_size += sizeof(unsigned char) * filter->len; + display_progress(progress, i + 1); + } + + free(sorted_commits); + stop_progress(&progress); + } + -static int add_ref_to_list(const char *refname, - const struct object_id *oid, - int flags, void *cb_data) +static int add_ref_to_set(const char *refname, + const struct object_id *oid, + int flags, void *cb_data) { - struct string_list *list = (struct string_list *)cb_data; + struct oidset *commits = (struct oidset *)cb_data; - string_list_append(list, oid_to_hex(oid)); + oidset_insert(commits, oid); return 0; } @@@ -1865,8 -2028,10 +2060,10 @@@ int write_commit_graph(struct object_di goto cleanup; } - if (!pack_indexes && !commits) - if (!pack_indexes && !commit_hex) { ++ if (!pack_indexes && !commits) { + ctx->order_by_pack = 1; fill_oids_from_all_packs(ctx); + } close_reachable(ctx); @@@ -2121,11 -2288,13 +2321,12 @@@ void free_commit_graph(struct commit_gr { if (!g) return; - if (g->graph_fd >= 0) { + if (g->data) { munmap((void *)g->data, g->data_len); g->data = NULL; - close(g->graph_fd); } free(g->filename); + free(g->bloom_filter_settings); free(g); } diff --cc commit-graph.h index 1254eae948,8655d064c1..183a15ed62 --- a/commit-graph.h +++ b/commit-graph.h @@@ -77,15 -85,10 +83,16 @@@ enum commit_graph_write_flags COMMIT_GRAPH_WRITE_PROGRESS = (1 << 1), COMMIT_GRAPH_WRITE_SPLIT = (1 << 2), /* Make sure that each OID in the input is a valid commit OID. */ - COMMIT_GRAPH_WRITE_CHECK_OIDS = (1 << 3) + COMMIT_GRAPH_WRITE_CHECK_OIDS = (1 << 3), + COMMIT_GRAPH_WRITE_BLOOM_FILTERS = (1 << 4), }; +enum commit_graph_split_flags { + COMMIT_GRAPH_SPLIT_UNSPECIFIED = 0, + COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED = 1, + COMMIT_GRAPH_SPLIT_REPLACE = 2 +}; + struct split_commit_graph_opts { int size_multiple; int max_commits;