From: Junio C Hamano Date: Thu, 30 Jul 2020 20:20:31 +0000 (-0700) Subject: Merge branch 'ds/commit-graph-bloom-updates' into master X-Git-Tag: v2.29.0-rc0~219 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=70cdbbe3a7028b9c1eb7542a0aa038b971d3fb27;p=thirdparty%2Fgit.git Merge branch 'ds/commit-graph-bloom-updates' into master Updates to the changed-paths bloom filter. * ds/commit-graph-bloom-updates: commit-graph: check all leading directories in changed path Bloom filters revision: empty pathspecs should not use Bloom filters revision.c: fix whitespace commit-graph: check chunk sizes after writing commit-graph: simplify chunk writes into loop commit-graph: unify the signatures of all write_graph_chunk_*() functions commit-graph: persist existence of changed-paths bloom: fix logic in get_bloom_filter() commit-graph: change test to die on parse, not load commit-graph: place bloom_settings in context --- 70cdbbe3a7028b9c1eb7542a0aa038b971d3fb27 diff --cc Documentation/git-commit-graph.txt index 8ca1764d3d,369b222b08..17405c73a9 --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@@ -60,23 -58,18 +60,26 @@@ With the `--append` option, include al existing commit-graph file. + With the `--changed-paths` option, compute and write information about the -paths changed between a commit and it's first parent. This operation can +paths changed between a commit and its first parent. This operation can take a while on large repositories. It provides significant performance gains - for getting history of a directory or a file with `git log -- `. + for getting history of a directory or a file with `git log -- `. If + this option is given, future commit-graph writes will automatically assume + that this option was intended. Use `--no-changed-paths` to stop storing this + data. + -With the `--split` option, write the commit-graph as a chain of multiple -commit-graph files stored in `/info/commit-graphs`. The new commits -not already in the commit-graph are added in a new "tip" file. This file -is merged with the existing file if the following merge conditions are -met: +With the `--split[=]` option, write the commit-graph as a +chain of multiple commit-graph files stored in +`/info/commit-graphs`. Commit-graph layers are merged based on the +strategy and other splitting options. The new commits not already in the +commit-graph are added in a new "tip" file. This file is merged with the +existing file if the following merge conditions are met: ++ +* If `--split=no-merge` is specified, a merge is never performed, and +the remaining options are ignored. `--split=replace` overwrites the +existing chain with a new one. A bare `--split` defers to the remaining +options. (Note that merging a chain of commit graphs replaces the +existing chain with a length-1 chain where the first and only +incremental holds the entire graph). + * If `--size-multiple=` is not specified, let `X` equal 2. If the new tip file would have `N` commits and the previous tip has `M` commits and diff --cc bloom.c index 6a7f2f2bdc,2af5389795..1a573226e7 --- a/bloom.c +++ b/bloom.c @@@ -194,17 -193,15 +194,15 @@@ struct bloom_filter *get_bloom_filter(s if (!filter->data) { load_commit_graph_info(r, c); - if (c->graph_pos != COMMIT_NOT_FROM_GRAPH && - r->objects->commit_graph->chunk_bloom_indexes) + if (commit_graph_position(c) != COMMIT_NOT_FROM_GRAPH && - r->objects->commit_graph->chunk_bloom_indexes) { - if (load_bloom_filter_from_graph(r->objects->commit_graph, filter, c)) - return filter; - else - return NULL; - } ++ r->objects->commit_graph->chunk_bloom_indexes) + load_bloom_filter_from_graph(r->objects->commit_graph, filter, c); } - if (filter->data || !compute_if_not_present) + if (filter->data) return filter; + if (!compute_if_not_present) + return NULL; repo_diff_setup(r, &diffopt); diffopt.flags.recursive = 1; diff --cc commit-graph.c index 1f37097dff,6752916c1a..e51c91dd5b --- a/commit-graph.c +++ b/commit-graph.c @@@ -16,7 -16,8 +16,9 @@@ #include "progress.h" #include "bloom.h" #include "commit-slab.h" +#include "shallow.h" + #include "json-writer.h" + #include "trace2.h" void git_test_write_commit_graph_or_die(void) { @@@ -1145,30 -1101,70 +1160,54 @@@ static int write_graph_chunk_bloom_inde while (list < last) { struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0); - cur_pos += filter->len; + size_t len = filter ? filter->len : 0; + cur_pos += len; - display_progress(progress, ++i); + display_progress(ctx->progress, ++ctx->progress_cnt); hashwrite_be32(f, cur_pos); list++; } + - stop_progress(&progress); + return 0; + } + + static void trace2_bloom_filter_settings(struct write_commit_graph_context *ctx) + { + struct json_writer jw = JSON_WRITER_INIT; + + jw_object_begin(&jw, 0); + jw_object_intmax(&jw, "hash_version", ctx->bloom_settings->hash_version); + jw_object_intmax(&jw, "num_hashes", ctx->bloom_settings->num_hashes); + jw_object_intmax(&jw, "bits_per_entry", ctx->bloom_settings->bits_per_entry); + jw_end(&jw); + + trace2_data_json("bloom", ctx->r, "settings", &jw); + + jw_release(&jw); } - static void write_graph_chunk_bloom_data(struct hashfile *f, - struct write_commit_graph_context *ctx, - const struct bloom_filter_settings *settings) + static int write_graph_chunk_bloom_data(struct hashfile *f, + struct write_commit_graph_context *ctx) { struct commit **list = ctx->commits.list; struct commit **last = ctx->commits.list + ctx->commits.nr; - struct progress *progress = NULL; - int i = 0; - hashwrite_be32(f, settings->hash_version); - hashwrite_be32(f, settings->num_hashes); - hashwrite_be32(f, settings->bits_per_entry); + trace2_bloom_filter_settings(ctx); + - if (ctx->report_progress) - progress = start_delayed_progress( - _("Writing changed paths Bloom filters data"), - ctx->commits.nr); - + hashwrite_be32(f, ctx->bloom_settings->hash_version); + hashwrite_be32(f, ctx->bloom_settings->num_hashes); + hashwrite_be32(f, ctx->bloom_settings->bits_per_entry); while (list < last) { struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0); + size_t len = filter ? filter->len : 0; - display_progress(progress, ++i); + + display_progress(ctx->progress, ++ctx->progress_cnt); - hashwrite(f, filter->data, filter->len * sizeof(unsigned char)); + if (len) + hashwrite(f, filter->data, len * sizeof(unsigned char)); list++; } + - stop_progress(&progress); + return 0; } static int oid_compare(const void *_a, const void *_b) @@@ -2045,10 -2018,25 +2103,24 @@@ int write_commit_graph(struct object_di ctx->append = flags & COMMIT_GRAPH_WRITE_APPEND ? 1 : 0; ctx->report_progress = flags & COMMIT_GRAPH_WRITE_PROGRESS ? 1 : 0; ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0; - ctx->check_oids = flags & COMMIT_GRAPH_WRITE_CHECK_OIDS ? 1 : 0; ctx->split_opts = split_opts; - ctx->changed_paths = flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS ? 1 : 0; ctx->total_bloom_filter_data_size = 0; + if (flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS) + ctx->changed_paths = 1; + if (!(flags & COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS)) { + struct commit_graph *g; + prepare_commit_graph_one(ctx->r, ctx->odb); + + g = ctx->r->objects->commit_graph; + + /* We have changed-paths already. Keep them in the next graph */ + if (g && g->chunk_bloom_data) { + ctx->changed_paths = 1; + ctx->bloom_settings = g->bloom_filter_settings; + } + } + if (ctx->split) { struct commit_graph *g; prepare_commit_graph(ctx->r); diff --cc commit-graph.h index cf8d3c9647,45b1e5bca3..09a97030dc --- a/commit-graph.h +++ b/commit-graph.h @@@ -3,10 -3,9 +3,10 @@@ #include "git-compat-util.h" #include "object-store.h" +#include "oidset.h" #define GIT_TEST_COMMIT_GRAPH "GIT_TEST_COMMIT_GRAPH" - #define GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD "GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD" + #define GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE "GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE" #define GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS "GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS" /* @@@ -91,13 -93,10 +91,14 @@@ enum commit_graph_write_flags COMMIT_GRAPH_WRITE_APPEND = (1 << 0), COMMIT_GRAPH_WRITE_PROGRESS = (1 << 1), COMMIT_GRAPH_WRITE_SPLIT = (1 << 2), - /* Make sure that each OID in the input is a valid commit OID. */ - COMMIT_GRAPH_WRITE_CHECK_OIDS = (1 << 3), - COMMIT_GRAPH_WRITE_BLOOM_FILTERS = (1 << 4), - COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS = (1 << 5), + COMMIT_GRAPH_WRITE_BLOOM_FILTERS = (1 << 3), ++ COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS = (1 << 4), +}; + +enum commit_graph_split_flags { + COMMIT_GRAPH_SPLIT_UNSPECIFIED = 0, + COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED = 1, + COMMIT_GRAPH_SPLIT_REPLACE = 2 }; struct split_commit_graph_opts { diff --cc t/t4216-log-bloom.sh index c855bcd3e7,4892364e74..c21cc160f3 --- a/t/t4216-log-bloom.sh +++ b/t/t4216-log-bloom.sh @@@ -152,4 -156,39 +156,39 @@@ test_expect_success 'Use Bloom filters test_bloom_filters_used_when_some_filters_are_missing "-- A/B" ' + test_expect_success 'persist filter settings' ' + test_when_finished rm -rf .git/objects/info/commit-graph* && + rm -rf .git/objects/info/commit-graph* && + GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \ + GIT_TRACE2_EVENT_NESTING=5 \ + GIT_TEST_BLOOM_SETTINGS_NUM_HASHES=9 \ + GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY=15 \ + git commit-graph write --reachable --changed-paths && + grep "{\"hash_version\":1,\"num_hashes\":9,\"bits_per_entry\":15}" trace2.txt && + GIT_TRACE2_EVENT="$(pwd)/trace2-auto.txt" \ + GIT_TRACE2_EVENT_NESTING=5 \ + git commit-graph write --reachable --changed-paths && + grep "{\"hash_version\":1,\"num_hashes\":9,\"bits_per_entry\":15}" trace2-auto.txt + ' + + test_expect_success 'correctly report changes over limit' ' + git init 513changes && + ( + cd 513changes && + for i in $(test_seq 1 513) + do + echo $i >file$i.txt || return 1 + done && + git add . && + git commit -m "files" && + git commit-graph write --reachable --changed-paths && + for i in $(test_seq 1 513) + do + git -c core.commitGraph=false log -- file$i.txt >expect && + git log -- file$i.txt >actual && + test_cmp expect actual || return 1 + done + ) + ' + -test_done +test_done diff --cc t/t5318-commit-graph.sh index 50d5f0849f,5ec01abdaa..2804b0dd45 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@@ -476,8 -436,7 +476,8 @@@ corrupt_graph_verify() cp $objdir/info/commit-graph commit-graph-pre-write-test fi && git status --short && - GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD=true git commit-graph write && + GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE=true git commit-graph write && + chmod u+w $objdir/info/commit-graph && git commit-graph verify }