Updates to the changed-paths bloom filter.
* ds/commit-graph-bloom-updates:
commit-graph: check all leading directories in changed path Bloom filters
revision: empty pathspecs should not use Bloom filters
revision.c: fix whitespace
commit-graph: check chunk sizes after writing
commit-graph: simplify chunk writes into loop
commit-graph: unify the signatures of all write_graph_chunk_*() functions
commit-graph: persist existence of changed-paths
bloom: fix logic in get_bloom_filter()
commit-graph: change test to die on parse, not load
commit-graph: place bloom_settings in context
existing commit-graph file.
+
With the `--changed-paths` option, compute and write information about the
-paths changed between a commit and it's first parent. This operation can
+paths changed between a commit and its first parent. This operation can
take a while on large repositories. It provides significant performance gains
- for getting history of a directory or a file with `git log -- <path>`.
+ for getting history of a directory or a file with `git log -- <path>`. If
+ this option is given, future commit-graph writes will automatically assume
+ that this option was intended. Use `--no-changed-paths` to stop storing this
+ data.
+
-With the `--split` option, write the commit-graph as a chain of multiple
-commit-graph files stored in `<dir>/info/commit-graphs`. The new commits
-not already in the commit-graph are added in a new "tip" file. This file
-is merged with the existing file if the following merge conditions are
-met:
+With the `--split[=<strategy>]` option, write the commit-graph as a
+chain of multiple commit-graph files stored in
+`<dir>/info/commit-graphs`. Commit-graph layers are merged based on the
+strategy and other splitting options. The new commits not already in the
+commit-graph are added in a new "tip" file. This file is merged with the
+existing file if the following merge conditions are met:
++
+* If `--split=no-merge` is specified, a merge is never performed, and
+the remaining options are ignored. `--split=replace` overwrites the
+existing chain with a new one. A bare `--split` defers to the remaining
+options. (Note that merging a chain of commit graphs replaces the
+existing chain with a length-1 chain where the first and only
+incremental holds the entire graph).
+
* If `--size-multiple=<X>` is not specified, let `X` equal 2. If the new
tip file would have `N` commits and the previous tip has `M` commits and
if (!filter->data) {
load_commit_graph_info(r, c);
- if (c->graph_pos != COMMIT_NOT_FROM_GRAPH &&
- r->objects->commit_graph->chunk_bloom_indexes)
+ if (commit_graph_position(c) != COMMIT_NOT_FROM_GRAPH &&
- r->objects->commit_graph->chunk_bloom_indexes) {
- if (load_bloom_filter_from_graph(r->objects->commit_graph, filter, c))
- return filter;
- else
- return NULL;
- }
++ r->objects->commit_graph->chunk_bloom_indexes)
+ load_bloom_filter_from_graph(r->objects->commit_graph, filter, c);
}
- if (filter->data || !compute_if_not_present)
+ if (filter->data)
return filter;
+ if (!compute_if_not_present)
+ return NULL;
repo_diff_setup(r, &diffopt);
diffopt.flags.recursive = 1;
#include "progress.h"
#include "bloom.h"
#include "commit-slab.h"
+#include "shallow.h"
+ #include "json-writer.h"
+ #include "trace2.h"
void git_test_write_commit_graph_or_die(void)
{
while (list < last) {
struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
- cur_pos += filter->len;
+ size_t len = filter ? filter->len : 0;
+ cur_pos += len;
- display_progress(progress, ++i);
+ display_progress(ctx->progress, ++ctx->progress_cnt);
hashwrite_be32(f, cur_pos);
list++;
}
- stop_progress(&progress);
+
+ return 0;
+ }
+
+ static void trace2_bloom_filter_settings(struct write_commit_graph_context *ctx)
+ {
+ struct json_writer jw = JSON_WRITER_INIT;
+
+ jw_object_begin(&jw, 0);
+ jw_object_intmax(&jw, "hash_version", ctx->bloom_settings->hash_version);
+ jw_object_intmax(&jw, "num_hashes", ctx->bloom_settings->num_hashes);
+ jw_object_intmax(&jw, "bits_per_entry", ctx->bloom_settings->bits_per_entry);
+ jw_end(&jw);
+
+ trace2_data_json("bloom", ctx->r, "settings", &jw);
+
+ jw_release(&jw);
}
- static void write_graph_chunk_bloom_data(struct hashfile *f,
- struct write_commit_graph_context *ctx,
- const struct bloom_filter_settings *settings)
+ static int write_graph_chunk_bloom_data(struct hashfile *f,
+ struct write_commit_graph_context *ctx)
{
struct commit **list = ctx->commits.list;
struct commit **last = ctx->commits.list + ctx->commits.nr;
- struct progress *progress = NULL;
- int i = 0;
- hashwrite_be32(f, settings->hash_version);
- hashwrite_be32(f, settings->num_hashes);
- hashwrite_be32(f, settings->bits_per_entry);
+ trace2_bloom_filter_settings(ctx);
+
- if (ctx->report_progress)
- progress = start_delayed_progress(
- _("Writing changed paths Bloom filters data"),
- ctx->commits.nr);
-
+ hashwrite_be32(f, ctx->bloom_settings->hash_version);
+ hashwrite_be32(f, ctx->bloom_settings->num_hashes);
+ hashwrite_be32(f, ctx->bloom_settings->bits_per_entry);
while (list < last) {
struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
- display_progress(progress, ++i);
+ size_t len = filter ? filter->len : 0;
- hashwrite(f, filter->data, filter->len * sizeof(unsigned char));
+
+ display_progress(ctx->progress, ++ctx->progress_cnt);
+ if (len)
+ hashwrite(f, filter->data, len * sizeof(unsigned char));
list++;
}
- stop_progress(&progress);
+
+ return 0;
}
static int oid_compare(const void *_a, const void *_b)
ctx->append = flags & COMMIT_GRAPH_WRITE_APPEND ? 1 : 0;
ctx->report_progress = flags & COMMIT_GRAPH_WRITE_PROGRESS ? 1 : 0;
ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0;
- ctx->check_oids = flags & COMMIT_GRAPH_WRITE_CHECK_OIDS ? 1 : 0;
ctx->split_opts = split_opts;
- ctx->changed_paths = flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS ? 1 : 0;
ctx->total_bloom_filter_data_size = 0;
+ if (flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS)
+ ctx->changed_paths = 1;
+ if (!(flags & COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS)) {
+ struct commit_graph *g;
+ prepare_commit_graph_one(ctx->r, ctx->odb);
+
+ g = ctx->r->objects->commit_graph;
+
+ /* We have changed-paths already. Keep them in the next graph */
+ if (g && g->chunk_bloom_data) {
+ ctx->changed_paths = 1;
+ ctx->bloom_settings = g->bloom_filter_settings;
+ }
+ }
+
if (ctx->split) {
struct commit_graph *g;
prepare_commit_graph(ctx->r);
#include "git-compat-util.h"
#include "object-store.h"
+#include "oidset.h"
#define GIT_TEST_COMMIT_GRAPH "GIT_TEST_COMMIT_GRAPH"
- #define GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD "GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD"
+ #define GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE "GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE"
#define GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS "GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS"
/*
COMMIT_GRAPH_WRITE_APPEND = (1 << 0),
COMMIT_GRAPH_WRITE_PROGRESS = (1 << 1),
COMMIT_GRAPH_WRITE_SPLIT = (1 << 2),
- /* Make sure that each OID in the input is a valid commit OID. */
- COMMIT_GRAPH_WRITE_CHECK_OIDS = (1 << 3),
- COMMIT_GRAPH_WRITE_BLOOM_FILTERS = (1 << 4),
- COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS = (1 << 5),
+ COMMIT_GRAPH_WRITE_BLOOM_FILTERS = (1 << 3),
++ COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS = (1 << 4),
+};
+
+enum commit_graph_split_flags {
+ COMMIT_GRAPH_SPLIT_UNSPECIFIED = 0,
+ COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED = 1,
+ COMMIT_GRAPH_SPLIT_REPLACE = 2
};
struct split_commit_graph_opts {
test_bloom_filters_used_when_some_filters_are_missing "-- A/B"
'
-test_done
+ test_expect_success 'persist filter settings' '
+ test_when_finished rm -rf .git/objects/info/commit-graph* &&
+ rm -rf .git/objects/info/commit-graph* &&
+ GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \
+ GIT_TRACE2_EVENT_NESTING=5 \
+ GIT_TEST_BLOOM_SETTINGS_NUM_HASHES=9 \
+ GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY=15 \
+ git commit-graph write --reachable --changed-paths &&
+ grep "{\"hash_version\":1,\"num_hashes\":9,\"bits_per_entry\":15}" trace2.txt &&
+ GIT_TRACE2_EVENT="$(pwd)/trace2-auto.txt" \
+ GIT_TRACE2_EVENT_NESTING=5 \
+ git commit-graph write --reachable --changed-paths &&
+ grep "{\"hash_version\":1,\"num_hashes\":9,\"bits_per_entry\":15}" trace2-auto.txt
+ '
+
+ test_expect_success 'correctly report changes over limit' '
+ git init 513changes &&
+ (
+ cd 513changes &&
+ for i in $(test_seq 1 513)
+ do
+ echo $i >file$i.txt || return 1
+ done &&
+ git add . &&
+ git commit -m "files" &&
+ git commit-graph write --reachable --changed-paths &&
+ for i in $(test_seq 1 513)
+ do
+ git -c core.commitGraph=false log -- file$i.txt >expect &&
+ git log -- file$i.txt >actual &&
+ test_cmp expect actual || return 1
+ done
+ )
+ '
+
+test_done
cp $objdir/info/commit-graph commit-graph-pre-write-test
fi &&
git status --short &&
- GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD=true git commit-graph write &&
+ GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE=true git commit-graph write &&
+ chmod u+w $objdir/info/commit-graph &&
git commit-graph verify
}