]> git.ipfire.org Git - thirdparty/git.git/commitdiff
Merge branch 'ds/commit-graph-bloom-updates' into master
authorJunio C Hamano <gitster@pobox.com>
Thu, 30 Jul 2020 20:20:31 +0000 (13:20 -0700)
committerJunio C Hamano <gitster@pobox.com>
Thu, 30 Jul 2020 20:20:31 +0000 (13:20 -0700)
Updates to the changed-paths bloom filter.

* ds/commit-graph-bloom-updates:
  commit-graph: check all leading directories in changed path Bloom filters
  revision: empty pathspecs should not use Bloom filters
  revision.c: fix whitespace
  commit-graph: check chunk sizes after writing
  commit-graph: simplify chunk writes into loop
  commit-graph: unify the signatures of all write_graph_chunk_*() functions
  commit-graph: persist existence of changed-paths
  bloom: fix logic in get_bloom_filter()
  commit-graph: change test to die on parse, not load
  commit-graph: place bloom_settings in context

1  2 
Documentation/git-commit-graph.txt
bloom.c
builtin/commit-graph.c
commit-graph.c
commit-graph.h
revision.c
revision.h
t/t4216-log-bloom.sh
t/t5318-commit-graph.sh

index 8ca1764d3dcab4f37abe9f5e92dddb0d822b47f4,369b222b08bc732b9326734c537ec5e50743d9fe..17405c73a98e8a80bf0b45b2a730aee2693f8614
@@@ -60,23 -58,18 +60,26 @@@ With the `--append` option, include al
  existing commit-graph file.
  +
  With the `--changed-paths` option, compute and write information about the
 -paths changed between a commit and it's first parent. This operation can
 +paths changed between a commit and its first parent. This operation can
  take a while on large repositories. It provides significant performance gains
- for getting history of a directory or a file with `git log -- <path>`.
+ for getting history of a directory or a file with `git log -- <path>`. If
+ this option is given, future commit-graph writes will automatically assume
+ that this option was intended. Use `--no-changed-paths` to stop storing this
+ data.
  +
 -With the `--split` option, write the commit-graph as a chain of multiple
 -commit-graph files stored in `<dir>/info/commit-graphs`. The new commits
 -not already in the commit-graph are added in a new "tip" file. This file
 -is merged with the existing file if the following merge conditions are
 -met:
 +With the `--split[=<strategy>]` option, write the commit-graph as a
 +chain of multiple commit-graph files stored in
 +`<dir>/info/commit-graphs`. Commit-graph layers are merged based on the
 +strategy and other splitting options. The new commits not already in the
 +commit-graph are added in a new "tip" file. This file is merged with the
 +existing file if the following merge conditions are met:
 ++
 +* If `--split=no-merge` is specified, a merge is never performed, and
 +the remaining options are ignored. `--split=replace` overwrites the
 +existing chain with a new one. A bare `--split` defers to the remaining
 +options. (Note that merging a chain of commit graphs replaces the
 +existing chain with a length-1 chain where the first and only
 +incremental holds the entire graph).
  +
  * If `--size-multiple=<X>` is not specified, let `X` equal 2. If the new
  tip file would have `N` commits and the previous tip has `M` commits and
diff --cc bloom.c
index 6a7f2f2bdc13031f2861a5cd8c8417bafaed503f,2af5389795cc6e3d68b50538259611b53bd7fd3a..1a573226e70e571b49a7af29d49740768b1465af
+++ b/bloom.c
@@@ -194,17 -193,15 +194,15 @@@ struct bloom_filter *get_bloom_filter(s
  
        if (!filter->data) {
                load_commit_graph_info(r, c);
 -              if (c->graph_pos != COMMIT_NOT_FROM_GRAPH &&
 -                  r->objects->commit_graph->chunk_bloom_indexes)
 +              if (commit_graph_position(c) != COMMIT_NOT_FROM_GRAPH &&
-                       r->objects->commit_graph->chunk_bloom_indexes) {
-                       if (load_bloom_filter_from_graph(r->objects->commit_graph, filter, c))
-                               return filter;
-                       else
-                               return NULL;
-               }
++                      r->objects->commit_graph->chunk_bloom_indexes)
+                       load_bloom_filter_from_graph(r->objects->commit_graph, filter, c);
        }
  
-       if (filter->data || !compute_if_not_present)
+       if (filter->data)
                return filter;
+       if (!compute_if_not_present)
+               return NULL;
  
        repo_diff_setup(r, &diffopt);
        diffopt.flags.recursive = 1;
Simple merge
diff --cc commit-graph.c
index 1f37097dfff0033b50a68b0eaafc1dee9cfc1816,6752916c1acda44dcd8113b3f5e3f1078d960e9d..e51c91dd5b0af4e4946091e5ca7f06707b3a0c5b
@@@ -16,7 -16,8 +16,9 @@@
  #include "progress.h"
  #include "bloom.h"
  #include "commit-slab.h"
 +#include "shallow.h"
+ #include "json-writer.h"
+ #include "trace2.h"
  
  void git_test_write_commit_graph_or_die(void)
  {
@@@ -1145,30 -1101,70 +1160,54 @@@ static int write_graph_chunk_bloom_inde
  
        while (list < last) {
                struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
-               cur_pos += filter->len;
+               size_t len = filter ? filter->len : 0;
+               cur_pos += len;
 -              display_progress(progress, ++i);
 +              display_progress(ctx->progress, ++ctx->progress_cnt);
                hashwrite_be32(f, cur_pos);
                list++;
        }
 -      stop_progress(&progress);
+       return 0;
+ }
+ static void trace2_bloom_filter_settings(struct write_commit_graph_context *ctx)
+ {
+       struct json_writer jw = JSON_WRITER_INIT;
+       jw_object_begin(&jw, 0);
+       jw_object_intmax(&jw, "hash_version", ctx->bloom_settings->hash_version);
+       jw_object_intmax(&jw, "num_hashes", ctx->bloom_settings->num_hashes);
+       jw_object_intmax(&jw, "bits_per_entry", ctx->bloom_settings->bits_per_entry);
+       jw_end(&jw);
+       trace2_data_json("bloom", ctx->r, "settings", &jw);
+       jw_release(&jw);
  }
  
- static void write_graph_chunk_bloom_data(struct hashfile *f,
-                                        struct write_commit_graph_context *ctx,
-                                        const struct bloom_filter_settings *settings)
+ static int write_graph_chunk_bloom_data(struct hashfile *f,
+                                       struct write_commit_graph_context *ctx)
  {
        struct commit **list = ctx->commits.list;
        struct commit **last = ctx->commits.list + ctx->commits.nr;
 -      struct progress *progress = NULL;
 -      int i = 0;
  
-       hashwrite_be32(f, settings->hash_version);
-       hashwrite_be32(f, settings->num_hashes);
-       hashwrite_be32(f, settings->bits_per_entry);
+       trace2_bloom_filter_settings(ctx);
 -      if (ctx->report_progress)
 -              progress = start_delayed_progress(
 -                      _("Writing changed paths Bloom filters data"),
 -                      ctx->commits.nr);
 -
+       hashwrite_be32(f, ctx->bloom_settings->hash_version);
+       hashwrite_be32(f, ctx->bloom_settings->num_hashes);
+       hashwrite_be32(f, ctx->bloom_settings->bits_per_entry);
  
        while (list < last) {
                struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
 -              display_progress(progress, ++i);
+               size_t len = filter ? filter->len : 0;
-               hashwrite(f, filter->data, filter->len * sizeof(unsigned char));
 +              display_progress(ctx->progress, ++ctx->progress_cnt);
+               if (len)
+                       hashwrite(f, filter->data, len * sizeof(unsigned char));
                list++;
        }
 -      stop_progress(&progress);
+       return 0;
  }
  
  static int oid_compare(const void *_a, const void *_b)
@@@ -2045,10 -2018,25 +2103,24 @@@ int write_commit_graph(struct object_di
        ctx->append = flags & COMMIT_GRAPH_WRITE_APPEND ? 1 : 0;
        ctx->report_progress = flags & COMMIT_GRAPH_WRITE_PROGRESS ? 1 : 0;
        ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0;
 -      ctx->check_oids = flags & COMMIT_GRAPH_WRITE_CHECK_OIDS ? 1 : 0;
        ctx->split_opts = split_opts;
-       ctx->changed_paths = flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS ? 1 : 0;
        ctx->total_bloom_filter_data_size = 0;
  
+       if (flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS)
+               ctx->changed_paths = 1;
+       if (!(flags & COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS)) {
+               struct commit_graph *g;
+               prepare_commit_graph_one(ctx->r, ctx->odb);
+               g = ctx->r->objects->commit_graph;
+               /* We have changed-paths already. Keep them in the next graph */
+               if (g && g->chunk_bloom_data) {
+                       ctx->changed_paths = 1;
+                       ctx->bloom_settings = g->bloom_filter_settings;
+               }
+       }
        if (ctx->split) {
                struct commit_graph *g;
                prepare_commit_graph(ctx->r);
diff --cc commit-graph.h
index cf8d3c9647be341c5ab33f177cc5bcade878ffdd,45b1e5bca39091243b5defefd0fa90b8e7e61b85..09a97030dcd8a2ea570da097d6f27a5d0cbaf934
@@@ -3,10 -3,9 +3,10 @@@
  
  #include "git-compat-util.h"
  #include "object-store.h"
 +#include "oidset.h"
  
  #define GIT_TEST_COMMIT_GRAPH "GIT_TEST_COMMIT_GRAPH"
- #define GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD "GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD"
+ #define GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE "GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE"
  #define GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS "GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS"
  
  /*
@@@ -91,13 -93,10 +91,14 @@@ enum commit_graph_write_flags 
        COMMIT_GRAPH_WRITE_APPEND     = (1 << 0),
        COMMIT_GRAPH_WRITE_PROGRESS   = (1 << 1),
        COMMIT_GRAPH_WRITE_SPLIT      = (1 << 2),
 -      /* Make sure that each OID in the input is a valid commit OID. */
 -      COMMIT_GRAPH_WRITE_CHECK_OIDS = (1 << 3),
 -      COMMIT_GRAPH_WRITE_BLOOM_FILTERS = (1 << 4),
 -      COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS = (1 << 5),
 +      COMMIT_GRAPH_WRITE_BLOOM_FILTERS = (1 << 3),
++      COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS = (1 << 4),
 +};
 +
 +enum commit_graph_split_flags {
 +      COMMIT_GRAPH_SPLIT_UNSPECIFIED      = 0,
 +      COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED = 1,
 +      COMMIT_GRAPH_SPLIT_REPLACE          = 2
  };
  
  struct split_commit_graph_opts {
diff --cc revision.c
Simple merge
diff --cc revision.h
Simple merge
index c855bcd3e7134cc23bb5b597b7d90608b1f4c504,4892364e74307509302b18d555bc287626e6ef0d..c21cc160f3bb9dac91ce222e4fda5216927c1859
@@@ -152,4 -156,39 +156,39 @@@ test_expect_success 'Use Bloom filters 
        test_bloom_filters_used_when_some_filters_are_missing "-- A/B"
  '
  
 -test_done
+ test_expect_success 'persist filter settings' '
+       test_when_finished rm -rf .git/objects/info/commit-graph* &&
+       rm -rf .git/objects/info/commit-graph* &&
+       GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \
+               GIT_TRACE2_EVENT_NESTING=5 \
+               GIT_TEST_BLOOM_SETTINGS_NUM_HASHES=9 \
+               GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY=15 \
+               git commit-graph write --reachable --changed-paths &&
+       grep "{\"hash_version\":1,\"num_hashes\":9,\"bits_per_entry\":15}" trace2.txt &&
+       GIT_TRACE2_EVENT="$(pwd)/trace2-auto.txt" \
+               GIT_TRACE2_EVENT_NESTING=5 \
+               git commit-graph write --reachable --changed-paths &&
+       grep "{\"hash_version\":1,\"num_hashes\":9,\"bits_per_entry\":15}" trace2-auto.txt
+ '
+ test_expect_success 'correctly report changes over limit' '
+       git init 513changes &&
+       (
+               cd 513changes &&
+               for i in $(test_seq 1 513)
+               do
+                       echo $i >file$i.txt || return 1
+               done &&
+               git add . &&
+               git commit -m "files" &&
+               git commit-graph write --reachable --changed-paths &&
+               for i in $(test_seq 1 513)
+               do
+                       git -c core.commitGraph=false log -- file$i.txt >expect &&
+                       git log -- file$i.txt >actual &&
+                       test_cmp expect actual || return 1
+               done
+       )
+ '
 +test_done
index 50d5f0849f031cb346271bc4fa8f01f18f8cfdf7,5ec01abdaa91f691ce454e4646890db729d2ae8d..2804b0dd4576778de0ea99358ad87bc6377321cd
@@@ -476,8 -436,7 +476,8 @@@ corrupt_graph_verify() 
                cp $objdir/info/commit-graph commit-graph-pre-write-test
        fi &&
        git status --short &&
-       GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD=true git commit-graph write &&
+       GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE=true git commit-graph write &&
 +      chmod u+w $objdir/info/commit-graph &&
        git commit-graph verify
  }