]> git.ipfire.org Git - thirdparty/git.git/commitdiff
Merge branch 'gs/commit-graph-path-filter'
authorJunio C Hamano <gitster@pobox.com>
Fri, 1 May 2020 20:39:53 +0000 (13:39 -0700)
committerJunio C Hamano <gitster@pobox.com>
Fri, 1 May 2020 20:39:53 +0000 (13:39 -0700)
Introduce an extension to the commit-graph to make it efficient to
check for the paths that were modified at each commit using Bloom
filters.

* gs/commit-graph-path-filter:
  bloom: ignore renames when computing changed paths
  commit-graph: add GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS test flag
  t4216: add end to end tests for git log with Bloom filters
  revision.c: add trace2 stats around Bloom filter usage
  revision.c: use Bloom filters to speed up path based revision walks
  commit-graph: add --changed-paths option to write subcommand
  commit-graph: reuse existing Bloom filters during write
  commit-graph: write Bloom filters to commit graph file
  commit-graph: examine commits by generation number
  commit-graph: examine changed-path objects in pack order
  commit-graph: compute Bloom filters for changed paths
  diff: halt tree-diff early after max_changes
  bloom.c: core Bloom filter implementation for changed paths.
  bloom.c: introduce core Bloom filter constructs
  bloom.c: add the murmur3 hash implementation
  commit-graph: define and use MAX_NUM_CHUNKS

13 files changed:
1  2 
Documentation/git-commit-graph.txt
Makefile
builtin/commit-graph.c
commit-graph.c
commit-graph.h
revision.c
revision.h
t/README
t/helper/test-read-graph.c
t/helper/test-tool.c
t/helper/test-tool.h
t/t5318-commit-graph.sh
t/t5324-split-commit-graph.sh

index 46f7f7c573d50a118f987e9609525849f8ce741a,f4b13c005b8be7e704ef1fac6164df7f2c17b1ac..53a650225a8b4d523cb0278b40d274990805e234
@@@ -57,18 -57,16 +57,23 @@@ or `--stdin-packs`.
  With the `--append` option, include all commits that are present in the
  existing commit-graph file.
  +
 -With the `--split` option, write the commit-graph as a chain of multiple
 -commit-graph files stored in `<dir>/info/commit-graphs`. The new commits
 -not already in the commit-graph are added in a new "tip" file. This file
 -is merged with the existing file if the following merge conditions are
 -met:
+ With the `--changed-paths` option, compute and write information about the
+ paths changed between a commit and it's first parent. This operation can
+ take a while on large repositories. It provides significant performance gains
+ for getting history of a directory or a file with `git log -- <path>`.
+ +
 +With the `--split[=<strategy>]` option, write the commit-graph as a
 +chain of multiple commit-graph files stored in
 +`<dir>/info/commit-graphs`. Commit-graph layers are merged based on the
 +strategy and other splitting options. The new commits not already in the
 +commit-graph are added in a new "tip" file. This file is merged with the
 +existing file if the following merge conditions are met:
 +* If `--split=no-merge` is specified, a merge is never performed, and
 +the remaining options are ignored. `--split=replace` overwrites the
 +existing chain with a new one. A bare `--split` defers to the remaining
 +options. (Note that merging a chain of commit graphs replaces the
 +existing chain with a length-1 chain where the first and only
 +incremental holds the entire graph).
  +
  * If `--size-multiple=<X>` is not specified, let `X` equal 2. If the new
  tip file would have `N` commits and the previous tip has `M` commits and
diff --cc Makefile
Simple merge
index 9ceedbba0749285d2b933a3ce56ed457779574ef,59009837dc9f792a825bc5217c66e93881a85b46..15fe60317c7846df93d9975dc991ba3a67bd0758
@@@ -9,9 -9,7 +9,9 @@@
  
  static char const * const builtin_commit_graph_usage[] = {
        N_("git commit-graph verify [--object-dir <objdir>] [--shallow] [--[no-]progress]"),
 -      N_("git commit-graph write [--object-dir <objdir>] [--append|--split] [--reachable|--stdin-packs|--stdin-commits] [--changed-paths] [--[no-]progress] <split options>"),
 +      N_("git commit-graph write [--object-dir <objdir>] [--append] "
 +         "[--split[=<strategy>]] [--reachable|--stdin-packs|--stdin-commits] "
-          "[--[no-]progress] <split options>"),
++         "[--changed-paths] [--[no-]progress] <split options>"),
        NULL
  };
  
@@@ -21,9 -19,7 +21,9 @@@ static const char * const builtin_commi
  };
  
  static const char * const builtin_commit_graph_write_usage[] = {
 -      N_("git commit-graph write [--object-dir <objdir>] [--append|--split] [--reachable|--stdin-packs|--stdin-commits] [--changed-paths] [--[no-]progress] <split options>"),
 +      N_("git commit-graph write [--object-dir <objdir>] [--append] "
 +         "[--split[=<strategy>]] [--reachable|--stdin-packs|--stdin-commits] "
-          "[--[no-]progress] <split options>"),
++         "[--changed-paths] [--[no-]progress] <split options>"),
        NULL
  };
  
@@@ -158,11 -136,11 +159,13 @@@ static int graph_write(int argc, const 
                        N_("start walk at commits listed by stdin")),
                OPT_BOOL(0, "append", &opts.append,
                        N_("include all commits already in the commit-graph file")),
+               OPT_BOOL(0, "changed-paths", &opts.enable_changed_paths,
+                       N_("enable computation for changed paths")),
                OPT_BOOL(0, "progress", &opts.progress, N_("force progress reporting")),
 -              OPT_BOOL(0, "split", &opts.split,
 -                      N_("allow writing an incremental commit-graph file")),
 +              OPT_CALLBACK_F(0, "split", &split_opts.flags, NULL,
 +                      N_("allow writing an incremental commit-graph file"),
 +                      PARSE_OPT_OPTARG | PARSE_OPT_NONEG,
 +                      write_option_parse_split),
                OPT_INTEGER(0, "max-commits", &split_opts.max_commits,
                        N_("maximum number of commits in a non-base split commit-graph")),
                OPT_INTEGER(0, "size-multiple", &split_opts.size_multiple,
diff --cc commit-graph.c
index e9b3c5d561292bde799fe273f07d148f3e178893,77668629e27dbbae783cfd2364fee78d41bf446d..7eb4f22f008f9d8e87abaaf5cb85a705a2e6b7c6
@@@ -1132,13 -1273,45 +1272,45 @@@ static void compute_generation_numbers(
        stop_progress(&ctx->progress);
  }
  
 -static int add_ref_to_list(const char *refname,
 -                         const struct object_id *oid,
 -                         int flags, void *cb_data)
+ static void compute_bloom_filters(struct write_commit_graph_context *ctx)
+ {
+       int i;
+       struct progress *progress = NULL;
+       struct commit **sorted_commits;
+       init_bloom_filters();
+       if (ctx->report_progress)
+               progress = start_delayed_progress(
+                       _("Computing commit changed paths Bloom filters"),
+                       ctx->commits.nr);
+       ALLOC_ARRAY(sorted_commits, ctx->commits.nr);
+       COPY_ARRAY(sorted_commits, ctx->commits.list, ctx->commits.nr);
+       if (ctx->order_by_pack)
+               QSORT(sorted_commits, ctx->commits.nr, commit_pos_cmp);
+       else
+               QSORT(sorted_commits, ctx->commits.nr, commit_gen_cmp);
+       for (i = 0; i < ctx->commits.nr; i++) {
+               struct commit *c = sorted_commits[i];
+               struct bloom_filter *filter = get_bloom_filter(ctx->r, c, 1);
+               ctx->total_bloom_filter_data_size += sizeof(unsigned char) * filter->len;
+               display_progress(progress, i + 1);
+       }
+       free(sorted_commits);
+       stop_progress(&progress);
+ }
 +static int add_ref_to_set(const char *refname,
 +                        const struct object_id *oid,
 +                        int flags, void *cb_data)
  {
 -      struct string_list *list = (struct string_list *)cb_data;
 +      struct oidset *commits = (struct oidset *)cb_data;
  
 -      string_list_append(list, oid_to_hex(oid));
 +      oidset_insert(commits, oid);
        return 0;
  }
  
@@@ -1865,8 -2028,10 +2060,10 @@@ int write_commit_graph(struct object_di
                        goto cleanup;
        }
  
-       if (!pack_indexes && !commits)
 -      if (!pack_indexes && !commit_hex) {
++      if (!pack_indexes && !commits) {
+               ctx->order_by_pack = 1;
                fill_oids_from_all_packs(ctx);
+       }
  
        close_reachable(ctx);
  
@@@ -2121,11 -2288,13 +2321,12 @@@ void free_commit_graph(struct commit_gr
  {
        if (!g)
                return;
 -      if (g->graph_fd >= 0) {
 +      if (g->data) {
                munmap((void *)g->data, g->data_len);
                g->data = NULL;
 -              close(g->graph_fd);
        }
        free(g->filename);
+       free(g->bloom_filter_settings);
        free(g);
  }
  
diff --cc commit-graph.h
index 1254eae94870207f18df5289ca243d18a0fc7649,8655d064c14496a9bfa210fa55b9ca5ae59ae3d4..183a15ed620bf24e7a5725c9d666107bc6e8aafb
@@@ -77,15 -85,10 +83,16 @@@ enum commit_graph_write_flags 
        COMMIT_GRAPH_WRITE_PROGRESS   = (1 << 1),
        COMMIT_GRAPH_WRITE_SPLIT      = (1 << 2),
        /* Make sure that each OID in the input is a valid commit OID. */
-       COMMIT_GRAPH_WRITE_CHECK_OIDS = (1 << 3)
+       COMMIT_GRAPH_WRITE_CHECK_OIDS = (1 << 3),
+       COMMIT_GRAPH_WRITE_BLOOM_FILTERS = (1 << 4),
  };
  
 +enum commit_graph_split_flags {
 +      COMMIT_GRAPH_SPLIT_UNSPECIFIED      = 0,
 +      COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED = 1,
 +      COMMIT_GRAPH_SPLIT_REPLACE          = 2
 +};
 +
  struct split_commit_graph_opts {
        int size_multiple;
        int max_commits;
diff --cc revision.c
Simple merge
diff --cc revision.h
Simple merge
diff --cc t/README
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge