]> git.ipfire.org Git - thirdparty/git.git/commitdiff
Merge branch 'ps/do-not-trust-commit-graph-blindly-for-existence'
authorJunio C Hamano <gitster@pobox.com>
Wed, 8 Nov 2023 02:03:59 +0000 (11:03 +0900)
committerJunio C Hamano <gitster@pobox.com>
Wed, 8 Nov 2023 02:03:59 +0000 (11:03 +0900)
The codepath to traverse the commit-graph learned to notice that a
commit is missing (e.g., corrupt repository lost an object), even
though it knows something about the commit (like its parents) from
what is in commit-graph.

* ps/do-not-trust-commit-graph-blindly-for-existence:
  commit: detect commits that exist in commit-graph but not in the ODB
  commit-graph: introduce envvar to disable commit existence checks

1  2 
Documentation/git.txt
commit-graph.c
commit-graph.h
t/t5318-commit-graph.sh

diff --combined Documentation/git.txt
index 9aeabde26200e1585d8d5283b6cdbc85ef8a37dc,3bac24cf8a2f8e9a4e0459105a0d662e3e8f73fa..2535a30194f978af900c0f84228680b3b8edf93a
@@@ -96,9 -96,9 +96,9 @@@ foo.bar= ...`) sets `foo.bar` to the em
        to avoid ambiguity with `<name>` containing one.
  +
  This is useful for cases where you want to pass transitory
 -configuration options to git, but are doing so on OS's where
 -other processes might be able to read your cmdline
 -(e.g. `/proc/self/cmdline`), but not your environ
 +configuration options to git, but are doing so on operating systems
 +where other processes might be able to read your command line
 +(e.g. `/proc/self/cmdline`), but not your environment
  (e.g. `/proc/self/environ`). That behavior is the default on
  Linux, but may not be on your system.
  +
@@@ -911,6 -911,16 +911,16 @@@ for full details
        should not normally need to set this to `0`, but it may be
        useful when trying to salvage data from a corrupted repository.
  
+ `GIT_COMMIT_GRAPH_PARANOIA`::
+       When loading a commit object from the commit-graph, Git performs an
+       existence check on the object in the object database. This is done to
+       avoid issues with stale commit-graphs that contain references to
+       already-deleted commits, but comes with a performance penalty.
+ +
+ The default is "true", which enables the aforementioned behavior.
+ Setting this to "false" disables the existence check. This can lead to
+ a performance improvement at the cost of consistency.
  `GIT_ALLOW_PROTOCOL`::
        If set to a colon-separated list of protocols, behave as if
        `protocol.allow` is set to `never`, and each of the listed
diff --combined commit-graph.c
index c2b782af3b649fbf3deea7d90c959e10f951a003,b37fdcb214b5df331b82c185401a3f8928b25371..ee66098e077d89f293b9527c7689865904f4a12f
@@@ -128,16 -128,6 +128,16 @@@ timestamp_t commit_graph_generation(con
        return GENERATION_NUMBER_INFINITY;
  }
  
 +static timestamp_t commit_graph_generation_from_graph(const struct commit *c)
 +{
 +      struct commit_graph_data *data =
 +              commit_graph_data_slab_peek(&commit_graph_data_slab, c);
 +
 +      if (!data || data->graph_pos == COMMIT_NOT_FROM_GRAPH)
 +              return GENERATION_NUMBER_INFINITY;
 +      return data->generation;
 +}
 +
  static struct commit_graph_data *commit_graph_data_at(const struct commit *c)
  {
        unsigned int i, nth_slab;
@@@ -277,8 -267,6 +277,8 @@@ struct commit_graph *load_commit_graph_
  
  static int verify_commit_graph_lite(struct commit_graph *g)
  {
 +      int i;
 +
        /*
         * Basic validation shared between parse_commit_graph()
         * which'll be called every time the graph is used, and the
                return 1;
        }
  
 +      for (i = 0; i < 255; i++) {
 +              uint32_t oid_fanout1 = ntohl(g->chunk_oid_fanout[i]);
 +              uint32_t oid_fanout2 = ntohl(g->chunk_oid_fanout[i + 1]);
 +
 +              if (oid_fanout1 > oid_fanout2) {
 +                      error("commit-graph fanout values out of order");
 +                      return 1;
 +              }
 +      }
 +      if (ntohl(g->chunk_oid_fanout[255]) != g->num_commits) {
 +              error("commit-graph oid table and fanout disagree on size");
 +              return 1;
 +      }
 +
 +      return 0;
 +}
 +
 +static int graph_read_oid_fanout(const unsigned char *chunk_start,
 +                               size_t chunk_size, void *data)
 +{
 +      struct commit_graph *g = data;
 +      if (chunk_size != 256 * sizeof(uint32_t))
 +              return error("commit-graph oid fanout chunk is wrong size");
 +      g->chunk_oid_fanout = (const uint32_t *)chunk_start;
        return 0;
  }
  
@@@ -340,54 -304,12 +340,54 @@@ static int graph_read_oid_lookup(const 
        return 0;
  }
  
 +static int graph_read_commit_data(const unsigned char *chunk_start,
 +                                size_t chunk_size, void *data)
 +{
 +      struct commit_graph *g = data;
 +      if (chunk_size != g->num_commits * GRAPH_DATA_WIDTH)
 +              return error("commit-graph commit data chunk is wrong size");
 +      g->chunk_commit_data = chunk_start;
 +      return 0;
 +}
 +
 +static int graph_read_generation_data(const unsigned char *chunk_start,
 +                                    size_t chunk_size, void *data)
 +{
 +      struct commit_graph *g = data;
 +      if (chunk_size != g->num_commits * sizeof(uint32_t))
 +              return error("commit-graph generations chunk is wrong size");
 +      g->chunk_generation_data = chunk_start;
 +      return 0;
 +}
 +
 +static int graph_read_bloom_index(const unsigned char *chunk_start,
 +                                size_t chunk_size, void *data)
 +{
 +      struct commit_graph *g = data;
 +      if (chunk_size != g->num_commits * 4) {
 +              warning("commit-graph changed-path index chunk is too small");
 +              return -1;
 +      }
 +      g->chunk_bloom_indexes = chunk_start;
 +      return 0;
 +}
 +
  static int graph_read_bloom_data(const unsigned char *chunk_start,
                                  size_t chunk_size, void *data)
  {
        struct commit_graph *g = data;
        uint32_t hash_version;
 +
 +      if (chunk_size < BLOOMDATA_CHUNK_HEADER_SIZE) {
 +              warning("ignoring too-small changed-path chunk"
 +                      " (%"PRIuMAX" < %"PRIuMAX") in commit-graph file",
 +                      (uintmax_t)chunk_size,
 +                      (uintmax_t)BLOOMDATA_CHUNK_HEADER_SIZE);
 +              return -1;
 +      }
 +
        g->chunk_bloom_data = chunk_start;
 +      g->chunk_bloom_data_size = chunk_size;
        hash_version = get_be32(chunk_start);
  
        if (hash_version != 1)
@@@ -459,31 -381,29 +459,31 @@@ struct commit_graph *parse_commit_graph
        cf = init_chunkfile(NULL);
  
        if (read_table_of_contents(cf, graph->data, graph_size,
 -                                 GRAPH_HEADER_SIZE, graph->num_chunks))
 +                                 GRAPH_HEADER_SIZE, graph->num_chunks, 1))
                goto free_and_return;
  
 -      pair_chunk(cf, GRAPH_CHUNKID_OIDFANOUT,
 -                 (const unsigned char **)&graph->chunk_oid_fanout);
 +      read_chunk(cf, GRAPH_CHUNKID_OIDFANOUT, graph_read_oid_fanout, graph);
        read_chunk(cf, GRAPH_CHUNKID_OIDLOOKUP, graph_read_oid_lookup, graph);
 -      pair_chunk(cf, GRAPH_CHUNKID_DATA, &graph->chunk_commit_data);
 -      pair_chunk(cf, GRAPH_CHUNKID_EXTRAEDGES, &graph->chunk_extra_edges);
 -      pair_chunk(cf, GRAPH_CHUNKID_BASE, &graph->chunk_base_graphs);
 +      read_chunk(cf, GRAPH_CHUNKID_DATA, graph_read_commit_data, graph);
 +      pair_chunk(cf, GRAPH_CHUNKID_EXTRAEDGES, &graph->chunk_extra_edges,
 +                 &graph->chunk_extra_edges_size);
 +      pair_chunk(cf, GRAPH_CHUNKID_BASE, &graph->chunk_base_graphs,
 +                 &graph->chunk_base_graphs_size);
  
        if (s->commit_graph_generation_version >= 2) {
 -              pair_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA,
 -                      &graph->chunk_generation_data);
 +              read_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA,
 +                         graph_read_generation_data, graph);
                pair_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW,
 -                      &graph->chunk_generation_data_overflow);
 +                         &graph->chunk_generation_data_overflow,
 +                         &graph->chunk_generation_data_overflow_size);
  
                if (graph->chunk_generation_data)
                        graph->read_generation_data = 1;
        }
  
        if (s->commit_graph_read_changed_paths) {
 -              pair_chunk(cf, GRAPH_CHUNKID_BLOOMINDEXES,
 -                         &graph->chunk_bloom_indexes);
 +              read_chunk(cf, GRAPH_CHUNKID_BLOOMINDEXES,
 +                         graph_read_bloom_index, graph);
                read_chunk(cf, GRAPH_CHUNKID_BLOOMDATA,
                           graph_read_bloom_data, graph);
        }
@@@ -543,31 -463,6 +543,31 @@@ static struct commit_graph *load_commit
        return g;
  }
  
 +/*
 + * returns 1 if and only if all graphs in the chain have
 + * corrected commit dates stored in the generation_data chunk.
 + */
 +static int validate_mixed_generation_chain(struct commit_graph *g)
 +{
 +      int read_generation_data = 1;
 +      struct commit_graph *p = g;
 +
 +      while (read_generation_data && p) {
 +              read_generation_data = p->read_generation_data;
 +              p = p->base_graph;
 +      }
 +
 +      if (read_generation_data)
 +              return 1;
 +
 +      while (g) {
 +              g->read_generation_data = 0;
 +              g = g->base_graph;
 +      }
 +
 +      return 0;
 +}
 +
  static int add_graph_to_chain(struct commit_graph *g,
                              struct commit_graph *chain,
                              struct object_id *oids,
                return 0;
        }
  
 +      if (g->chunk_base_graphs_size / g->hash_len < n) {
 +              warning(_("commit-graph base graphs chunk is too small"));
 +              return 0;
 +      }
 +
        while (n) {
                n--;
  
                cur_g = cur_g->base_graph;
        }
  
 -      g->base_graph = chain;
 -
        if (chain) {
                if (unsigned_add_overflows(chain->num_commits,
                                           chain->num_commits_in_base)) {
                g->num_commits_in_base = chain->num_commits + chain->num_commits_in_base;
        }
  
 +      g->base_graph = chain;
 +
        return 1;
  }
  
 -static struct commit_graph *load_commit_graph_chain(struct repository *r,
 -                                                  struct object_directory *odb)
 +int open_commit_graph_chain(const char *chain_file,
 +                          int *fd, struct stat *st)
 +{
 +      *fd = git_open(chain_file);
 +      if (*fd < 0)
 +              return 0;
 +      if (fstat(*fd, st)) {
 +              close(*fd);
 +              return 0;
 +      }
 +      if (st->st_size < the_hash_algo->hexsz) {
 +              close(*fd);
 +              if (!st->st_size) {
 +                      /* treat empty files the same as missing */
 +                      errno = ENOENT;
 +              } else {
 +                      warning("commit-graph chain file too small");
 +                      errno = EINVAL;
 +              }
 +              return 0;
 +      }
 +      return 1;
 +}
 +
 +struct commit_graph *load_commit_graph_chain_fd_st(struct repository *r,
 +                                                 int fd, struct stat *st,
 +                                                 int *incomplete_chain)
  {
        struct commit_graph *graph_chain = NULL;
        struct strbuf line = STRBUF_INIT;
 -      struct stat st;
        struct object_id *oids;
        int i = 0, valid = 1, count;
 -      char *chain_name = get_commit_graph_chain_filename(odb);
 -      FILE *fp;
 -      int stat_res;
 -
 -      fp = fopen(chain_name, "r");
 -      stat_res = stat(chain_name, &st);
 -      free(chain_name);
 +      FILE *fp = xfdopen(fd, "r");
  
 -      if (!fp)
 -              return NULL;
 -      if (stat_res ||
 -          st.st_size <= the_hash_algo->hexsz) {
 -              fclose(fp);
 -              return NULL;
 -      }
 -
 -      count = st.st_size / (the_hash_algo->hexsz + 1);
 +      count = st->st_size / (the_hash_algo->hexsz + 1);
        CALLOC_ARRAY(oids, count);
  
        prepare_alt_odb(r);
                                if (add_graph_to_chain(g, graph_chain, oids, i)) {
                                        graph_chain = g;
                                        valid = 1;
 +                              } else {
 +                                      free_commit_graph(g);
                                }
  
                                break;
                }
        }
  
 +      validate_mixed_generation_chain(graph_chain);
 +
        free(oids);
        fclose(fp);
        strbuf_release(&line);
  
 +      *incomplete_chain = !valid;
        return graph_chain;
  }
  
 -/*
 - * returns 1 if and only if all graphs in the chain have
 - * corrected commit dates stored in the generation_data chunk.
 - */
 -static int validate_mixed_generation_chain(struct commit_graph *g)
 +static struct commit_graph *load_commit_graph_chain(struct repository *r,
 +                                                  struct object_directory *odb)
  {
 -      int read_generation_data = 1;
 -      struct commit_graph *p = g;
 -
 -      while (read_generation_data && p) {
 -              read_generation_data = p->read_generation_data;
 -              p = p->base_graph;
 -      }
 -
 -      if (read_generation_data)
 -              return 1;
 +      char *chain_file = get_commit_graph_chain_filename(odb);
 +      struct stat st;
 +      int fd;
 +      struct commit_graph *g = NULL;
  
 -      while (g) {
 -              g->read_generation_data = 0;
 -              g = g->base_graph;
 +      if (open_commit_graph_chain(chain_file, &fd, &st)) {
 +              int incomplete;
 +              /* ownership of fd is taken over by load function */
 +              g = load_commit_graph_chain_fd_st(r, fd, &st, &incomplete);
        }
  
 -      return 0;
 +      free(chain_file);
 +      return g;
  }
  
  struct commit_graph *read_commit_graph_one(struct repository *r,
        if (!g)
                g = load_commit_graph_chain(r, odb);
  
 -      validate_mixed_generation_chain(g);
 -
        return g;
  }
  
@@@ -829,10 -713,19 +829,10 @@@ struct bloom_filter_settings *get_bloom
        return NULL;
  }
  
 -static void close_commit_graph_one(struct commit_graph *g)
 -{
 -      if (!g)
 -              return;
 -
 -      clear_commit_graph_data_slab(&commit_graph_data_slab);
 -      close_commit_graph_one(g->base_graph);
 -      free_commit_graph(g);
 -}
 -
  void close_commit_graph(struct raw_object_store *o)
  {
 -      close_commit_graph_one(o->commit_graph);
 +      clear_commit_graph_data_slab(&commit_graph_data_slab);
 +      free_commit_graph(o->commit_graph);
        o->commit_graph = NULL;
  }
  
@@@ -912,10 -805,7 +912,10 @@@ static void fill_commit_graph_info(stru
                                die(_("commit-graph requires overflow generation data but has none"));
  
                        offset_pos = offset ^ CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW;
 -                      graph_data->generation = item->date + get_be64(g->chunk_generation_data_overflow + st_mult(8, offset_pos));
 +                      if (g->chunk_generation_data_overflow_size / sizeof(uint64_t) <= offset_pos)
 +                              die(_("commit-graph overflow generation data is too small"));
 +                      graph_data->generation = item->date +
 +                              get_be64(g->chunk_generation_data_overflow + sizeof(uint64_t) * offset_pos);
                } else
                        graph_data->generation = item->date + offset;
        } else
@@@ -935,7 -825,7 +935,7 @@@ static int fill_commit_in_graph(struct 
                                struct commit_graph *g, uint32_t pos)
  {
        uint32_t edge_value;
 -      uint32_t *parent_data_ptr;
 +      uint32_t parent_data_pos;
        struct commit_list **pptr;
        const unsigned char *commit_data;
        uint32_t lex_index;
                return 1;
        }
  
 -      parent_data_ptr = (uint32_t*)(g->chunk_extra_edges +
 -                        st_mult(4, edge_value & GRAPH_EDGE_LAST_MASK));
 +      parent_data_pos = edge_value & GRAPH_EDGE_LAST_MASK;
        do {
 -              edge_value = get_be32(parent_data_ptr);
 +              if (g->chunk_extra_edges_size / sizeof(uint32_t) <= parent_data_pos) {
 +                      error("commit-graph extra-edges pointer out of bounds");
 +                      free_commit_list(item->parents);
 +                      item->parents = NULL;
 +                      item->object.parsed = 0;
 +                      return 0;
 +              }
 +              edge_value = get_be32(g->chunk_extra_edges +
 +                                    sizeof(uint32_t) * parent_data_pos);
                pptr = insert_parent_or_die(r, g,
                                            edge_value & GRAPH_EDGE_LAST_MASK,
                                            pptr);
 -              parent_data_ptr++;
 +              parent_data_pos++;
        } while (!(edge_value & GRAPH_LAST_EDGE));
  
        return 1;
@@@ -1024,14 -907,18 +1024,18 @@@ int repo_find_commit_pos_in_graph(struc
  
  struct commit *lookup_commit_in_graph(struct repository *repo, const struct object_id *id)
  {
+       static int commit_graph_paranoia = -1;
        struct commit *commit;
        uint32_t pos;
  
+       if (commit_graph_paranoia == -1)
+               commit_graph_paranoia = git_env_bool(GIT_COMMIT_GRAPH_PARANOIA, 1);
        if (!prepare_commit_graph(repo))
                return NULL;
        if (!search_commit_pos_in_graph(id, repo->objects->commit_graph, &pos))
                return NULL;
-       if (!has_object(repo, id, 0))
+       if (commit_graph_paranoia && !has_object(repo, id, 0))
                return NULL;
  
        commit = lookup_commit(repo, id);
@@@ -1685,14 -1572,12 +1689,14 @@@ static void compute_topological_levels(
        stop_progress(&ctx->progress);
  }
  
 -static timestamp_t get_generation_from_graph_data(struct commit *c, void *data)
 +static timestamp_t get_generation_from_graph_data(struct commit *c,
 +                                                void *data UNUSED)
  {
        return commit_graph_data_at(c)->generation;
  }
  
 -static void set_generation_v2(struct commit *c, timestamp_t t, void *data)
 +static void set_generation_v2(struct commit *c, timestamp_t t,
 +                            void *data UNUSED)
  {
        struct commit_graph_data *g = commit_graph_data_at(c);
        g->generation = t;
@@@ -1706,6 -1591,7 +1710,6 @@@ static void compute_generation_numbers(
                .commits = &ctx->commits,
                .get_generation = get_generation_from_graph_data,
                .set_generation = set_generation_v2,
 -              .data = ctx,
        };
  
        if (ctx->report_progress)
  }
  
  static void set_generation_in_graph_data(struct commit *c, timestamp_t t,
 -                                       void *data)
 +                                       void *data UNUSED)
  {
        commit_graph_data_at(c)->generation = t;
  }
@@@ -2179,11 -2065,9 +2183,11 @@@ static int write_commit_graph_file(stru
                        free(graph_name);
                }
  
 +              free(ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 1]);
                ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 1] = xstrdup(hash_to_hex(file_hash));
                final_graph_name = get_split_graph_filename(ctx->odb,
                                        ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 1]);
 +              free(ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 1]);
                ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 1] = final_graph_name;
  
                result = rename(ctx->graph_name, final_graph_name);
@@@ -2632,7 -2516,6 +2636,7 @@@ int write_commit_graph(struct object_di
  
  cleanup:
        free(ctx->graph_name);
 +      free(ctx->base_graph_name);
        free(ctx->commits.list);
        oid_array_clear(&ctx->oids);
        clear_topo_level_slab(&topo_levels);
@@@ -2671,6 -2554,9 +2675,6 @@@ static void graph_report(const char *fm
        va_end(ap);
  }
  
 -#define GENERATION_ZERO_EXISTS 1
 -#define GENERATION_NUMBER_EXISTS 2
 -
  static int commit_graph_checksum_valid(struct commit_graph *g)
  {
        return hashfile_checksum_valid(g->data, g->data_len);
@@@ -2683,8 -2569,7 +2687,8 @@@ static int verify_one_commit_graph(stru
  {
        uint32_t i, cur_fanout_pos = 0;
        struct object_id prev_oid, cur_oid;
 -      int generation_zero = 0;
 +      struct commit *seen_gen_zero = NULL;
 +      struct commit *seen_gen_non_zero = NULL;
  
        verify_commit_graph_error = verify_commit_graph_lite(g);
        if (verify_commit_graph_error)
                                             oid_to_hex(&graph_parents->item->object.oid),
                                             oid_to_hex(&odb_parents->item->object.oid));
  
 -                      generation = commit_graph_generation(graph_parents->item);
 +                      generation = commit_graph_generation_from_graph(graph_parents->item);
                        if (generation > max_generation)
                                max_generation = generation;
  
                        graph_report(_("commit-graph parent list for commit %s terminates early"),
                                     oid_to_hex(&cur_oid));
  
 -              if (!commit_graph_generation(graph_commit)) {
 -                      if (generation_zero == GENERATION_NUMBER_EXISTS)
 -                              graph_report(_("commit-graph has generation number zero for commit %s, but non-zero elsewhere"),
 -                                           oid_to_hex(&cur_oid));
 -                      generation_zero = GENERATION_ZERO_EXISTS;
 -              } else if (generation_zero == GENERATION_ZERO_EXISTS)
 -                      graph_report(_("commit-graph has non-zero generation number for commit %s, but zero elsewhere"),
 -                                   oid_to_hex(&cur_oid));
 +              if (commit_graph_generation_from_graph(graph_commit))
 +                      seen_gen_non_zero = graph_commit;
 +              else
 +                      seen_gen_zero = graph_commit;
  
 -              if (generation_zero == GENERATION_ZERO_EXISTS)
 +              if (seen_gen_zero)
                        continue;
  
                /*
                                     odb_commit->date);
        }
  
 +      if (seen_gen_zero && seen_gen_non_zero)
 +              graph_report(_("commit-graph has both zero and non-zero "
 +                             "generations (e.g., commits '%s' and '%s')"),
 +                           oid_to_hex(&seen_gen_zero->object.oid),
 +                           oid_to_hex(&seen_gen_non_zero->object.oid));
 +
        return verify_commit_graph_error;
  }
  
@@@ -2863,17 -2746,15 +2867,17 @@@ int verify_commit_graph(struct reposito
  
  void free_commit_graph(struct commit_graph *g)
  {
 -      if (!g)
 -              return;
 -      if (g->data) {
 -              munmap((void *)g->data, g->data_len);
 -              g->data = NULL;
 +      while (g) {
 +              struct commit_graph *next = g->base_graph;
 +
 +              if (g->data)
 +                      munmap((void *)g->data, g->data_len);
 +              free(g->filename);
 +              free(g->bloom_filter_settings);
 +              free(g);
 +
 +              g = next;
        }
 -      free(g->filename);
 -      free(g->bloom_filter_settings);
 -      free(g);
  }
  
  void disable_commit_graph(struct repository *r)
diff --combined commit-graph.h
index c6870274c5ab7e8821e65a1eb35b514c889ffa95,3c86e8b05f49785f4865d148e241d09fa43b130d..e519cb81cb649dd016fdb5f26dbfd3623ead13e9
@@@ -8,6 -8,12 +8,12 @@@
  #define GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE "GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE"
  #define GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS "GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS"
  
+ /*
+  * This environment variable controls whether commits looked up via the
+  * commit graph will be double checked to exist in the object database.
+  */
+ #define GIT_COMMIT_GRAPH_PARANOIA "GIT_COMMIT_GRAPH_PARANOIA"
  /*
   * This method is only used to enhance coverage of the commit-graph
   * feature in the test suite with the GIT_TEST_COMMIT_GRAPH and
@@@ -26,7 -32,6 +32,7 @@@ struct string_list
  char *get_commit_graph_filename(struct object_directory *odb);
  char *get_commit_graph_chain_filename(struct object_directory *odb);
  int open_commit_graph(const char *graph_file, int *fd, struct stat *st);
 +int open_commit_graph_chain(const char *chain_file, int *fd, struct stat *st);
  
  /*
   * Given a commit struct, try to fill the commit struct info, including:
@@@ -94,14 -99,10 +100,14 @@@ struct commit_graph 
        const unsigned char *chunk_commit_data;
        const unsigned char *chunk_generation_data;
        const unsigned char *chunk_generation_data_overflow;
 +      size_t chunk_generation_data_overflow_size;
        const unsigned char *chunk_extra_edges;
 +      size_t chunk_extra_edges_size;
        const unsigned char *chunk_base_graphs;
 +      size_t chunk_base_graphs_size;
        const unsigned char *chunk_bloom_indexes;
        const unsigned char *chunk_bloom_data;
 +      size_t chunk_bloom_data_size;
  
        struct topo_level_slab *topo_levels;
        struct bloom_filter_settings *bloom_filter_settings;
  struct commit_graph *load_commit_graph_one_fd_st(struct repository *r,
                                                 int fd, struct stat *st,
                                                 struct object_directory *odb);
 +struct commit_graph *load_commit_graph_chain_fd_st(struct repository *r,
 +                                                 int fd, struct stat *st,
 +                                                 int *incomplete_chain);
  struct commit_graph *read_commit_graph_one(struct repository *r,
                                           struct object_directory *odb);
  
diff --combined t/t5318-commit-graph.sh
index 6505ff595a389afa7d79fe31305d214046c376f2,2c62b91ef96ba620e74d446d2c1f054da28ee866..134239d40f05fb15ef14712ee426b2fb581c827d
@@@ -2,7 -2,6 +2,7 @@@
  
  test_description='commit graph'
  . ./test-lib.sh
 +. "$TEST_DIRECTORY"/lib-chunk.sh
  
  GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0
  
@@@ -451,15 -450,14 +451,15 @@@ GRAPH_BYTE_FANOUT2=$(($GRAPH_FANOUT_OFF
  GRAPH_OID_LOOKUP_OFFSET=$(($GRAPH_FANOUT_OFFSET + 4 * 256))
  GRAPH_BYTE_OID_LOOKUP_ORDER=$(($GRAPH_OID_LOOKUP_OFFSET + $HASH_LEN * 8))
  GRAPH_BYTE_OID_LOOKUP_MISSING=$(($GRAPH_OID_LOOKUP_OFFSET + $HASH_LEN * 4 + 10))
 +GRAPH_COMMIT_DATA_WIDTH=$(($HASH_LEN + 16))
  GRAPH_COMMIT_DATA_OFFSET=$(($GRAPH_OID_LOOKUP_OFFSET + $HASH_LEN * $NUM_COMMITS))
  GRAPH_BYTE_COMMIT_TREE=$GRAPH_COMMIT_DATA_OFFSET
  GRAPH_BYTE_COMMIT_PARENT=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN))
  GRAPH_BYTE_COMMIT_EXTRA_PARENT=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN + 4))
  GRAPH_BYTE_COMMIT_WRONG_PARENT=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN + 3))
  GRAPH_BYTE_COMMIT_GENERATION=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN + 11))
 +GRAPH_BYTE_COMMIT_GENERATION_LAST=$(($GRAPH_BYTE_COMMIT_GENERATION + $(($NUM_COMMITS - 1)) * $GRAPH_COMMIT_DATA_WIDTH))
  GRAPH_BYTE_COMMIT_DATE=$(($GRAPH_COMMIT_DATA_OFFSET + $HASH_LEN + 12))
 -GRAPH_COMMIT_DATA_WIDTH=$(($HASH_LEN + 16))
  GRAPH_OCTOPUS_DATA_OFFSET=$(($GRAPH_COMMIT_DATA_OFFSET + \
                             $GRAPH_COMMIT_DATA_WIDTH * $NUM_COMMITS))
  GRAPH_BYTE_OCTOPUS=$(($GRAPH_OCTOPUS_DATA_OFFSET + 4))
@@@ -560,7 -558,7 +560,7 @@@ test_expect_success 'detect incorrect f
  
  test_expect_success 'detect incorrect fanout final value' '
        corrupt_graph_and_verify $GRAPH_BYTE_FANOUT2 "\01" \
 -              "fanout value"
 +              "oid table and fanout disagree on size"
  '
  
  test_expect_success 'detect incorrect OID order' '
@@@ -598,6 -596,11 +598,6 @@@ test_expect_success 'detect incorrect g
                "generation for commit"
  '
  
 -test_expect_success 'detect incorrect generation number' '
 -      corrupt_graph_and_verify $GRAPH_BYTE_COMMIT_GENERATION "\01" \
 -              "commit-graph generation for commit"
 -'
 -
  test_expect_success 'detect incorrect commit date' '
        corrupt_graph_and_verify $GRAPH_BYTE_COMMIT_DATE "\01" \
                "commit date"
@@@ -619,16 -622,6 +619,16 @@@ test_expect_success 'detect incorrect c
                $GRAPH_CHUNK_LOOKUP_OFFSET
  '
  
 +test_expect_success 'detect mixed generation numbers (non-zero to zero)' '
 +      corrupt_graph_and_verify $GRAPH_BYTE_COMMIT_GENERATION_LAST "\0\0\0\0" \
 +              "both zero and non-zero generations"
 +'
 +
 +test_expect_success 'detect mixed generation numbers (zero to non-zero)' '
 +      corrupt_graph_and_verify $GRAPH_BYTE_COMMIT_GENERATION "\0\0\0\0" \
 +              "both zero and non-zero generations"
 +'
 +
  test_expect_success 'git fsck (checks commit-graph when config set to true)' '
        git -C full fsck &&
        corrupt_graph_and_verify $GRAPH_BYTE_FOOTER "\00" \
@@@ -822,77 -815,52 +822,125 @@@ test_expect_success 'overflow during ge
        )
  '
  
 +corrupt_chunk () {
 +      graph=full/.git/objects/info/commit-graph &&
 +      test_when_finished "rm -rf $graph" &&
 +      git -C full commit-graph write --reachable &&
 +      corrupt_chunk_file $graph "$@"
 +}
 +
 +check_corrupt_chunk () {
 +      corrupt_chunk "$@" &&
 +      git -C full -c core.commitGraph=false log >expect.out &&
 +      git -C full -c core.commitGraph=true log >out 2>err &&
 +      test_cmp expect.out out
 +}
 +
 +test_expect_success 'reader notices too-small oid fanout chunk' '
 +      # make it big enough that the graph file is plausible,
 +      # otherwise we hit an earlier check
 +      check_corrupt_chunk OIDF clear $(printf "000000%02x" $(test_seq 250)) &&
 +      cat >expect.err <<-\EOF &&
 +      error: commit-graph oid fanout chunk is wrong size
 +      error: commit-graph is missing the OID Fanout chunk
 +      EOF
 +      test_cmp expect.err err
 +'
 +
 +test_expect_success 'reader notices fanout/lookup table mismatch' '
 +      check_corrupt_chunk OIDF 1020 "FFFFFFFF" &&
 +      cat >expect.err <<-\EOF &&
 +      error: commit-graph oid table and fanout disagree on size
 +      EOF
 +      test_cmp expect.err err
 +'
 +
 +test_expect_success 'reader notices out-of-bounds fanout' '
 +      # Rather than try to corrupt a specific hash, we will just
 +      # wreck them all. But we cannot just set them all to 0xFFFFFFFF or
 +      # similar, as they are used for hi/lo starts in a binary search (so if
 +      # they are identical, that indicates that the search should abort
 +      # immediately). Instead, we will give them high values that differ by
 +      # 2^24, ensuring that any that are used would cause an out-of-bounds
 +      # read.
 +      check_corrupt_chunk OIDF 0 $(printf "%02x000000" $(test_seq 0 254)) &&
 +      cat >expect.err <<-\EOF &&
 +      error: commit-graph fanout values out of order
 +      EOF
 +      test_cmp expect.err err
 +'
 +
 +test_expect_success 'reader notices too-small commit data chunk' '
 +      check_corrupt_chunk CDAT clear 00000000 &&
 +      cat >expect.err <<-\EOF &&
 +      error: commit-graph commit data chunk is wrong size
 +      error: commit-graph is missing the Commit Data chunk
 +      EOF
 +      test_cmp expect.err err
 +'
 +
 +test_expect_success 'reader notices out-of-bounds extra edge' '
 +      check_corrupt_chunk EDGE clear &&
 +      cat >expect.err <<-\EOF &&
 +      error: commit-graph extra-edges pointer out of bounds
 +      EOF
 +      test_cmp expect.err err
 +'
 +
 +test_expect_success 'reader notices too-small generations chunk' '
 +      check_corrupt_chunk GDA2 clear 00000000 &&
 +      cat >expect.err <<-\EOF &&
 +      error: commit-graph generations chunk is wrong size
 +      EOF
 +      test_cmp expect.err err
 +'
 +
+ test_expect_success 'stale commit cannot be parsed when given directly' '
+       test_when_finished "rm -rf repo" &&
+       git init repo &&
+       (
+               cd repo &&
+               test_commit A &&
+               test_commit B &&
+               git commit-graph write --reachable &&
+               oid=$(git rev-parse B) &&
+               rm .git/objects/"$(test_oid_to_path "$oid")" &&
+               # Verify that it is possible to read the commit from the
+               # commit graph when not being paranoid, ...
+               GIT_COMMIT_GRAPH_PARANOIA=false git rev-list B &&
+               # ... but parsing the commit when double checking that
+               # it actually exists in the object database should fail.
+               test_must_fail git rev-list -1 B
+       )
+ '
+ test_expect_success 'stale commit cannot be parsed when traversing graph' '
+       test_when_finished "rm -rf repo" &&
+       git init repo &&
+       (
+               cd repo &&
+               test_commit A &&
+               test_commit B &&
+               test_commit C &&
+               git commit-graph write --reachable &&
+               # Corrupt the repository by deleting the intermediate commit
+               # object. Commands should notice that this object is absent and
+               # thus that the repository is corrupt even if the commit graph
+               # exists.
+               oid=$(git rev-parse B) &&
+               rm .git/objects/"$(test_oid_to_path "$oid")" &&
+               # Again, we should be able to parse the commit when not
+               # being paranoid about commit graph staleness...
+               GIT_COMMIT_GRAPH_PARANOIA=false git rev-parse HEAD~2 &&
+               # ... but fail when we are paranoid.
+               test_must_fail git rev-parse HEAD~2 2>error &&
+               grep "error: commit $oid exists in commit-graph but not in the object database" error
+       )
+ '
  test_done