Merge branch 'jk/chunk-bounds'

author Junio C Hamano <gitster@pobox.com>

Mon, 23 Oct 2023 20:56:36 +0000 (13:56 -0700)

committer Junio C Hamano <gitster@pobox.com>

Mon, 23 Oct 2023 20:56:36 +0000 (13:56 -0700)
author Junio C Hamano <gitster@pobox.com>
Mon, 23 Oct 2023 20:56:36 +0000 (13:56 -0700)
committer Junio C Hamano <gitster@pobox.com>
Mon, 23 Oct 2023 20:56:36 +0000 (13:56 -0700)
diff --git a/bloom.c b/bloom.c

index aef6b5fea2d18f521b4812bd017fe685f635be47..1474aa19fa524ff1695c2b518259f61b1c2023b7 100644 (file)
--- a/bloom.c
+++ b/bloom.c
@@ -29,6 +29,26 @@ static inline unsigned char get_bitmask(uint32_t pos)
         return ((unsigned char)1) << (pos & (BITS_PER_WORD - 1));
  }
  
+static int check_bloom_offset(struct commit_graph *g, uint32_t pos,
+                             uint32_t offset)
+{
+       /*
+        * Note that we allow offsets equal to the data size, which would set
+        * our pointers at one past the end of the chunk memory. This is
+        * necessary because the on-disk index points to the end of the
+        * entries (so we can compute size by comparing adjacent ones). And
+        * naturally the final entry's end is one-past-the-end of the chunk.
+        */
+       if (offset <= g->chunk_bloom_data_size - BLOOMDATA_CHUNK_HEADER_SIZE)
+               return 0;
+
+       warning("ignoring out-of-range offset (%"PRIuMAX") for changed-path"
+               " filter at pos %"PRIuMAX" of %s (chunk size: %"PRIuMAX")",
+               (uintmax_t)offset, (uintmax_t)pos,
+               g->filename, (uintmax_t)g->chunk_bloom_data_size);
+       return -1;
+}
+
  static int load_bloom_filter_from_graph(struct commit_graph *g,
                                         struct bloom_filter *filter,
                                         uint32_t graph_pos)
@@ -51,6 +71,20 @@ static int load_bloom_filter_from_graph(struct commit_graph *g,
         else
                 start_index = 0;
  
+       if (check_bloom_offset(g, lex_pos, end_index) < 0 ||
+           check_bloom_offset(g, lex_pos - 1, start_index) < 0)
+               return 0;
+
+       if (end_index < start_index) {
+               warning("ignoring decreasing changed-path index offsets"
+                       " (%"PRIuMAX" > %"PRIuMAX") for positions"
+                       " %"PRIuMAX" and %"PRIuMAX" of %s",
+                       (uintmax_t)start_index, (uintmax_t)end_index,
+                       (uintmax_t)(lex_pos-1), (uintmax_t)lex_pos,
+                       g->filename);
+               return 0;
+       }
+
         filter->len = end_index - start_index;
         filter->data = (unsigned char *)(g->chunk_bloom_data +
                                         sizeof(unsigned char) * start_index +
diff --git a/chunk-format.c b/chunk-format.c

index 140dfa0dcc4629fec59f723d025ec36019ff29eb..cdc7f39b7039b94a14a42ec74b0e27917c43fe4f 100644 (file)
--- a/chunk-format.c
+++ b/chunk-format.c
@@ -102,7 +102,8 @@ int read_table_of_contents(struct chunkfile *cf,
                            const unsigned char *mfile,
                            size_t mfile_size,
                            uint64_t toc_offset,
-                          int toc_length)
+                          int toc_length,
+                          unsigned expected_alignment)
  {
         int i;
         uint32_t chunk_id;
@@ -120,6 +121,11 @@ int read_table_of_contents(struct chunkfile *cf,
                         error(_("terminating chunk id appears earlier than expected"));
                         return 1;
                 }
+               if (chunk_offset % expected_alignment != 0) {
+                       error(_("chunk id %"PRIx32" not %d-byte aligned"),
+                             chunk_id, expected_alignment);
+                       return 1;
+               }
  
                 table_of_contents += CHUNK_TOC_ENTRY_SIZE;
                 next_chunk_offset = get_be64(table_of_contents + 4);
@@ -154,20 +160,28 @@ int read_table_of_contents(struct chunkfile *cf,
         return 0;
  }
  
+struct pair_chunk_data {
+       const unsigned char **p;
+       size_t *size;
+};
+
  static int pair_chunk_fn(const unsigned char *chunk_start,
                          size_t chunk_size,
                          void *data)
  {
-       const unsigned char **p = data;
-       *p = chunk_start;
+       struct pair_chunk_data *pcd = data;
+       *pcd->p = chunk_start;
+       *pcd->size = chunk_size;
         return 0;
  }
  
  int pair_chunk(struct chunkfile *cf,
                uint32_t chunk_id,
-              const unsigned char **p)
+              const unsigned char **p,
+              size_t *size)
  {
-       return read_chunk(cf, chunk_id, pair_chunk_fn, p);
+       struct pair_chunk_data pcd = { .p = p, .size = size };
+       return read_chunk(cf, chunk_id, pair_chunk_fn, &pcd);
  }
  
  int read_chunk(struct chunkfile *cf,
diff --git a/chunk-format.h b/chunk-format.h

index c7794e84adda364bc0ad14a3d1d61467d73eaadd..14b76180ef768f6b5c38aa1cda128ab87d2327ae 100644 (file)
--- a/chunk-format.h
+++ b/chunk-format.h
@@ -36,20 +36,23 @@ int read_table_of_contents(struct chunkfile *cf,
                            const unsigned char *mfile,
                            size_t mfile_size,
                            uint64_t toc_offset,
-                          int toc_length);
+                          int toc_length,
+                          unsigned expected_alignment);
  
  #define CHUNK_NOT_FOUND (-2)
  
  /*
   * Find 'chunk_id' in the given chunkfile and assign the
   * given pointer to the position in the mmap'd file where
- * that chunk begins.
+ * that chunk begins. Likewise the "size" parameter is filled
+ * with the size of the chunk.
   *
   * Returns CHUNK_NOT_FOUND if the chunk does not exist.
   */
  int pair_chunk(struct chunkfile *cf,
                uint32_t chunk_id,
-              const unsigned char **p);
+              const unsigned char **p,
+              size_t *size);
  
  typedef int (*chunk_read_fn)(const unsigned char *chunk_start,
                              size_t chunk_size, void *data);
diff --git a/commit-graph.c b/commit-graph.c

index fd2f700b2e8d94ef1b6ced49490342c2a5b3cfeb..c2b782af3b649fbf3deea7d90c959e10f951a003 100644 (file)
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -277,6 +277,8 @@ struct commit_graph *load_commit_graph_one_fd_st(struct repository *r,
  
  static int verify_commit_graph_lite(struct commit_graph *g)
  {
+       int i;
+
         /*
          * Basic validation shared between parse_commit_graph()
          * which'll be called every time the graph is used, and the
@@ -302,6 +304,30 @@ static int verify_commit_graph_lite(struct commit_graph *g)
                 return 1;
         }
  
+       for (i = 0; i < 255; i++) {
+               uint32_t oid_fanout1 = ntohl(g->chunk_oid_fanout[i]);
+               uint32_t oid_fanout2 = ntohl(g->chunk_oid_fanout[i + 1]);
+
+               if (oid_fanout1 > oid_fanout2) {
+                       error("commit-graph fanout values out of order");
+                       return 1;
+               }
+       }
+       if (ntohl(g->chunk_oid_fanout[255]) != g->num_commits) {
+               error("commit-graph oid table and fanout disagree on size");
+               return 1;
+       }
+
+       return 0;
+}
+
+static int graph_read_oid_fanout(const unsigned char *chunk_start,
+                                size_t chunk_size, void *data)
+{
+       struct commit_graph *g = data;
+       if (chunk_size != 256 * sizeof(uint32_t))
+               return error("commit-graph oid fanout chunk is wrong size");
+       g->chunk_oid_fanout = (const uint32_t *)chunk_start;
         return 0;
  }
  
@@ -314,12 +340,54 @@ static int graph_read_oid_lookup(const unsigned char *chunk_start,
         return 0;
  }
  
+static int graph_read_commit_data(const unsigned char *chunk_start,
+                                 size_t chunk_size, void *data)
+{
+       struct commit_graph *g = data;
+       if (chunk_size != g->num_commits * GRAPH_DATA_WIDTH)
+               return error("commit-graph commit data chunk is wrong size");
+       g->chunk_commit_data = chunk_start;
+       return 0;
+}
+
+static int graph_read_generation_data(const unsigned char *chunk_start,
+                                     size_t chunk_size, void *data)
+{
+       struct commit_graph *g = data;
+       if (chunk_size != g->num_commits * sizeof(uint32_t))
+               return error("commit-graph generations chunk is wrong size");
+       g->chunk_generation_data = chunk_start;
+       return 0;
+}
+
+static int graph_read_bloom_index(const unsigned char *chunk_start,
+                                 size_t chunk_size, void *data)
+{
+       struct commit_graph *g = data;
+       if (chunk_size != g->num_commits * 4) {
+               warning("commit-graph changed-path index chunk is too small");
+               return -1;
+       }
+       g->chunk_bloom_indexes = chunk_start;
+       return 0;
+}
+
  static int graph_read_bloom_data(const unsigned char *chunk_start,
                                   size_t chunk_size, void *data)
  {
         struct commit_graph *g = data;
         uint32_t hash_version;
+
+       if (chunk_size < BLOOMDATA_CHUNK_HEADER_SIZE) {
+               warning("ignoring too-small changed-path chunk"
+                       " (%"PRIuMAX" < %"PRIuMAX") in commit-graph file",
+                       (uintmax_t)chunk_size,
+                       (uintmax_t)BLOOMDATA_CHUNK_HEADER_SIZE);
+               return -1;
+       }
+
         g->chunk_bloom_data = chunk_start;
+       g->chunk_bloom_data_size = chunk_size;
         hash_version = get_be32(chunk_start);
  
         if (hash_version != 1)
@@ -391,29 +459,31 @@ struct commit_graph *parse_commit_graph(struct repo_settings *s,
         cf = init_chunkfile(NULL);
  
         if (read_table_of_contents(cf, graph->data, graph_size,
-                                  GRAPH_HEADER_SIZE, graph->num_chunks))
+                                  GRAPH_HEADER_SIZE, graph->num_chunks, 1))
                 goto free_and_return;
  
-       pair_chunk(cf, GRAPH_CHUNKID_OIDFANOUT,
-                  (const unsigned char **)&graph->chunk_oid_fanout);
+       read_chunk(cf, GRAPH_CHUNKID_OIDFANOUT, graph_read_oid_fanout, graph);
         read_chunk(cf, GRAPH_CHUNKID_OIDLOOKUP, graph_read_oid_lookup, graph);
-       pair_chunk(cf, GRAPH_CHUNKID_DATA, &graph->chunk_commit_data);
-       pair_chunk(cf, GRAPH_CHUNKID_EXTRAEDGES, &graph->chunk_extra_edges);
-       pair_chunk(cf, GRAPH_CHUNKID_BASE, &graph->chunk_base_graphs);
+       read_chunk(cf, GRAPH_CHUNKID_DATA, graph_read_commit_data, graph);
+       pair_chunk(cf, GRAPH_CHUNKID_EXTRAEDGES, &graph->chunk_extra_edges,
+                  &graph->chunk_extra_edges_size);
+       pair_chunk(cf, GRAPH_CHUNKID_BASE, &graph->chunk_base_graphs,
+                  &graph->chunk_base_graphs_size);
  
         if (s->commit_graph_generation_version >= 2) {
-               pair_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA,
-                       &graph->chunk_generation_data);
+               read_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA,
+                          graph_read_generation_data, graph);
                 pair_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW,
-                       &graph->chunk_generation_data_overflow);
+                          &graph->chunk_generation_data_overflow,
+                          &graph->chunk_generation_data_overflow_size);
  
                 if (graph->chunk_generation_data)
                         graph->read_generation_data = 1;
         }
  
         if (s->commit_graph_read_changed_paths) {
-               pair_chunk(cf, GRAPH_CHUNKID_BLOOMINDEXES,
-                          &graph->chunk_bloom_indexes);
+               read_chunk(cf, GRAPH_CHUNKID_BLOOMINDEXES,
+                          graph_read_bloom_index, graph);
                 read_chunk(cf, GRAPH_CHUNKID_BLOOMDATA,
                            graph_read_bloom_data, graph);
         }
@@ -510,6 +580,11 @@ static int add_graph_to_chain(struct commit_graph *g,
                 return 0;
         }
  
+       if (g->chunk_base_graphs_size / g->hash_len < n) {
+               warning(_("commit-graph base graphs chunk is too small"));
+               return 0;
+       }
+
         while (n) {
                 n--;
  
@@ -837,7 +912,10 @@ static void fill_commit_graph_info(struct commit *item, struct commit_graph *g,
                                 die(_("commit-graph requires overflow generation data but has none"));
  
                         offset_pos = offset ^ CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW;
-                       graph_data->generation = item->date + get_be64(g->chunk_generation_data_overflow + st_mult(8, offset_pos));
+                       if (g->chunk_generation_data_overflow_size / sizeof(uint64_t) <= offset_pos)
+                               die(_("commit-graph overflow generation data is too small"));
+                       graph_data->generation = item->date +
+                               get_be64(g->chunk_generation_data_overflow + sizeof(uint64_t) * offset_pos);
                 } else
                         graph_data->generation = item->date + offset;
         } else
@@ -857,7 +935,7 @@ static int fill_commit_in_graph(struct repository *r,
                                 struct commit_graph *g, uint32_t pos)
  {
         uint32_t edge_value;
-       uint32_t *parent_data_ptr;
+       uint32_t parent_data_pos;
         struct commit_list **pptr;
         const unsigned char *commit_data;
         uint32_t lex_index;
@@ -889,14 +967,21 @@ static int fill_commit_in_graph(struct repository *r,
                 return 1;
         }
  
-       parent_data_ptr = (uint32_t*)(g->chunk_extra_edges +
-                         st_mult(4, edge_value & GRAPH_EDGE_LAST_MASK));
+       parent_data_pos = edge_value & GRAPH_EDGE_LAST_MASK;
         do {
-               edge_value = get_be32(parent_data_ptr);
+               if (g->chunk_extra_edges_size / sizeof(uint32_t) <= parent_data_pos) {
+                       error("commit-graph extra-edges pointer out of bounds");
+                       free_commit_list(item->parents);
+                       item->parents = NULL;
+                       item->object.parsed = 0;
+                       return 0;
+               }
+               edge_value = get_be32(g->chunk_extra_edges +
+                                     sizeof(uint32_t) * parent_data_pos);
                 pptr = insert_parent_or_die(r, g,
                                             edge_value & GRAPH_EDGE_LAST_MASK,
                                             pptr);
-               parent_data_ptr++;
+               parent_data_pos++;
         } while (!(edge_value & GRAPH_LAST_EDGE));
  
         return 1;
diff --git a/commit-graph.h b/commit-graph.h

index 20ada7e891f1be3e92b347faf57d0ae6a1391daa..c6870274c5ab7e8821e65a1eb35b514c889ffa95 100644 (file)
--- a/commit-graph.h
+++ b/commit-graph.h
@@ -94,10 +94,14 @@ struct commit_graph {
         const unsigned char *chunk_commit_data;
         const unsigned char *chunk_generation_data;
         const unsigned char *chunk_generation_data_overflow;
+       size_t chunk_generation_data_overflow_size;
         const unsigned char *chunk_extra_edges;
+       size_t chunk_extra_edges_size;
         const unsigned char *chunk_base_graphs;
+       size_t chunk_base_graphs_size;
         const unsigned char *chunk_bloom_indexes;
         const unsigned char *chunk_bloom_data;
+       size_t chunk_bloom_data_size;
  
         struct topo_level_slab *topo_levels;
         struct bloom_filter_settings *bloom_filter_settings;
diff --git a/midx.c b/midx.c

index 931f55735037fcc4de53005ef0970e9ed3781885..2f3863c936a4c1c9035f90ae28b73887cf8f24b2 100644 (file)
--- a/midx.c
+++ b/midx.c
@@ -71,6 +71,33 @@ static int midx_read_oid_fanout(const unsigned char *chunk_start,
                 error(_("multi-pack-index OID fanout is of the wrong size"));
                 return 1;
         }
+       m->num_objects = ntohl(m->chunk_oid_fanout[255]);
+       return 0;
+}
+
+static int midx_read_oid_lookup(const unsigned char *chunk_start,
+                               size_t chunk_size, void *data)
+{
+       struct multi_pack_index *m = data;
+       m->chunk_oid_lookup = chunk_start;
+
+       if (chunk_size != st_mult(m->hash_len, m->num_objects)) {
+               error(_("multi-pack-index OID lookup chunk is the wrong size"));
+               return 1;
+       }
+       return 0;
+}
+
+static int midx_read_object_offsets(const unsigned char *chunk_start,
+                                   size_t chunk_size, void *data)
+{
+       struct multi_pack_index *m = data;
+       m->chunk_object_offsets = chunk_start;
+
+       if (chunk_size != st_mult(m->num_objects, MIDX_CHUNK_OFFSET_WIDTH)) {
+               error(_("multi-pack-index object offset chunk is the wrong size"));
+               return 1;
+       }
         return 0;
  }
  
@@ -140,33 +167,41 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local
         cf = init_chunkfile(NULL);
  
         if (read_table_of_contents(cf, m->data, midx_size,
-                                  MIDX_HEADER_SIZE, m->num_chunks))
+                                  MIDX_HEADER_SIZE, m->num_chunks,
+                                  MIDX_CHUNK_ALIGNMENT))
                 goto cleanup_fail;
  
-       if (pair_chunk(cf, MIDX_CHUNKID_PACKNAMES, &m->chunk_pack_names) == CHUNK_NOT_FOUND)
-               die(_("multi-pack-index missing required pack-name chunk"));
-       if (read_chunk(cf, MIDX_CHUNKID_OIDFANOUT, midx_read_oid_fanout, m) == CHUNK_NOT_FOUND)
-               die(_("multi-pack-index missing required OID fanout chunk"));
-       if (pair_chunk(cf, MIDX_CHUNKID_OIDLOOKUP, &m->chunk_oid_lookup) == CHUNK_NOT_FOUND)
-               die(_("multi-pack-index missing required OID lookup chunk"));
-       if (pair_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS, &m->chunk_object_offsets) == CHUNK_NOT_FOUND)
-               die(_("multi-pack-index missing required object offsets chunk"));
+       if (pair_chunk(cf, MIDX_CHUNKID_PACKNAMES, &m->chunk_pack_names, &m->chunk_pack_names_len))
+               die(_("multi-pack-index required pack-name chunk missing or corrupted"));
+       if (read_chunk(cf, MIDX_CHUNKID_OIDFANOUT, midx_read_oid_fanout, m))
+               die(_("multi-pack-index required OID fanout chunk missing or corrupted"));
+       if (read_chunk(cf, MIDX_CHUNKID_OIDLOOKUP, midx_read_oid_lookup, m))
+               die(_("multi-pack-index required OID lookup chunk missing or corrupted"));
+       if (read_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS, midx_read_object_offsets, m))
+               die(_("multi-pack-index required object offsets chunk missing or corrupted"));
  
-       pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets);
+       pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets,
+                  &m->chunk_large_offsets_len);
  
         if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1))
-               pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex);
-
-       m->num_objects = ntohl(m->chunk_oid_fanout[255]);
+               pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex,
+                          &m->chunk_revindex_len);
  
         CALLOC_ARRAY(m->pack_names, m->num_packs);
         CALLOC_ARRAY(m->packs, m->num_packs);
  
         cur_pack_name = (const char *)m->chunk_pack_names;
         for (i = 0; i < m->num_packs; i++) {
+               const char *end;
+               size_t avail = m->chunk_pack_names_len -
+                               (cur_pack_name - (const char *)m->chunk_pack_names);
+
                 m->pack_names[i] = cur_pack_name;
  
-               cur_pack_name += strlen(cur_pack_name) + 1;
+               end = memchr(cur_pack_name, '\0', avail);
+               if (!end)
+                       die(_("multi-pack-index pack-name chunk is too short"));
+               cur_pack_name = end + 1;
  
                 if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0)
                         die(_("multi-pack-index pack names out of order: '%s' before '%s'"),
@@ -270,8 +305,9 @@ off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos)
                         die(_("multi-pack-index stores a 64-bit offset, but off_t is too small"));
  
                 offset32 ^= MIDX_LARGE_OFFSET_NEEDED;
-               return get_be64(m->chunk_large_offsets +
-                               st_mult(sizeof(uint64_t), offset32));
+               if (offset32 >= m->chunk_large_offsets_len / sizeof(uint64_t))
+                       die(_("multi-pack-index large offset out of bounds"));
+               return get_be64(m->chunk_large_offsets + sizeof(uint64_t) * offset32);
         }
  
         return offset32;
diff --git a/midx.h b/midx.h

index 5578cd7b835e2b396e502e8abaf4560ab765c850..a5d98919c857b812789f44d86fb6ad13fb7e0560 100644 (file)
--- a/midx.h
+++ b/midx.h
@@ -32,11 +32,14 @@ struct multi_pack_index {
         int local;
  
         const unsigned char *chunk_pack_names;
+       size_t chunk_pack_names_len;
         const uint32_t *chunk_oid_fanout;
         const unsigned char *chunk_oid_lookup;
         const unsigned char *chunk_object_offsets;
         const unsigned char *chunk_large_offsets;
+       size_t chunk_large_offsets_len;
         const unsigned char *chunk_revindex;
+       size_t chunk_revindex_len;
  
         const char **pack_names;
         struct packed_git **packs;
diff --git a/pack-revindex.c b/pack-revindex.c

index a01a2a4640d421eb3418a1627317613ee8bd4dcf..acf1dd9786cd3c2a0cb410804721488b6d1d92b4 100644 (file)
--- a/pack-revindex.c
+++ b/pack-revindex.c
@@ -343,6 +343,17 @@ int verify_pack_revindex(struct packed_git *p)
         return res;
  }
  
+static int can_use_midx_ridx_chunk(struct multi_pack_index *m)
+{
+       if (!m->chunk_revindex)
+               return 0;
+       if (m->chunk_revindex_len != st_mult(sizeof(uint32_t), m->num_objects)) {
+               error(_("multi-pack-index reverse-index chunk is the wrong size"));
+               return 0;
+       }
+       return 1;
+}
+
  int load_midx_revindex(struct multi_pack_index *m)
  {
         struct strbuf revindex_name = STRBUF_INIT;
@@ -351,7 +362,7 @@ int load_midx_revindex(struct multi_pack_index *m)
         if (m->revindex_data)
                 return 0;
  
-       if (m->chunk_revindex) {
+       if (can_use_midx_ridx_chunk(m)) {
                 /*
                  * If the MIDX `m` has a `RIDX` chunk, then use its contents for
                  * the reverse index instead of trying to load a separate `.rev`
diff --git a/t/lib-chunk.sh b/t/lib-chunk.sh

new file mode 100644 (file)

index 0000000..a7cd9c3
--- /dev/null
+++ b/t/lib-chunk.sh
@@ -0,0 +1,17 @@
+# Shell library for working with "chunk" files (commit-graph, midx, etc).
+
+# corrupt_chunk_file <fn> <chunk> <offset> <bytes>
+#
+# Corrupt a chunk-based file (like a commit-graph) by overwriting the bytes
+# found in the chunk specified by the 4-byte <chunk> identifier. If <offset> is
+# "clear", replace the chunk entirely. Otherwise, overwrite data <offset> bytes
+# into the chunk.
+#
+# The <bytes> are interpreted as pairs of hex digits (so "000000FE" would be
+# big-endian 254).
+corrupt_chunk_file () {
+       fn=$1; shift
+       perl "$TEST_DIRECTORY"/lib-chunk/corrupt-chunk-file.pl \
+               "$@" <"$fn" >"$fn.tmp" &&
+       mv "$fn.tmp" "$fn"
+}
diff --git a/t/lib-chunk/corrupt-chunk-file.pl b/t/lib-chunk/corrupt-chunk-file.pl

new file mode 100644 (file)

index 0000000..cd6d386
--- /dev/null
+++ b/t/lib-chunk/corrupt-chunk-file.pl
@@ -0,0 +1,66 @@
+#!/usr/bin/perl
+
+my ($chunk, $seek, $bytes) = @ARGV;
+$bytes =~ s/../chr(hex($&))/ge;
+
+binmode STDIN;
+binmode STDOUT;
+
+# A few helpers to read bytes, or read and copy them to the
+# output.
+sub get {
+       my $n = shift;
+       return unless $n;
+       read(STDIN, my $buf, $n)
+               or die "read error or eof: $!\n";
+       return $buf;
+}
+sub copy {
+       my $buf = get(@_);
+       print $buf;
+       return $buf;
+}
+
+# read until we find table-of-contents entry for chunk;
+# note that we cheat a bit by assuming 4-byte alignment and
+# that no ToC entry will accidentally look like a header.
+#
+# If we don't find the entry, copy() will hit EOF and exit
+# (which should cause the caller to fail the test).
+while (copy(4) ne $chunk) { }
+my $offset = unpack("Q>", copy(8));
+
+# In clear mode, our length will change. So figure out
+# the length by comparing to the offset of the next chunk, and
+# then adjust that offset (and all subsequent) ones.
+my $len;
+if ($seek eq "clear") {
+       my $id;
+       do {
+               $id = copy(4);
+               my $next = unpack("Q>", get(8));
+               if (!defined $len) {
+                       $len = $next - $offset;
+               }
+               print pack("Q>", $next - $len + length($bytes));
+       } while (unpack("N", $id));
+}
+
+# and now copy up to our existing chunk data
+copy($offset - tell(STDIN));
+if ($seek eq "clear") {
+       # if clearing, skip past existing data
+       get($len);
+} else {
+       # otherwise, copy up to the requested offset,
+       # and skip past the overwritten bytes
+       copy($seek);
+       get(length($bytes));
+}
+
+# now write out the requested bytes, along
+# with any other remaining data
+print $bytes;
+while (read(STDIN, my $buf, 4096)) {
+       print $buf;
+}
diff --git a/t/t4216-log-bloom.sh b/t/t4216-log-bloom.sh

index fa9d32facfb0ddca6c001d78c1011cf0b3568f19..2ba0324a693731412242cf9603826736a3c9279a 100755 (executable)
--- a/t/t4216-log-bloom.sh
+++ b/t/t4216-log-bloom.sh
@@ -5,6 +5,7 @@ GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
  export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
  
  . ./test-lib.sh
+. "$TEST_DIRECTORY"/lib-chunk.sh
  
  GIT_TEST_COMMIT_GRAPH=0
  GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0
@@ -404,4 +405,53 @@ test_expect_success 'Bloom generation backfills empty commits' '
         )
  '
  
+corrupt_graph () {
+       graph=.git/objects/info/commit-graph &&
+       test_when_finished "rm -rf $graph" &&
+       git commit-graph write --reachable --changed-paths &&
+       corrupt_chunk_file $graph "$@"
+}
+
+check_corrupt_graph () {
+       corrupt_graph "$@" &&
+       git -c core.commitGraph=false log -- A/B/file2 >expect.out &&
+       git -c core.commitGraph=true log -- A/B/file2 >out 2>err &&
+       test_cmp expect.out out
+}
+
+test_expect_success 'Bloom reader notices too-small data chunk' '
+       check_corrupt_graph BDAT clear 00000000 &&
+       echo "warning: ignoring too-small changed-path chunk" \
+               "(4 < 12) in commit-graph file" >expect.err &&
+       test_cmp expect.err err
+'
+
+test_expect_success 'Bloom reader notices out-of-bounds filter offsets' '
+       check_corrupt_graph BIDX 12 FFFFFFFF &&
+       # use grep to avoid depending on exact chunk size
+       grep "warning: ignoring out-of-range offset (4294967295) for changed-path filter at pos 3 of .git/objects/info/commit-graph" err
+'
+
+test_expect_success 'Bloom reader notices too-small index chunk' '
+       # replace the index with a single entry, making most
+       # lookups out-of-bounds
+       check_corrupt_graph BIDX clear 00000000 &&
+       echo "warning: commit-graph changed-path index chunk" \
+               "is too small" >expect.err &&
+       test_cmp expect.err err
+'
+
+test_expect_success 'Bloom reader notices out-of-order index offsets' '
+       # we do not know any real offsets, but we can pick
+       # something plausible; we should not get to the point of
+       # actually reading from the bogus offsets anyway.
+       corrupt_graph BIDX 4 0000000c00000005 &&
+       echo "warning: ignoring decreasing changed-path index offsets" \
+               "(12 > 5) for positions 1 and 2 of .git/objects/info/commit-graph" >expect.err &&
+       git -c core.commitGraph=false log -- A/B/file2 >expect.out &&
+       git -c core.commitGraph=true log -- A/B/file2 >out 2>err &&
+       test_cmp expect.out out &&
+       test_cmp expect.err err
+'
+
  test_done
diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh

index ba65f17dd9cc863a9a92ae90393af3ffdd2309de..6505ff595a389afa7d79fe31305d214046c376f2 100755 (executable)
--- a/t/t5318-commit-graph.sh
+++ b/t/t5318-commit-graph.sh
@@ -2,6 +2,7 @@
  
  test_description='commit graph'
  . ./test-lib.sh
+. "$TEST_DIRECTORY"/lib-chunk.sh
  
  GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0
  
@@ -559,7 +560,7 @@ test_expect_success 'detect incorrect fanout' '
  
  test_expect_success 'detect incorrect fanout final value' '
         corrupt_graph_and_verify $GRAPH_BYTE_FANOUT2 "\01" \
-               "fanout value"
+               "oid table and fanout disagree on size"
  '
  
  test_expect_success 'detect incorrect OID order' '
@@ -821,4 +822,77 @@ test_expect_success 'overflow during generation version upgrade' '
         )
  '
  
+corrupt_chunk () {
+       graph=full/.git/objects/info/commit-graph &&
+       test_when_finished "rm -rf $graph" &&
+       git -C full commit-graph write --reachable &&
+       corrupt_chunk_file $graph "$@"
+}
+
+check_corrupt_chunk () {
+       corrupt_chunk "$@" &&
+       git -C full -c core.commitGraph=false log >expect.out &&
+       git -C full -c core.commitGraph=true log >out 2>err &&
+       test_cmp expect.out out
+}
+
+test_expect_success 'reader notices too-small oid fanout chunk' '
+       # make it big enough that the graph file is plausible,
+       # otherwise we hit an earlier check
+       check_corrupt_chunk OIDF clear $(printf "000000%02x" $(test_seq 250)) &&
+       cat >expect.err <<-\EOF &&
+       error: commit-graph oid fanout chunk is wrong size
+       error: commit-graph is missing the OID Fanout chunk
+       EOF
+       test_cmp expect.err err
+'
+
+test_expect_success 'reader notices fanout/lookup table mismatch' '
+       check_corrupt_chunk OIDF 1020 "FFFFFFFF" &&
+       cat >expect.err <<-\EOF &&
+       error: commit-graph oid table and fanout disagree on size
+       EOF
+       test_cmp expect.err err
+'
+
+test_expect_success 'reader notices out-of-bounds fanout' '
+       # Rather than try to corrupt a specific hash, we will just
+       # wreck them all. But we cannot just set them all to 0xFFFFFFFF or
+       # similar, as they are used for hi/lo starts in a binary search (so if
+       # they are identical, that indicates that the search should abort
+       # immediately). Instead, we will give them high values that differ by
+       # 2^24, ensuring that any that are used would cause an out-of-bounds
+       # read.
+       check_corrupt_chunk OIDF 0 $(printf "%02x000000" $(test_seq 0 254)) &&
+       cat >expect.err <<-\EOF &&
+       error: commit-graph fanout values out of order
+       EOF
+       test_cmp expect.err err
+'
+
+test_expect_success 'reader notices too-small commit data chunk' '
+       check_corrupt_chunk CDAT clear 00000000 &&
+       cat >expect.err <<-\EOF &&
+       error: commit-graph commit data chunk is wrong size
+       error: commit-graph is missing the Commit Data chunk
+       EOF
+       test_cmp expect.err err
+'
+
+test_expect_success 'reader notices out-of-bounds extra edge' '
+       check_corrupt_chunk EDGE clear &&
+       cat >expect.err <<-\EOF &&
+       error: commit-graph extra-edges pointer out of bounds
+       EOF
+       test_cmp expect.err err
+'
+
+test_expect_success 'reader notices too-small generations chunk' '
+       check_corrupt_chunk GDA2 clear 00000000 &&
+       cat >expect.err <<-\EOF &&
+       error: commit-graph generations chunk is wrong size
+       EOF
+       test_cmp expect.err err
+'
+
  test_done
diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh

index 1bcc02004d7d1acbce8306bdeb5439f4da689ccb..d3c9e97feb12b7d80a1c74189a0da04c438152c5 100755 (executable)
--- a/t/t5319-multi-pack-index.sh
+++ b/t/t5319-multi-pack-index.sh
@@ -2,6 +2,7 @@
  
  test_description='multi-pack-indexes'
  . ./test-lib.sh
+. "$TEST_DIRECTORY"/lib-chunk.sh
  
  GIT_TEST_MULTI_PACK_INDEX=0
  objdir=.git/objects
@@ -438,7 +439,7 @@ test_expect_success 'verify extended chunk count' '
  
  test_expect_success 'verify missing required chunk' '
         corrupt_midx_and_verify $MIDX_BYTE_CHUNK_ID "\01" $objdir \
-               "missing required"
+               "required pack-name chunk missing"
  '
  
  test_expect_success 'verify invalid chunk offset' '
@@ -1055,4 +1056,105 @@ test_expect_success 'repack with delta islands' '
         )
  '
  
+corrupt_chunk () {
+       midx=.git/objects/pack/multi-pack-index &&
+       test_when_finished "rm -rf $midx" &&
+       git repack -ad --write-midx &&
+       corrupt_chunk_file $midx "$@"
+}
+
+test_expect_success 'reader notices too-small oid fanout chunk' '
+       corrupt_chunk OIDF clear 00000000 &&
+       test_must_fail git log 2>err &&
+       cat >expect <<-\EOF &&
+       error: multi-pack-index OID fanout is of the wrong size
+       fatal: multi-pack-index required OID fanout chunk missing or corrupted
+       EOF
+       test_cmp expect err
+'
+
+test_expect_success 'reader notices too-small oid lookup chunk' '
+       corrupt_chunk OIDL clear 00000000 &&
+       test_must_fail git log 2>err &&
+       cat >expect <<-\EOF &&
+       error: multi-pack-index OID lookup chunk is the wrong size
+       fatal: multi-pack-index required OID lookup chunk missing or corrupted
+       EOF
+       test_cmp expect err
+'
+
+test_expect_success 'reader notices too-small pack names chunk' '
+       # There is no NUL to terminate the name here, so the
+       # chunk is too short.
+       corrupt_chunk PNAM clear 70656666 &&
+       test_must_fail git log 2>err &&
+       cat >expect <<-\EOF &&
+       fatal: multi-pack-index pack-name chunk is too short
+       EOF
+       test_cmp expect err
+'
+
+test_expect_success 'reader handles unaligned chunks' '
+       # A 9-byte PNAM means all of the subsequent chunks
+       # will no longer be 4-byte aligned, but it is still
+       # a valid one-pack chunk on its own (it is "foo.pack\0").
+       corrupt_chunk PNAM clear 666f6f2e7061636b00 &&
+       git -c core.multipackindex=false log >expect.out &&
+       git -c core.multipackindex=true log >out 2>err &&
+       test_cmp expect.out out &&
+       cat >expect.err <<-\EOF &&
+       error: chunk id 4f494446 not 4-byte aligned
+       EOF
+       test_cmp expect.err err
+'
+
+test_expect_success 'reader notices too-small object offset chunk' '
+       corrupt_chunk OOFF clear 00000000 &&
+       test_must_fail git log 2>err &&
+       cat >expect <<-\EOF &&
+       error: multi-pack-index object offset chunk is the wrong size
+       fatal: multi-pack-index required object offsets chunk missing or corrupted
+       EOF
+       test_cmp expect err
+'
+
+test_expect_success 'reader bounds-checks large offset table' '
+       # re-use the objects64 dir here to cheaply get access to a midx
+       # with large offsets.
+       git init repo &&
+       test_when_finished "rm -rf repo" &&
+       (
+               cd repo &&
+               (cd ../objects64 && pwd) >.git/objects/info/alternates &&
+               git multi-pack-index --object-dir=../objects64 write &&
+               midx=../objects64/pack/multi-pack-index &&
+               corrupt_chunk_file $midx LOFF clear &&
+               # using only %(objectsize) is important here; see the commit
+               # message for more details
+               test_must_fail git cat-file --batch-all-objects \
+                       --batch-check="%(objectsize)" 2>err &&
+               cat >expect <<-\EOF &&
+               fatal: multi-pack-index large offset out of bounds
+               EOF
+               test_cmp expect err
+       )
+'
+
+test_expect_success 'reader notices too-small revindex chunk' '
+       # We only get a revindex with bitmaps (and likewise only
+       # load it when they are asked for).
+       test_config repack.writeBitmaps true &&
+       corrupt_chunk RIDX clear 00000000 &&
+       git -c core.multipackIndex=false rev-list \
+               --all --use-bitmap-index >expect.out &&
+       git -c core.multipackIndex=true rev-list \
+               --all --use-bitmap-index >out 2>err &&
+       test_cmp expect.out out &&
+       cat >expect.err <<-\EOF &&
+       error: multi-pack-index reverse-index chunk is the wrong size
+       warning: multi-pack bitmap is missing required reverse index
+       EOF
+       test_cmp expect.err err
+'
+
  test_done
diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh

index 8a9720dcb05a54e53a9c0fdb83faf61da72c1972..97eb6d2e72bac5c3936965c269ff9bfa56d87568 100755 (executable)
--- a/t/t5324-split-commit-graph.sh
+++ b/t/t5324-split-commit-graph.sh
@@ -4,6 +4,7 @@ test_description='split commit graph'
  
  TEST_PASSES_SANITIZE_LEAK=true
  . ./test-lib.sh
+. "$TEST_DIRECTORY"/lib-chunk.sh
  
  GIT_TEST_COMMIT_GRAPH=0
  GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0
@@ -319,7 +320,7 @@ test_expect_success 'verify --shallow does not check base contents' '
                 cd verify-shallow &&
                 git commit-graph verify &&
                 base_file=$graphdir/graph-$(head -n 1 $graphdir/commit-graph-chain).graph &&
-               corrupt_file "$base_file" 1000 "\01" &&
+               corrupt_file "$base_file" 1500 "\01" &&
                 git commit-graph verify --shallow &&
                 test_must_fail git commit-graph verify 2>test_err &&
                 grep -v "^+" test_err >err &&
@@ -393,10 +394,23 @@ test_expect_success 'verify across alternates' '
                 test_commit extra &&
                 git commit-graph write --reachable --split &&
                 tip_file=$graphdir/graph-$(tail -n 1 $graphdir/commit-graph-chain).graph &&
-               corrupt_file "$tip_file" 100 "\01" &&
+               corrupt_file "$tip_file" 1500 "\01" &&
                 test_must_fail git commit-graph verify --shallow 2>test_err &&
                 grep -v "^+" test_err >err &&
-               test_i18ngrep "commit-graph has incorrect fanout value" err
+               test_i18ngrep "incorrect checksum" err
+       )
+'
+
+test_expect_success 'reader bounds-checks base-graph chunk' '
+       git clone --no-hardlinks . corrupt-base-chunk &&
+       (
+               cd corrupt-base-chunk &&
+               tip_file=$graphdir/graph-$(tail -n 1 $graphdir/commit-graph-chain).graph &&
+               corrupt_chunk_file "$tip_file" BASE clear 01020304 &&
+               git -c core.commitGraph=false log >expect.out &&
+               git -c core.commitGraph=true log >out 2>err &&
+               test_cmp expect.out out &&
+               grep "commit-graph base graphs chunk is too small" err
         )
  '
  
diff --git a/t/t5328-commit-graph-64bit-time.sh b/t/t5328-commit-graph-64bit-time.sh

index ca476e80a0d8dcc663d0cf1b678656d0e5b2868d..fc6a242b56d88686036ff019003352ee806ef8ce 100755 (executable)
--- a/t/t5328-commit-graph-64bit-time.sh
+++ b/t/t5328-commit-graph-64bit-time.sh
@@ -12,6 +12,7 @@ then
  fi
  
  . "$TEST_DIRECTORY"/lib-commit-graph.sh
+. "$TEST_DIRECTORY/lib-chunk.sh"
  
  UNIX_EPOCH_ZERO="@0 +0000"
  FUTURE_DATE="@4147483646 +0000"
@@ -74,4 +75,13 @@ test_expect_success 'single commit with generation data exceeding UINT32_MAX' '
         git -C repo-uint32-max commit-graph verify
  '
  
+test_expect_success 'reader notices out-of-bounds generation overflow' '
+       graph=.git/objects/info/commit-graph &&
+       test_when_finished "rm -rf $graph" &&
+       git commit-graph write --reachable &&
+       corrupt_chunk_file $graph GDO2 clear &&
+       test_must_fail git log 2>err &&
+       grep "commit-graph overflow generation data is too small" err
+'
+
  test_done
author	Junio C Hamano <gitster@pobox.com>
	Mon, 23 Oct 2023 20:56:36 +0000 (13:56 -0700)
committer	Junio C Hamano <gitster@pobox.com>
	Mon, 23 Oct 2023 20:56:36 +0000 (13:56 -0700)
bloom.c		patch \| blob \| blame \| history
chunk-format.c		patch \| blob \| blame \| history
chunk-format.h		patch \| blob \| blame \| history
commit-graph.c		patch \| blob \| blame \| history
commit-graph.h		patch \| blob \| blame \| history
midx.c		patch \| blob \| blame \| history
midx.h		patch \| blob \| blame \| history
pack-revindex.c		patch \| blob \| blame \| history
t/lib-chunk.sh	[new file with mode: 0644]	patch \| blob
t/lib-chunk/corrupt-chunk-file.pl	[new file with mode: 0644]	patch \| blob
t/t4216-log-bloom.sh		patch \| blob \| blame \| history
t/t5318-commit-graph.sh		patch \| blob \| blame \| history
t/t5319-multi-pack-index.sh		patch \| blob \| blame \| history
t/t5324-split-commit-graph.sh		patch \| blob \| blame \| history
t/t5328-commit-graph-64bit-time.sh		patch \| blob \| blame \| history