]> git.ipfire.org Git - thirdparty/git.git/blobdiff - commit-graph.c
commit-reach: use one walk in remove_redundant()
[thirdparty/git.git] / commit-graph.c
index 06f8dc1d8966f1ec59f58460a41df67b63180b24..f3bde2ad95a16ad81c657296d999e7f7c417ff4b 100644 (file)
@@ -38,11 +38,13 @@ void git_test_write_commit_graph_or_die(void)
 #define GRAPH_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
 #define GRAPH_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
 #define GRAPH_CHUNKID_DATA 0x43444154 /* "CDAT" */
+#define GRAPH_CHUNKID_GENERATION_DATA 0x47444154 /* "GDAT" */
+#define GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW 0x47444f56 /* "GDOV" */
 #define GRAPH_CHUNKID_EXTRAEDGES 0x45444745 /* "EDGE" */
 #define GRAPH_CHUNKID_BLOOMINDEXES 0x42494458 /* "BIDX" */
 #define GRAPH_CHUNKID_BLOOMDATA 0x42444154 /* "BDAT" */
 #define GRAPH_CHUNKID_BASE 0x42415345 /* "BASE" */
-#define MAX_NUM_CHUNKS 7
+#define MAX_NUM_CHUNKS 9
 
 #define GRAPH_DATA_WIDTH (the_hash_algo->rawsz + 16)
 
@@ -61,9 +63,13 @@ void git_test_write_commit_graph_or_die(void)
 #define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * GRAPH_CHUNKLOOKUP_WIDTH \
                        + GRAPH_FANOUT_SIZE + the_hash_algo->rawsz)
 
+#define CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW (1ULL << 31)
+
 /* Remember to update object flag allocation in object.h */
 #define REACHABLE       (1u<<15)
 
+define_commit_slab(topo_level_slab, uint32_t);
+
 /* Keep track of the order in which commits are added to our list. */
 define_commit_slab(commit_pos, int);
 static struct commit_pos commit_pos = COMMIT_SLAB_INIT(1, commit_pos);
@@ -99,7 +105,7 @@ uint32_t commit_graph_position(const struct commit *c)
        return data ? data->graph_pos : COMMIT_NOT_FROM_GRAPH;
 }
 
-uint32_t commit_graph_generation(const struct commit *c)
+timestamp_t commit_graph_generation(const struct commit *c)
 {
        struct commit_graph_data *data =
                commit_graph_data_slab_peek(&commit_graph_data_slab, c);
@@ -139,13 +145,17 @@ static struct commit_graph_data *commit_graph_data_at(const struct commit *c)
        return data;
 }
 
+/*
+ * Should be used only while writing commit-graph as it compares
+ * generation value of commits by directly accessing commit-slab.
+ */
 static int commit_gen_cmp(const void *va, const void *vb)
 {
        const struct commit *a = *(const struct commit **)va;
        const struct commit *b = *(const struct commit **)vb;
 
-       uint32_t generation_a = commit_graph_generation(a);
-       uint32_t generation_b = commit_graph_generation(b);
+       const timestamp_t generation_a = commit_graph_data_at(a)->generation;
+       const timestamp_t generation_b = commit_graph_data_at(b)->generation;
        /* lower generation commits first */
        if (generation_a < generation_b)
                return -1;
@@ -388,6 +398,20 @@ struct commit_graph *parse_commit_graph(struct repository *r,
                                graph->chunk_commit_data = data + chunk_offset;
                        break;
 
+               case GRAPH_CHUNKID_GENERATION_DATA:
+                       if (graph->chunk_generation_data)
+                               chunk_repeated = 1;
+                       else
+                               graph->chunk_generation_data = data + chunk_offset;
+                       break;
+
+               case GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW:
+                       if (graph->chunk_generation_data_overflow)
+                               chunk_repeated = 1;
+                       else
+                               graph->chunk_generation_data_overflow = data + chunk_offset;
+                       break;
+
                case GRAPH_CHUNKID_EXTRAEDGES:
                        if (graph->chunk_extra_edges)
                                chunk_repeated = 1;
@@ -590,6 +614,21 @@ static struct commit_graph *load_commit_graph_chain(struct repository *r,
        return graph_chain;
 }
 
+static void validate_mixed_generation_chain(struct commit_graph *g)
+{
+       int read_generation_data;
+
+       if (!g)
+               return;
+
+       read_generation_data = !!g->chunk_generation_data;
+
+       while (g) {
+               g->read_generation_data = read_generation_data;
+               g = g->base_graph;
+       }
+}
+
 struct commit_graph *read_commit_graph_one(struct repository *r,
                                           struct object_directory *odb)
 {
@@ -598,6 +637,8 @@ struct commit_graph *read_commit_graph_one(struct repository *r,
        if (!g)
                g = load_commit_graph_chain(r, odb);
 
+       validate_mixed_generation_chain(g);
+
        return g;
 }
 
@@ -673,6 +714,20 @@ int generation_numbers_enabled(struct repository *r)
        return !!first_generation;
 }
 
+int corrected_commit_dates_enabled(struct repository *r)
+{
+       struct commit_graph *g;
+       if (!prepare_commit_graph(r))
+               return 0;
+
+       g = r->objects->commit_graph;
+
+       if (!g->num_commits)
+               return 0;
+
+       return g->read_generation_data;
+}
+
 struct bloom_filter_settings *get_bloom_filter_settings(struct repository *r)
 {
        struct commit_graph *g = r->objects->commit_graph;
@@ -748,17 +803,41 @@ static void fill_commit_graph_info(struct commit *item, struct commit_graph *g,
 {
        const unsigned char *commit_data;
        struct commit_graph_data *graph_data;
-       uint32_t lex_index;
+       uint32_t lex_index, offset_pos;
+       uint64_t date_high, date_low, offset;
 
        while (pos < g->num_commits_in_base)
                g = g->base_graph;
 
+       if (pos >= g->num_commits + g->num_commits_in_base)
+               die(_("invalid commit position. commit-graph is likely corrupt"));
+
        lex_index = pos - g->num_commits_in_base;
        commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * lex_index;
 
        graph_data = commit_graph_data_at(item);
        graph_data->graph_pos = pos;
-       graph_data->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
+
+       date_high = get_be32(commit_data + g->hash_len + 8) & 0x3;
+       date_low = get_be32(commit_data + g->hash_len + 12);
+       item->date = (timestamp_t)((date_high << 32) | date_low);
+
+       if (g->read_generation_data) {
+               offset = (timestamp_t)get_be32(g->chunk_generation_data + sizeof(uint32_t) * lex_index);
+
+               if (offset & CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW) {
+                       if (!g->chunk_generation_data_overflow)
+                               die(_("commit-graph requires overflow generation data but has none"));
+
+                       offset_pos = offset ^ CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW;
+                       graph_data->generation = get_be64(g->chunk_generation_data_overflow + 8 * offset_pos);
+               } else
+                       graph_data->generation = item->date + offset;
+       } else
+               graph_data->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
+
+       if (g->topo_levels)
+               *topo_level_slab_at(g->topo_levels, item) = get_be32(commit_data + g->hash_len + 8) >> 2;
 }
 
 static inline void set_commit_tree(struct commit *c, struct tree *t)
@@ -772,38 +851,22 @@ static int fill_commit_in_graph(struct repository *r,
 {
        uint32_t edge_value;
        uint32_t *parent_data_ptr;
-       uint64_t date_low, date_high;
        struct commit_list **pptr;
-       struct commit_graph_data *graph_data;
        const unsigned char *commit_data;
        uint32_t lex_index;
 
        while (pos < g->num_commits_in_base)
                g = g->base_graph;
 
-       if (pos >= g->num_commits + g->num_commits_in_base)
-               die(_("invalid commit position. commit-graph is likely corrupt"));
+       fill_commit_graph_info(item, g, pos);
 
-       /*
-        * Store the "full" position, but then use the
-        * "local" position for the rest of the calculation.
-        */
-       graph_data = commit_graph_data_at(item);
-       graph_data->graph_pos = pos;
        lex_index = pos - g->num_commits_in_base;
-
        commit_data = g->chunk_commit_data + (g->hash_len + 16) * lex_index;
 
        item->object.parsed = 1;
 
        set_commit_tree(item, NULL);
 
-       date_high = get_be32(commit_data + g->hash_len + 8) & 0x3;
-       date_low = get_be32(commit_data + g->hash_len + 12);
-       item->date = (timestamp_t)((date_high << 32) | date_low);
-
-       graph_data->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
-
        pptr = &item->parents;
 
        edge_value = get_be32(commit_data + g->hash_len);
@@ -943,6 +1006,7 @@ struct write_commit_graph_context {
        struct oid_array oids;
        struct packed_commit_list commits;
        int num_extra_edges;
+       int num_generation_data_overflows;
        unsigned long approx_nr_objects;
        struct progress *progress;
        int progress_done;
@@ -961,8 +1025,10 @@ struct write_commit_graph_context {
                 report_progress:1,
                 split:1,
                 changed_paths:1,
-                order_by_pack:1;
+                order_by_pack:1,
+                write_generation_data:1;
 
+       struct topo_level_slab *topo_levels;
        const struct commit_graph_opts *opts;
        size_t total_bloom_filter_data_size;
        const struct bloom_filter_settings *bloom_settings;
@@ -1109,7 +1175,7 @@ static int write_graph_chunk_data(struct hashfile *f,
                else
                        packedDate[0] = 0;
 
-               packedDate[0] |= htonl(commit_graph_data_at(*list)->generation << 2);
+               packedDate[0] |= htonl(*topo_level_slab_at(ctx->topo_levels, *list) << 2);
 
                packedDate[1] = htonl((*list)->date);
                hashwrite(f, packedDate, 8);
@@ -1120,6 +1186,45 @@ static int write_graph_chunk_data(struct hashfile *f,
        return 0;
 }
 
+static int write_graph_chunk_generation_data(struct hashfile *f,
+                                             struct write_commit_graph_context *ctx)
+{
+       int i, num_generation_data_overflows = 0;
+
+       for (i = 0; i < ctx->commits.nr; i++) {
+               struct commit *c = ctx->commits.list[i];
+               timestamp_t offset = commit_graph_data_at(c)->generation - c->date;
+               display_progress(ctx->progress, ++ctx->progress_cnt);
+
+               if (offset > GENERATION_NUMBER_V2_OFFSET_MAX) {
+                       offset = CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW | num_generation_data_overflows;
+                       num_generation_data_overflows++;
+               }
+
+               hashwrite_be32(f, offset);
+       }
+
+       return 0;
+}
+
+static int write_graph_chunk_generation_data_overflow(struct hashfile *f,
+                                                      struct write_commit_graph_context *ctx)
+{
+       int i;
+       for (i = 0; i < ctx->commits.nr; i++) {
+               struct commit *c = ctx->commits.list[i];
+               timestamp_t offset = commit_graph_data_at(c)->generation - c->date;
+               display_progress(ctx->progress, ++ctx->progress_cnt);
+
+               if (offset > GENERATION_NUMBER_V2_OFFSET_MAX) {
+                       hashwrite_be32(f, offset >> 32);
+                       hashwrite_be32(f, (uint32_t) offset);
+               }
+       }
+
+       return 0;
+}
+
 static int write_graph_chunk_extra_edges(struct hashfile *f,
                                         struct write_commit_graph_context *ctx)
 {
@@ -1339,11 +1444,12 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx)
                                        _("Computing commit graph generation numbers"),
                                        ctx->commits.nr);
        for (i = 0; i < ctx->commits.nr; i++) {
-               uint32_t generation = commit_graph_data_at(ctx->commits.list[i])->generation;
+               uint32_t level = *topo_level_slab_at(ctx->topo_levels, ctx->commits.list[i]);
+               timestamp_t corrected_commit_date = commit_graph_data_at(ctx->commits.list[i])->generation;
 
                display_progress(ctx->progress, i + 1);
-               if (generation != GENERATION_NUMBER_INFINITY &&
-                   generation != GENERATION_NUMBER_ZERO)
+               if (level != GENERATION_NUMBER_ZERO &&
+                   corrected_commit_date != GENERATION_NUMBER_ZERO)
                        continue;
 
                commit_list_insert(ctx->commits.list[i], &list);
@@ -1351,29 +1457,40 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx)
                        struct commit *current = list->item;
                        struct commit_list *parent;
                        int all_parents_computed = 1;
-                       uint32_t max_generation = 0;
+                       uint32_t max_level = 0;
+                       timestamp_t max_corrected_commit_date = 0;
 
                        for (parent = current->parents; parent; parent = parent->next) {
-                               generation = commit_graph_data_at(parent->item)->generation;
+                               level = *topo_level_slab_at(ctx->topo_levels, parent->item);
+                               corrected_commit_date = commit_graph_data_at(parent->item)->generation;
 
-                               if (generation == GENERATION_NUMBER_INFINITY ||
-                                   generation == GENERATION_NUMBER_ZERO) {
+                               if (level == GENERATION_NUMBER_ZERO ||
+                                   corrected_commit_date == GENERATION_NUMBER_ZERO) {
                                        all_parents_computed = 0;
                                        commit_list_insert(parent->item, &list);
                                        break;
-                               } else if (generation > max_generation) {
-                                       max_generation = generation;
                                }
+
+                               if (level > max_level)
+                                       max_level = level;
+
+                               if (corrected_commit_date > max_corrected_commit_date)
+                                       max_corrected_commit_date = corrected_commit_date;
                        }
 
                        if (all_parents_computed) {
-                               struct commit_graph_data *data = commit_graph_data_at(current);
-
-                               data->generation = max_generation + 1;
                                pop_commit(&list);
 
-                               if (data->generation > GENERATION_NUMBER_MAX)
-                                       data->generation = GENERATION_NUMBER_MAX;
+                               if (max_level > GENERATION_NUMBER_V1_MAX - 1)
+                                       max_level = GENERATION_NUMBER_V1_MAX - 1;
+                               *topo_level_slab_at(ctx->topo_levels, current) = max_level + 1;
+
+                               if (current->date && current->date > max_corrected_commit_date)
+                                       max_corrected_commit_date = current->date - 1;
+                               commit_graph_data_at(current)->generation = max_corrected_commit_date + 1;
+
+                               if (commit_graph_data_at(current)->generation - current->date > GENERATION_NUMBER_V2_OFFSET_MAX)
+                                       ctx->num_generation_data_overflows++;
                        }
                }
        }
@@ -1707,6 +1824,21 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
        chunks[2].id = GRAPH_CHUNKID_DATA;
        chunks[2].size = (hashsz + 16) * ctx->commits.nr;
        chunks[2].write_fn = write_graph_chunk_data;
+
+       if (git_env_bool(GIT_TEST_COMMIT_GRAPH_NO_GDAT, 0))
+               ctx->write_generation_data = 0;
+       if (ctx->write_generation_data) {
+               chunks[num_chunks].id = GRAPH_CHUNKID_GENERATION_DATA;
+               chunks[num_chunks].size = sizeof(uint32_t) * ctx->commits.nr;
+               chunks[num_chunks].write_fn = write_graph_chunk_generation_data;
+               num_chunks++;
+       }
+       if (ctx->num_generation_data_overflows) {
+               chunks[num_chunks].id = GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW;
+               chunks[num_chunks].size = sizeof(timestamp_t) * ctx->num_generation_data_overflows;
+               chunks[num_chunks].write_fn = write_graph_chunk_generation_data_overflow;
+               num_chunks++;
+       }
        if (ctx->num_extra_edges) {
                chunks[num_chunks].id = GRAPH_CHUNKID_EXTRAEDGES;
                chunks[num_chunks].size = 4 * ctx->num_extra_edges;
@@ -1918,6 +2050,13 @@ static void split_graph_merge_strategy(struct write_commit_graph_context *ctx)
                if (i < ctx->num_commit_graphs_after)
                        ctx->commit_graph_hash_after[i] = xstrdup(oid_to_hex(&g->oid));
 
+               /*
+                * If the topmost remaining layer has generation data chunk, the
+                * resultant layer also has generation data chunk.
+                */
+               if (i == ctx->num_commit_graphs_after - 2)
+                       ctx->write_generation_data = !!g->chunk_generation_data;
+
                i--;
                g = g->base_graph;
        }
@@ -2109,6 +2248,7 @@ int write_commit_graph(struct object_directory *odb,
        int res = 0;
        int replace = 0;
        struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
+       struct topo_level_slab topo_levels;
 
        prepare_repo_settings(the_repository);
        if (!the_repository->settings.core_commit_graph) {
@@ -2126,6 +2266,8 @@ int write_commit_graph(struct object_directory *odb,
        ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0;
        ctx->opts = opts;
        ctx->total_bloom_filter_data_size = 0;
+       ctx->write_generation_data = 1;
+       ctx->num_generation_data_overflows = 0;
 
        bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY",
                                                      bloom_settings.bits_per_entry);
@@ -2135,6 +2277,18 @@ int write_commit_graph(struct object_directory *odb,
                                                         bloom_settings.max_changed_paths);
        ctx->bloom_settings = &bloom_settings;
 
+       init_topo_level_slab(&topo_levels);
+       ctx->topo_levels = &topo_levels;
+
+       if (ctx->r->objects->commit_graph) {
+               struct commit_graph *g = ctx->r->objects->commit_graph;
+
+               while (g) {
+                       g->topo_levels = &topo_levels;
+                       g = g->base_graph;
+               }
+       }
+
        if (flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS)
                ctx->changed_paths = 1;
        if (!(flags & COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS)) {
@@ -2227,6 +2381,8 @@ int write_commit_graph(struct object_directory *odb,
        } else
                ctx->num_commit_graphs_after = 1;
 
+       validate_mixed_generation_chain(ctx->r->objects->commit_graph);
+
        compute_generation_numbers(ctx);
 
        if (ctx->changed_paths)
@@ -2355,8 +2511,8 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags)
        for (i = 0; i < g->num_commits; i++) {
                struct commit *graph_commit, *odb_commit;
                struct commit_list *graph_parents, *odb_parents;
-               uint32_t max_generation = 0;
-               uint32_t generation;
+               timestamp_t max_generation = 0;
+               timestamp_t generation;
 
                display_progress(progress, i + 1);
                hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i);
@@ -2420,16 +2576,17 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags)
                        continue;
 
                /*
-                * If one of our parents has generation GENERATION_NUMBER_MAX, then
-                * our generation is also GENERATION_NUMBER_MAX. Decrement to avoid
-                * extra logic in the following condition.
+                * If we are using topological level and one of our parents has
+                * generation GENERATION_NUMBER_V1_MAX, then our generation is
+                * also GENERATION_NUMBER_V1_MAX. Decrement to avoid extra logic
+                * in the following condition.
                 */
-               if (max_generation == GENERATION_NUMBER_MAX)
+               if (!g->read_generation_data && max_generation == GENERATION_NUMBER_V1_MAX)
                        max_generation--;
 
                generation = commit_graph_generation(graph_commit);
-               if (generation != max_generation + 1)
-                       graph_report(_("commit-graph generation for commit %s is %u != %u"),
+               if (generation < max_generation + 1)
+                       graph_report(_("commit-graph generation for commit %s is %"PRItime" < %"PRItime),
                                     oid_to_hex(&cur_oid),
                                     generation,
                                     max_generation + 1);