]> git.ipfire.org Git - thirdparty/git.git/blobdiff - unpack-trees.c
unpack-trees: optimize walking same trees with cache-tree
[thirdparty/git.git] / unpack-trees.c
index 6d9f692ea651dde9d408ea5196014e37872313b1..8376663b593afc926d40329377fb3208416b9e99 100644 (file)
@@ -635,6 +635,102 @@ static inline int are_same_oid(struct name_entry *name_j, struct name_entry *nam
        return name_j->oid && name_k->oid && !oidcmp(name_j->oid, name_k->oid);
 }
 
+static int all_trees_same_as_cache_tree(int n, unsigned long dirmask,
+                                       struct name_entry *names,
+                                       struct traverse_info *info)
+{
+       struct unpack_trees_options *o = info->data;
+       int i;
+
+       if (!o->merge || dirmask != ((1 << n) - 1))
+               return 0;
+
+       for (i = 1; i < n; i++)
+               if (!are_same_oid(names, names + i))
+                       return 0;
+
+       return cache_tree_matches_traversal(o->src_index->cache_tree, names, info);
+}
+
+static int index_pos_by_traverse_info(struct name_entry *names,
+                                     struct traverse_info *info)
+{
+       struct unpack_trees_options *o = info->data;
+       int len = traverse_path_len(info, names);
+       char *name = xmalloc(len + 1 /* slash */ + 1 /* NUL */);
+       int pos;
+
+       make_traverse_path(name, info, names);
+       name[len++] = '/';
+       name[len] = '\0';
+       pos = index_name_pos(o->src_index, name, len);
+       if (pos >= 0)
+               BUG("This is a directory and should not exist in index");
+       pos = -pos - 1;
+       if (!starts_with(o->src_index->cache[pos]->name, name) ||
+           (pos > 0 && starts_with(o->src_index->cache[pos-1]->name, name)))
+               BUG("pos must point at the first entry in this directory");
+       free(name);
+       return pos;
+}
+
+/*
+ * Fast path if we detect that all trees are the same as cache-tree at this
+ * path. We'll walk these trees recursively using cache-tree/index instead of
+ * ODB since already know what these trees contain.
+ */
+static int traverse_by_cache_tree(int pos, int nr_entries, int nr_names,
+                                 struct name_entry *names,
+                                 struct traverse_info *info)
+{
+       struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, };
+       struct unpack_trees_options *o = info->data;
+       int i, d;
+
+       if (!o->merge)
+               BUG("We need cache-tree to do this optimization");
+
+       /*
+        * Do what unpack_callback() and unpack_nondirectories() normally
+        * do. But we walk all paths in an iterative loop instead.
+        *
+        * D/F conflicts and higher stage entries are not a concern
+        * because cache-tree would be invalidated and we would never
+        * get here in the first place.
+        */
+       for (i = 0; i < nr_entries; i++) {
+               struct cache_entry *tree_ce;
+               int len, rc;
+
+               src[0] = o->src_index->cache[pos + i];
+
+               len = ce_namelen(src[0]);
+               tree_ce = xcalloc(1, cache_entry_size(len));
+
+               tree_ce->ce_mode = src[0]->ce_mode;
+               tree_ce->ce_flags = create_ce_flags(0);
+               tree_ce->ce_namelen = len;
+               oidcpy(&tree_ce->oid, &src[0]->oid);
+               memcpy(tree_ce->name, src[0]->name, len + 1);
+
+               for (d = 1; d <= nr_names; d++)
+                       src[d] = tree_ce;
+
+               rc = call_unpack_fn((const struct cache_entry * const *)src, o);
+               free(tree_ce);
+               if (rc < 0)
+                       return rc;
+
+               mark_ce_used(src[0], o);
+       }
+       if (o->debug_unpack)
+               printf("Unpacked %d entries from %s to %s using cache-tree\n",
+                      nr_entries,
+                      o->src_index->cache[pos]->name,
+                      o->src_index->cache[pos + nr_entries - 1]->name);
+       return 0;
+}
+
 static int traverse_trees_recursive(int n, unsigned long dirmask,
                                    unsigned long df_conflicts,
                                    struct name_entry *names,
@@ -646,6 +742,27 @@ static int traverse_trees_recursive(int n, unsigned long dirmask,
        void *buf[MAX_UNPACK_TREES];
        struct traverse_info newinfo;
        struct name_entry *p;
+       int nr_entries;
+
+       nr_entries = all_trees_same_as_cache_tree(n, dirmask, names, info);
+       if (nr_entries > 0) {
+               struct unpack_trees_options *o = info->data;
+               int pos = index_pos_by_traverse_info(names, info);
+
+               if (!o->merge || df_conflicts)
+                       BUG("Wrong condition to get here buddy");
+
+               /*
+                * All entries up to 'pos' must have been processed
+                * (i.e. marked CE_UNPACKED) at this point. But to be safe,
+                * save and restore cache_bottom anyway to not miss
+                * unprocessed entries before 'pos'.
+                */
+               bottom = o->cache_bottom;
+               ret = traverse_by_cache_tree(pos, nr_entries, n, names, info);
+               o->cache_bottom = bottom;
+               return ret;
+       }
 
        p = names;
        while (!p->mode)
@@ -812,6 +929,11 @@ static struct cache_entry *create_ce_entry(const struct traverse_info *info,
        return ce;
 }
 
+/*
+ * Note that traverse_by_cache_tree() duplicates some logic in this function
+ * without actually calling it. If you change the logic here you may need to
+ * check and change there as well.
+ */
 static int unpack_nondirectories(int n, unsigned long mask,
                                 unsigned long dirmask,
                                 struct cache_entry **src,
@@ -1004,6 +1126,11 @@ static void debug_unpack_callback(int n,
                debug_name_entry(i, names + i);
 }
 
+/*
+ * Note that traverse_by_cache_tree() duplicates some logic in this function
+ * without actually calling it. If you change the logic here you may need to
+ * check and change there as well.
+ */
 static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, struct name_entry *names, struct traverse_info *info)
 {
        struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, };