]> git.ipfire.org Git - thirdparty/git.git/commitdiff
Merge branch 'cs/store-packfiles-in-hashmap'
authorJunio C Hamano <gitster@pobox.com>
Mon, 16 Dec 2019 21:08:31 +0000 (13:08 -0800)
committerJunio C Hamano <gitster@pobox.com>
Mon, 16 Dec 2019 21:08:32 +0000 (13:08 -0800)
In a repository with many packfiles, the cost of the procedure that
avoids registering the same packfile twice was unnecessarily high
by using an inefficient search algorithm, which has been corrected.

* cs/store-packfiles-in-hashmap:
  packfile.c: speed up loading lots of packfiles

object-store.h
object.c
packfile.c
t/perf/p5303-many-packs.sh

index 7f7b3cdd806b756eefb86c7d116ba9a9a6c62558..55ee63935073e2b2790d82e52399aaa1638f9e8f 100644 (file)
@@ -60,6 +60,7 @@ struct oid_array *odb_loose_cache(struct object_directory *odb,
 void odb_clear_loose_cache(struct object_directory *odb);
 
 struct packed_git {
+       struct hashmap_entry packmap_ent;
        struct packed_git *next;
        struct list_head mru;
        struct pack_window *windows;
@@ -88,6 +89,20 @@ struct packed_git {
 
 struct multi_pack_index;
 
+static inline int pack_map_entry_cmp(const void *unused_cmp_data,
+                                    const struct hashmap_entry *entry,
+                                    const struct hashmap_entry *entry2,
+                                    const void *keydata)
+{
+       const char *key = keydata;
+       const struct packed_git *pg1, *pg2;
+
+       pg1 = container_of(entry, const struct packed_git, packmap_ent);
+       pg2 = container_of(entry2, const struct packed_git, packmap_ent);
+
+       return strcmp(pg1->pack_name, key ? key : pg2->pack_name);
+}
+
 struct raw_object_store {
        /*
         * Set of all object directories; the main directory is first (and
@@ -131,6 +146,12 @@ struct raw_object_store {
        /* A most-recently-used ordered version of the packed_git list. */
        struct list_head packed_git_mru;
 
+       /*
+        * A map of packfiles to packed_git structs for tracking which
+        * packs have been loaded already.
+        */
+       struct hashmap pack_map;
+
        /*
         * A fast, rough count of the number of objects in the repository.
         * These two fields are not meant for direct access. Use
index 3b8b8c55c9a8e7e2961d9911f1ec4fd70730f07f..142ef69399a2fd81c36d81291c573295715b48b8 100644 (file)
--- a/object.c
+++ b/object.c
@@ -479,6 +479,7 @@ struct raw_object_store *raw_object_store_new(void)
 
        memset(o, 0, sizeof(*o));
        INIT_LIST_HEAD(&o->packed_git_mru);
+       hashmap_init(&o->pack_map, pack_map_entry_cmp, NULL, 0);
        return o;
 }
 
@@ -518,6 +519,8 @@ void raw_object_store_clear(struct raw_object_store *o)
        INIT_LIST_HEAD(&o->packed_git_mru);
        close_object_store(o);
        o->packed_git = NULL;
+
+       hashmap_free(&o->pack_map);
 }
 
 void parsed_object_pool_clear(struct parsed_object_pool *o)
index 355066de17514e1a2cf8456e6d55d871bf15f09b..f0dc63e92fb31f4b0154bf29f2f54cf8d8be3190 100644 (file)
@@ -757,6 +757,9 @@ void install_packed_git(struct repository *r, struct packed_git *pack)
 
        pack->next = r->objects->packed_git;
        r->objects->packed_git = pack;
+
+       hashmap_entry_init(&pack->packmap_ent, strhash(pack->pack_name));
+       hashmap_add(&r->objects->pack_map, &pack->packmap_ent);
 }
 
 void (*report_garbage)(unsigned seen_bits, const char *path);
@@ -856,20 +859,18 @@ static void prepare_pack(const char *full_name, size_t full_name_len,
 
        if (strip_suffix_mem(full_name, &base_len, ".idx") &&
            !(data->m && midx_contains_pack(data->m, file_name))) {
-               /* Don't reopen a pack we already have. */
-               for (p = data->r->objects->packed_git; p; p = p->next) {
-                       size_t len;
-                       if (strip_suffix(p->pack_name, ".pack", &len) &&
-                           len == base_len &&
-                           !memcmp(p->pack_name, full_name, len))
-                               break;
-               }
+               struct hashmap_entry hent;
+               char *pack_name = xstrfmt("%.*s.pack", (int)base_len, full_name);
+               unsigned int hash = strhash(pack_name);
+               hashmap_entry_init(&hent, hash);
 
-               if (!p) {
+               /* Don't reopen a pack we already have. */
+               if (!hashmap_get(&data->r->objects->pack_map, &hent, pack_name)) {
                        p = add_packed_git(full_name, full_name_len, data->local);
                        if (p)
                                install_packed_git(data->r, p);
                }
+               free(pack_name);
        }
 
        if (!report_garbage)
index a369152c473232c09e67d962cbcf7ea9b6689d7c..7ee791669a15d748636c5b960f7dbfa3b473b0fb 100755 (executable)
@@ -85,4 +85,22 @@ do
        '
 done
 
+# Measure pack loading with 10,000 packs.
+test_expect_success 'generate lots of packs' '
+       for i in $(test_seq 10000); do
+               echo "blob"
+               echo "data <<EOF"
+               echo "blob $i"
+               echo "EOF"
+               echo "checkpoint"
+       done |
+       git -c fastimport.unpackLimit=0 fast-import
+'
+
+# The purpose of this test is to evaluate load time for a large number
+# of packs while doing as little other work as possible.
+test_perf "load 10,000 packs" '
+       git rev-parse --verify "HEAD^{commit}"
+'
+
 test_done