]> git.ipfire.org Git - thirdparty/git.git/commitdiff
Merge branch 'ps/reftable-iteration-perf'
authorJunio C Hamano <gitster@pobox.com>
Tue, 27 Feb 2024 02:10:24 +0000 (18:10 -0800)
committerJunio C Hamano <gitster@pobox.com>
Tue, 27 Feb 2024 02:10:24 +0000 (18:10 -0800)
The code to iterate over refs with the reftable backend has seen
some optimization.

* ps/reftable-iteration-perf:
  reftable/reader: add comments to `table_iter_next()`
  reftable/record: don't try to reallocate ref record name
  reftable/block: swap buffers instead of copying
  reftable/pq: allocation-less comparison of entry keys
  reftable/merged: skip comparison for records of the same subiter
  reftable/merged: allocation-less dropping of shadowed records
  reftable/record: introduce function to compare records by key

reftable/block.c
reftable/merged.c
reftable/merged.h
reftable/pq.c
reftable/reader.c
reftable/record.c
reftable/record.h

index ce8a7d24f3d33e265e7d5d05bd77a3dfcf9c5b1f..72eb73b3806d623010c0e224f4f8ce77d555ce43 100644 (file)
@@ -339,8 +339,7 @@ int block_iter_next(struct block_iter *it, struct reftable_record *rec)
                return -1;
        string_view_consume(&in, n);
 
-       strbuf_reset(&it->last_key);
-       strbuf_addbuf(&it->last_key, &it->key);
+       strbuf_swap(&it->last_key, &it->key);
        it->next_off += start.len - in.len;
        return 0;
 }
index a0f222e07bdf7519ed1c0d8d43ccbb6151663c3a..1aa6cd31b74954da9b1b5140cd350459c37c1378 100644 (file)
@@ -49,8 +49,6 @@ static void merged_iter_close(void *p)
        for (size_t i = 0; i < mi->stack_len; i++)
                reftable_iterator_destroy(&mi->stack[i]);
        reftable_free(mi->stack);
-       strbuf_release(&mi->key);
-       strbuf_release(&mi->entry_key);
 }
 
 static int merged_iter_advance_nonnull_subiter(struct merged_iter *mi,
@@ -105,14 +103,19 @@ static int merged_iter_next_entry(struct merged_iter *mi,
          such a deployment, the loop below must be changed to collect all
          entries for the same key, and return new the newest one.
        */
-       reftable_record_key(&entry.rec, &mi->entry_key);
        while (!merged_iter_pqueue_is_empty(mi->pq)) {
                struct pq_entry top = merged_iter_pqueue_top(mi->pq);
-               int cmp = 0;
-
-               reftable_record_key(&top.rec, &mi->key);
+               int cmp;
+
+               /*
+                * When the next entry comes from the same queue as the current
+                * entry then it must by definition be larger. This avoids a
+                * comparison in the most common case.
+                */
+               if (top.index == entry.index)
+                       break;
 
-               cmp = strbuf_cmp(&mi->key, &mi->entry_key);
+               cmp = reftable_record_cmp(&top.rec, &entry.rec);
                if (cmp > 0)
                        break;
 
@@ -243,8 +246,6 @@ static int merged_table_seek_record(struct reftable_merged_table *mt,
                .typ = reftable_record_type(rec),
                .hash_id = mt->hash_id,
                .suppress_deletions = mt->suppress_deletions,
-               .key = STRBUF_INIT,
-               .entry_key = STRBUF_INIT,
        };
        struct merged_iter *p;
        int err;
index d5b39dfe7f1e3b54b5dc5e7ae46068155794986f..7d9f95d27ed0a44c4208e743b6b83ba4cdc5536c 100644 (file)
@@ -31,8 +31,6 @@ struct merged_iter {
        uint8_t typ;
        int suppress_deletions;
        struct merged_iter_pqueue pq;
-       struct strbuf key;
-       struct strbuf entry_key;
 };
 
 void merged_table_release(struct reftable_merged_table *mt);
index 2461daf5ff49c9748031a75041d81b1fa30a3421..e0ccce2b9779a8bfd1acac6d42bc7d0e46e3fe0e 100644 (file)
@@ -14,20 +14,9 @@ https://developers.google.com/open-source/licenses/bsd
 
 int pq_less(struct pq_entry *a, struct pq_entry *b)
 {
-       struct strbuf ak = STRBUF_INIT;
-       struct strbuf bk = STRBUF_INIT;
-       int cmp = 0;
-       reftable_record_key(&a->rec, &ak);
-       reftable_record_key(&b->rec, &bk);
-
-       cmp = strbuf_cmp(&ak, &bk);
-
-       strbuf_release(&ak);
-       strbuf_release(&bk);
-
+       int cmp = reftable_record_cmp(&a->rec, &b->rec);
        if (cmp == 0)
                return a->index > b->index;
-
        return cmp < 0;
 }
 
index 2663b039381d1c3290193e8afe5ad3d9a13df9fc..b113daab77336c240d10f64ce9890ff04f5f366a 100644 (file)
@@ -357,24 +357,32 @@ static int table_iter_next(struct table_iter *ti, struct reftable_record *rec)
 
        while (1) {
                struct table_iter next = TABLE_ITER_INIT;
-               int err = 0;
-               if (ti->is_finished) {
+               int err;
+
+               if (ti->is_finished)
                        return 1;
-               }
 
+               /*
+                * Check whether the current block still has more records. If
+                * so, return it. If the iterator returns positive then the
+                * current block has been exhausted.
+                */
                err = table_iter_next_in_block(ti, rec);
-               if (err <= 0) {
+               if (err <= 0)
                        return err;
-               }
 
+               /*
+                * Otherwise, we need to continue to the next block in the
+                * table and retry. If there are no more blocks then the
+                * iterator is drained.
+                */
                err = table_iter_next_block(&next, ti);
-               if (err != 0) {
-                       ti->is_finished = 1;
-               }
                table_iter_block_done(ti);
-               if (err != 0) {
+               if (err) {
+                       ti->is_finished = 1;
                        return err;
                }
+
                table_iter_copy_from(ti, &next);
                block_iter_close(&next.bi);
        }
index 26c5e43f9bfc3ebc5b3b397b03fb06214044e0bb..d6bb42e887423d95f2fcc1b2b146505ab4bef62a 100644 (file)
@@ -377,10 +377,11 @@ static int reftable_ref_record_decode(void *rec, struct strbuf key,
 
        assert(hash_size > 0);
 
-       r->refname = reftable_realloc(r->refname, key.len + 1);
+       r->refname = reftable_malloc(key.len + 1);
        memcpy(r->refname, key.buf, key.len);
-       r->update_index = update_index;
        r->refname[key.len] = 0;
+
+       r->update_index = update_index;
        r->value_type = val_type;
        switch (val_type) {
        case REFTABLE_REF_VAL1:
@@ -430,7 +431,6 @@ static int reftable_ref_record_is_deletion_void(const void *p)
                (const struct reftable_ref_record *)p);
 }
 
-
 static int reftable_ref_record_equal_void(const void *a,
                                          const void *b, int hash_size)
 {
@@ -439,6 +439,13 @@ static int reftable_ref_record_equal_void(const void *a,
        return reftable_ref_record_equal(ra, rb, hash_size);
 }
 
+static int reftable_ref_record_cmp_void(const void *_a, const void *_b)
+{
+       const struct reftable_ref_record *a = _a;
+       const struct reftable_ref_record *b = _b;
+       return strcmp(a->refname, b->refname);
+}
+
 static void reftable_ref_record_print_void(const void *rec,
                                           int hash_size)
 {
@@ -455,6 +462,7 @@ static struct reftable_record_vtable reftable_ref_record_vtable = {
        .release = &reftable_ref_record_release_void,
        .is_deletion = &reftable_ref_record_is_deletion_void,
        .equal = &reftable_ref_record_equal_void,
+       .cmp = &reftable_ref_record_cmp_void,
        .print = &reftable_ref_record_print_void,
 };
 
@@ -627,6 +635,25 @@ static int reftable_obj_record_equal_void(const void *a, const void *b, int hash
        return 1;
 }
 
+static int reftable_obj_record_cmp_void(const void *_a, const void *_b)
+{
+       const struct reftable_obj_record *a = _a;
+       const struct reftable_obj_record *b = _b;
+       int cmp;
+
+       cmp = memcmp(a->hash_prefix, b->hash_prefix,
+                    a->hash_prefix_len > b->hash_prefix_len ?
+                    a->hash_prefix_len : b->hash_prefix_len);
+       if (cmp)
+               return cmp;
+
+       /*
+        * When the prefix is the same then the object record that is longer is
+        * considered to be bigger.
+        */
+       return a->hash_prefix_len - b->hash_prefix_len;
+}
+
 static struct reftable_record_vtable reftable_obj_record_vtable = {
        .key = &reftable_obj_record_key,
        .type = BLOCK_TYPE_OBJ,
@@ -637,6 +664,7 @@ static struct reftable_record_vtable reftable_obj_record_vtable = {
        .release = &reftable_obj_record_release,
        .is_deletion = &not_a_deletion,
        .equal = &reftable_obj_record_equal_void,
+       .cmp = &reftable_obj_record_cmp_void,
        .print = &reftable_obj_record_print,
 };
 
@@ -955,6 +983,22 @@ static int reftable_log_record_equal_void(const void *a,
                                         hash_size);
 }
 
+static int reftable_log_record_cmp_void(const void *_a, const void *_b)
+{
+       const struct reftable_log_record *a = _a;
+       const struct reftable_log_record *b = _b;
+       int cmp = strcmp(a->refname, b->refname);
+       if (cmp)
+               return cmp;
+
+       /*
+        * Note that the comparison here is reversed. This is because the
+        * update index is reversed when comparing keys. For reference, see how
+        * we handle this in reftable_log_record_key()`.
+        */
+       return b->update_index - a->update_index;
+}
+
 int reftable_log_record_equal(const struct reftable_log_record *a,
                              const struct reftable_log_record *b, int hash_size)
 {
@@ -1004,6 +1048,7 @@ static struct reftable_record_vtable reftable_log_record_vtable = {
        .release = &reftable_log_record_release_void,
        .is_deletion = &reftable_log_record_is_deletion_void,
        .equal = &reftable_log_record_equal_void,
+       .cmp = &reftable_log_record_cmp_void,
        .print = &reftable_log_record_print_void,
 };
 
@@ -1079,6 +1124,13 @@ static int reftable_index_record_equal(const void *a, const void *b, int hash_si
        return ia->offset == ib->offset && !strbuf_cmp(&ia->last_key, &ib->last_key);
 }
 
+static int reftable_index_record_cmp(const void *_a, const void *_b)
+{
+       const struct reftable_index_record *a = _a;
+       const struct reftable_index_record *b = _b;
+       return strbuf_cmp(&a->last_key, &b->last_key);
+}
+
 static void reftable_index_record_print(const void *rec, int hash_size)
 {
        const struct reftable_index_record *idx = rec;
@@ -1096,6 +1148,7 @@ static struct reftable_record_vtable reftable_index_record_vtable = {
        .release = &reftable_index_record_release,
        .is_deletion = &not_a_deletion,
        .equal = &reftable_index_record_equal,
+       .cmp = &reftable_index_record_cmp,
        .print = &reftable_index_record_print,
 };
 
@@ -1149,6 +1202,14 @@ int reftable_record_is_deletion(struct reftable_record *rec)
                reftable_record_data(rec));
 }
 
+int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b)
+{
+       if (a->type != b->type)
+               BUG("cannot compare reftable records of different type");
+       return reftable_record_vtable(a)->cmp(
+               reftable_record_data(a), reftable_record_data(b));
+}
+
 int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size)
 {
        if (a->type != b->type)
index e64ed30c8058cab0ffed789181a3e1909dc5b684..a05e2be17971d5e26f27871159cda293aa2640ad 100644 (file)
@@ -62,6 +62,12 @@ struct reftable_record_vtable {
        /* Are two records equal? This assumes they have the same type. Returns 0 for non-equal. */
        int (*equal)(const void *a, const void *b, int hash_size);
 
+       /*
+        * Compare keys of two records with each other. The records must have
+        * the same type.
+        */
+       int (*cmp)(const void *a, const void *b);
+
        /* Print on stdout, for debugging. */
        void (*print)(const void *rec, int hash_size);
 };
@@ -114,6 +120,7 @@ struct reftable_record {
 void reftable_record_init(struct reftable_record *rec, uint8_t typ);
 
 /* see struct record_vtable */
+int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b);
 int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size);
 void reftable_record_print(struct reftable_record *rec, int hash_size);
 void reftable_record_key(struct reftable_record *rec, struct strbuf *dest);