]> git.ipfire.org Git - thirdparty/git.git/commitdiff
reftable/merged: make subiters own their records
authorPatrick Steinhardt <ps@pks.im>
Mon, 4 Mar 2024 10:48:59 +0000 (11:48 +0100)
committerJunio C Hamano <gitster@pobox.com>
Mon, 4 Mar 2024 18:19:39 +0000 (10:19 -0800)
For each subiterator, the merged table needs to track their current
record. This record is owned by the priority queue though instead of by
the merged iterator. This is not optimal performance-wise.

For one, we need to move around records whenever we add or remove a
record from the priority queue. Thus, the bigger the entries the more
bytes we need to copy around. And compared to pointers, a reftable
record is rather on the bigger side. The other issue is that this makes
it harder to reuse the records.

Refactor the code so that the merged iterator tracks ownership of the
records per-subiter. Instead of having records in the priority queue, we
can now use mere pointers to the per-subiter records. This also allows
us to swap records between the caller and the per-subiter record instead
of doing an actual copy via `reftable_record_copy_from()`, which removes
the need to release the caller-provided record.

This results in a noticeable speedup when iterating through many refs.
The following benchmark iterates through 1 million refs:

  Benchmark 1: show-ref: single matching ref (revision = HEAD~)
    Time (mean ± σ):     145.5 ms ±   4.5 ms    [User: 142.5 ms, System: 2.8 ms]
    Range (min … max):   141.3 ms … 177.0 ms    1000 runs

  Benchmark 2: show-ref: single matching ref (revision = HEAD)
    Time (mean ± σ):     139.0 ms ±   4.7 ms    [User: 136.1 ms, System: 2.8 ms]
    Range (min … max):   134.2 ms … 182.2 ms    1000 runs

  Summary
    show-ref: single matching ref (revision = HEAD) ran
      1.05 ± 0.05 times faster than show-ref: single matching ref (revision = HEAD~)

This refactoring also allows a subsequent refactoring where we start
reusing memory allocated by the reftable records because we do not need
to release the caller-provided record anymore.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
reftable/merged.c
reftable/pq.c
reftable/pq.h
reftable/pq_test.c

index 9b1ccfff00d5d7b9b1ecc5c536faaca66acb8dd4..ae74234472f3f1485645152f87a04b5b5ccf3b9b 100644 (file)
@@ -17,8 +17,13 @@ https://developers.google.com/open-source/licenses/bsd
 #include "reftable-error.h"
 #include "system.h"
 
+struct merged_subiter {
+       struct reftable_iterator iter;
+       struct reftable_record rec;
+};
+
 struct merged_iter {
-       struct reftable_iterator *stack;
+       struct merged_subiter *subiters;
        struct merged_iter_pqueue pq;
        uint32_t hash_id;
        size_t stack_len;
@@ -32,16 +37,18 @@ static int merged_iter_init(struct merged_iter *mi)
        for (size_t i = 0; i < mi->stack_len; i++) {
                struct pq_entry e = {
                        .index = i,
+                       .rec = &mi->subiters[i].rec,
                };
                int err;
 
-               reftable_record_init(&e.rec, mi->typ);
-               err = iterator_next(&mi->stack[i], &e.rec);
+               reftable_record_init(&mi->subiters[i].rec, mi->typ);
+               err = iterator_next(&mi->subiters[i].iter,
+                                   &mi->subiters[i].rec);
                if (err < 0)
                        return err;
                if (err > 0) {
-                       reftable_iterator_destroy(&mi->stack[i]);
-                       reftable_record_release(&e.rec);
+                       reftable_iterator_destroy(&mi->subiters[i].iter);
+                       reftable_record_release(&mi->subiters[i].rec);
                        continue;
                }
 
@@ -56,9 +63,11 @@ static void merged_iter_close(void *p)
        struct merged_iter *mi = p;
 
        merged_iter_pqueue_release(&mi->pq);
-       for (size_t i = 0; i < mi->stack_len; i++)
-               reftable_iterator_destroy(&mi->stack[i]);
-       reftable_free(mi->stack);
+       for (size_t i = 0; i < mi->stack_len; i++) {
+               reftable_iterator_destroy(&mi->subiters[i].iter);
+               reftable_record_release(&mi->subiters[i].rec);
+       }
+       reftable_free(mi->subiters);
 }
 
 static int merged_iter_advance_nonnull_subiter(struct merged_iter *mi,
@@ -66,17 +75,16 @@ static int merged_iter_advance_nonnull_subiter(struct merged_iter *mi,
 {
        struct pq_entry e = {
                .index = idx,
+               .rec = &mi->subiters[idx].rec,
        };
        int err;
 
-       reftable_record_init(&e.rec, mi->typ);
-       err = iterator_next(&mi->stack[idx], &e.rec);
+       err = iterator_next(&mi->subiters[idx].iter, &mi->subiters[idx].rec);
        if (err < 0)
                return err;
-
        if (err > 0) {
-               reftable_iterator_destroy(&mi->stack[idx]);
-               reftable_record_release(&e.rec);
+               reftable_iterator_destroy(&mi->subiters[idx].iter);
+               reftable_record_release(&mi->subiters[idx].rec);
                return 0;
        }
 
@@ -86,7 +94,7 @@ static int merged_iter_advance_nonnull_subiter(struct merged_iter *mi,
 
 static int merged_iter_advance_subiter(struct merged_iter *mi, size_t idx)
 {
-       if (iterator_is_null(&mi->stack[idx]))
+       if (iterator_is_null(&mi->subiters[idx].iter))
                return 0;
        return merged_iter_advance_nonnull_subiter(mi, idx);
 }
@@ -121,25 +129,19 @@ static int merged_iter_next_entry(struct merged_iter *mi,
                struct pq_entry top = merged_iter_pqueue_top(mi->pq);
                int cmp;
 
-               cmp = reftable_record_cmp(&top.rec, &entry.rec);
+               cmp = reftable_record_cmp(top.rec, entry.rec);
                if (cmp > 0)
                        break;
 
                merged_iter_pqueue_remove(&mi->pq);
                err = merged_iter_advance_subiter(mi, top.index);
                if (err < 0)
-                       goto done;
-               reftable_record_release(&top.rec);
+                       return err;
        }
 
-       reftable_record_release(rec);
-       *rec = entry.rec;
        mi->advance_index = entry.index;
-
-done:
-       if (err)
-               reftable_record_release(&entry.rec);
-       return err;
+       SWAP(*rec, *entry.rec);
+       return 0;
 }
 
 static int merged_iter_next(struct merged_iter *mi, struct reftable_record *rec)
@@ -257,10 +259,10 @@ static int merged_table_seek_record(struct reftable_merged_table *mt,
        struct merged_iter *p;
        int err;
 
-       REFTABLE_CALLOC_ARRAY(merged.stack, mt->stack_len);
+       REFTABLE_CALLOC_ARRAY(merged.subiters, mt->stack_len);
        for (size_t i = 0; i < mt->stack_len; i++) {
                err = reftable_table_seek_record(&mt->stack[i],
-                                                &merged.stack[merged.stack_len], rec);
+                                                &merged.subiters[merged.stack_len].iter, rec);
                if (err < 0)
                        goto out;
                if (!err)
index e0ccce2b9779a8bfd1acac6d42bc7d0e46e3fe0e..0074d6bc43dd2e97065d522a592709cec1e06196 100644 (file)
@@ -14,7 +14,7 @@ https://developers.google.com/open-source/licenses/bsd
 
 int pq_less(struct pq_entry *a, struct pq_entry *b)
 {
-       int cmp = reftable_record_cmp(&a->rec, &b->rec);
+       int cmp = reftable_record_cmp(a->rec, b->rec);
        if (cmp == 0)
                return a->index > b->index;
        return cmp < 0;
@@ -82,10 +82,6 @@ void merged_iter_pqueue_add(struct merged_iter_pqueue *pq, const struct pq_entry
 
 void merged_iter_pqueue_release(struct merged_iter_pqueue *pq)
 {
-       int i = 0;
-       for (i = 0; i < pq->len; i++) {
-               reftable_record_release(&pq->heap[i].rec);
-       }
        FREE_AND_NULL(pq->heap);
-       pq->len = pq->cap = 0;
+       memset(pq, 0, sizeof(*pq));
 }
index 9e25a43a36e50f7492990b4228c16725df516b80..ce23972c16c9fe072e75d0a4f8e6c478dc24bcfa 100644 (file)
@@ -13,7 +13,7 @@ https://developers.google.com/open-source/licenses/bsd
 
 struct pq_entry {
        size_t index;
-       struct reftable_record rec;
+       struct reftable_record *rec;
 };
 
 struct merged_iter_pqueue {
index c202eff84801820b28c56ddb7de28a2d041f3c08..b7d3c80cc7260298749800696e4a8bcc5be3284c 100644 (file)
@@ -27,48 +27,43 @@ void merged_iter_pqueue_check(struct merged_iter_pqueue pq)
 
 static void test_pq(void)
 {
-       char *names[54] = { NULL };
-       int N = ARRAY_SIZE(names) - 1;
-
        struct merged_iter_pqueue pq = { NULL };
+       struct reftable_record recs[54];
+       int N = ARRAY_SIZE(recs) - 1, i;
        char *last = NULL;
 
-       int i = 0;
        for (i = 0; i < N; i++) {
-               char name[100];
-               snprintf(name, sizeof(name), "%02d", i);
-               names[i] = xstrdup(name);
+               struct strbuf refname = STRBUF_INIT;
+               strbuf_addf(&refname, "%02d", i);
+
+               reftable_record_init(&recs[i], BLOCK_TYPE_REF);
+               recs[i].u.ref.refname = strbuf_detach(&refname, NULL);
        }
 
        i = 1;
        do {
-               struct pq_entry e = { .rec = { .type = BLOCK_TYPE_REF,
-                                              .u.ref = {
-                                                      .refname = names[i],
-                                              } } };
+               struct pq_entry e = {
+                       .rec = &recs[i],
+               };
+
                merged_iter_pqueue_add(&pq, &e);
                merged_iter_pqueue_check(pq);
+
                i = (i * 7) % N;
        } while (i != 1);
 
        while (!merged_iter_pqueue_is_empty(pq)) {
                struct pq_entry e = merged_iter_pqueue_remove(&pq);
-               struct reftable_record *rec = &e.rec;
                merged_iter_pqueue_check(pq);
 
-               EXPECT(reftable_record_type(rec) == BLOCK_TYPE_REF);
-               if (last) {
-                       EXPECT(strcmp(last, rec->u.ref.refname) < 0);
-               }
-               /* this is names[i], so don't dealloc. */
-               last = rec->u.ref.refname;
-               rec->u.ref.refname = NULL;
-               reftable_record_release(rec);
-       }
-       for (i = 0; i < N; i++) {
-               reftable_free(names[i]);
+               EXPECT(reftable_record_type(e.rec) == BLOCK_TYPE_REF);
+               if (last)
+                       EXPECT(strcmp(last, e.rec->u.ref.refname) < 0);
+               last = e.rec->u.ref.refname;
        }
 
+       for (i = 0; i < N; i++)
+               reftable_record_release(&recs[i]);
        merged_iter_pqueue_release(&pq);
 }