From: Junio C Hamano Date: Tue, 27 Feb 2024 02:10:24 +0000 (-0800) Subject: Merge branch 'ps/reftable-iteration-perf' X-Git-Tag: v2.45.0-rc0~173 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=9f67cbd0a7725857d219da6720fc9a5acfda2960;hp=274400998b0a2da64d20e9fa95b288bf1403459c;p=thirdparty%2Fgit.git Merge branch 'ps/reftable-iteration-perf' The code to iterate over refs with the reftable backend has seen some optimization. * ps/reftable-iteration-perf: reftable/reader: add comments to `table_iter_next()` reftable/record: don't try to reallocate ref record name reftable/block: swap buffers instead of copying reftable/pq: allocation-less comparison of entry keys reftable/merged: skip comparison for records of the same subiter reftable/merged: allocation-less dropping of shadowed records reftable/record: introduce function to compare records by key --- diff --git a/reftable/block.c b/reftable/block.c index ce8a7d24f3..72eb73b380 100644 --- a/reftable/block.c +++ b/reftable/block.c @@ -339,8 +339,7 @@ int block_iter_next(struct block_iter *it, struct reftable_record *rec) return -1; string_view_consume(&in, n); - strbuf_reset(&it->last_key); - strbuf_addbuf(&it->last_key, &it->key); + strbuf_swap(&it->last_key, &it->key); it->next_off += start.len - in.len; return 0; } diff --git a/reftable/merged.c b/reftable/merged.c index a0f222e07b..1aa6cd31b7 100644 --- a/reftable/merged.c +++ b/reftable/merged.c @@ -49,8 +49,6 @@ static void merged_iter_close(void *p) for (size_t i = 0; i < mi->stack_len; i++) reftable_iterator_destroy(&mi->stack[i]); reftable_free(mi->stack); - strbuf_release(&mi->key); - strbuf_release(&mi->entry_key); } static int merged_iter_advance_nonnull_subiter(struct merged_iter *mi, @@ -105,14 +103,19 @@ static int merged_iter_next_entry(struct merged_iter *mi, such a deployment, the loop below must be changed to collect all entries for the same key, and return new the newest one. */ - reftable_record_key(&entry.rec, &mi->entry_key); while (!merged_iter_pqueue_is_empty(mi->pq)) { struct pq_entry top = merged_iter_pqueue_top(mi->pq); - int cmp = 0; - - reftable_record_key(&top.rec, &mi->key); + int cmp; + + /* + * When the next entry comes from the same queue as the current + * entry then it must by definition be larger. This avoids a + * comparison in the most common case. + */ + if (top.index == entry.index) + break; - cmp = strbuf_cmp(&mi->key, &mi->entry_key); + cmp = reftable_record_cmp(&top.rec, &entry.rec); if (cmp > 0) break; @@ -243,8 +246,6 @@ static int merged_table_seek_record(struct reftable_merged_table *mt, .typ = reftable_record_type(rec), .hash_id = mt->hash_id, .suppress_deletions = mt->suppress_deletions, - .key = STRBUF_INIT, - .entry_key = STRBUF_INIT, }; struct merged_iter *p; int err; diff --git a/reftable/merged.h b/reftable/merged.h index d5b39dfe7f..7d9f95d27e 100644 --- a/reftable/merged.h +++ b/reftable/merged.h @@ -31,8 +31,6 @@ struct merged_iter { uint8_t typ; int suppress_deletions; struct merged_iter_pqueue pq; - struct strbuf key; - struct strbuf entry_key; }; void merged_table_release(struct reftable_merged_table *mt); diff --git a/reftable/pq.c b/reftable/pq.c index 2461daf5ff..e0ccce2b97 100644 --- a/reftable/pq.c +++ b/reftable/pq.c @@ -14,20 +14,9 @@ https://developers.google.com/open-source/licenses/bsd int pq_less(struct pq_entry *a, struct pq_entry *b) { - struct strbuf ak = STRBUF_INIT; - struct strbuf bk = STRBUF_INIT; - int cmp = 0; - reftable_record_key(&a->rec, &ak); - reftable_record_key(&b->rec, &bk); - - cmp = strbuf_cmp(&ak, &bk); - - strbuf_release(&ak); - strbuf_release(&bk); - + int cmp = reftable_record_cmp(&a->rec, &b->rec); if (cmp == 0) return a->index > b->index; - return cmp < 0; } diff --git a/reftable/reader.c b/reftable/reader.c index 2663b03938..b113daab77 100644 --- a/reftable/reader.c +++ b/reftable/reader.c @@ -357,24 +357,32 @@ static int table_iter_next(struct table_iter *ti, struct reftable_record *rec) while (1) { struct table_iter next = TABLE_ITER_INIT; - int err = 0; - if (ti->is_finished) { + int err; + + if (ti->is_finished) return 1; - } + /* + * Check whether the current block still has more records. If + * so, return it. If the iterator returns positive then the + * current block has been exhausted. + */ err = table_iter_next_in_block(ti, rec); - if (err <= 0) { + if (err <= 0) return err; - } + /* + * Otherwise, we need to continue to the next block in the + * table and retry. If there are no more blocks then the + * iterator is drained. + */ err = table_iter_next_block(&next, ti); - if (err != 0) { - ti->is_finished = 1; - } table_iter_block_done(ti); - if (err != 0) { + if (err) { + ti->is_finished = 1; return err; } + table_iter_copy_from(ti, &next); block_iter_close(&next.bi); } diff --git a/reftable/record.c b/reftable/record.c index 26c5e43f9b..d6bb42e887 100644 --- a/reftable/record.c +++ b/reftable/record.c @@ -377,10 +377,11 @@ static int reftable_ref_record_decode(void *rec, struct strbuf key, assert(hash_size > 0); - r->refname = reftable_realloc(r->refname, key.len + 1); + r->refname = reftable_malloc(key.len + 1); memcpy(r->refname, key.buf, key.len); - r->update_index = update_index; r->refname[key.len] = 0; + + r->update_index = update_index; r->value_type = val_type; switch (val_type) { case REFTABLE_REF_VAL1: @@ -430,7 +431,6 @@ static int reftable_ref_record_is_deletion_void(const void *p) (const struct reftable_ref_record *)p); } - static int reftable_ref_record_equal_void(const void *a, const void *b, int hash_size) { @@ -439,6 +439,13 @@ static int reftable_ref_record_equal_void(const void *a, return reftable_ref_record_equal(ra, rb, hash_size); } +static int reftable_ref_record_cmp_void(const void *_a, const void *_b) +{ + const struct reftable_ref_record *a = _a; + const struct reftable_ref_record *b = _b; + return strcmp(a->refname, b->refname); +} + static void reftable_ref_record_print_void(const void *rec, int hash_size) { @@ -455,6 +462,7 @@ static struct reftable_record_vtable reftable_ref_record_vtable = { .release = &reftable_ref_record_release_void, .is_deletion = &reftable_ref_record_is_deletion_void, .equal = &reftable_ref_record_equal_void, + .cmp = &reftable_ref_record_cmp_void, .print = &reftable_ref_record_print_void, }; @@ -627,6 +635,25 @@ static int reftable_obj_record_equal_void(const void *a, const void *b, int hash return 1; } +static int reftable_obj_record_cmp_void(const void *_a, const void *_b) +{ + const struct reftable_obj_record *a = _a; + const struct reftable_obj_record *b = _b; + int cmp; + + cmp = memcmp(a->hash_prefix, b->hash_prefix, + a->hash_prefix_len > b->hash_prefix_len ? + a->hash_prefix_len : b->hash_prefix_len); + if (cmp) + return cmp; + + /* + * When the prefix is the same then the object record that is longer is + * considered to be bigger. + */ + return a->hash_prefix_len - b->hash_prefix_len; +} + static struct reftable_record_vtable reftable_obj_record_vtable = { .key = &reftable_obj_record_key, .type = BLOCK_TYPE_OBJ, @@ -637,6 +664,7 @@ static struct reftable_record_vtable reftable_obj_record_vtable = { .release = &reftable_obj_record_release, .is_deletion = ¬_a_deletion, .equal = &reftable_obj_record_equal_void, + .cmp = &reftable_obj_record_cmp_void, .print = &reftable_obj_record_print, }; @@ -955,6 +983,22 @@ static int reftable_log_record_equal_void(const void *a, hash_size); } +static int reftable_log_record_cmp_void(const void *_a, const void *_b) +{ + const struct reftable_log_record *a = _a; + const struct reftable_log_record *b = _b; + int cmp = strcmp(a->refname, b->refname); + if (cmp) + return cmp; + + /* + * Note that the comparison here is reversed. This is because the + * update index is reversed when comparing keys. For reference, see how + * we handle this in reftable_log_record_key()`. + */ + return b->update_index - a->update_index; +} + int reftable_log_record_equal(const struct reftable_log_record *a, const struct reftable_log_record *b, int hash_size) { @@ -1004,6 +1048,7 @@ static struct reftable_record_vtable reftable_log_record_vtable = { .release = &reftable_log_record_release_void, .is_deletion = &reftable_log_record_is_deletion_void, .equal = &reftable_log_record_equal_void, + .cmp = &reftable_log_record_cmp_void, .print = &reftable_log_record_print_void, }; @@ -1079,6 +1124,13 @@ static int reftable_index_record_equal(const void *a, const void *b, int hash_si return ia->offset == ib->offset && !strbuf_cmp(&ia->last_key, &ib->last_key); } +static int reftable_index_record_cmp(const void *_a, const void *_b) +{ + const struct reftable_index_record *a = _a; + const struct reftable_index_record *b = _b; + return strbuf_cmp(&a->last_key, &b->last_key); +} + static void reftable_index_record_print(const void *rec, int hash_size) { const struct reftable_index_record *idx = rec; @@ -1096,6 +1148,7 @@ static struct reftable_record_vtable reftable_index_record_vtable = { .release = &reftable_index_record_release, .is_deletion = ¬_a_deletion, .equal = &reftable_index_record_equal, + .cmp = &reftable_index_record_cmp, .print = &reftable_index_record_print, }; @@ -1149,6 +1202,14 @@ int reftable_record_is_deletion(struct reftable_record *rec) reftable_record_data(rec)); } +int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b) +{ + if (a->type != b->type) + BUG("cannot compare reftable records of different type"); + return reftable_record_vtable(a)->cmp( + reftable_record_data(a), reftable_record_data(b)); +} + int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size) { if (a->type != b->type) diff --git a/reftable/record.h b/reftable/record.h index e64ed30c80..a05e2be179 100644 --- a/reftable/record.h +++ b/reftable/record.h @@ -62,6 +62,12 @@ struct reftable_record_vtable { /* Are two records equal? This assumes they have the same type. Returns 0 for non-equal. */ int (*equal)(const void *a, const void *b, int hash_size); + /* + * Compare keys of two records with each other. The records must have + * the same type. + */ + int (*cmp)(const void *a, const void *b); + /* Print on stdout, for debugging. */ void (*print)(const void *rec, int hash_size); }; @@ -114,6 +120,7 @@ struct reftable_record { void reftable_record_init(struct reftable_record *rec, uint8_t typ); /* see struct record_vtable */ +int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b); int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size); void reftable_record_print(struct reftable_record *rec, int hash_size); void reftable_record_key(struct reftable_record *rec, struct strbuf *dest);