]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
bcachefs: journal keys: sort keys for interior nodes first
authorKent Overstreet <kent.overstreet@linux.dev>
Sun, 17 Nov 2024 19:20:35 +0000 (14:20 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sat, 21 Dec 2024 06:36:18 +0000 (01:36 -0500)
There's an unavoidable issue with btree lookups when we're overlaying
journal keys and the journal has many deletions for keys present in the
btree - peek operations will have to iterate over all those deletions to
find the next live key to return.

This is mainly a problem for lookups in interior nodes, if we have to
traverse to a leaf. Looking up an insert position in a leaf (for journal
replay) doesn't have to find the next live key, but walking down the
btree does.

So to ameloriate this, change journal key sort ordering so that we
replay keys from roots and interior nodes first.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_journal_iter.c
fs/bcachefs/btree_journal_iter.h

index 39898baa8854ddd103302134924c105230d56b66..dbc9bc233ccaea6565d370013b7b7a1a289050df 100644 (file)
@@ -172,9 +172,8 @@ static void journal_iter_verify(struct journal_iter *iter)
        if (iter->idx < keys->size) {
                struct journal_key *k = keys->data + iter->idx;
 
-               int cmp = cmp_int(k->btree_id,  iter->btree_id) ?:
-                         cmp_int(k->level,     iter->level);
-               BUG_ON(cmp < 0);
+               int cmp = __journal_key_btree_cmp(iter->btree_id, iter->level, k);
+               BUG_ON(cmp > 0);
        }
 }
 
@@ -365,9 +364,8 @@ static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter)
        while (iter->idx < iter->keys->size) {
                struct journal_key *k = iter->keys->data + iter->idx;
 
-               int cmp = cmp_int(k->btree_id,  iter->btree_id) ?:
-                         cmp_int(k->level,     iter->level);
-               if (cmp > 0)
+               int cmp = __journal_key_btree_cmp(iter->btree_id, iter->level, k);
+               if (cmp < 0)
                        break;
                BUG_ON(cmp);
 
index 5ddbb7571770e359e8e0cd5a5e4ab25bb3290606..118ada4cdd1b1b55913b206372fcdc9f57f6835e 100644 (file)
@@ -28,14 +28,21 @@ struct btree_and_journal_iter {
        bool                    prefetch;
 };
 
+static inline int __journal_key_btree_cmp(enum btree_id        l_btree_id,
+                                         unsigned      l_level,
+                                         const struct journal_key *r)
+{
+       return -cmp_int(l_level,        r->level) ?:
+               cmp_int(l_btree_id,     r->btree_id);
+}
+
 static inline int __journal_key_cmp(enum btree_id      l_btree_id,
                                    unsigned            l_level,
                                    struct bpos l_pos,
                                    const struct journal_key *r)
 {
-       return (cmp_int(l_btree_id,     r->btree_id) ?:
-               cmp_int(l_level,        r->level) ?:
-               bpos_cmp(l_pos, r->k->k.p));
+       return __journal_key_btree_cmp(l_btree_id, l_level, r) ?:
+               bpos_cmp(l_pos, r->k->k.p);
 }
 
 static inline int journal_key_cmp(const struct journal_key *l, const struct journal_key *r)