]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
bcachefs: Fix race between btree updates & journal replay
authorKent Overstreet <kent.overstreet@gmail.com>
Fri, 31 Dec 2021 22:54:13 +0000 (17:54 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:21 +0000 (17:09 -0400)
Add a flag to indicate whether a journal replay key has been
overwritten, and set/test it with appropriate btree locks held.

This fixes a race between the allocator - invalidating buckets, and
doing btree updates - and journal replay, which before this patch could
clobber the allocator thread's update with an older version of the key
from the journal.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
fs/bcachefs/bcachefs.h
fs/bcachefs/btree_update_leaf.c
fs/bcachefs/recovery.c
fs/bcachefs/recovery.h

index c282086079fbcc1d2fd3420fd60326bd2426dbb7..9452b6cf04a5c1d6388b1205515576f1390d4882 100644 (file)
@@ -561,6 +561,7 @@ struct journal_keys {
                enum btree_id   btree_id:8;
                unsigned        level:8;
                bool            allocated;
+               bool            overwritten;
                struct bkey_i   *k;
                u32             journal_seq;
                u32             journal_offset;
index 40deafced921360cc3274c68944f1273cd48950f..8af9ba464b253ab1bc56e7bba65aa3406f2178ea 100644 (file)
@@ -15,6 +15,7 @@
 #include "journal.h"
 #include "journal_reclaim.h"
 #include "keylist.h"
+#include "recovery.h"
 #include "subvolume.h"
 #include "replicas.h"
 #include "trace.h"
@@ -625,6 +626,14 @@ fail:
        return btree_trans_restart(trans);
 }
 
+static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans)
+{
+       struct btree_insert_entry *i;
+
+       trans_for_each_update(trans, i)
+               bch2_journal_key_overwritten(trans->c, i->btree_id, i->level, i->k->k.p);
+}
+
 /*
  * Get journal reservation, take write locks, and attempt to do btree update(s):
  */
@@ -702,6 +711,9 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
 
        ret = bch2_trans_commit_write_locked(trans, stopped_at, trace_ip);
 
+       if (!ret && unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)))
+               bch2_drop_overwrites_from_journal(trans);
+
        trans_for_each_update(trans, i)
                if (!same_leaf_as_prev(trans, i))
                        bch2_btree_node_unlock_write_inlined(trans, i->path,
index d332fd16517b983766950e67a7ba4070d85204bd..fcacf166f9001621ab7d29da08d4dd19130c1671 100644 (file)
@@ -185,6 +185,19 @@ int bch2_journal_key_delete(struct bch_fs *c, enum btree_id id,
        return bch2_journal_key_insert(c, id, level, &whiteout);
 }
 
+void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree,
+                                 unsigned level, struct bpos pos)
+{
+       struct journal_keys *keys = &c->journal_keys;
+       size_t idx = journal_key_search(keys, btree, level, pos);
+
+       if (idx < keys->nr &&
+           keys->d[idx].btree_id       == btree &&
+           keys->d[idx].level          == level &&
+           !bpos_cmp(keys->d[idx].k->k.p, pos))
+               keys->d[idx].overwritten = true;
+}
+
 static struct bkey_i *bch2_journal_iter_peek(struct journal_iter *iter)
 {
        struct journal_key *k = iter->idx - iter->keys->nr
@@ -539,8 +552,16 @@ static int __bch2_journal_replay_key(struct btree_trans *trans,
        bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
                                  BTREE_MAX_DEPTH, k->level,
                                  iter_flags);
-       ret   = bch2_btree_iter_traverse(&iter) ?:
-               bch2_trans_update(trans, &iter, k->k, BTREE_TRIGGER_NORUN);
+       ret = bch2_btree_iter_traverse(&iter);
+       if (ret)
+               goto out;
+
+       /* Must be checked with btree locked: */
+       if (k->overwritten)
+               goto out;
+
+       ret = bch2_trans_update(trans, &iter, k->k, BTREE_TRIGGER_NORUN);
+out:
        bch2_trans_iter_exit(trans, &iter);
        return ret;
 }
index 1504e0bdb9405cf0a8e2b95cfc07de6ca3cdc3ea..a7a9496afb95c63183c727d59345f743cdd8b48c 100644 (file)
@@ -37,6 +37,8 @@ int bch2_journal_key_insert(struct bch_fs *, enum btree_id,
                            unsigned, struct bkey_i *);
 int bch2_journal_key_delete(struct bch_fs *, enum btree_id,
                            unsigned, struct bpos);
+void bch2_journal_key_overwritten(struct bch_fs *, enum btree_id,
+                                 unsigned, struct bpos);
 
 void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *);
 struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *);