]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
bcachefs: Simplify journal replay
authorKent Overstreet <kent.overstreet@gmail.com>
Tue, 28 Dec 2021 04:10:06 +0000 (23:10 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:21 +0000 (17:09 -0400)
With BTREE_ITER_WITH_JOURNAL, there's no longer any restrictions on the
order we have to replay keys from the journal in, and we can also start
up journal reclaim right away - and delete a bunch of code.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
fs/bcachefs/alloc_background.c
fs/bcachefs/bcachefs.h
fs/bcachefs/btree_key_cache.h
fs/bcachefs/btree_update_interior.c
fs/bcachefs/btree_update_leaf.c
fs/bcachefs/journal_reclaim.c
fs/bcachefs/journal_types.h
fs/bcachefs/recovery.c

index cb4b059e796ce3b5ee2aa017cd5dfaae552ad47a..ab7d972aac3a3a5bd2b492ddfffe3152c81ad250 100644 (file)
@@ -902,8 +902,7 @@ static void discard_one_bucket(struct bch_fs *c, struct bch_dev *ca, u64 b)
 static bool allocator_thread_running(struct bch_dev *ca)
 {
        unsigned state = ca->mi.state == BCH_MEMBER_STATE_rw &&
-               test_bit(BCH_FS_ALLOCATOR_RUNNING, &ca->fs->flags) &&
-               test_bit(BCH_FS_ALLOC_REPLAY_DONE, &ca->fs->flags)
+               test_bit(BCH_FS_ALLOCATOR_RUNNING, &ca->fs->flags)
                ? ALLOCATOR_running
                : ALLOCATOR_stopped;
        alloc_thread_set_state(ca, state);
index 431cf25b38dbf48a9ec41a11fb6bd5a20f9e96e4..7771b4a4bb87b49e183eedcd14c8eeb6188c7aa6 100644 (file)
@@ -510,8 +510,6 @@ enum {
        BCH_FS_INITIAL_GC_DONE,
        BCH_FS_INITIAL_GC_UNFIXED,
        BCH_FS_TOPOLOGY_REPAIR_DONE,
-       BCH_FS_ALLOC_REPLAY_DONE,
-       BCH_FS_BTREE_INTERIOR_REPLAY_DONE,
        BCH_FS_FSCK_DONE,
        BCH_FS_STARTED,
        BCH_FS_RW,
index 0768ef3ca77600d96b7b49e6bd101dcd09e4ff87..b3d241b134539e545a44557afd7fb16ebe87f4cf 100644 (file)
@@ -16,8 +16,7 @@ static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c)
        size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys);
        size_t max_dirty = 4096 + (nr_keys * 3) / 4;
 
-       return nr_dirty > max_dirty &&
-               test_bit(JOURNAL_RECLAIM_STARTED, &c->journal.flags);
+       return nr_dirty > max_dirty;
 }
 
 int bch2_btree_key_cache_journal_flush(struct journal *,
index 17111c4228bd2a999bf5d6542e76d31e63d65f71..51a2ea2c5cd65af420cefaac503070bddb0851d6 100644 (file)
@@ -45,7 +45,7 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b)
 
        BUG_ON(!b->c.level);
 
-       if (!test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags))
+       if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
                return;
 
        bch2_btree_node_iter_init_from_start(&iter, b);
@@ -1851,9 +1851,6 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
 {
        struct async_btree_rewrite *a;
 
-       if (!test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags))
-               return;
-
        if (!percpu_ref_tryget(&c->writes))
                return;
 
index e95940ffad6bc9db07d635d336fc2129d20590d0..1072acb0c9afcf2ad6592bdbcae10fb02b16df12 100644 (file)
@@ -206,9 +206,6 @@ static bool btree_insert_key_leaf(struct btree_trans *trans,
        int old_live_u64s = b->nr.live_u64s;
        int live_u64s_added, u64s_added;
 
-       EBUG_ON(!insert->level &&
-               !test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags));
-
        if (unlikely(!bch2_btree_bset_insert_key(trans, insert->path, b,
                                        &insert_l(insert)->iter, insert->k)))
                return false;
index 4462beb524619893ff77bb5dffa745c03e6bbd8e..d72b17dc935ac805ae338c342f4c03d3e86228c5 100644 (file)
@@ -489,9 +489,6 @@ static size_t journal_flush_pins(struct journal *j, u64 seq_to_flush,
        u64 seq;
        int err;
 
-       if (!test_bit(JOURNAL_RECLAIM_STARTED, &j->flags))
-               return 0;
-
        lockdep_assert_held(&j->reclaim_lock);
 
        while (1) {
@@ -689,8 +686,6 @@ static int bch2_journal_reclaim_thread(void *arg)
 
        set_freezable();
 
-       kthread_wait_freezable(test_bit(JOURNAL_RECLAIM_STARTED, &j->flags));
-
        j->last_flushed = jiffies;
 
        while (!ret && !kthread_should_stop()) {
index 0c4df603280d73bfa2e7ba3224a5f615a1d071cf..73e7fbc4f109221c368f5956bd4a833f54a7d0bf 100644 (file)
@@ -148,7 +148,6 @@ enum journal_space_from {
 enum {
        JOURNAL_REPLAY_DONE,
        JOURNAL_STARTED,
-       JOURNAL_RECLAIM_STARTED,
        JOURNAL_NEED_WRITE,
        JOURNAL_MAY_GET_UNRESERVED,
        JOURNAL_MAY_SKIP_FLUSH,
index 57311ad283c75b987c8a39bfabbd41043cddfcd0..cb0ba84711aa13d1065af18518f431a1195f2ada 100644 (file)
@@ -474,8 +474,8 @@ static void replay_now_at(struct journal *j, u64 seq)
                bch2_journal_pin_put(j, j->replay_journal_seq++);
 }
 
-static int __bch2_journal_replay_key(struct btree_trans *trans,
-                                    struct journal_key *k)
+static int bch2_journal_replay_key(struct btree_trans *trans,
+                                  struct journal_key *k)
 {
        struct btree_iter iter;
        unsigned iter_flags =
@@ -484,7 +484,7 @@ static int __bch2_journal_replay_key(struct btree_trans *trans,
        int ret;
 
        if (!k->level && k->btree_id == BTREE_ID_alloc)
-               iter_flags |= BTREE_ITER_CACHED|BTREE_ITER_CACHED_NOFILL;
+               iter_flags |= BTREE_ITER_CACHED;
 
        bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
                                  BTREE_MAX_DEPTH, k->level,
@@ -503,29 +503,12 @@ out:
        return ret;
 }
 
-static int bch2_journal_replay_key(struct bch_fs *c, struct journal_key *k)
-{
-       unsigned commit_flags =
-               BTREE_INSERT_LAZY_RW|
-               BTREE_INSERT_NOFAIL|
-               BTREE_INSERT_JOURNAL_RESERVED;
-
-       if (!k->allocated)
-               commit_flags |= BTREE_INSERT_JOURNAL_REPLAY;
-
-       return bch2_trans_do(c, NULL, NULL, commit_flags,
-                            __bch2_journal_replay_key(&trans, k));
-}
-
 static int journal_sort_seq_cmp(const void *_l, const void *_r)
 {
        const struct journal_key *l = *((const struct journal_key **)_l);
        const struct journal_key *r = *((const struct journal_key **)_r);
 
-       return  cmp_int(r->level,       l->level) ?:
-               cmp_int(l->journal_seq, r->journal_seq) ?:
-               cmp_int(l->btree_id,    r->btree_id) ?:
-               bpos_cmp(l->k->k.p,     r->k->k.p);
+       return cmp_int(l->journal_seq, r->journal_seq);
 }
 
 static int bch2_journal_replay(struct bch_fs *c)
@@ -533,10 +516,7 @@ static int bch2_journal_replay(struct bch_fs *c)
        struct journal_keys *keys = &c->journal_keys;
        struct journal_key **keys_sorted, *k;
        struct journal *j = &c->journal;
-       struct bch_dev *ca;
-       unsigned idx;
        size_t i;
-       u64 seq;
        int ret;
 
        keys_sorted = kmalloc_array(sizeof(*keys_sorted), keys->nr, GFP_KERNEL);
@@ -555,73 +535,25 @@ static int bch2_journal_replay(struct bch_fs *c)
                replay_now_at(j, keys->journal_seq_base);
        }
 
-       seq = j->replay_journal_seq;
-
-       /*
-        * First replay updates to the alloc btree - these will only update the
-        * btree key cache:
-        */
-       for (i = 0; i < keys->nr; i++) {
-               k = keys_sorted[i];
-
-               cond_resched();
-
-               if (!k->level && k->btree_id == BTREE_ID_alloc) {
-                       j->replay_journal_seq = keys->journal_seq_base + k->journal_seq;
-                       ret = bch2_journal_replay_key(c, k);
-                       if (ret)
-                               goto err;
-               }
-       }
-
-       /* Now we can start the allocator threads: */
-       set_bit(BCH_FS_ALLOC_REPLAY_DONE, &c->flags);
-       for_each_member_device(ca, c, idx)
-               bch2_wake_allocator(ca);
-
-       /*
-        * Next replay updates to interior btree nodes:
-        */
-       for (i = 0; i < keys->nr; i++) {
-               k = keys_sorted[i];
-
-               cond_resched();
-
-               if (k->level) {
-                       j->replay_journal_seq = keys->journal_seq_base + k->journal_seq;
-                       ret = bch2_journal_replay_key(c, k);
-                       if (ret)
-                               goto err;
-               }
-       }
-
-       /*
-        * Now that the btree is in a consistent state, we can start journal
-        * reclaim (which will be flushing entries from the btree key cache back
-        * to the btree:
-        */
-       set_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags);
-       set_bit(JOURNAL_RECLAIM_STARTED, &j->flags);
-       journal_reclaim_kick(j);
-
-       j->replay_journal_seq = seq;
-
-       /*
-        * Now replay leaf node updates:
-        */
        for (i = 0; i < keys->nr; i++) {
                k = keys_sorted[i];
 
                cond_resched();
 
-               if (k->level || k->btree_id == BTREE_ID_alloc)
-                       continue;
-
-               replay_now_at(j, keys->journal_seq_base + k->journal_seq);
+               if (!k->allocated)
+                       replay_now_at(j, keys->journal_seq_base + k->journal_seq);
 
-               ret = bch2_journal_replay_key(c, k);
-               if (ret)
+               ret = bch2_trans_do(c, NULL, NULL,
+                                   BTREE_INSERT_LAZY_RW|
+                                   BTREE_INSERT_NOFAIL|
+                                   BTREE_INSERT_JOURNAL_RESERVED|
+                                   (!k->allocated ? BTREE_INSERT_JOURNAL_REPLAY : 0),
+                            bch2_journal_replay_key(&trans, k));
+               if (ret) {
+                       bch_err(c, "journal replay: error %d while replaying key at btree %s level %u",
+                               ret, bch2_btree_ids[k->btree_id], k->level);
                        goto err;
+               }
        }
 
        replay_now_at(j, j->replay_journal_seq_end);
@@ -629,14 +561,9 @@ static int bch2_journal_replay(struct bch_fs *c)
 
        bch2_journal_set_replay_done(j);
        bch2_journal_flush_all_pins(j);
-       kfree(keys_sorted);
-
-       return bch2_journal_error(j);
+       ret = bch2_journal_error(j);
 err:
-       bch_err(c, "journal replay: error %d while replaying key at btree %s level %u",
-               ret, bch2_btree_ids[k->btree_id], k->level);
        kfree(keys_sorted);
-
        return ret;
 }
 
@@ -1215,7 +1142,8 @@ use_clean:
        ret = bch2_journal_replay(c);
        if (ret)
                goto err;
-       bch_verbose(c, "journal replay done");
+       if (c->opts.verbose || !c->sb.clean)
+               bch_info(c, "journal replay done");
 
        if (test_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags) &&
            !c->opts.nochanges) {
@@ -1385,10 +1313,6 @@ int bch2_fs_initialize(struct bch_fs *c)
        for (i = 0; i < BTREE_ID_NR; i++)
                bch2_btree_root_alloc(c, i);
 
-       set_bit(BCH_FS_ALLOC_REPLAY_DONE, &c->flags);
-       set_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags);
-       set_bit(JOURNAL_RECLAIM_STARTED, &c->journal.flags);
-
        err = "unable to allocate journal buckets";
        for_each_online_member(ca, c, i) {
                ret = bch2_dev_journal_alloc(ca);