]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
bcachefs: Introduce a separate journal watermark for copygc
authorKent Overstreet <kent.overstreet@gmail.com>
Tue, 15 Mar 2022 01:48:42 +0000 (21:48 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:29 +0000 (17:09 -0400)
Since journal reclaim -> btree key cache flushing may require the
allocation of new btree nodes, it has an implicit dependency on copygc
in order to make forward progress - so we should avoid blocking copygc
unless the journal is really close to full.

This introduces watermarks to replace our single MAY_GET_UNRESERVED bit
in the journal, and adds a watermark for copygc and plumbs it through.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/alloc_background.c
fs/bcachefs/btree_key_cache.c
fs/bcachefs/btree_update.h
fs/bcachefs/btree_update_interior.c
fs/bcachefs/btree_update_leaf.c
fs/bcachefs/journal.c
fs/bcachefs/journal.h
fs/bcachefs/journal_reclaim.c
fs/bcachefs/journal_types.h
fs/bcachefs/movinggc.c
fs/bcachefs/recovery.c

index a53aeb4ee6484c9f75e925a625fe0cc77c6dd086..33b2e4d7da3b32d7c1bfbd9c2c1547474888b6f4 100644 (file)
@@ -670,7 +670,6 @@ static int bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
        ret = bch2_trans_do(c, NULL, &commit_seq,
                            BTREE_INSERT_NOCHECK_RW|
                            BTREE_INSERT_NOFAIL|
-                           BTREE_INSERT_JOURNAL_RESERVED|
                            flags,
                            bucket_invalidate_btree(&trans, ca, b, &u));
 
index 7e41552a57dfb6ab22ae3d77c2186502ea3c4386..f856dee0c3aaa7bcc401eece0e3cef95bc9cacb1 100644 (file)
@@ -421,7 +421,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
                                  BTREE_INSERT_NOFAIL|
                                  BTREE_INSERT_USE_RESERVE|
                                  (ck->journal.seq == journal_last_seq(j)
-                                  ? BTREE_INSERT_JOURNAL_RESERVED
+                                  ? JOURNAL_WATERMARK_reserved
                                   : 0)|
                                  commit_flags);
        if (ret) {
index 3cf4cc4f235018038fe17a98b67b7810e88c5f59..ad13b0739a688d4678844e6e276e02098764f173 100644 (file)
@@ -16,12 +16,12 @@ bool bch2_btree_bset_insert_key(struct btree_trans *, struct btree_path *,
 void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64);
 
 enum btree_insert_flags {
-       __BTREE_INSERT_NOFAIL,
+       /* First two bits for journal watermark: */
+       __BTREE_INSERT_NOFAIL = 2,
        __BTREE_INSERT_NOCHECK_RW,
        __BTREE_INSERT_LAZY_RW,
        __BTREE_INSERT_USE_RESERVE,
        __BTREE_INSERT_JOURNAL_REPLAY,
-       __BTREE_INSERT_JOURNAL_RESERVED,
        __BTREE_INSERT_JOURNAL_RECLAIM,
        __BTREE_INSERT_NOWAIT,
        __BTREE_INSERT_GC_LOCK_HELD,
@@ -41,9 +41,6 @@ enum btree_insert_flags {
 /* Insert is for journal replay - don't get journal reservations: */
 #define BTREE_INSERT_JOURNAL_REPLAY    (1 << __BTREE_INSERT_JOURNAL_REPLAY)
 
-/* Indicates that we have pre-reserved space in the journal: */
-#define BTREE_INSERT_JOURNAL_RESERVED  (1 << __BTREE_INSERT_JOURNAL_RESERVED)
-
 /* Insert is being called from journal reclaim path: */
 #define BTREE_INSERT_JOURNAL_RECLAIM (1 << __BTREE_INSERT_JOURNAL_RECLAIM)
 
index 1c53f965539de2437ba0ee3ca4afe69a4dd45c54..cd4332f891dc89edac50e05f32332d1e17d737bc 100644 (file)
@@ -599,7 +599,7 @@ static void btree_update_nodes_written(struct btree_update *as)
                              BTREE_INSERT_NOFAIL|
                              BTREE_INSERT_NOCHECK_RW|
                              BTREE_INSERT_JOURNAL_RECLAIM|
-                             BTREE_INSERT_JOURNAL_RESERVED,
+                             JOURNAL_WATERMARK_reserved,
                              btree_update_nodes_written_trans(&trans, as));
        bch2_trans_exit(&trans);
 
@@ -964,14 +964,11 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
                ? BCH_DISK_RESERVATION_NOFAIL : 0;
        unsigned nr_nodes[2];
        unsigned update_level = level;
-       int journal_flags = 0;
+       int journal_flags = flags & JOURNAL_WATERMARK_MASK;
        int ret = 0;
 
        BUG_ON(!path->should_be_locked);
 
-       if (flags & BTREE_INSERT_JOURNAL_RESERVED)
-               journal_flags |= JOURNAL_RES_GET_RESERVED;
-
        closure_init_stack(&cl);
 retry:
        nr_nodes[0] = nr_nodes[1] = 0;
@@ -1972,7 +1969,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
                                BTREE_INSERT_NOCHECK_RW|
                                BTREE_INSERT_USE_RESERVE|
                                BTREE_INSERT_JOURNAL_RECLAIM|
-                               BTREE_INSERT_JOURNAL_RESERVED);
+                               JOURNAL_WATERMARK_reserved);
        if (ret)
                goto err;
 
index f534d7e649fd0b25b1107ff97a97d000e10c668d..90e6e51306728abc9873d9f7149711edb0b23d53 100644 (file)
@@ -296,11 +296,10 @@ static inline int bch2_trans_journal_res_get(struct btree_trans *trans,
        struct bch_fs *c = trans->c;
        int ret;
 
-       if (trans->flags & BTREE_INSERT_JOURNAL_RESERVED)
-               flags |= JOURNAL_RES_GET_RESERVED;
-
        ret = bch2_journal_res_get(&c->journal, &trans->journal_res,
-                                  trans->journal_u64s, flags);
+                                  trans->journal_u64s,
+                                  flags|
+                                  (trans->flags & JOURNAL_WATERMARK_MASK));
 
        return ret == -EAGAIN ? BTREE_INSERT_NEED_JOURNAL_RES : ret;
 }
@@ -902,8 +901,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
        ret = bch2_journal_preres_get(&c->journal,
                        &trans->journal_preres, trans->journal_preres_u64s,
                        JOURNAL_RES_GET_NONBLOCK|
-                       ((trans->flags & BTREE_INSERT_JOURNAL_RESERVED)
-                        ? JOURNAL_RES_GET_RESERVED : 0));
+                       (trans->flags & JOURNAL_WATERMARK_MASK));
        if (unlikely(ret == -EAGAIN))
                ret = bch2_trans_journal_preres_get_cold(trans,
                                                trans->journal_preres_u64s, trace_ip);
@@ -988,7 +986,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
                bch2_trans_unlock(trans);
 
                if ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM) &&
-                   !(trans->flags & BTREE_INSERT_JOURNAL_RESERVED)) {
+                   !(trans->flags & JOURNAL_WATERMARK_reserved)) {
                        trans->restarted = true;
                        ret = -EAGAIN;
                        break;
index 750509661d797e89843fb3ca40409449d116df34..c7f1674ed596b65f7ebf1b42455e5f6ea36930d2 100644 (file)
 #include "super-io.h"
 #include "trace.h"
 
+#define x(n)   #n,
+static const char * const bch2_journal_watermarks[] = {
+       JOURNAL_WATERMARKS()
+       NULL
+};
+
+static const char * const bch2_journal_errors[] = {
+       JOURNAL_ERRORS()
+       NULL
+};
+#undef x
+
 static inline bool journal_seq_unwritten(struct journal *j, u64 seq)
 {
        return seq > j->seq_ondisk;
@@ -207,19 +219,19 @@ static int journal_entry_open(struct journal *j)
        BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
 
        if (j->blocked)
-               return cur_entry_blocked;
+               return JOURNAL_ERR_blocked;
 
        if (j->cur_entry_error)
                return j->cur_entry_error;
 
        if (bch2_journal_error(j))
-               return cur_entry_insufficient_devices; /* -EROFS */
+               return JOURNAL_ERR_insufficient_devices; /* -EROFS */
 
        if (!fifo_free(&j->pin))
-               return cur_entry_journal_pin_full;
+               return JOURNAL_ERR_journal_pin_full;
 
        if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf))
-               return cur_entry_max_in_flight;
+               return JOURNAL_ERR_max_in_flight;
 
        BUG_ON(!j->cur_entry_sectors);
 
@@ -238,7 +250,7 @@ static int journal_entry_open(struct journal *j)
        u64s = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1);
 
        if (u64s <= 0)
-               return cur_entry_journal_full;
+               return JOURNAL_ERR_journal_full;
 
        if (fifo_empty(&j->pin) && j->reclaim_thread)
                wake_up_process(j->reclaim_thread);
@@ -354,13 +366,12 @@ retry:
                return 0;
        }
 
-       if (!(flags & JOURNAL_RES_GET_RESERVED) &&
-           !test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) {
+       if ((flags & JOURNAL_WATERMARK_MASK) < j->watermark) {
                /*
                 * Don't want to close current journal entry, just need to
                 * invoke reclaim:
                 */
-               ret = cur_entry_journal_full;
+               ret = JOURNAL_ERR_journal_full;
                goto unlock;
        }
 
@@ -378,10 +389,10 @@ retry:
        __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL);
        ret = journal_entry_open(j);
 
-       if (ret == cur_entry_max_in_flight)
+       if (ret == JOURNAL_ERR_max_in_flight)
                trace_journal_entry_full(c);
 unlock:
-       if ((ret && ret != cur_entry_insufficient_devices) &&
+       if ((ret && ret != JOURNAL_ERR_insufficient_devices) &&
            !j->res_get_blocked_start) {
                j->res_get_blocked_start = local_clock() ?: 1;
                trace_journal_full(c);
@@ -393,14 +404,15 @@ unlock:
        if (!ret)
                goto retry;
 
-       if ((ret == cur_entry_journal_full ||
-            ret == cur_entry_journal_pin_full) &&
+       if ((ret == JOURNAL_ERR_journal_full ||
+            ret == JOURNAL_ERR_journal_pin_full) &&
            !can_discard &&
            !nr_unwritten_journal_entries(j) &&
-           (flags & JOURNAL_RES_GET_RESERVED)) {
+           (flags & JOURNAL_WATERMARK_MASK) == JOURNAL_WATERMARK_reserved) {
                struct printbuf buf = PRINTBUF;
 
-               bch_err(c, "Journal stuck! Hava a pre-reservation but journal full");
+               bch_err(c, "Journal stuck! Hava a pre-reservation but journal full (ret %s)",
+                       bch2_journal_errors[ret]);
 
                bch2_journal_debug_to_text(&buf, j);
                bch_err(c, "%s", buf.buf);
@@ -418,8 +430,8 @@ unlock:
         * Journal is full - can't rely on reclaim from work item due to
         * freezing:
         */
-       if ((ret == cur_entry_journal_full ||
-            ret == cur_entry_journal_pin_full) &&
+       if ((ret == JOURNAL_ERR_journal_full ||
+            ret == JOURNAL_ERR_journal_pin_full) &&
            !(flags & JOURNAL_RES_GET_NONBLOCK)) {
                if (can_discard) {
                        bch2_journal_do_discards(j);
@@ -432,7 +444,7 @@ unlock:
                }
        }
 
-       return ret == cur_entry_insufficient_devices ? -EROFS : -EAGAIN;
+       return ret == JOURNAL_ERR_insufficient_devices ? -EROFS : -EAGAIN;
 }
 
 /*
@@ -1187,13 +1199,14 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
        rcu_read_lock();
        s = READ_ONCE(j->reservations);
 
-       pr_buf(out, "dirty journal entries:\t%llu\n",   fifo_used(&j->pin));
+       pr_buf(out, "dirty journal entries:\t%llu/%llu\n",fifo_used(&j->pin), j->pin.size);
        pr_buf(out, "seq:\t\t\t%llu\n",                 journal_cur_seq(j));
        pr_buf(out, "seq_ondisk:\t\t%llu\n",            j->seq_ondisk);
        pr_buf(out, "last_seq:\t\t%llu\n",              journal_last_seq(j));
        pr_buf(out, "last_seq_ondisk:\t%llu\n",         j->last_seq_ondisk);
        pr_buf(out, "flushed_seq_ondisk:\t%llu\n",      j->flushed_seq_ondisk);
        pr_buf(out, "prereserved:\t\t%u/%u\n",          j->prereserved.reserved, j->prereserved.remaining);
+       pr_buf(out, "watermark:\t\t%s\n",               bch2_journal_watermarks[j->watermark]);
        pr_buf(out, "each entry reserved:\t%u\n",       j->entry_u64s_reserved);
        pr_buf(out, "nr flush writes:\t%llu\n",         j->nr_flush_writes);
        pr_buf(out, "nr noflush writes:\t%llu\n",       j->nr_noflush_writes);
@@ -1203,7 +1216,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
        pr_buf(out, "reclaim runs in:\t%u ms\n",        time_after(j->next_reclaim, now)
               ? jiffies_to_msecs(j->next_reclaim - jiffies) : 0);
        pr_buf(out, "current entry sectors:\t%u\n",     j->cur_entry_sectors);
-       pr_buf(out, "current entry error:\t%u\n",       j->cur_entry_error);
+       pr_buf(out, "current entry error:\t%s\n",       bch2_journal_errors[j->cur_entry_error]);
        pr_buf(out, "current entry:\t\t");
 
        switch (s.cur_entry_offset) {
index 243349f4ac1cc28e4bc09b81f133519d668c3b2d..c287ecf643aa14d69e54a43ba24b25b945754a9d 100644 (file)
@@ -293,9 +293,9 @@ static inline void bch2_journal_res_put(struct journal *j,
 int bch2_journal_res_get_slowpath(struct journal *, struct journal_res *,
                                  unsigned);
 
-#define JOURNAL_RES_GET_NONBLOCK       (1 << 0)
-#define JOURNAL_RES_GET_CHECK          (1 << 1)
-#define JOURNAL_RES_GET_RESERVED       (1 << 2)
+/* First two bits for JOURNAL_WATERMARK: */
+#define JOURNAL_RES_GET_NONBLOCK       (1 << 2)
+#define JOURNAL_RES_GET_CHECK          (1 << 3)
 
 static inline int journal_res_get_fast(struct journal *j,
                                       struct journal_res *res,
@@ -316,8 +316,7 @@ static inline int journal_res_get_fast(struct journal *j,
 
                EBUG_ON(!journal_state_count(new, new.idx));
 
-               if (!(flags & JOURNAL_RES_GET_RESERVED) &&
-                   !test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags))
+               if ((flags & JOURNAL_WATERMARK_MASK) < j->watermark)
                        return 0;
 
                new.cur_entry_offset += res->u64s;
@@ -370,23 +369,27 @@ out:
 
 /* journal_preres: */
 
-static inline bool journal_check_may_get_unreserved(struct journal *j)
+static inline void journal_set_watermark(struct journal *j)
 {
        union journal_preres_state s = READ_ONCE(j->prereserved);
-       bool ret = s.reserved < s.remaining &&
-               fifo_free(&j->pin) > j->pin.size / 4;
-
-       lockdep_assert_held(&j->lock);
-
-       if (ret != test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) {
-               if (ret) {
-                       set_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags);
-                       journal_wake(j);
-               } else {
-                       clear_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags);
-               }
-       }
-       return ret;
+       unsigned watermark = JOURNAL_WATERMARK_any;
+
+       if (fifo_free(&j->pin) < j->pin.size / 4)
+               watermark = max_t(unsigned, watermark, JOURNAL_WATERMARK_copygc);
+       if (fifo_free(&j->pin) < j->pin.size / 8)
+               watermark = max_t(unsigned, watermark, JOURNAL_WATERMARK_reserved);
+
+       if (s.reserved > s.remaining)
+               watermark = max_t(unsigned, watermark, JOURNAL_WATERMARK_copygc);
+       if (!s.remaining)
+               watermark = max_t(unsigned, watermark, JOURNAL_WATERMARK_reserved);
+
+       if (watermark == j->watermark)
+               return;
+
+       swap(watermark, j->watermark);
+       if (watermark > j->watermark)
+               journal_wake(j);
 }
 
 static inline void bch2_journal_preres_put(struct journal *j,
@@ -406,12 +409,8 @@ static inline void bch2_journal_preres_put(struct journal *j,
                closure_wake_up(&j->preres_wait);
        }
 
-       if (s.reserved <= s.remaining &&
-           !test_bit(JOURNAL_MAY_GET_UNRESERVED, &j->flags)) {
-               spin_lock(&j->lock);
-               journal_check_may_get_unreserved(j);
-               spin_unlock(&j->lock);
-       }
+       if (s.reserved <= s.remaining && j->watermark)
+               journal_set_watermark(j);
 }
 
 int __bch2_journal_preres_get(struct journal *,
@@ -432,7 +431,7 @@ static inline int bch2_journal_preres_get_fast(struct journal *j,
                old.v = new.v = v;
                ret = 0;
 
-               if ((flags & JOURNAL_RES_GET_RESERVED) ||
+               if ((flags & JOURNAL_WATERMARK_reserved) ||
                    new.reserved + d < new.remaining) {
                        new.reserved += d;
                        ret = 1;
index f55fc0b11977fff12399cc1a754c376b19c4764e..e99a01e3b5fb38acb1f2a49c9f25b85904aa54ff 100644 (file)
@@ -195,7 +195,7 @@ void bch2_journal_space_available(struct journal *j)
        j->can_discard = can_discard;
 
        if (nr_online < c->opts.metadata_replicas_required) {
-               ret = cur_entry_insufficient_devices;
+               ret = JOURNAL_ERR_insufficient_devices;
                goto out;
        }
 
@@ -224,9 +224,9 @@ void bch2_journal_space_available(struct journal *j)
                bch2_fatal_error(c);
                spin_lock(&j->lock);
 
-               ret = cur_entry_journal_stuck;
+               ret = JOURNAL_ERR_journal_stuck;
        } else if (!j->space[journal_space_discarded].next_entry)
-               ret = cur_entry_journal_full;
+               ret = JOURNAL_ERR_journal_full;
 
        if ((j->space[journal_space_clean_ondisk].next_entry <
             j->space[journal_space_clean_ondisk].total) &&
@@ -245,7 +245,7 @@ out:
        j->cur_entry_sectors    = !ret ? j->space[journal_space_discarded].next_entry : 0;
        j->cur_entry_error      = ret;
        journal_set_remaining(j, u64s_remaining);
-       journal_check_may_get_unreserved(j);
+       journal_set_watermark(j);
 
        if (!ret)
                journal_wake(j);
index 91f829adf8627dfb774ad3e97c262dc023b07e7c..a41b915b3ac6db6a0fff76c9f3962420d531a9d6 100644 (file)
@@ -144,16 +144,45 @@ enum journal_space_from {
 enum {
        JOURNAL_REPLAY_DONE,
        JOURNAL_STARTED,
-       JOURNAL_MAY_GET_UNRESERVED,
        JOURNAL_MAY_SKIP_FLUSH,
 };
 
+#define JOURNAL_WATERMARKS()           \
+       x(any)                          \
+       x(copygc)                       \
+       x(reserved)
+
+enum journal_watermark {
+#define x(n)   JOURNAL_WATERMARK_##n,
+       JOURNAL_WATERMARKS()
+#undef x
+};
+
+#define JOURNAL_WATERMARK_MASK 3
+
+/* Reasons we may fail to get a journal reservation: */
+#define JOURNAL_ERRORS()               \
+       x(ok)                           \
+       x(blocked)                      \
+       x(max_in_flight)                \
+       x(journal_full)                 \
+       x(journal_pin_full)             \
+       x(journal_stuck)                \
+       x(insufficient_devices)
+
+enum journal_errors {
+#define x(n)   JOURNAL_ERR_##n,
+       JOURNAL_ERRORS()
+#undef x
+};
+
 /* Embedded in struct bch_fs */
 struct journal {
        /* Fastpath stuff up front: */
        struct {
 
        union journal_res_state reservations;
+       enum journal_watermark  watermark;
 
        union journal_preres_state prereserved;
 
@@ -173,15 +202,7 @@ struct journal {
         * 0, or -ENOSPC if waiting on journal reclaim, or -EROFS if
         * insufficient devices:
         */
-       enum {
-               cur_entry_ok,
-               cur_entry_blocked,
-               cur_entry_max_in_flight,
-               cur_entry_journal_full,
-               cur_entry_journal_pin_full,
-               cur_entry_journal_stuck,
-               cur_entry_insufficient_devices,
-       }                       cur_entry_error;
+       enum journal_errors     cur_entry_error;
 
        unsigned                buf_size_want;
        /*
index b43e54133b15d8acafc14b09cc9a285ad38f38dd..a54a83d3247b3300a454368b528b1ddb90f62748 100644 (file)
@@ -91,7 +91,7 @@ static enum data_cmd copygc_pred(struct bch_fs *c, void *arg,
                        data_opts->target               = io_opts->background_target;
                        data_opts->nr_replicas          = 1;
                        data_opts->btree_insert_flags   = BTREE_INSERT_USE_RESERVE|
-                               BTREE_INSERT_JOURNAL_RESERVED;
+                               JOURNAL_WATERMARK_copygc;
                        data_opts->rewrite_dev          = p.ptr.dev;
 
                        if (p.has_ec)
index 887971559214eb681e345328260ec12b67356823..93882e6a2ae4a0ff3249cfba30db9a866cb18616 100644 (file)
@@ -562,8 +562,9 @@ static int bch2_journal_replay(struct bch_fs *c)
                ret = bch2_trans_do(c, NULL, NULL,
                                    BTREE_INSERT_LAZY_RW|
                                    BTREE_INSERT_NOFAIL|
-                                   BTREE_INSERT_JOURNAL_RESERVED|
-                                   (!k->allocated ? BTREE_INSERT_JOURNAL_REPLAY : 0),
+                                   (!k->allocated
+                                    ? BTREE_INSERT_JOURNAL_REPLAY|JOURNAL_WATERMARK_reserved
+                                    : 0),
                             bch2_journal_replay_key(&trans, k));
                if (ret) {
                        bch_err(c, "journal replay: error %d while replaying key at btree %s level %u",