]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
bcachefs: New discard implementation
authorKent Overstreet <kent.overstreet@gmail.com>
Thu, 10 Feb 2022 09:32:19 +0000 (04:32 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:29 +0000 (17:09 -0400)
In the old allocator code, buckets would be discarded just prior to
being used - this made sense in bcache where we were discarding buckets
just after invalidating the cached data they contain, but in a
filesystem where we typically have more free space we want to be
discarding buckets when they become empty.

This patch implements the new behaviour - it checks the need_discard
btree for buckets awaiting discards, and then clears the appropriate
bit in the alloc btree, which moves the buckets to the freespace btree.

Additionally, discards are now enabled by default.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/alloc_background.c
fs/bcachefs/alloc_background.h
fs/bcachefs/bcachefs.h
fs/bcachefs/buckets.c
fs/bcachefs/journal_io.c
fs/bcachefs/opts.h
fs/bcachefs/super.c
fs/bcachefs/trace.h

index 3ba2b35fad53899643ded6d55703cb054a8ed2c2..9514c2e5f01e3ed1138dde807d07a462781971eb 100644 (file)
@@ -545,6 +545,7 @@ int bch2_trans_mark_alloc(struct btree_trans *trans,
                new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
                new_a->io_time[WRITE]= max_t(u64, 1, atomic64_read(&c->io_clock[WRITE].now));
                SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true);
+               SET_BCH_ALLOC_V4_NEED_DISCARD(new_a, true);
        }
 
        if (old_a.data_type && !new_a->data_type &&
@@ -579,6 +580,144 @@ int bch2_trans_mark_alloc(struct btree_trans *trans,
        return 0;
 }
 
+static int bch2_clear_need_discard(struct btree_trans *trans, struct bpos pos,
+                                  struct bch_dev *ca, bool *discard_done)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       struct bkey_i_alloc_v4 *a;
+       struct printbuf buf = PRINTBUF;
+       int ret;
+
+       bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, pos,
+                            BTREE_ITER_CACHED);
+       k = bch2_btree_iter_peek_slot(&iter);
+       ret = bkey_err(k);
+       if (ret)
+               goto out;
+
+       a = bch2_alloc_to_v4_mut(trans, k);
+       ret = PTR_ERR_OR_ZERO(a);
+       if (ret)
+               goto out;
+
+       if (BCH_ALLOC_V4_NEED_INC_GEN(&a->v)) {
+               a->v.gen++;
+               SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false);
+               goto write;
+       }
+
+       BUG_ON(a->v.journal_seq > c->journal.flushed_seq_ondisk);
+
+       if (bch2_fs_inconsistent_on(!BCH_ALLOC_V4_NEED_DISCARD(&a->v), c,
+                       "%s\n  incorrectly set in need_discard btree",
+                       (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+               ret = -EIO;
+               goto out;
+       }
+
+       if (!*discard_done && ca->mi.discard && !c->opts.nochanges) {
+               /*
+                * This works without any other locks because this is the only
+                * thread that removes items from the need_discard tree
+                */
+               bch2_trans_unlock(trans);
+               blkdev_issue_discard(ca->disk_sb.bdev,
+                                    k.k->p.offset * ca->mi.bucket_size,
+                                    ca->mi.bucket_size,
+                                    GFP_KERNEL);
+               *discard_done = true;
+
+               ret = bch2_trans_relock(trans) ? 0 : -EINTR;
+               if (ret)
+                       goto out;
+       }
+
+       SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false);
+write:
+       ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
+out:
+       bch2_trans_iter_exit(trans, &iter);
+       printbuf_exit(&buf);
+       return ret;
+}
+
+static void bch2_do_discards_work(struct work_struct *work)
+{
+       struct bch_fs *c = container_of(work, struct bch_fs, discard_work);
+       struct bch_dev *ca = NULL;
+       struct btree_trans trans;
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       u64 seen = 0, open = 0, need_journal_commit = 0, discarded = 0;
+       int ret;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       for_each_btree_key(&trans, iter, BTREE_ID_need_discard,
+                          POS_MIN, 0, k, ret) {
+               bool discard_done = false;
+
+               if (ca && k.k->p.inode != ca->dev_idx) {
+                       percpu_ref_put(&ca->io_ref);
+                       ca = NULL;
+               }
+
+               if (!ca) {
+                       ca = bch_dev_bkey_exists(c, k.k->p.inode);
+                       if (!percpu_ref_tryget(&ca->io_ref)) {
+                               ca = NULL;
+                               bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0));
+                               continue;
+                       }
+               }
+
+               seen++;
+
+               if (bch2_bucket_is_open_safe(c, k.k->p.inode, k.k->p.offset)) {
+                       open++;
+                       continue;
+               }
+
+               if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
+                               c->journal.flushed_seq_ondisk,
+                               k.k->p.inode, k.k->p.offset)) {
+                       need_journal_commit++;
+                       continue;
+               }
+
+               ret = __bch2_trans_do(&trans, NULL, NULL,
+                                     BTREE_INSERT_USE_RESERVE|
+                                     BTREE_INSERT_NOFAIL,
+                               bch2_clear_need_discard(&trans, k.k->p, ca, &discard_done));
+               if (ret)
+                       break;
+
+               discarded++;
+       }
+       bch2_trans_iter_exit(&trans, &iter);
+
+       if (ca)
+               percpu_ref_put(&ca->io_ref);
+
+       bch2_trans_exit(&trans);
+
+       if (need_journal_commit * 2 > seen)
+               bch2_journal_flush_async(&c->journal, NULL);
+
+       percpu_ref_put(&c->writes);
+
+       trace_do_discards(c, seen, open, need_journal_commit, discarded, ret);
+}
+
+void bch2_do_discards(struct bch_fs *c)
+{
+       if (percpu_ref_tryget(&c->writes) &&
+           !queue_work(system_long_wq, &c->discard_work))
+               percpu_ref_put(&c->writes);
+}
+
 static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca)
 {
        struct btree_trans trans;
@@ -862,4 +1001,5 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
 void bch2_fs_allocator_background_init(struct bch_fs *c)
 {
        spin_lock_init(&c->freelist_lock);
+       INIT_WORK(&c->discard_work, bch2_do_discards_work);
 }
index 74b23f9b1bd3c383aaf2717f394405b918ab18e7..8ba9bf853c2fe787614d40f9eaa49451ea564e04 100644 (file)
@@ -113,6 +113,8 @@ int bch2_alloc_read(struct bch_fs *, bool, bool);
 
 int bch2_trans_mark_alloc(struct btree_trans *, struct bkey_s_c,
                          struct bkey_i *, unsigned);
+void bch2_do_discards(struct bch_fs *);
+
 int bch2_fs_freespace_init(struct bch_fs *);
 
 void bch2_recalc_capacity(struct bch_fs *);
index 879b2adc8b42c608157359f2d1c3871bb8d2fa8b..ca48b3f863042436dbba7a8de75de2463da1e45b 100644 (file)
@@ -758,6 +758,7 @@ struct bch_fs {
        unsigned                write_points_nr;
 
        struct buckets_waiting_for_journal buckets_waiting_for_journal;
+       struct work_struct      discard_work;
 
        /* GARBAGE COLLECTION */
        struct task_struct      *gc_thread;
index 2c6fdf385ba38dcddeabb757839036e8774f28dc..0e86b45b6c55d367615d51ce0d33e207e23de723 100644 (file)
@@ -543,6 +543,11 @@ int bch2_mark_alloc(struct btree_trans *trans,
            (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk))
                closure_wake_up(&c->freelist_wait);
 
+       if ((flags & BTREE_TRIGGER_INSERT) &&
+           BCH_ALLOC_V4_NEED_DISCARD(&new_a) &&
+           !new_a.journal_seq)
+               bch2_do_discards(c);
+
        if (bucket_state(new_a) == BUCKET_need_gc_gens) {
                atomic_inc(&c->kick_gc);
                wake_up_process(c->gc_thread);
index 3e418342ee6791db818161794dfe73014c3efe03..3974d043fd8aa5171a1a3cbb54e7c814bb19b684 100644 (file)
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "bcachefs.h"
+#include "alloc_background.h"
 #include "alloc_foreground.h"
 #include "btree_io.h"
 #include "btree_update_interior.h"
@@ -1399,6 +1400,7 @@ static void journal_write_done(struct closure *cl)
                        j->flushed_seq_ondisk = seq;
                        j->last_seq_ondisk = w->last_seq;
 
+                       bch2_do_discards(c);
                        closure_wake_up(&c->freelist_wait);
 
                        bch2_reset_alloc_cursors(c);
index b45740ec3c67392192bf8cabeba0a6e1cdd58b7a..ce79e1a12bd045304c669e5d21e74eb86e99c82f 100644 (file)
@@ -266,7 +266,7 @@ enum opt_type {
        x(discard,                      u8,                             \
          OPT_FS|OPT_MOUNT|OPT_DEVICE,                                  \
          OPT_BOOL(),                                                   \
-         BCH2_NO_SB_OPT,               false,                          \
+         BCH2_NO_SB_OPT,               true,                           \
          NULL,         "Enable discard/TRIM support")                  \
        x(verbose,                      u8,                             \
          OPT_FS|OPT_MOUNT,                                             \
index 3a8740fde9de294af452e4bdae9e3c309f6bcd8c..037923bca7420a890c2456f82ba6f90598f76d9c 100644 (file)
@@ -401,6 +401,8 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
                bch2_dev_allocator_add(c, ca);
        bch2_recalc_capacity(c);
 
+       bch2_do_discards(c);
+
        if (!early) {
                ret = bch2_fs_read_write_late(c);
                if (ret)
index caf59b977e2fa734c051a1e5448fdb781dd70217..ef2096fd147d97429bbb341eefd5d2fd459e32d4 100644 (file)
@@ -182,6 +182,40 @@ TRACE_EVENT(journal_reclaim_finish,
                  __entry->nr_flushed)
 );
 
+/* allocator: */
+
+TRACE_EVENT(do_discards,
+       TP_PROTO(struct bch_fs *c, u64 seen, u64 open,
+                u64 need_journal_commit, u64 discarded, int ret),
+       TP_ARGS(c, seen, open, need_journal_commit, discarded, ret),
+
+       TP_STRUCT__entry(
+               __field(dev_t,          dev                     )
+               __field(u64,            seen                    )
+               __field(u64,            open                    )
+               __field(u64,            need_journal_commit     )
+               __field(u64,            discarded               )
+               __field(int,            ret                     )
+       ),
+
+       TP_fast_assign(
+               __entry->dev                    = c->dev;
+               __entry->seen                   = seen;
+               __entry->open                   = open;
+               __entry->need_journal_commit    = need_journal_commit;
+               __entry->discarded              = discarded;
+               __entry->ret                    = ret;
+       ),
+
+       TP_printk("%d%d seen %llu open %llu need_journal_commit %llu discarded %llu ret %i",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->seen,
+                 __entry->open,
+                 __entry->need_journal_commit,
+                 __entry->discarded,
+                 __entry->ret)
+);
+
 /* bset.c: */
 
 DEFINE_EVENT(bpos, bkey_pack_pos_fail,