]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
bcachefs: Start copygc, rebalance threads earlier
authorKent Overstreet <kent.overstreet@linux.dev>
Sun, 13 Apr 2025 10:44:23 +0000 (06:44 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Mon, 21 Apr 2025 15:57:24 +0000 (11:57 -0400)
Previously, copygc and rebalance weren't started until the very end of
mounting, after all recvoery passes have finished.

But copygc really should be started earlier, since it may be needed for
allocations to make forward progress. Additionally, we've been seeing
occasional bug reports where starting the kthread fails due to a pending
signal - i.e. we're getting timed out by systemd (during a version
upgrade), but we're not seeing the signal until mount is about to
complete.

Additionally, we now have copygc/rebalance explicitly wait for
check_snapshots to complete (if being run); they require that for
snapshot_is_ancestor() in the data move path.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/movinggc.c
fs/bcachefs/rebalance.c
fs/bcachefs/recovery.c
fs/bcachefs/recovery_passes.c
fs/bcachefs/super.c

index 159410c50861b7283a6577b618c81925c18f4d85..96873372b51600f367d5a1710b17abeef828eabb 100644 (file)
@@ -356,6 +356,13 @@ static int bch2_copygc_thread(void *arg)
 
        set_freezable();
 
+       /*
+        * Data move operations can't run until after check_snapshots has
+        * completed, and bch2_snapshot_is_ancestor() is available.
+        */
+       kthread_wait_freezable(c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots ||
+                              kthread_should_stop());
+
        bch2_move_stats_init(&move_stats, "copygc");
        bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats,
                              writepoint_ptr(&c->copygc_write_point),
index 39006e6affe3db50e975fc35c191cae7a621432d..4ccdfc1f34aa3559fb65c93d709110a80a3e4030 100644 (file)
@@ -581,6 +581,13 @@ static int bch2_rebalance_thread(void *arg)
 
        set_freezable();
 
+       /*
+        * Data move operations can't run until after check_snapshots has
+        * completed, and bch2_snapshot_is_ancestor() is available.
+        */
+       kthread_wait_freezable(c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots ||
+                              kthread_should_stop());
+
        bch2_moving_ctxt_init(&ctxt, c, NULL, &r->work_stats,
                              writepoint_ptr(&c->rebalance_write_point),
                              true);
index bea578e339886c898c625e27e721dcd17673e155..d6c4ef819d40ac9fc73db339d62933f3dcc28b46 100644 (file)
@@ -18,6 +18,7 @@
 #include "journal_seq_blacklist.h"
 #include "logged_ops.h"
 #include "move.h"
+#include "movinggc.h"
 #include "namei.h"
 #include "quota.h"
 #include "rebalance.h"
@@ -1194,6 +1195,9 @@ int bch2_fs_initialize(struct bch_fs *c)
 
        c->recovery_pass_done = BCH_RECOVERY_PASS_NR - 1;
 
+       bch2_copygc_wakeup(c);
+       bch2_rebalance_wakeup(c);
+
        if (enabled_qtypes(c)) {
                ret = bch2_fs_quota_read(c);
                if (ret)
index 8f769804cb5906eea79a2a31de43d72e8d96233b..22f72bb5b8536bf173ac7c6f59db70bcc1224fbf 100644 (file)
@@ -266,6 +266,7 @@ int bch2_run_recovery_passes(struct bch_fs *c)
        spin_lock_irq(&c->recovery_pass_lock);
 
        while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) {
+               unsigned prev_done = c->recovery_pass_done;
                unsigned pass = c->curr_recovery_pass;
 
                c->next_recovery_pass = pass + 1;
@@ -299,6 +300,12 @@ int bch2_run_recovery_passes(struct bch_fs *c)
                }
 
                c->curr_recovery_pass = c->next_recovery_pass;
+
+               if (prev_done <= BCH_RECOVERY_PASS_check_snapshots &&
+                   c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots) {
+                       bch2_copygc_wakeup(c);
+                       bch2_rebalance_wakeup(c);
+               }
        }
 
        spin_unlock_irq(&c->recovery_pass_lock);
index ee27734f350f8e1d999316da0e044321b7a26dda..4060af4692f92866f6a313ba139b7e7ca974779f 100644 (file)
@@ -418,32 +418,6 @@ bool bch2_fs_emergency_read_only_locked(struct bch_fs *c)
        return ret;
 }
 
-static int bch2_fs_read_write_late(struct bch_fs *c)
-{
-       int ret;
-
-       /*
-        * Data move operations can't run until after check_snapshots has
-        * completed, and bch2_snapshot_is_ancestor() is available.
-        *
-        * Ideally we'd start copygc/rebalance earlier instead of waiting for
-        * all of recovery/fsck to complete:
-        */
-       ret = bch2_copygc_start(c);
-       if (ret) {
-               bch_err(c, "error starting copygc thread");
-               return ret;
-       }
-
-       ret = bch2_rebalance_start(c);
-       if (ret) {
-               bch_err(c, "error starting rebalance thread");
-               return ret;
-       }
-
-       return 0;
-}
-
 static int __bch2_fs_read_write(struct bch_fs *c, bool early)
 {
        int ret;
@@ -503,10 +477,17 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
                atomic_long_inc(&c->writes[i]);
        }
 #endif
-       if (!early) {
-               ret = bch2_fs_read_write_late(c);
-               if (ret)
-                       goto err;
+
+       ret = bch2_copygc_start(c);
+       if (ret) {
+               bch_err_msg(c, ret, "error starting copygc thread");
+               goto err;
+       }
+
+       ret = bch2_rebalance_start(c);
+       if (ret) {
+               bch_err_msg(c, ret, "error starting rebalance thread");
+               goto err;
        }
 
        bch2_do_discards(c);
@@ -1082,13 +1063,10 @@ int bch2_fs_start(struct bch_fs *c)
        wake_up(&c->ro_ref_wait);
 
        down_write(&c->state_lock);
-       if (c->opts.read_only) {
+       if (c->opts.read_only)
                bch2_fs_read_only(c);
-       } else {
-               ret = !test_bit(BCH_FS_rw, &c->flags)
-                       ? bch2_fs_read_write(c)
-                       : bch2_fs_read_write_late(c);
-       }
+       else if (!test_bit(BCH_FS_rw, &c->flags))
+               ret = bch2_fs_read_write(c);
        up_write(&c->state_lock);
 
 err: