]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
bcachefs: __bch2_run_recovery_passes()
authorKent Overstreet <kent.overstreet@linux.dev>
Sat, 10 May 2025 21:45:45 +0000 (17:45 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Thu, 22 May 2025 00:15:03 +0000 (20:15 -0400)
Consolidate bch2_run_recovery_passes() and
bch2_run_online_recovery_passes(), prep work for automatically
scheduling and running recovery passes in the background.

- Now takes a mask of which passes to run, automatic background repair
  will pass in sb.recovery_passes_required.

- Skips passes that are failing: a pass that failed may be reattempted
  after another pass succeeds (some passes depend on repair done by
  other passes for successful completion).

- bch2_recovery_passes_match() helper to skip alloc passes on a
  filesystem without alloc info.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/fsck.c
fs/bcachefs/recovery.c
fs/bcachefs/recovery_passes.c
fs/bcachefs/recovery_passes.h
fs/bcachefs/recovery_passes_types.h

index 94a64816cb50f845a2632a5afea67a40befacfe5..0e223d4ae2ec977bd8c19216a2fae1436d4ca3d9 100644 (file)
@@ -3178,7 +3178,7 @@ static int bch2_fsck_online_thread_fn(struct thread_with_stdio *stdio)
        set_bit(BCH_FS_in_fsck, &c->flags);
 
        c->recovery.curr_pass = BCH_RECOVERY_PASS_check_alloc_info;
-       int ret = bch2_run_online_recovery_passes(c);
+       int ret = bch2_run_online_recovery_passes(c, ~0ULL);
 
        clear_bit(BCH_FS_in_fsck, &c->flags);
        bch_err_fn(c, ret);
index cd2372221a5468976ad1083088696a9b79247e86..a7e6b5a6505a12b0c4c035d10d05665787cccea1 100644 (file)
@@ -966,7 +966,7 @@ use_clean:
        if (ret)
                goto err;
 
-       ret = bch2_run_recovery_passes(c);
+       ret = bch2_run_recovery_passes(c, 0);
        if (ret)
                goto err;
 
@@ -1001,9 +1001,8 @@ use_clean:
                bch_info(c, "Fixed errors, running fsck a second time to verify fs is clean");
                clear_bit(BCH_FS_errors_fixed, &c->flags);
 
-               c->recovery.curr_pass = BCH_RECOVERY_PASS_check_alloc_info;
-
-               ret = bch2_run_recovery_passes(c);
+               ret = bch2_run_recovery_passes(c,
+                       BCH_RECOVERY_PASS_check_alloc_info);
                if (ret)
                        goto err;
 
index c1eca55a1dde1dd00fff356db6d83160158f3e23..e0e261aa752e156a80c2858f7032f4478732861d 100644 (file)
@@ -203,6 +203,21 @@ static struct recovery_pass_fn recovery_pass_fns[] = {
 #undef x
 };
 
+static u64 bch2_recovery_passes_match(unsigned flags)
+{
+       u64 ret = 0;
+
+       for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++)
+               if (recovery_pass_fns[i].when & flags)
+                       ret |= BIT_ULL(i);
+       return ret;
+}
+
+u64 bch2_fsck_recovery_passes(void)
+{
+       return bch2_recovery_passes_match(PASS_FSCK);
+}
+
 /*
  * For when we need to rewind recovery passes and run a pass we skipped:
  */
@@ -235,10 +250,12 @@ static int __bch2_run_explicit_recovery_pass(struct printbuf *out,
 
        c->opts.recovery_passes |= BIT_ULL(pass);
 
+       if (test_bit(BCH_FS_in_recovery, &c->flags))
+               r->passes_to_run |= BIT_ULL(pass);
+
        if (test_bit(BCH_FS_in_recovery, &c->flags) &&
            r->curr_pass > pass) {
                r->next_pass = pass;
-               r->passes_complete &= (1ULL << pass) >> 1;
                return -BCH_ERR_restart_recovery;
        } else {
                return 0;
@@ -302,37 +319,9 @@ int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c,
        return ret;
 }
 
-u64 bch2_fsck_recovery_passes(void)
-{
-       u64 ret = 0;
-
-       for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++)
-               if (recovery_pass_fns[i].when & PASS_FSCK)
-                       ret |= BIT_ULL(i);
-       return ret;
-}
-
-static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
-{
-       struct recovery_pass_fn *p = recovery_pass_fns + pass;
-
-       if ((p->when & PASS_ALLOC) && (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)))
-               return false;
-       if (c->opts.recovery_passes_exclude & BIT_ULL(pass))
-               return false;
-       if (c->opts.recovery_passes & BIT_ULL(pass))
-               return true;
-       if ((p->when & PASS_FSCK) && c->opts.fsck)
-               return true;
-       if ((p->when & PASS_UNCLEAN) && !c->sb.clean)
-               return true;
-       if (p->when & PASS_ALWAYS)
-               return true;
-       return false;
-}
-
 static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
 {
+       struct bch_fs_recovery *r = &c->recovery;
        struct recovery_pass_fn *p = recovery_pass_fns + pass;
 
        if (!(p->when & PASS_SILENT))
@@ -341,8 +330,15 @@ static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
 
        s64 start_time = ktime_get_real_seconds();
        int ret = p->fn(c);
-       if (ret)
+
+       r->passes_to_run &= ~BIT_ULL(pass);
+
+       if (ret) {
+               r->passes_failing |= BIT_ULL(pass);
                return ret;
+       }
+
+       r->passes_failing = 0;
 
        if (!test_bit(BCH_FS_error, &c->flags))
                bch2_sb_recovery_pass_complete(c, pass, start_time);
@@ -353,80 +349,96 @@ static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
        return 0;
 }
 
-int bch2_run_online_recovery_passes(struct bch_fs *c)
+static int __bch2_run_recovery_passes(struct bch_fs *c, u64 orig_passes_to_run,
+                                     bool online)
 {
-       for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) {
-               struct recovery_pass_fn *p = recovery_pass_fns + i;
-
-               if (!(p->when & PASS_ONLINE))
-                       continue;
+       struct bch_fs_recovery *r = &c->recovery;
+       int ret = 0;
 
-               int ret = bch2_run_recovery_pass(c, i);
-               if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) {
-                       i = c->recovery.curr_pass;
-                       continue;
-               }
-               if (ret)
-                       return ret;
-       }
+       spin_lock_irq(&r->lock);
 
-       return 0;
-}
+       if (online)
+               orig_passes_to_run &= bch2_recovery_passes_match(PASS_ONLINE);
 
-int bch2_run_recovery_passes(struct bch_fs *c)
-{
-       int ret = 0;
+       if (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info))
+               orig_passes_to_run &= ~bch2_recovery_passes_match(PASS_ALLOC);
 
        /*
-        * We can't allow set_may_go_rw to be excluded; that would cause us to
-        * use the journal replay keys for updates where it's not expected.
+        * A failed recovery pass will be retried after another pass succeeds -
+        * but not this iteration.
+        *
+        * This is because some passes depend on repair done by other passes: we
+        * may want to retry, but we don't want to loop on failing passes.
         */
-       c->opts.recovery_passes_exclude &= ~BCH_RECOVERY_PASS_set_may_go_rw;
 
-       down(&c->recovery.run_lock);
-       spin_lock_irq(&c->recovery.lock);
+       orig_passes_to_run &= ~r->passes_failing;
 
-       while (c->recovery.curr_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) {
-               unsigned prev_done = c->recovery.pass_done;
-               unsigned pass = c->recovery.curr_pass;
+       r->passes_to_run = orig_passes_to_run;
 
-               c->recovery.next_pass = pass + 1;
+       while (r->passes_to_run) {
+               unsigned prev_done = r->pass_done;
+               unsigned pass = __ffs64(r->passes_to_run);
+               r->curr_pass = pass;
+               r->next_pass = r->curr_pass + 1;
+               r->passes_to_run &= ~BIT_ULL(pass);
 
-               if (c->opts.recovery_pass_last &&
-                   c->recovery.curr_pass > c->opts.recovery_pass_last)
-                       break;
+               spin_unlock_irq(&r->lock);
+
+               int ret2 = bch2_run_recovery_pass(c, pass) ?:
+                       bch2_journal_flush(&c->journal);
 
-               if (should_run_recovery_pass(c, pass)) {
-                       spin_unlock_irq(&c->recovery.lock);
-                       ret =   bch2_run_recovery_pass(c, pass) ?:
-                               bch2_journal_flush(&c->journal);
-                       spin_lock_irq(&c->recovery.lock);
-
-                       if (c->recovery.next_pass < c->recovery.curr_pass) {
-                               /*
-                                * bch2_run_explicit_recovery_pass() was called: we
-                                * can't always catch -BCH_ERR_restart_recovery because
-                                * it may have been called from another thread (btree
-                                * node read completion)
-                                */
-                               ret = 0;
-                               c->recovery.passes_complete &= ~(~0ULL << c->recovery.curr_pass);
-                       } else {
-                               c->recovery.passes_complete |= BIT_ULL(pass);
-                               c->recovery.pass_done = max(c->recovery.pass_done, pass);
-                       }
+               spin_lock_irq(&r->lock);
+
+               if (r->next_pass < r->curr_pass) {
+                       /* Rewind: */
+                       r->passes_to_run |= orig_passes_to_run & (~0ULL << r->next_pass);
+               } else if (!ret2) {
+                       r->pass_done = max(r->pass_done, pass);
+                       r->passes_complete |= BIT_ULL(pass);
+               } else {
+                       ret = ret2;
                }
 
-               c->recovery.curr_pass = c->recovery.next_pass;
+               if (ret && !online)
+                       break;
 
                if (prev_done <= BCH_RECOVERY_PASS_check_snapshots &&
-                   c->recovery.pass_done > BCH_RECOVERY_PASS_check_snapshots) {
+                   r->pass_done > BCH_RECOVERY_PASS_check_snapshots) {
                        bch2_copygc_wakeup(c);
                        bch2_rebalance_wakeup(c);
                }
        }
 
-       spin_unlock_irq(&c->recovery.lock);
+       spin_unlock_irq(&r->lock);
+
+       return ret;
+}
+
+int bch2_run_online_recovery_passes(struct bch_fs *c, u64 passes)
+{
+       return __bch2_run_recovery_passes(c, c->sb.recovery_passes_required|passes, true);
+}
+
+int bch2_run_recovery_passes(struct bch_fs *c, enum bch_recovery_pass from)
+{
+       u64 passes =
+               bch2_recovery_passes_match(PASS_ALWAYS) |
+               (!c->sb.clean ? bch2_recovery_passes_match(PASS_UNCLEAN) : 0) |
+               (c->opts.fsck ? bch2_recovery_passes_match(PASS_FSCK) : 0) |
+               c->opts.recovery_passes |
+               c->sb.recovery_passes_required;
+
+       /*
+        * We can't allow set_may_go_rw to be excluded; that would cause us to
+        * use the journal replay keys for updates where it's not expected.
+        */
+       c->opts.recovery_passes_exclude &= ~BCH_RECOVERY_PASS_set_may_go_rw;
+       passes &= ~c->opts.recovery_passes_exclude;
+
+       passes &= ~(BIT_ULL(from) - 1);
+
+       down(&c->recovery.run_lock);
+       int ret = __bch2_run_recovery_passes(c, passes, false);
        up(&c->recovery.run_lock);
 
        return ret;
index 4c03472be5b903351d63f99fed4b7d8ae1290199..0e79cc33fd8f138459a85df556f033fd2a34373b 100644 (file)
@@ -17,8 +17,8 @@ int __bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, struct printbu
 int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, struct printbuf *,
                                               enum bch_recovery_pass);
 
-int bch2_run_online_recovery_passes(struct bch_fs *);
-int bch2_run_recovery_passes(struct bch_fs *);
+int bch2_run_online_recovery_passes(struct bch_fs *, u64);
+int bch2_run_recovery_passes(struct bch_fs *, enum bch_recovery_pass);
 
 void bch2_fs_recovery_passes_init(struct bch_fs *);
 
index 69e8e29d58d0d0468c967852104ff465f74faed0..deb6e0565cb9c9623e64f203c21d60a34b579035 100644 (file)
@@ -14,8 +14,10 @@ struct bch_fs_recovery {
        enum bch_recovery_pass  next_pass;
        /* never rewinds version of curr_pass */
        enum bch_recovery_pass  pass_done;
+       u64                     passes_to_run;
        /* bitmask of recovery passes that we actually ran */
        u64                     passes_complete;
+       u64                     passes_failing;
        spinlock_t              lock;
        struct semaphore        run_lock;
 };