]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
bcachefs: bch2_check_bucket_backpointer_mismatch()
authorKent Overstreet <kent.overstreet@linux.dev>
Fri, 9 May 2025 21:01:05 +0000 (17:01 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Thu, 22 May 2025 00:15:04 +0000 (20:15 -0400)
Detect buckets with missing backpointers, and run repair on demand.

__bch2_move_data_phys() now calls
bch2_check_bucket_backpointer_mismatch() as it walks buckets, which
checks for missing backpointers by comparing backpointers against bucket
sector counts.

When missing backpointers are detected, we kick off
bch2_check_extents_to_backpointers() asynchronously - right away if
we're trying to evacuate, or with a threshold if we're just running
copygc.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/alloc_background.c
fs/bcachefs/backpointers.c
fs/bcachefs/backpointers.h
fs/bcachefs/move.c
fs/bcachefs/movinggc.c

index 88e710ba2685198b25512506279555bd35fdc4b9..a38b9c6c891e7240562f4f5e1da95ea659ed4fbf 100644 (file)
@@ -2175,8 +2175,11 @@ static int invalidate_one_bucket(struct btree_trans *trans,
        BUG_ON(a->data_type != BCH_DATA_cached);
        BUG_ON(a->dirty_sectors);
 
-       if (!a->cached_sectors)
-               bch_err(c, "invalidating empty bucket, confused");
+       if (!a->cached_sectors) {
+               bch2_check_bucket_backpointer_mismatch(trans, ca, bucket.offset,
+                                                      true, last_flushed);
+               goto out;
+       }
 
        unsigned cached_sectors = a->cached_sectors;
        u8 gen = a->gen;
index 6b98ce1ed6c929b78d6642605f68a4c02ccb0c7a..c08bc66850786c4df021cb05b838c7b5b10a3db9 100644 (file)
@@ -12,6 +12,7 @@
 #include "disk_accounting.h"
 #include "error.h"
 #include "progress.h"
+#include "recovery_passes.h"
 
 #include <linux/mm.h>
 
@@ -804,6 +805,13 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
        return ret;
 }
 
+static inline int bch2_fs_going_ro(struct bch_fs *c)
+{
+       return test_bit(BCH_FS_going_ro, &c->flags)
+               ? -EROFS
+               : 0;
+}
+
 static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
                                                   struct extents_to_bp_state *s)
 {
@@ -831,6 +839,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
 
                        ret = for_each_btree_key_continue(trans, iter, 0, k, ({
                                bch2_progress_update_iter(trans, &progress, &iter, "extents_to_backpointers");
+                               bch2_fs_going_ro(c) ?:
                                check_extent_to_backpointers(trans, s, btree_id, level, k) ?:
                                bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
                        }));
@@ -870,6 +879,7 @@ static int data_type_to_alloc_counter(enum bch_data_type t)
 static int check_bucket_backpointers_to_extents(struct btree_trans *, struct bch_dev *, struct bpos);
 
 static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct bkey_s_c alloc_k,
+                                            bool *had_mismatch,
                                             struct bkey_buf *last_flushed)
 {
        struct bch_fs *c = trans->c;
@@ -877,6 +887,8 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
        const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert);
        bool need_commit = false;
 
+       *had_mismatch = false;
+
        if (a->data_type == BCH_DATA_sb ||
            a->data_type == BCH_DATA_journal ||
            a->data_type == BCH_DATA_parity)
@@ -957,6 +969,8 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b
                         ? bch2_bucket_bitmap_set(ca, &ca->bucket_backpointer_empty,
                                                  alloc_k.k->p.offset)
                         : 0);
+
+               *had_mismatch = true;
        }
 err:
        bch2_dev_put(ca);
@@ -1104,7 +1118,9 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
 
        ret = for_each_btree_key(trans, iter, BTREE_ID_alloc,
                                 POS_MIN, BTREE_ITER_prefetch, k, ({
-               check_bucket_backpointer_mismatch(trans, k, &s.last_flushed);
+               bool had_mismatch;
+               bch2_fs_going_ro(c) ?:
+               check_bucket_backpointer_mismatch(trans, k, &had_mismatch, &s.last_flushed);
        }));
        if (ret)
                goto err;
@@ -1150,20 +1166,69 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
 
                s.bp_start = bpos_successor(s.bp_end);
        }
-err:
-       bch2_trans_put(trans);
-       bch2_bkey_buf_exit(&s.last_flushed, c);
-       bch2_btree_cache_unpin(c);
 
        for_each_member_device(c, ca) {
                bch2_bucket_bitmap_free(&ca->bucket_backpointer_mismatch);
                bch2_bucket_bitmap_free(&ca->bucket_backpointer_empty);
        }
+err:
+       bch2_trans_put(trans);
+       bch2_bkey_buf_exit(&s.last_flushed, c);
+       bch2_btree_cache_unpin(c);
 
        bch_err_fn(c, ret);
        return ret;
 }
 
+static int check_bucket_backpointer_pos_mismatch(struct btree_trans *trans,
+                                                struct bpos bucket,
+                                                bool *had_mismatch,
+                                                struct bkey_buf *last_flushed)
+{
+       struct btree_iter alloc_iter;
+       struct bkey_s_c k = bch2_bkey_get_iter(trans, &alloc_iter,
+                                              BTREE_ID_alloc, bucket,
+                                              BTREE_ITER_cached);
+       int ret = bkey_err(k);
+       if (ret)
+               return ret;
+
+       ret = check_bucket_backpointer_mismatch(trans, k, had_mismatch, last_flushed);
+       bch2_trans_iter_exit(trans, &alloc_iter);
+       return ret;
+}
+
+int bch2_check_bucket_backpointer_mismatch(struct btree_trans *trans,
+                                          struct bch_dev *ca, u64 bucket,
+                                          bool copygc,
+                                          struct bkey_buf *last_flushed)
+{
+       struct bch_fs *c = trans->c;
+       bool had_mismatch;
+       int ret = lockrestart_do(trans,
+               check_bucket_backpointer_pos_mismatch(trans, POS(ca->dev_idx, bucket),
+                                                     &had_mismatch, last_flushed));
+       if (ret || !had_mismatch)
+               return ret;
+
+       u64 nr = ca->bucket_backpointer_mismatch.nr;
+       u64 allowed = copygc ? ca->mi.nbuckets >> 7 : 0;
+
+       struct printbuf buf = PRINTBUF;
+       __bch2_log_msg_start(ca->name, &buf);
+
+       prt_printf(&buf, "Detected missing backpointers in bucket %llu, now have %llu/%llu with missing\n",
+                  bucket, nr, ca->mi.nbuckets);
+
+       bch2_run_explicit_recovery_pass(c, &buf,
+                       BCH_RECOVERY_PASS_check_extents_to_backpointers,
+                       nr < allowed ? RUN_RECOVERY_PASS_ratelimit : 0);
+
+       bch2_print_str(c, KERN_ERR, buf.buf);
+       printbuf_exit(&buf);
+       return 0;
+}
+
 /* backpointers -> extents */
 
 static int check_one_backpointer(struct btree_trans *trans,
index fe7149a2fbf54227aaeb7c84d3bafd5af8c18f74..6840561084cef021547943944363ecf8e694d18a 100644 (file)
@@ -182,7 +182,8 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct bkey_s_c_b
 struct btree *bch2_backpointer_get_node(struct btree_trans *, struct bkey_s_c_backpointer,
                                        struct btree_iter *, struct bkey_buf *);
 
-int bch2_check_bucket_backpointer_mismatch(struct btree_trans *, struct bpos, struct bkey_buf *);
+int bch2_check_bucket_backpointer_mismatch(struct btree_trans *, struct bch_dev *, u64,
+                                          bool, struct bkey_buf *);
 
 int bch2_check_btree_backpointers(struct bch_fs *);
 int bch2_check_extents_to_backpointers(struct bch_fs *);
index 49898d5743d4b8ff68467864692d4f0b0a304d1a..0dd3bec3acff9f2acfcafe4fffd14acacf89723b 100644 (file)
@@ -815,6 +815,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
                        u64 bucket_start,
                        u64 bucket_end,
                        unsigned data_types,
+                       bool copygc,
                        move_pred_fn pred, void *arg)
 {
        struct btree_trans *trans = ctxt->trans;
@@ -825,6 +826,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
        struct bkey_buf sk;
        struct bkey_s_c k;
        struct bkey_buf last_flushed;
+       u64 check_mismatch_done = bucket_start;
        int ret = 0;
 
        struct bch_dev *ca = bch2_dev_tryget(c, dev);
@@ -835,8 +837,6 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
 
        struct bpos bp_start    = bucket_pos_to_bp_start(ca, POS(dev, bucket_start));
        struct bpos bp_end      = bucket_pos_to_bp_end(ca, POS(dev, bucket_end));
-       bch2_dev_put(ca);
-       ca = NULL;
 
        bch2_bkey_buf_init(&last_flushed);
        bkey_init(&last_flushed.k->k);
@@ -871,6 +871,14 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
                if (!k.k || bkey_gt(k.k->p, bp_end))
                        break;
 
+               if (check_mismatch_done < bp_pos_to_bucket(ca, k.k->p).offset) {
+                       while (check_mismatch_done < bp_pos_to_bucket(ca, k.k->p).offset) {
+                               bch2_check_bucket_backpointer_mismatch(trans, ca, check_mismatch_done++,
+                                                                      copygc, &last_flushed);
+                       }
+                       continue;
+               }
+
                if (k.k->type != KEY_TYPE_backpointer)
                        goto next;
 
@@ -946,10 +954,15 @@ static int __bch2_move_data_phys(struct moving_context *ctxt,
 next:
                bch2_btree_iter_advance(trans, &bp_iter);
        }
+
+       while (check_mismatch_done < bucket_end)
+               bch2_check_bucket_backpointer_mismatch(trans, ca, check_mismatch_done++,
+                                                      copygc, &last_flushed);
 err:
        bch2_trans_iter_exit(trans, &bp_iter);
        bch2_bkey_buf_exit(&sk, c);
        bch2_bkey_buf_exit(&last_flushed, c);
+       bch2_dev_put(ca);
        return ret;
 }
 
@@ -974,7 +987,8 @@ int bch2_move_data_phys(struct bch_fs *c,
                ctxt.stats->data_type = (int) DATA_PROGRESS_DATA_TYPE_phys;
        }
 
-       int ret = __bch2_move_data_phys(&ctxt, NULL, dev, start, end, data_types, pred, arg);
+       int ret = __bch2_move_data_phys(&ctxt, NULL, dev, start, end,
+                                       data_types, false, pred, arg);
        bch2_moving_ctxt_exit(&ctxt);
 
        return ret;
@@ -1019,6 +1033,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt,
                                   bucket.offset,
                                   bucket.offset + 1,
                                   ~0,
+                                  true,
                                   evacuate_bucket_pred, &arg);
 }
 
index 0a751a65386f523e6d559a3214eb9c999c073d92..7cb0b3d347b473cf5f56ca4e7b3dba4d9134cc1f 100644 (file)
@@ -75,6 +75,9 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
        if (!ca)
                goto out;
 
+       if (bch2_bucket_bitmap_test(&ca->bucket_backpointer_mismatch, b->k.bucket.offset))
+               goto out;
+
        if (ca->mi.state != BCH_MEMBER_STATE_rw ||
            !bch2_dev_is_online(ca))
                goto out;