From: Kent Overstreet Date: Fri, 9 May 2025 21:01:05 +0000 (-0400) Subject: bcachefs: bch2_check_bucket_backpointer_mismatch() X-Git-Tag: v6.16-rc1~211^2~41 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=39cea302f13a0a9dc4cf39248529a42e79d06842;p=thirdparty%2Fkernel%2Flinux.git bcachefs: bch2_check_bucket_backpointer_mismatch() Detect buckets with missing backpointers, and run repair on demand. __bch2_move_data_phys() now calls bch2_check_bucket_backpointer_mismatch() as it walks buckets, which checks for missing backpointers by comparing backpointers against bucket sector counts. When missing backpointers are detected, we kick off bch2_check_extents_to_backpointers() asynchronously - right away if we're trying to evacuate, or with a threshold if we're just running copygc. Signed-off-by: Kent Overstreet --- diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 88e710ba26851..a38b9c6c891e7 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -2175,8 +2175,11 @@ static int invalidate_one_bucket(struct btree_trans *trans, BUG_ON(a->data_type != BCH_DATA_cached); BUG_ON(a->dirty_sectors); - if (!a->cached_sectors) - bch_err(c, "invalidating empty bucket, confused"); + if (!a->cached_sectors) { + bch2_check_bucket_backpointer_mismatch(trans, ca, bucket.offset, + true, last_flushed); + goto out; + } unsigned cached_sectors = a->cached_sectors; u8 gen = a->gen; diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 6b98ce1ed6c92..c08bc66850786 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -12,6 +12,7 @@ #include "disk_accounting.h" #include "error.h" #include "progress.h" +#include "recovery_passes.h" #include @@ -804,6 +805,13 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, return ret; } +static inline int bch2_fs_going_ro(struct bch_fs *c) +{ + return test_bit(BCH_FS_going_ro, &c->flags) + ? -EROFS + : 0; +} + static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, struct extents_to_bp_state *s) { @@ -831,6 +839,7 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, ret = for_each_btree_key_continue(trans, iter, 0, k, ({ bch2_progress_update_iter(trans, &progress, &iter, "extents_to_backpointers"); + bch2_fs_going_ro(c) ?: check_extent_to_backpointers(trans, s, btree_id, level, k) ?: bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); })); @@ -870,6 +879,7 @@ static int data_type_to_alloc_counter(enum bch_data_type t) static int check_bucket_backpointers_to_extents(struct btree_trans *, struct bch_dev *, struct bpos); static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct bkey_s_c alloc_k, + bool *had_mismatch, struct bkey_buf *last_flushed) { struct bch_fs *c = trans->c; @@ -877,6 +887,8 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert); bool need_commit = false; + *had_mismatch = false; + if (a->data_type == BCH_DATA_sb || a->data_type == BCH_DATA_journal || a->data_type == BCH_DATA_parity) @@ -957,6 +969,8 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b ? bch2_bucket_bitmap_set(ca, &ca->bucket_backpointer_empty, alloc_k.k->p.offset) : 0); + + *had_mismatch = true; } err: bch2_dev_put(ca); @@ -1104,7 +1118,9 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) ret = for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_prefetch, k, ({ - check_bucket_backpointer_mismatch(trans, k, &s.last_flushed); + bool had_mismatch; + bch2_fs_going_ro(c) ?: + check_bucket_backpointer_mismatch(trans, k, &had_mismatch, &s.last_flushed); })); if (ret) goto err; @@ -1150,20 +1166,69 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) s.bp_start = bpos_successor(s.bp_end); } -err: - bch2_trans_put(trans); - bch2_bkey_buf_exit(&s.last_flushed, c); - bch2_btree_cache_unpin(c); for_each_member_device(c, ca) { bch2_bucket_bitmap_free(&ca->bucket_backpointer_mismatch); bch2_bucket_bitmap_free(&ca->bucket_backpointer_empty); } +err: + bch2_trans_put(trans); + bch2_bkey_buf_exit(&s.last_flushed, c); + bch2_btree_cache_unpin(c); bch_err_fn(c, ret); return ret; } +static int check_bucket_backpointer_pos_mismatch(struct btree_trans *trans, + struct bpos bucket, + bool *had_mismatch, + struct bkey_buf *last_flushed) +{ + struct btree_iter alloc_iter; + struct bkey_s_c k = bch2_bkey_get_iter(trans, &alloc_iter, + BTREE_ID_alloc, bucket, + BTREE_ITER_cached); + int ret = bkey_err(k); + if (ret) + return ret; + + ret = check_bucket_backpointer_mismatch(trans, k, had_mismatch, last_flushed); + bch2_trans_iter_exit(trans, &alloc_iter); + return ret; +} + +int bch2_check_bucket_backpointer_mismatch(struct btree_trans *trans, + struct bch_dev *ca, u64 bucket, + bool copygc, + struct bkey_buf *last_flushed) +{ + struct bch_fs *c = trans->c; + bool had_mismatch; + int ret = lockrestart_do(trans, + check_bucket_backpointer_pos_mismatch(trans, POS(ca->dev_idx, bucket), + &had_mismatch, last_flushed)); + if (ret || !had_mismatch) + return ret; + + u64 nr = ca->bucket_backpointer_mismatch.nr; + u64 allowed = copygc ? ca->mi.nbuckets >> 7 : 0; + + struct printbuf buf = PRINTBUF; + __bch2_log_msg_start(ca->name, &buf); + + prt_printf(&buf, "Detected missing backpointers in bucket %llu, now have %llu/%llu with missing\n", + bucket, nr, ca->mi.nbuckets); + + bch2_run_explicit_recovery_pass(c, &buf, + BCH_RECOVERY_PASS_check_extents_to_backpointers, + nr < allowed ? RUN_RECOVERY_PASS_ratelimit : 0); + + bch2_print_str(c, KERN_ERR, buf.buf); + printbuf_exit(&buf); + return 0; +} + /* backpointers -> extents */ static int check_one_backpointer(struct btree_trans *trans, diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h index fe7149a2fbf54..6840561084cef 100644 --- a/fs/bcachefs/backpointers.h +++ b/fs/bcachefs/backpointers.h @@ -182,7 +182,8 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *, struct bkey_s_c_b struct btree *bch2_backpointer_get_node(struct btree_trans *, struct bkey_s_c_backpointer, struct btree_iter *, struct bkey_buf *); -int bch2_check_bucket_backpointer_mismatch(struct btree_trans *, struct bpos, struct bkey_buf *); +int bch2_check_bucket_backpointer_mismatch(struct btree_trans *, struct bch_dev *, u64, + bool, struct bkey_buf *); int bch2_check_btree_backpointers(struct bch_fs *); int bch2_check_extents_to_backpointers(struct bch_fs *); diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 49898d5743d4b..0dd3bec3acff9 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -815,6 +815,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, u64 bucket_start, u64 bucket_end, unsigned data_types, + bool copygc, move_pred_fn pred, void *arg) { struct btree_trans *trans = ctxt->trans; @@ -825,6 +826,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, struct bkey_buf sk; struct bkey_s_c k; struct bkey_buf last_flushed; + u64 check_mismatch_done = bucket_start; int ret = 0; struct bch_dev *ca = bch2_dev_tryget(c, dev); @@ -835,8 +837,6 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, struct bpos bp_start = bucket_pos_to_bp_start(ca, POS(dev, bucket_start)); struct bpos bp_end = bucket_pos_to_bp_end(ca, POS(dev, bucket_end)); - bch2_dev_put(ca); - ca = NULL; bch2_bkey_buf_init(&last_flushed); bkey_init(&last_flushed.k->k); @@ -871,6 +871,14 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, if (!k.k || bkey_gt(k.k->p, bp_end)) break; + if (check_mismatch_done < bp_pos_to_bucket(ca, k.k->p).offset) { + while (check_mismatch_done < bp_pos_to_bucket(ca, k.k->p).offset) { + bch2_check_bucket_backpointer_mismatch(trans, ca, check_mismatch_done++, + copygc, &last_flushed); + } + continue; + } + if (k.k->type != KEY_TYPE_backpointer) goto next; @@ -946,10 +954,15 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, next: bch2_btree_iter_advance(trans, &bp_iter); } + + while (check_mismatch_done < bucket_end) + bch2_check_bucket_backpointer_mismatch(trans, ca, check_mismatch_done++, + copygc, &last_flushed); err: bch2_trans_iter_exit(trans, &bp_iter); bch2_bkey_buf_exit(&sk, c); bch2_bkey_buf_exit(&last_flushed, c); + bch2_dev_put(ca); return ret; } @@ -974,7 +987,8 @@ int bch2_move_data_phys(struct bch_fs *c, ctxt.stats->data_type = (int) DATA_PROGRESS_DATA_TYPE_phys; } - int ret = __bch2_move_data_phys(&ctxt, NULL, dev, start, end, data_types, pred, arg); + int ret = __bch2_move_data_phys(&ctxt, NULL, dev, start, end, + data_types, false, pred, arg); bch2_moving_ctxt_exit(&ctxt); return ret; @@ -1019,6 +1033,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, bucket.offset, bucket.offset + 1, ~0, + true, evacuate_bucket_pred, &arg); } diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 0a751a65386f5..7cb0b3d347b47 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -75,6 +75,9 @@ static int bch2_bucket_is_movable(struct btree_trans *trans, if (!ca) goto out; + if (bch2_bucket_bitmap_test(&ca->bucket_backpointer_mismatch, b->k.bucket.offset)) + goto out; + if (ca->mi.state != BCH_MEMBER_STATE_rw || !bch2_dev_is_online(ca)) goto out;