]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
bcachefs: bch2_btree_node_scrub()
authorKent Overstreet <kent.overstreet@linux.dev>
Mon, 30 Dec 2024 21:24:23 +0000 (16:24 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sat, 15 Mar 2025 01:02:12 +0000 (21:02 -0400)
Add a function for scrubbing btree nodes - reading them in, and kicking
off a rewrite if there's an error.

The btree_node_read_done() checks have to be duplicated because we're
not using a pointer to a struct btree - the btree node might already be
in cache, and we need to check a specific replica, which might not be
the one we previously read from.

This will be used in the next patch implementing high-level scrub.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/bcachefs.h
fs/bcachefs/btree_io.c
fs/bcachefs/btree_io.h
fs/bcachefs/errcode.h

index 161cf2f05d2ad97444b0f297cd4702a4e0495216..13acfbf3852aef2ed6600625aecdfb1a844e032a 100644 (file)
@@ -687,7 +687,8 @@ struct btree_trans_buf {
        x(gc_gens)                                                      \
        x(snapshot_delete_pagecache)                                    \
        x(sysfs)                                                        \
-       x(btree_write_buffer)
+       x(btree_write_buffer)                                           \
+       x(btree_node_scrub)
 
 enum bch_write_ref {
 #define x(n) BCH_WRITE_REF_##n,
index fc1c01fd2d8dc5802ed7930e203ee34ade42e8e9..91c624db2958ed5b3400da9cd1c83c22652257e0 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
+#include "bkey_buf.h"
 #include "bkey_methods.h"
 #include "bkey_sort.h"
 #include "btree_cache.h"
@@ -1811,6 +1812,190 @@ int bch2_btree_root_read(struct bch_fs *c, enum btree_id id,
        return bch2_trans_run(c, __bch2_btree_root_read(trans, id, k, level));
 }
 
+struct btree_node_scrub {
+       struct bch_fs           *c;
+       struct bch_dev          *ca;
+       void                    *buf;
+       bool                    used_mempool;
+       unsigned                written;
+
+       enum btree_id           btree;
+       unsigned                level;
+       struct bkey_buf         key;
+       __le64                  seq;
+
+       struct work_struct      work;
+       struct bio              bio;
+};
+
+static bool btree_node_scrub_check(struct bch_fs *c, struct btree_node *data, unsigned ptr_written,
+                                  struct printbuf *err)
+{
+       unsigned written = 0;
+
+       if (le64_to_cpu(data->magic) != bset_magic(c)) {
+               prt_printf(err, "bad magic: want %llx, got %llx",
+                          bset_magic(c), le64_to_cpu(data->magic));
+               return false;
+       }
+
+       while (written < (ptr_written ?: btree_sectors(c))) {
+               struct btree_node_entry *bne;
+               struct bset *i;
+               bool first = !written;
+
+               if (first) {
+                       bne = NULL;
+                       i = &data->keys;
+               } else {
+                       bne = (void *) data + (written << 9);
+                       i = &bne->keys;
+
+                       if (!ptr_written && i->seq != data->keys.seq)
+                               break;
+               }
+
+               struct nonce nonce = btree_nonce(i, written << 9);
+               bool good_csum_type = bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i));
+
+               if (first) {
+                       if (good_csum_type) {
+                               struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, data);
+                               if (bch2_crc_cmp(data->csum, csum)) {
+                                       bch2_csum_err_msg(err, BSET_CSUM_TYPE(i), data->csum, csum);
+                                       return false;
+                               }
+                       }
+
+                       written += vstruct_sectors(data, c->block_bits);
+               } else {
+                       if (good_csum_type) {
+                               struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
+                               if (bch2_crc_cmp(bne->csum, csum)) {
+                                       bch2_csum_err_msg(err, BSET_CSUM_TYPE(i), bne->csum, csum);
+                                       return false;
+                               }
+                       }
+
+                       written += vstruct_sectors(bne, c->block_bits);
+               }
+       }
+
+       return true;
+}
+
+static void btree_node_scrub_work(struct work_struct *work)
+{
+       struct btree_node_scrub *scrub = container_of(work, struct btree_node_scrub, work);
+       struct bch_fs *c = scrub->c;
+       struct printbuf err = PRINTBUF;
+
+       __bch2_btree_pos_to_text(&err, c, scrub->btree, scrub->level,
+                                bkey_i_to_s_c(scrub->key.k));
+       prt_newline(&err);
+
+       if (!btree_node_scrub_check(c, scrub->buf, scrub->written, &err)) {
+               struct btree_trans *trans = bch2_trans_get(c);
+
+               struct btree_iter iter;
+               bch2_trans_node_iter_init(trans, &iter, scrub->btree,
+                                         scrub->key.k->k.p, 0, scrub->level - 1, 0);
+
+               struct btree *b;
+               int ret = lockrestart_do(trans, PTR_ERR_OR_ZERO(b = bch2_btree_iter_peek_node(&iter)));
+               if (ret)
+                       goto err;
+
+               if (bkey_i_to_btree_ptr_v2(&b->key)->v.seq == scrub->seq) {
+                       bch_err(c, "error validating btree node during scrub on %s at btree %s",
+                               scrub->ca->name, err.buf);
+
+                       ret = bch2_btree_node_rewrite(trans, &iter, b, 0);
+               }
+err:
+               bch2_trans_iter_exit(trans, &iter);
+               bch2_trans_begin(trans);
+               bch2_trans_put(trans);
+       }
+
+       printbuf_exit(&err);
+       bch2_bkey_buf_exit(&scrub->key, c);;
+       btree_bounce_free(c, c->opts.btree_node_size, scrub->used_mempool, scrub->buf);
+       percpu_ref_put(&scrub->ca->io_ref);
+       kfree(scrub);
+       bch2_write_ref_put(c, BCH_WRITE_REF_btree_node_scrub);
+}
+
+static void btree_node_scrub_endio(struct bio *bio)
+{
+       struct btree_node_scrub *scrub = container_of(bio, struct btree_node_scrub, bio);
+
+       queue_work(scrub->c->btree_read_complete_wq, &scrub->work);
+}
+
+int bch2_btree_node_scrub(struct btree_trans *trans,
+                         enum btree_id btree, unsigned level,
+                         struct bkey_s_c k, unsigned dev)
+{
+       if (k.k->type != KEY_TYPE_btree_ptr_v2)
+               return 0;
+
+       struct bch_fs *c = trans->c;
+
+       if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_btree_node_scrub))
+               return -BCH_ERR_erofs_no_writes;
+
+       struct extent_ptr_decoded pick;
+       int ret = bch2_bkey_pick_read_device(c, k, NULL, &pick, dev);
+       if (ret <= 0)
+               goto err;
+
+       struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ);
+       if (!ca) {
+               ret = -BCH_ERR_device_offline;
+               goto err;
+       }
+
+       bool used_mempool = false;
+       void *buf = btree_bounce_alloc(c, c->opts.btree_node_size, &used_mempool);
+
+       unsigned vecs = buf_pages(buf, c->opts.btree_node_size);
+
+       struct btree_node_scrub *scrub =
+               kzalloc(sizeof(*scrub) + sizeof(struct bio_vec) * vecs, GFP_KERNEL);
+       if (!scrub) {
+               ret = -ENOMEM;
+               goto err_free;
+       }
+
+       scrub->c                = c;
+       scrub->ca               = ca;
+       scrub->buf              = buf;
+       scrub->used_mempool     = used_mempool;
+       scrub->written          = btree_ptr_sectors_written(k);
+
+       scrub->btree            = btree;
+       scrub->level            = level;
+       bch2_bkey_buf_init(&scrub->key);
+       bch2_bkey_buf_reassemble(&scrub->key, c, k);
+       scrub->seq              = bkey_s_c_to_btree_ptr_v2(k).v->seq;
+
+       INIT_WORK(&scrub->work, btree_node_scrub_work);
+
+       bio_init(&scrub->bio, ca->disk_sb.bdev, scrub->bio.bi_inline_vecs, vecs, REQ_OP_READ);
+       bch2_bio_map(&scrub->bio, scrub->buf, c->opts.btree_node_size);
+       scrub->bio.bi_iter.bi_sector    = pick.ptr.offset;
+       scrub->bio.bi_end_io            = btree_node_scrub_endio;
+       submit_bio(&scrub->bio);
+       return 0;
+err_free:
+       btree_bounce_free(c, c->opts.btree_node_size, used_mempool, buf);
+       percpu_ref_put(&ca->io_ref);
+err:
+       bch2_write_ref_put(c, BCH_WRITE_REF_btree_node_scrub);
+       return ret;
+}
+
 static void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
                                      struct btree_write *w)
 {
index 6f9e4a6dacf784721422f582367ebd61252e0fd7..75ead3815d672dd0f2f81c9c04551a79c34aa9cd 100644 (file)
@@ -132,6 +132,9 @@ void bch2_btree_node_read(struct btree_trans *, struct btree *, bool);
 int bch2_btree_root_read(struct bch_fs *, enum btree_id,
                         const struct bkey_i *, unsigned);
 
+int bch2_btree_node_scrub(struct btree_trans *, enum btree_id, unsigned,
+                         struct bkey_s_c, unsigned);
+
 bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
 
 enum btree_write_flags {
index 1e8f65f95d60659ed6fddeec5179ea9fa2074138..20bfdee42309d92ae5cee080bf3c42f85ed141e9 100644 (file)
        x(EIO,                          invalidate_stripe_to_dev)               \
        x(EIO,                          no_encryption_key)                      \
        x(EIO,                          insufficient_journal_devices)           \
+       x(EIO,                          device_offline)                         \
        x(BCH_ERR_btree_node_read_err,  btree_node_read_err_fixable)            \
        x(BCH_ERR_btree_node_read_err,  btree_node_read_err_want_retry)         \
        x(BCH_ERR_btree_node_read_err,  btree_node_read_err_must_retry)         \