]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
bcachefs: Be precise about bch_io_failures
authorKent Overstreet <kent.overstreet@linux.dev>
Mon, 10 Mar 2025 17:33:41 +0000 (13:33 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Thu, 22 May 2025 00:13:17 +0000 (20:13 -0400)
If the extent we're reading from changes, due to be being overwritten or
moved (possibly partially) - we need to reset bch_io_failures so that we
don't accidentally mark a new extent as poisoned prematurely.

This means we have to separately track (in the retry path) the extent we
previously read from.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/bkey.h
fs/bcachefs/io_read.c
fs/bcachefs/io_read.h

index 054e2d5e8448a36ce1bd21f40258e2b355f2e117..082632905649e638d4f7623c51b1af1b21708a08 100644 (file)
@@ -191,6 +191,7 @@ static inline struct bpos bkey_max(struct bpos l, struct bpos r)
 static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
 {
        return bpos_eq(l.k->p, r.k->p) &&
+               l.k->size == r.k->size &&
                bkey_bytes(l.k) == bkey_bytes(r.k) &&
                !memcmp(l.v, r.v, bkey_val_bytes(l.k));
 }
index def4a26a3b4590c593bf415303a0e6090c0e01ca..3705b606f675a4bca4ad3ba7e41980d0025bdc70 100644 (file)
@@ -296,6 +296,13 @@ static struct bch_read_bio *promote_alloc(struct btree_trans *trans,
                                        bool *read_full,
                                        struct bch_io_failures *failed)
 {
+       /*
+        * We're in the retry path, but we don't know what to repair yet, and we
+        * don't want to do a promote here:
+        */
+       if (failed && !failed->nr)
+               return NULL;
+
        struct bch_fs *c = trans->c;
        /*
         * if failed != NULL we're not actually doing a promote, we're
@@ -430,6 +437,28 @@ static void bch2_rbio_done(struct bch_read_bio *rbio)
        bio_endio(&rbio->bio);
 }
 
+static void get_rbio_extent(struct btree_trans *trans,
+                           struct bch_read_bio *rbio,
+                           struct bkey_buf *sk)
+{
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       int ret = lockrestart_do(trans,
+                       bkey_err(k = bch2_bkey_get_iter(trans, &iter,
+                                               rbio->data_btree, rbio->data_pos, 0)));
+       if (ret)
+               return;
+
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       bkey_for_each_ptr(ptrs, ptr)
+               if (bch2_extent_ptr_eq(*ptr, rbio->pick.ptr)) {
+                       bch2_bkey_buf_reassemble(sk, trans->c, k);
+                       break;
+               }
+
+       bch2_trans_iter_exit(trans, &iter);
+}
+
 static noinline int bch2_read_retry_nodecode(struct btree_trans *trans,
                                        struct bch_read_bio *rbio,
                                        struct bvec_iter bvec_iter,
@@ -491,11 +520,18 @@ static void bch2_rbio_retry(struct work_struct *work)
 
        struct btree_trans *trans = bch2_trans_get(c);
 
+       struct bkey_buf sk;
+       bch2_bkey_buf_init(&sk);
+       bkey_init(&sk.k->k);
+
        trace_io_read_retry(&rbio->bio);
        this_cpu_add(c->counters[BCH_COUNTER_io_read_retry],
                     bvec_iter_sectors(rbio->bvec_iter));
 
-       if (bch2_err_matches(rbio->ret, BCH_ERR_data_read_retry_avoid))
+       get_rbio_extent(trans, rbio, &sk);
+
+       if (!bkey_deleted(&sk.k->k) &&
+           bch2_err_matches(rbio->ret, BCH_ERR_data_read_retry_avoid))
                bch2_mark_io_failure(&failed, &rbio->pick,
                                     rbio->ret == -BCH_ERR_data_read_retry_csum_err);
 
@@ -516,7 +552,7 @@ static void bch2_rbio_retry(struct work_struct *work)
 
        int ret = rbio->data_update
                ? bch2_read_retry_nodecode(trans, rbio, iter, &failed, flags)
-               : __bch2_read(trans, rbio, iter, inum, &failed, flags);
+               : __bch2_read(trans, rbio, iter, inum, &failed, &sk, flags);
 
        if (ret) {
                rbio->ret = ret;
@@ -539,6 +575,7 @@ static void bch2_rbio_retry(struct work_struct *work)
        }
 
        bch2_rbio_done(rbio);
+       bch2_bkey_buf_exit(&sk, c);
        bch2_trans_put(trans);
 }
 
@@ -1265,7 +1302,9 @@ out_read_done:
 
 int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio,
                struct bvec_iter bvec_iter, subvol_inum inum,
-               struct bch_io_failures *failed, unsigned flags)
+               struct bch_io_failures *failed,
+               struct bkey_buf *prev_read,
+               unsigned flags)
 {
        struct bch_fs *c = trans->c;
        struct btree_iter iter;
@@ -1313,6 +1352,12 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio,
 
                k = bkey_i_to_s_c(sk.k);
 
+               if (unlikely(flags & BCH_READ_in_retry)) {
+                       if (!bkey_and_val_eq(k, bkey_i_to_s_c(prev_read->k)))
+                               failed->nr = 0;
+                       bch2_bkey_buf_copy(prev_read, c, sk.k);
+               }
+
                /*
                 * With indirect extents, the amount of data to read is the min
                 * of the original extent and the indirect extent:
index c78025d863e0ff37a9a49a2ac66f3e7c5546794f..1a85b092fd1d3291b3fb8083aa9bc6bc6b41679c 100644 (file)
@@ -144,7 +144,8 @@ static inline void bch2_read_extent(struct btree_trans *trans,
 }
 
 int __bch2_read(struct btree_trans *, struct bch_read_bio *, struct bvec_iter,
-               subvol_inum, struct bch_io_failures *, unsigned flags);
+               subvol_inum,
+               struct bch_io_failures *, struct bkey_buf *, unsigned flags);
 
 static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
                             subvol_inum inum)
@@ -154,7 +155,7 @@ static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
        rbio->subvol = inum.subvol;
 
        bch2_trans_run(c,
-               __bch2_read(trans, rbio, rbio->bio.bi_iter, inum, NULL,
+               __bch2_read(trans, rbio, rbio->bio.bi_iter, inum, NULL, NULL,
                            BCH_READ_retry_if_stale|
                            BCH_READ_may_promote|
                            BCH_READ_user_mapped));