From: Kent Overstreet Date: Mon, 30 Dec 2024 21:32:57 +0000 (-0500) Subject: bcachefs: Internal reads can now correct errors X-Git-Tag: v6.15-rc1~146^2~152 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=dff6de9518848b5afa0bc6fec57e657701be67ec;p=thirdparty%2Fkernel%2Flinux.git bcachefs: Internal reads can now correct errors Rework the read path so that BCH_READ_NODECODE reads now also self-heal after a read error and a successful retry - prerequisite for scrub. - __bch2_read_endio() now handles a read that's both BCH_READ_NODECODE and a bounce. Normally, we don't want a BCH_READ_NODECODE read to ever allocate a split bch_read_bio: we want to maintain the relationship between the bch_read_bio and the data_update it's embedded in. But correcting read errors requires allocating a split/bounce rbio that's embedded in a promote_op. We do still have a 1-1 relationship, i.e. we only allocate a single split/bounce if it's a BCH_READ_NODECODE, so things hopefully don't get too crazy. - __bch2_read_extent() now is allowed to allocate the promote_op for rewriting after a failed read, even if it's BCH_READ_NODECODE. Signed-off-by: Kent Overstreet --- diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index bb5d1de25aa19..18c8e54f455e5 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -696,32 +696,40 @@ static void __bch2_read_endio(struct work_struct *work) if (unlikely(rbio->narrow_crcs)) bch2_rbio_narrow_crcs(rbio); - if (rbio->flags & BCH_READ_data_update) - goto nodecode; - - /* Adjust crc to point to subset of data we want: */ - crc.offset += rbio->offset_into_extent; - crc.live_size = bvec_iter_sectors(rbio->bvec_iter); + if (likely(!(rbio->flags & BCH_READ_data_update))) { + /* Adjust crc to point to subset of data we want: */ + crc.offset += rbio->offset_into_extent; + crc.live_size = bvec_iter_sectors(rbio->bvec_iter); + + if (crc_is_compressed(crc)) { + ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); + if (ret) + goto decrypt_err; + + if (bch2_bio_uncompress(c, src, dst, dst_iter, crc) && + !c->opts.no_data_io) + goto decompression_err; + } else { + /* don't need to decrypt the entire bio: */ + nonce = nonce_add(nonce, crc.offset << 9); + bio_advance(src, crc.offset << 9); - if (crc_is_compressed(crc)) { - ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); - if (ret) - goto decrypt_err; + BUG_ON(src->bi_iter.bi_size < dst_iter.bi_size); + src->bi_iter.bi_size = dst_iter.bi_size; - if (bch2_bio_uncompress(c, src, dst, dst_iter, crc) && - !c->opts.no_data_io) - goto decompression_err; - } else { - /* don't need to decrypt the entire bio: */ - nonce = nonce_add(nonce, crc.offset << 9); - bio_advance(src, crc.offset << 9); + ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); + if (ret) + goto decrypt_err; - BUG_ON(src->bi_iter.bi_size < dst_iter.bi_size); - src->bi_iter.bi_size = dst_iter.bi_size; + if (rbio->bounce) { + struct bvec_iter src_iter = src->bi_iter; - ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); - if (ret) - goto decrypt_err; + bio_copy_data_iter(dst, &dst_iter, src, &src_iter); + } + } + } else { + if (rbio->split) + rbio->parent->pick = rbio->pick; if (rbio->bounce) { struct bvec_iter src_iter = src->bi_iter; @@ -739,7 +747,7 @@ static void __bch2_read_endio(struct work_struct *work) if (ret) goto decrypt_err; } -nodecode: + if (likely(!(rbio->flags & BCH_READ_in_retry))) { rbio = bch2_rbio_free(rbio); bch2_rbio_done(rbio); @@ -931,13 +939,35 @@ retry_pick: goto retry_pick; } - if (flags & BCH_READ_data_update) { - struct data_update *u = container_of(orig, struct data_update, rbio); + if (!(flags & BCH_READ_data_update)) { + if (!(flags & BCH_READ_last_fragment) || + bio_flagged(&orig->bio, BIO_CHAIN)) + flags |= BCH_READ_must_clone; + + narrow_crcs = !(flags & BCH_READ_in_retry) && + bch2_can_narrow_extent_crcs(k, pick.crc); + + if (narrow_crcs && (flags & BCH_READ_user_mapped)) + flags |= BCH_READ_must_bounce; + EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size); + + if (crc_is_compressed(pick.crc) || + (pick.crc.csum_type != BCH_CSUM_none && + (bvec_iter_sectors(iter) != pick.crc.uncompressed_size || + (bch2_csum_type_is_encryption(pick.crc.csum_type) && + (flags & BCH_READ_user_mapped)) || + (flags & BCH_READ_must_bounce)))) { + read_full = true; + bounce = true; + } + } else { + read_full = true; /* * can happen if we retry, and the extent we were going to read * has been merged in the meantime: */ + struct data_update *u = container_of(orig, struct data_update, rbio); if (pick.crc.compressed_size > u->op.wbio.bio.bi_iter.bi_size) { if (ca) percpu_ref_put(&ca->io_ref); @@ -945,29 +975,6 @@ retry_pick: } iter.bi_size = pick.crc.compressed_size << 9; - goto get_bio; - } - - if (!(flags & BCH_READ_last_fragment) || - bio_flagged(&orig->bio, BIO_CHAIN)) - flags |= BCH_READ_must_clone; - - narrow_crcs = !(flags & BCH_READ_in_retry) && - bch2_can_narrow_extent_crcs(k, pick.crc); - - if (narrow_crcs && (flags & BCH_READ_user_mapped)) - flags |= BCH_READ_must_bounce; - - EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size); - - if (crc_is_compressed(pick.crc) || - (pick.crc.csum_type != BCH_CSUM_none && - (bvec_iter_sectors(iter) != pick.crc.uncompressed_size || - (bch2_csum_type_is_encryption(pick.crc.csum_type) && - (flags & BCH_READ_user_mapped)) || - (flags & BCH_READ_must_bounce)))) { - read_full = true; - bounce = true; } if (orig->opts.promote_target || have_io_error(failed)) @@ -991,7 +998,7 @@ retry_pick: pick.crc.offset = 0; pick.crc.live_size = bvec_iter_sectors(iter); } -get_bio: + if (rbio) { /* * promote already allocated bounce rbio: @@ -1055,9 +1062,6 @@ get_bio: rbio->version = k.k->bversion; INIT_WORK(&rbio->work, NULL); - if (flags & BCH_READ_data_update) - orig->pick = pick; - rbio->bio.bi_opf = orig->bio.bi_opf; rbio->bio.bi_iter.bi_sector = pick.ptr.offset; rbio->bio.bi_end_io = bch2_read_endio;