]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
btrfs: concentrate highmem handling for data verification
authorQu Wenruo <wqu@suse.com>
Tue, 2 Sep 2025 01:21:25 +0000 (10:51 +0930)
committerDavid Sterba <dsterba@suse.com>
Tue, 23 Sep 2025 06:49:16 +0000 (08:49 +0200)
Currently for btrfs checksum verification, we do it in the following
pattern:

kaddr = kmap_local_*();
ret = btrfs_check_csum_csum(kaddr);
kunmap_local(kaddr);

It's OK for now, but it's still not following the patterns of helpers
inside linux/highmem.h, which never requires a virt memory address.

In those highmem helpers, they mostly accept a folio, some offset/length
inside the folio, and in the implementation they check if the folio
needs partial kmap, and do the handling.

Inspired by those formal highmem helpers, enhance the highmem handling
of data checksum verification by:

- Rename btrfs_check_sector_csum() to btrfs_check_block_csum()
  To follow the more common term "block" used in all other major
  filesystems.

- Pass a physical address into btrfs_check_block_csum() and
  btrfs_data_csum_ok()
  The physical address is always available even for a highmem page.
  Since it's page frame number << PAGE_SHIFT + offset in page.

  And with that physical address, we can grab the folio covering the
  page, and do extra checks to ensure it covers at least one block.

  This also allows us to do the kmap inside btrfs_check_block_csum().
  This means all the extra HIGHMEM handling will be concentrated into
  btrfs_check_block_csum(), and no callers will need to bother highmem
  by themselves.

- Properly zero out the block if csum mismatch
  Since btrfs_data_csum_ok() only got a paddr, we can not and should not
  use memzero_bvec(), which only accepts single page bvec.
  Instead use paddr to grab the folio and call folio_zero_range()

Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/bio.c
fs/btrfs/btrfs_inode.h
fs/btrfs/inode.c
fs/btrfs/raid56.c
fs/btrfs/scrub.c

index ea7f7a17a3d5bbf709da7dd57060c5a103eac3b4..493135bfa518f1367ef34aeea5794eb1308f58dc 100644 (file)
@@ -167,7 +167,7 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio,
        int mirror = repair_bbio->mirror_num;
 
        if (repair_bbio->bio.bi_status ||
-           !btrfs_data_csum_ok(repair_bbio, dev, 0, bv)) {
+           !btrfs_data_csum_ok(repair_bbio, dev, 0, bvec_phys(bv))) {
                bio_reset(&repair_bbio->bio, NULL, REQ_OP_READ);
                repair_bbio->bio.bi_iter = repair_bbio->saved_iter;
 
@@ -280,7 +280,7 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *de
                struct bio_vec bv = bio_iter_iovec(&bbio->bio, *iter);
 
                bv.bv_len = min(bv.bv_len, sectorsize);
-               if (status || !btrfs_data_csum_ok(bbio, dev, offset, &bv))
+               if (status || !btrfs_data_csum_ok(bbio, dev, offset, bvec_phys(&bv)))
                        fbio = repair_one_sector(bbio, offset, &bv, fbio);
 
                bio_advance_iter_single(&bbio->bio, iter, sectorsize);
index df3445448b7d64e406f2764b232f42c6509bb0b6..077b2f178816f0b575aa660018b4337be2fc4489 100644 (file)
@@ -542,10 +542,10 @@ static inline void btrfs_set_inode_mapping_order(struct btrfs_inode *inode)
 #define CSUM_FMT                               "0x%*phN"
 #define CSUM_FMT_VALUE(size, bytes)            size, bytes
 
-int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, void *kaddr, u8 *csum,
-                           const u8 * const csum_expected);
+int btrfs_check_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, u8 *csum,
+                          const u8 * const csum_expected);
 bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
-                       u32 bio_offset, struct bio_vec *bv);
+                       u32 bio_offset, phys_addr_t paddr);
 noinline int can_nocow_extent(struct btrfs_inode *inode, u64 offset, u64 *len,
                              struct btrfs_file_extent *file_extent,
                              bool nowait);
index dd503dba33cf656440f3849d5447f96ebf9efffb..98877535f213e9d247c10ff960dd86bbe769c70c 100644 (file)
@@ -3334,13 +3334,35 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered)
  *
  * @kaddr must be a properly kmapped address.
  */
-int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, void *kaddr, u8 *csum,
-                           const u8 * const csum_expected)
+int btrfs_check_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, u8 *csum,
+                          const u8 * const csum_expected)
 {
+       struct folio *folio = page_folio(phys_to_page(paddr));
+       const u32 blocksize = fs_info->sectorsize;
        SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
 
        shash->tfm = fs_info->csum_shash;
-       crypto_shash_digest(shash, kaddr, fs_info->sectorsize, csum);
+       /* The full block must be inside the folio. */
+       ASSERT(offset_in_folio(folio, paddr) + blocksize <= folio_size(folio));
+
+       if (folio_test_partial_kmap(folio)) {
+               size_t cur = paddr;
+
+               crypto_shash_init(shash);
+               while (cur < paddr + blocksize) {
+                       void *kaddr;
+                       size_t len = min(paddr + blocksize - cur,
+                                        PAGE_SIZE - offset_in_page(cur));
+
+                       kaddr = kmap_local_folio(folio, offset_in_folio(folio, cur));
+                       crypto_shash_update(shash, kaddr, len);
+                       kunmap_local(kaddr);
+                       cur += len;
+               }
+               crypto_shash_final(shash, csum);
+       } else {
+               crypto_shash_digest(shash, phys_to_virt(paddr), blocksize, csum);
+       }
 
        if (memcmp(csum, csum_expected, fs_info->csum_size))
                return -EIO;
@@ -3361,17 +3383,16 @@ int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, void *kaddr, u8 *csum
  * Return %true if the sector is ok or had no checksum to start with, else %false.
  */
 bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
-                       u32 bio_offset, struct bio_vec *bv)
+                       u32 bio_offset, phys_addr_t paddr)
 {
        struct btrfs_inode *inode = bbio->inode;
        struct btrfs_fs_info *fs_info = inode->root->fs_info;
+       const u32 blocksize = fs_info->sectorsize;
+       struct folio *folio;
        u64 file_offset = bbio->file_offset + bio_offset;
-       u64 end = file_offset + bv->bv_len - 1;
+       u64 end = file_offset + blocksize - 1;
        u8 *csum_expected;
        u8 csum[BTRFS_CSUM_SIZE];
-       void *kaddr;
-
-       ASSERT(bv->bv_len == fs_info->sectorsize);
 
        if (!bbio->csum)
                return true;
@@ -3387,12 +3408,8 @@ bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
 
        csum_expected = bbio->csum + (bio_offset >> fs_info->sectorsize_bits) *
                                fs_info->csum_size;
-       kaddr = bvec_kmap_local(bv);
-       if (btrfs_check_sector_csum(fs_info, kaddr, csum, csum_expected)) {
-               kunmap_local(kaddr);
+       if (btrfs_check_block_csum(fs_info, paddr, csum, csum_expected))
                goto zeroit;
-       }
-       kunmap_local(kaddr);
        return true;
 
 zeroit:
@@ -3400,7 +3417,9 @@ zeroit:
                                    bbio->mirror_num);
        if (dev)
                btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS);
-       memzero_bvec(bv);
+       folio = page_folio(phys_to_page(paddr));
+       ASSERT(offset_in_folio(folio, paddr) + blocksize <= folio_size(folio));
+       folio_zero_range(folio, offset_in_folio(folio, paddr), blocksize);
        return false;
 }
 
index 3ff2bedfb3a4c9a20f06d684d33a66d7ee20fb1c..e88699460dda6a7d74e47f766f25e18c34e3ea6d 100644 (file)
@@ -1585,9 +1585,6 @@ static void verify_bio_data_sectors(struct btrfs_raid_bio *rbio,
                return;
 
        bio_for_each_segment_all(bvec, bio, iter_all) {
-               void *kaddr;
-
-               kaddr = bvec_kmap_local(bvec);
                for (u32 off = 0; off < bvec->bv_len;
                     off += fs_info->sectorsize, total_sector_nr++) {
                        u8 csum_buf[BTRFS_CSUM_SIZE];
@@ -1599,12 +1596,11 @@ static void verify_bio_data_sectors(struct btrfs_raid_bio *rbio,
                        if (!test_bit(total_sector_nr, rbio->csum_bitmap))
                                continue;
 
-                       ret = btrfs_check_sector_csum(fs_info, kaddr + off,
-                                                     csum_buf, expected_csum);
+                       ret = btrfs_check_block_csum(fs_info, bvec_phys(bvec) + off,
+                                                    csum_buf, expected_csum);
                        if (ret < 0)
                                set_bit(total_sector_nr, rbio->error_bitmap);
                }
-               kunmap_local(kaddr);
        }
 }
 
@@ -1802,7 +1798,6 @@ static int verify_one_sector(struct btrfs_raid_bio *rbio,
        struct sector_ptr *sector;
        u8 csum_buf[BTRFS_CSUM_SIZE];
        u8 *csum_expected;
-       void *kaddr;
        int ret;
 
        if (!rbio->csum_bitmap || !rbio->csum_buf)
@@ -1824,9 +1819,7 @@ static int verify_one_sector(struct btrfs_raid_bio *rbio,
        csum_expected = rbio->csum_buf +
                        (stripe_nr * rbio->stripe_nsectors + sector_nr) *
                        fs_info->csum_size;
-       kaddr = kmap_local_sector(sector);
-       ret = btrfs_check_sector_csum(fs_info, kaddr, csum_buf, csum_expected);
-       kunmap_local(kaddr);
+       ret = btrfs_check_block_csum(fs_info, sector->paddr, csum_buf, csum_expected);
        return ret;
 }
 
index 2f10c65929dc9722707f5d966df902f29e7fed1d..cef260ed854c15cb4250915c7456229724c8641b 100644 (file)
@@ -696,6 +696,20 @@ static void *scrub_stripe_get_kaddr(struct scrub_stripe *stripe, int sector_nr)
        return page_address(page) + offset_in_page(offset);
 }
 
+static phys_addr_t scrub_stripe_get_paddr(struct scrub_stripe *stripe, int sector_nr)
+{
+       struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
+       u32 offset = (sector_nr << fs_info->sectorsize_bits);
+       const struct page *page = stripe->pages[offset >> PAGE_SHIFT];
+
+       /* stripe->pages[] is allocated by us and no highmem is allowed. */
+       ASSERT(page);
+       ASSERT(!PageHighMem(page));
+       /* And the range must be contained inside the page. */
+       ASSERT(offset_in_page(offset) + fs_info->sectorsize <= PAGE_SIZE);
+       return page_to_phys(page) + offset_in_page(offset);
+}
+
 static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr)
 {
        struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
@@ -788,7 +802,7 @@ static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr)
        struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
        struct scrub_sector_verification *sector = &stripe->sectors[sector_nr];
        const u32 sectors_per_tree = fs_info->nodesize >> fs_info->sectorsize_bits;
-       void *kaddr = scrub_stripe_get_kaddr(stripe, sector_nr);
+       phys_addr_t paddr = scrub_stripe_get_paddr(stripe, sector_nr);
        u8 csum_buf[BTRFS_CSUM_SIZE];
        int ret;
 
@@ -833,7 +847,7 @@ static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr)
                return;
        }
 
-       ret = btrfs_check_sector_csum(fs_info, kaddr, csum_buf, sector->csum);
+       ret = btrfs_check_block_csum(fs_info, paddr, csum_buf, sector->csum);
        if (ret < 0) {
                scrub_bitmap_set_bit_csum_error(stripe, sector_nr);
                scrub_bitmap_set_bit_error(stripe, sector_nr);