btrfs: make btrfs_repair_io_failure() handle bs > ps cases without large folios

author Qu Wenruo <wqu@suse.com>

Mon, 10 Nov 2025 22:41:59 +0000 (09:11 +1030)

committer David Sterba <dsterba@suse.com>

Mon, 24 Nov 2025 21:42:23 +0000 (22:42 +0100)
author Qu Wenruo <wqu@suse.com>
Mon, 10 Nov 2025 22:41:59 +0000 (09:11 +1030)
committer David Sterba <dsterba@suse.com>
Mon, 24 Nov 2025 21:42:23 +0000 (22:42 +0100)
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c

index a73652b8724aca8be831789f27d8a304cccc6932..383ea6731b35e27c0493a948d155ca130af6c73a 100644 (file)
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -172,7 +172,21 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio,
         struct btrfs_inode *inode = repair_bbio->inode;
         struct btrfs_fs_info *fs_info = inode->root->fs_info;
         struct bio_vec *bv = bio_first_bvec_all(&repair_bbio->bio);
+       /*
+        * We can not move forward the saved_iter, as it will be later
+        * utilized by repair_bbio again.
+        */
+       struct bvec_iter saved_iter = repair_bbio->saved_iter;
+       const u32 step = min(fs_info->sectorsize, PAGE_SIZE);
+       const u64 logical = repair_bbio->saved_iter.bi_sector << SECTOR_SHIFT;
+       const u32 nr_steps = repair_bbio->saved_iter.bi_size / step;
         int mirror = repair_bbio->mirror_num;
+       phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE];
+       phys_addr_t paddr;
+       unsigned int slot = 0;
+
+       /* Repair bbio should be eaxctly one block sized. */
+       ASSERT(repair_bbio->saved_iter.bi_size == fs_info->sectorsize);
  
         if (repair_bbio->bio.bi_status ||
             !btrfs_data_csum_ok(repair_bbio, dev, 0, bvec_phys(bv))) {
@@ -190,12 +204,17 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio,
                 return;
         }
  
+       btrfs_bio_for_each_block(paddr, &repair_bbio->bio, &saved_iter, step) {
+               ASSERT(slot < nr_steps);
+               paddrs[slot] = paddr;
+               slot++;
+       }
+
         do {
                 mirror = prev_repair_mirror(fbio, mirror);
                 btrfs_repair_io_failure(fs_info, btrfs_ino(inode),
                                   repair_bbio->file_offset, fs_info->sectorsize,
-                                 repair_bbio->saved_iter.bi_sector << SECTOR_SHIFT,
-                                 bvec_phys(bv), mirror);
+                                 logical, paddrs, step, mirror);
         } while (mirror != fbio->bbio->mirror_num);
  
  done:
@@ -866,18 +885,36 @@ void btrfs_submit_bbio(struct btrfs_bio *bbio, int mirror_num)
   *
   * The I/O is issued synchronously to block the repair read completion from
   * freeing the bio.
+ *
+ * @ino:       Offending inode number
+ * @fileoff:   File offset inside the inode
+ * @length:    Length of the repair write
+ * @logical:   Logical address of the range
+ * @paddrs:    Physical address array of the content
+ * @step:      Length of for each paddrs
+ * @mirror_num: Mirror number to write to. Must not be zero
   */
-int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
-                           u64 length, u64 logical, phys_addr_t paddr, int mirror_num)
+int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 fileoff,
+                           u32 length, u64 logical, const phys_addr_t paddrs[],
+                           unsigned int step, int mirror_num)
  {
+       const u32 nr_steps = DIV_ROUND_UP_POW2(length, step);
         struct btrfs_io_stripe smap = { 0 };
-       struct bio_vec bvec;
-       struct bio bio;
+       struct bio *bio = NULL;
         int ret = 0;
  
         ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));
         BUG_ON(!mirror_num);
  
+       /* Basic alignment checks. */
+       ASSERT(IS_ALIGNED(logical, fs_info->sectorsize));
+       ASSERT(IS_ALIGNED(length, fs_info->sectorsize));
+       ASSERT(IS_ALIGNED(fileoff, fs_info->sectorsize));
+       /* Either it's a single data or metadata block. */
+       ASSERT(length <= BTRFS_MAX_BLOCKSIZE);
+       ASSERT(step <= length);
+       ASSERT(is_power_of_2(step));
+
         if (btrfs_repair_one_zone(fs_info, logical))
                 return 0;
  
@@ -897,24 +934,27 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
                 goto out_counter_dec;
         }
  
-       bio_init(&bio, smap.dev->bdev, &bvec, 1, REQ_OP_WRITE | REQ_SYNC);
-       bio.bi_iter.bi_sector = smap.physical >> SECTOR_SHIFT;
-       __bio_add_page(&bio, phys_to_page(paddr), length, offset_in_page(paddr));
-       ret = submit_bio_wait(&bio);
+       bio = bio_alloc(smap.dev->bdev, nr_steps, REQ_OP_WRITE | REQ_SYNC, GFP_NOFS);
+       bio->bi_iter.bi_sector = smap.physical >> SECTOR_SHIFT;
+       for (int i = 0; i < nr_steps; i++) {
+               ret = bio_add_page(bio, phys_to_page(paddrs[i]), step, offset_in_page(paddrs[i]));
+               /* We should have allocated enough slots to contain all the different pages. */
+               ASSERT(ret == step);
+       }
+       ret = submit_bio_wait(bio);
+       bio_put(bio);
         if (ret) {
                 /* try to remap that extent elsewhere? */
                 btrfs_dev_stat_inc_and_print(smap.dev, BTRFS_DEV_STAT_WRITE_ERRS);
-               goto out_bio_uninit;
+               goto out_counter_dec;
         }
  
         btrfs_info_rl(fs_info,
                 "read error corrected: ino %llu off %llu (dev %s sector %llu)",
-                            ino, start, btrfs_dev_name(smap.dev),
+                            ino, fileoff, btrfs_dev_name(smap.dev),
                              smap.physical >> SECTOR_SHIFT);
         ret = 0;
  
-out_bio_uninit:
-       bio_uninit(&bio);
  out_counter_dec:
         btrfs_bio_counter_dec(fs_info);
         return ret;
diff --git a/fs/btrfs/bio.h b/fs/btrfs/bio.h

index deaeea3becf470a25bb2f284a8f1d2ed5927b3ab..035145909b003c9a0eb80ca09a953285a0674639 100644 (file)
--- a/fs/btrfs/bio.h
+++ b/fs/btrfs/bio.h
@@ -117,7 +117,8 @@ void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status);
  
  void btrfs_submit_bbio(struct btrfs_bio *bbio, int mirror_num);
  void btrfs_submit_repair_write(struct btrfs_bio *bbio, int mirror_num, bool dev_replace);
-int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
-                           u64 length, u64 logical, phys_addr_t paddr, int mirror_num);
+int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 fileoff,
+                           u32 length, u64 logical, const phys_addr_t paddrs[],
+                           unsigned int step, int mirror_num);
  
  #endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c

index 4764108b03381a726a841bd9ff40125b1401f38e..0df81a09a3d144cf57c1cde86a3335569219f86e 100644 (file)
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -183,26 +183,33 @@ static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb,
                                       int mirror_num)
  {
         struct btrfs_fs_info *fs_info = eb->fs_info;
+       const u32 step = min(fs_info->nodesize, PAGE_SIZE);
+       const u32 nr_steps = eb->len / step;
+       phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE];
         int ret = 0;
  
         if (sb_rdonly(fs_info->sb))
                 return -EROFS;
  
-       for (int i = 0; i < num_extent_folios(eb); i++) {
+       for (int i = 0; i < num_extent_pages(eb); i++) {
                 struct folio *folio = eb->folios[i];
-               u64 start = max_t(u64, eb->start, folio_pos(folio));
-               u64 end = min_t(u64, eb->start + eb->len,
-                               folio_pos(folio) + eb->folio_size);
-               u32 len = end - start;
-               phys_addr_t paddr = PFN_PHYS(folio_pfn(folio)) +
-                                   offset_in_folio(folio, start);
-
-               ret = btrfs_repair_io_failure(fs_info, 0, start, len, start,
-                                             paddr, mirror_num);
-               if (ret)
-                       break;
+
+               /* No large folio support yet. */
+               ASSERT(folio_order(folio) == 0);
+               ASSERT(i < nr_steps);
+
+               /*
+                * For nodesize < page size, there is just one paddr, with some
+                * offset inside the page.
+                *
+                * For nodesize >= page size, it's one or more paddrs, and eb->start
+                * must be aligned to page boundary.
+                */
+               paddrs[i] = page_to_phys(&folio->page) + offset_in_page(eb->start);
         }
  
+       ret = btrfs_repair_io_failure(fs_info, 0, eb->start, eb->len, eb->start,
+                                     paddrs, step, mirror_num);
         return ret;
  }
author	Qu Wenruo <wqu@suse.com>
	Mon, 10 Nov 2025 22:41:59 +0000 (09:11 +1030)
committer	David Sterba <dsterba@suse.com>
	Mon, 24 Nov 2025 21:42:23 +0000 (22:42 +0100)
fs/btrfs/bio.c		patch \| blob \| blame \| history
fs/btrfs/bio.h		patch \| blob \| blame \| history
fs/btrfs/disk-io.c		patch \| blob \| blame \| history