]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
btrfs: migrate btrfs_bio_ctrl::submit_bitmap to support larger bitmaps
authorQu Wenruo <wqu@suse.com>
Wed, 13 May 2026 04:36:20 +0000 (14:06 +0930)
committerFilipe Manana <fdmanana@suse.com>
Tue, 9 Jun 2026 10:49:26 +0000 (11:49 +0100)
[CURRENT LIMIT]
Btrfs currently only supports sub-bitmaps (e.g. dirty bitmap) no larger
than BITS_PER_LONG.

One call site that utilizes this limit is btrfs_bio_ctrl::submit_bitmap,
which makes it very simple and straightforward to just grab an unsigned
long value and assign it to submit_bitmap.

Unfortunately that limit prevents us from supporting huge folios.
For 4K page size and block size, a huge folio (order 9) means 512 blocks
inside a 2M folio.

[ENHANCEMENT]
Instead of using a fixed unsigned long value, change
btrfs_bio_ctrl::submit_bitmap to an unsigned long pointer.

And for cases where an unsigned long can hold the whole bitmap,
introduce @submit_bitmap_value, and just point that pointer to that
unsigned long.

Then update all direct users of bio_ctrl->submit_bitmap to use the
pointer version.

There are several call sites that get extra changes:

- @range_bitmap inside extent_writepage_io()
  Which is only utilized to truncate the bitmap.
  Since we do not want to allocate new memory just for such temporary
  usage, change the original bitmap_set() and bitmap_and() into
  bitmap_clear() for the ranges outside of the target range.

- Getting dirty subpage bitmap inside writepage_delalloc()
  Since we're passing an unsigned long pointer now, we need to go with
  different handling (bs == ps, blocks_per_folio <= BITS_PER_LONG,
  blocks_per_folio > BITS_PER_LONG).

Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/extent_io.c
fs/btrfs/subpage.c
fs/btrfs/subpage.h

index 1dfa3152e4bd9dbb7a83ad4cd939b8995e640937..de0f37663790fd08a7e509a5e269eab48764659d 100644 (file)
@@ -130,7 +130,13 @@ struct btrfs_bio_ctrl {
         * extent_writepage_io().
         * This is to avoid touching ranges covered by compression/inline.
         */
-       unsigned long submit_bitmap;
+       unsigned long *submit_bitmap;
+       /*
+        * When blocks_per_folio <= BITS_PER_LONG, we can use the inline
+        * one without allocating memory.
+        */
+       unsigned long submit_bitmap_value;
+
        struct readahead_control *ractl;
 
        /*
@@ -1492,9 +1498,9 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
        int ret = 0;
 
        /* Save the dirty bitmap as our submission bitmap will be a subset of it. */
-       bio_ctrl->submit_bitmap = btrfs_get_subpage_dirty_bitmap_value(fs_info, folio);
+       btrfs_copy_subpage_dirty_bitmap(fs_info, folio, bio_ctrl->submit_bitmap);
 
-       for_each_set_bitrange(start_bit, end_bit, &bio_ctrl->submit_bitmap,
+       for_each_set_bitrange(start_bit, end_bit, bio_ctrl->submit_bitmap,
                              blocks_per_folio) {
                u64 start = page_start + (start_bit << fs_info->sectorsize_bits);
                u32 len = (end_bit - start_bit) << fs_info->sectorsize_bits;
@@ -1570,7 +1576,7 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
                                             btrfs_ino(inode),
                                             folio_pos(folio),
                                             blocks_per_folio,
-                                            &bio_ctrl->submit_bitmap,
+                                            bio_ctrl->submit_bitmap,
                                             found_start, found_len, ret);
                } else {
                        /*
@@ -1595,7 +1601,7 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
                                                 fs_info->sectorsize_bits;
                        unsigned int end_bit = (min(page_end + 1, found_start + found_len) -
                                                page_start) >> fs_info->sectorsize_bits;
-                       bitmap_clear(&bio_ctrl->submit_bitmap, start_bit, end_bit - start_bit);
+                       bitmap_clear(bio_ctrl->submit_bitmap, start_bit, end_bit - start_bit);
                }
                /*
                 * Above btrfs_run_delalloc_range() may have unlocked the folio,
@@ -1616,7 +1622,7 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
                                fs_info->sectorsize_bits,
                                blocks_per_folio);
 
-               for_each_set_bitrange(start_bit, end_bit, &bio_ctrl->submit_bitmap,
+               for_each_set_bitrange(start_bit, end_bit, bio_ctrl->submit_bitmap,
                                      bitmap_size) {
                        u64 start = page_start + (start_bit << fs_info->sectorsize_bits);
                        u32 len = (end_bit - start_bit) << fs_info->sectorsize_bits;
@@ -1641,7 +1647,7 @@ out:
         * If all ranges are submitted asynchronously, we just need to account
         * for them here.
         */
-       if (bitmap_empty(&bio_ctrl->submit_bitmap, blocks_per_folio)) {
+       if (bitmap_empty(bio_ctrl->submit_bitmap, blocks_per_folio)) {
                wbc->nr_to_write -= delalloc_to_write;
                return 1;
        }
@@ -1768,7 +1774,6 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
                                                  loff_t i_size)
 {
        struct btrfs_fs_info *fs_info = inode->root->fs_info;
-       unsigned long range_bitmap = 0;
        bool submitted_io = false;
        int found_error = 0;
        const u64 end = start + len;
@@ -1783,14 +1788,18 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
        ASSERT(end <= folio_end, "start=%llu len=%u folio_start=%llu folio_size=%zu",
               start, len, folio_start, folio_size(folio));
 
-       bitmap_set(&range_bitmap, (start - folio_pos(folio)) >> fs_info->sectorsize_bits,
-                  len >> fs_info->sectorsize_bits);
-       bitmap_and(&bio_ctrl->submit_bitmap, &bio_ctrl->submit_bitmap, &range_bitmap,
-                  blocks_per_folio);
+       /* Truncate the submit bitmap to the current range. */
+       if (start > folio_start)
+               bitmap_clear(bio_ctrl->submit_bitmap, 0,
+                            (start - folio_start) >> fs_info->sectorsize_bits);
+       if (start + len < folio_end)
+               bitmap_clear(bio_ctrl->submit_bitmap,
+                            (end - folio_start) >> fs_info->sectorsize_bits,
+                            (folio_end - end) >> fs_info->sectorsize_bits);
 
        bio_ctrl->end_io_func = end_bbio_data_write;
 
-       for_each_set_bit(bit, &bio_ctrl->submit_bitmap, blocks_per_folio) {
+       for_each_set_bit(bit, bio_ctrl->submit_bitmap, blocks_per_folio) {
                cur = folio_pos(folio) + (bit << fs_info->sectorsize_bits);
 
                if (cur >= i_size) {
@@ -1849,6 +1858,23 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
        return found_error;
 }
 
+static void bio_ctrl_init_submit_bitmap(struct btrfs_fs_info *fs_info,
+                                       struct folio *folio,
+                                       struct btrfs_bio_ctrl *bio_ctrl)
+{
+       const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio);
+
+       /* Only supported for blocks per folio <= BITS_PER_LONG for now. */
+       ASSERT(blocks_per_folio <= BITS_PER_LONG);
+       bio_ctrl->submit_bitmap_value = 0;
+       bio_ctrl->submit_bitmap = &bio_ctrl->submit_bitmap_value;
+       /*
+        * Default to unlock the whole folio.
+        * The proper bitmap is not initialized until writepage_delalloc().
+        */
+       bitmap_set(bio_ctrl->submit_bitmap, 0, blocks_per_folio);
+}
+
 /*
  * the writepage semantics are similar to regular writepage.  extent
  * records are inserted to lock ranges in the tree, and as dirty areas
@@ -1883,12 +1909,7 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
        if (folio_contains(folio, end_index))
                folio_zero_range(folio, pg_offset, folio_size(folio) - pg_offset);
 
-       /*
-        * Default to unlock the whole folio.
-        * The proper bitmap can only be initialized until writepage_delalloc().
-        */
-       bio_ctrl->submit_bitmap = (unsigned long)-1;
-
+       bio_ctrl_init_submit_bitmap(fs_info, folio, bio_ctrl);
        /*
         * If the page is dirty but without private set, it's marked dirty
         * without informing the fs.
@@ -1927,7 +1948,7 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
 "failed to submit blocks, root=%lld inode=%llu folio=%llu submit_bitmap=%*pbl: %d",
                             btrfs_root_id(inode->root), btrfs_ino(inode),
                             folio_pos(folio), blocks_per_folio,
-                            &bio_ctrl->submit_bitmap, ret);
+                            bio_ctrl->submit_bitmap, ret);
 
        bio_ctrl->wbc->nr_to_write--;
 
@@ -2674,7 +2695,7 @@ void extent_write_locked_range(struct inode *inode, const struct folio *locked_f
                 * Set the submission bitmap to submit all sectors.
                 * extent_writepage_io() will do the truncation correctly.
                 */
-               bio_ctrl.submit_bitmap = (unsigned long)-1;
+               bio_ctrl_init_submit_bitmap(fs_info, folio, &bio_ctrl);
                ret = extent_writepage_io(BTRFS_I(inode), folio, cur, cur_len,
                                          &bio_ctrl, i_size);
                if (ret == 1)
index fb56eaf5232584a4c3130397406bc47797e7497e..df923009060df818fddb0ea5ecdcc334bf8c6e28 100644 (file)
@@ -268,11 +268,11 @@ void btrfs_folio_end_lock(const struct btrfs_fs_info *fs_info,
 }
 
 void btrfs_folio_end_lock_bitmap(const struct btrfs_fs_info *fs_info,
-                                struct folio *folio, unsigned long bitmap)
+                                struct folio *folio, unsigned long *bitmap)
 {
        struct btrfs_folio_state *bfs = folio_get_private(folio);
        const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio);
-       const unsigned int nbits = bitmap_weight(&bitmap, blocks_per_folio);
+       const unsigned int nbits = bitmap_weight(bitmap, blocks_per_folio);
        unsigned long flags;
        bool last = false;
 
@@ -731,24 +731,35 @@ void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info,
        spin_unlock_irqrestore(&bfs->lock, flags);
 }
 
-unsigned long btrfs_get_subpage_dirty_bitmap_value(struct btrfs_fs_info *fs_info,
-                                                  struct folio *folio)
+void btrfs_copy_subpage_dirty_bitmap(struct btrfs_fs_info *fs_info,
+                                    struct folio *folio,
+                                    unsigned long *dst)
 {
        struct btrfs_folio_state *bfs;
        const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio);
        unsigned long flags;
        unsigned long value;
 
-       if (blocks_per_folio == 1)
-               return 1;
+       if (blocks_per_folio == 1) {
+               value = 1;
+               bitmap_copy(dst, &value, 1);
+               return;
+       }
 
        ASSERT(folio_test_private(folio) && folio_get_private(folio));
        ASSERT(blocks_per_folio > 1);
-       ASSERT(blocks_per_folio <= BITS_PER_LONG);
        bfs = folio_get_private(folio);
 
+       if (blocks_per_folio <= BITS_PER_LONG) {
+               spin_lock_irqsave(&bfs->lock, flags);
+               value = bitmap_read(bfs->bitmaps, btrfs_bitmap_nr_dirty * blocks_per_folio,
+                                   blocks_per_folio);
+               spin_unlock_irqrestore(&bfs->lock, flags);
+               bitmap_copy(dst, &value, blocks_per_folio);
+               return;
+       }
        spin_lock_irqsave(&bfs->lock, flags);
-       value = get_bitmap_value_dirty(fs_info, folio);
+       bitmap_copy(dst, get_bitmap_pointer_dirty(fs_info, folio),
+                   blocks_per_folio);
        spin_unlock_irqrestore(&bfs->lock, flags);
-       return value;
 }
index 756c05c89c11dfad40ddf6efae0bec6d47ee1251..c6d7394e6418af4973440fb3e7a89b37b21110db 100644 (file)
@@ -116,7 +116,7 @@ void btrfs_folio_end_lock(const struct btrfs_fs_info *fs_info,
 void btrfs_folio_set_lock(const struct btrfs_fs_info *fs_info,
                          struct folio *folio, u64 start, u32 len);
 void btrfs_folio_end_lock_bitmap(const struct btrfs_fs_info *fs_info,
-                                struct folio *folio, unsigned long bitmap);
+                                struct folio *folio, unsigned long *bitmap);
 /*
  * Template for subpage related operations.
  *
@@ -184,8 +184,9 @@ bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info,
 void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info,
                                  struct folio *folio, u64 start, u32 len);
 bool btrfs_meta_folio_clear_and_test_dirty(struct folio *folio, const struct extent_buffer *eb);
-unsigned long btrfs_get_subpage_dirty_bitmap_value(struct btrfs_fs_info *fs_info,
-                                                  struct folio *folio);
+void btrfs_copy_subpage_dirty_bitmap(struct btrfs_fs_info *fs_info,
+                                    struct folio *folio,
+                                    unsigned long *dst);
 void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info,
                                      struct folio *folio, u64 start, u32 len);