]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
btrfs: refactor alloc_extent_buffer() to allocate-then-attach method
authorQu Wenruo <wqu@suse.com>
Wed, 29 Nov 2023 22:32:08 +0000 (09:02 +1030)
committerDavid Sterba <dsterba@suse.com>
Fri, 15 Dec 2023 22:01:04 +0000 (23:01 +0100)
Currently alloc_extent_buffer() utilizes find_or_create_page() to
allocate one page a time for an extent buffer.

This method has the following disadvantages:

- find_or_create_page() is the legacy way of allocating new pages
  With the new folio infrastructure, find_or_create_page() is just
  redirected to filemap_get_folio().

- Lacks the way to support higher order (order >= 1) folios
  As we can not yet let filemap give us a higher order folio.

This patch would change the workflow by the following way:

Old    | new
-----------------------------------+-------------------------------------
                                   | ret = btrfs_alloc_page_array();
for (i = 0; i < num_pages; i++) {  | for (i = 0; i < num_pages; i++) {
    p = find_or_create_page();     |     ret = filemap_add_folio();
    /* Attach page private */      |     /* Reuse page cache if needed */
    /* Reused eb if needed */      |
   |     /* Attach page private and
   |        reuse eb if needed */
   | }

By this we split the page allocation and private attaching into two
parts, allowing future updates to each part more easily, and migrate to
folio interfaces (especially for possible higher order folios).

Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/compression.c
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/inode.c
fs/btrfs/raid56.c
fs/btrfs/scrub.c

index 2d9974c283c6cb14e1823514fdc08ad0a586535c..5678e73f1509f33571c47cce495a9966b2ee5ed2 100644 (file)
@@ -608,7 +608,7 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
                goto out_free_bio;
        }
 
-       ret2 = btrfs_alloc_page_array(cb->nr_pages, cb->compressed_pages);
+       ret2 = btrfs_alloc_page_array(cb->nr_pages, cb->compressed_pages, 0);
        if (ret2) {
                ret = BLK_STS_RESOURCE;
                goto out_free_compressed_pages;
index 921e9e17fc65a41967be06175afc455eb5d91e17..cecd8939e99a96cd579be52c92f17a0ec62dfad4 100644 (file)
@@ -674,19 +674,22 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio)
  * @nr_pages:   number of pages to allocate
  * @page_array: the array to fill with pages; any existing non-null entries in
  *             the array will be skipped
+ * @extra_gfp: the extra GFP flags for the allocation.
  *
  * Return: 0        if all pages were able to be allocated;
  *         -ENOMEM  otherwise, the partially allocated pages would be freed and
  *                  the array slots zeroed
  */
-int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array)
+int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array,
+                          gfp_t extra_gfp)
 {
        unsigned int allocated;
 
        for (allocated = 0; allocated < nr_pages;) {
                unsigned int last = allocated;
 
-               allocated = alloc_pages_bulk_array(GFP_NOFS, nr_pages, page_array);
+               allocated = alloc_pages_bulk_array(GFP_NOFS | extra_gfp,
+                                                  nr_pages, page_array);
 
                if (allocated == nr_pages)
                        return 0;
@@ -3219,7 +3222,7 @@ struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src)
         */
        set_bit(EXTENT_BUFFER_UNMAPPED, &new->bflags);
 
-       ret = btrfs_alloc_page_array(num_pages, new->pages);
+       ret = btrfs_alloc_page_array(num_pages, new->pages, 0);
        if (ret) {
                btrfs_release_extent_buffer(new);
                return NULL;
@@ -3255,7 +3258,7 @@ struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
                return NULL;
 
        num_pages = num_extent_pages(eb);
-       ret = btrfs_alloc_page_array(num_pages, eb->pages);
+       ret = btrfs_alloc_page_array(num_pages, eb->pages, 0);
        if (ret)
                goto err;
 
@@ -3475,16 +3478,75 @@ static int check_eb_alignment(struct btrfs_fs_info *fs_info, u64 start)
        return 0;
 }
 
+
+/*
+ * Return 0 if eb->pages[i] is attached to btree inode successfully.
+ * Return >0 if there is already annother extent buffer for the range,
+ * and @found_eb_ret would be updated.
+ */
+static int attach_eb_page_to_filemap(struct extent_buffer *eb, int i,
+                                    struct extent_buffer **found_eb_ret)
+{
+
+       struct btrfs_fs_info *fs_info = eb->fs_info;
+       struct address_space *mapping = fs_info->btree_inode->i_mapping;
+       const unsigned long index = eb->start >> PAGE_SHIFT;
+       struct folio *existing_folio;
+       int ret;
+
+       ASSERT(found_eb_ret);
+
+       /* Caller should ensure the page exists. */
+       ASSERT(eb->pages[i]);
+
+retry:
+       ret = filemap_add_folio(mapping, page_folio(eb->pages[i]), index + i,
+                               GFP_NOFS | __GFP_NOFAIL);
+       if (!ret)
+               return 0;
+
+       existing_folio = filemap_lock_folio(mapping, index + i);
+       /* The page cache only exists for a very short time, just retry. */
+       if (IS_ERR(existing_folio))
+               goto retry;
+
+       /* For now, we should only have single-page folios for btree inode. */
+       ASSERT(folio_nr_pages(existing_folio) == 1);
+
+       if (fs_info->nodesize < PAGE_SIZE) {
+               /*
+                * We're going to reuse the existing page, can drop our page
+                * and subpage structure now.
+                */
+               __free_page(eb->pages[i]);
+               eb->pages[i] = folio_page(existing_folio, 0);
+       } else {
+               struct extent_buffer *existing_eb;
+
+               existing_eb = grab_extent_buffer(fs_info,
+                                                folio_page(existing_folio, 0));
+               if (existing_eb) {
+                       /* The extent buffer still exists, we can use it directly. */
+                       *found_eb_ret = existing_eb;
+                       folio_unlock(existing_folio);
+                       folio_put(existing_folio);
+                       return 1;
+               }
+               /* The extent buffer no longer exists, we can reuse the folio. */
+               __free_page(eb->pages[i]);
+               eb->pages[i] = folio_page(existing_folio, 0);
+       }
+       return 0;
+}
+
 struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
                                          u64 start, u64 owner_root, int level)
 {
        unsigned long len = fs_info->nodesize;
        int num_pages;
-       int i;
-       unsigned long index = start >> PAGE_SHIFT;
+       int attached = 0;
        struct extent_buffer *eb;
-       struct extent_buffer *exists = NULL;
-       struct page *p;
+       struct extent_buffer *existing_eb = NULL;
        struct address_space *mapping = fs_info->btree_inode->i_mapping;
        struct btrfs_subpage *prealloc = NULL;
        u64 lockdep_owner = owner_root;
@@ -3535,29 +3597,36 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
        if (fs_info->nodesize < PAGE_SIZE) {
                prealloc = btrfs_alloc_subpage(fs_info, BTRFS_SUBPAGE_METADATA);
                if (IS_ERR(prealloc)) {
-                       exists = ERR_CAST(prealloc);
-                       goto free_eb;
+                       ret = PTR_ERR(prealloc);
+                       goto out;
                }
        }
 
-       for (i = 0; i < num_pages; i++, index++) {
-               p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
-               if (!p) {
-                       exists = ERR_PTR(-ENOMEM);
-                       btrfs_free_subpage(prealloc);
-                       goto free_eb;
+       /* Allocate all pages first. */
+       ret = btrfs_alloc_page_array(num_pages, eb->pages, __GFP_NOFAIL);
+       if (ret < 0) {
+               btrfs_free_subpage(prealloc);
+               goto out;
+       }
+
+       /* Attach all pages to the filemap. */
+       for (int i = 0; i < num_pages; i++) {
+               struct page *p;
+
+               ret = attach_eb_page_to_filemap(eb, i, &existing_eb);
+               if (ret > 0) {
+                       ASSERT(existing_eb);
+                       goto out;
                }
+               attached++;
 
+               /*
+                * Only after attach_eb_page_to_filemap(), eb->pages[] is
+                * reliable, as we may choose to reuse the existing page cache
+                * and free the allocated page.
+                */
+               p = eb->pages[i];
                spin_lock(&mapping->private_lock);
-               exists = grab_extent_buffer(fs_info, p);
-               if (exists) {
-                       spin_unlock(&mapping->private_lock);
-                       unlock_page(p);
-                       put_page(p);
-                       mark_extent_buffer_accessed(exists, p);
-                       btrfs_free_subpage(prealloc);
-                       goto free_eb;
-               }
                /* Should not fail, as we have preallocated the memory */
                ret = attach_extent_buffer_page(eb, p, prealloc);
                ASSERT(!ret);
@@ -3574,7 +3643,6 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
                spin_unlock(&mapping->private_lock);
 
                WARN_ON(btrfs_page_test_dirty(fs_info, p, eb->start, eb->len));
-               eb->pages[i] = p;
 
                /*
                 * Check if the current page is physically contiguous with previous eb
@@ -3601,10 +3669,8 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
                eb->addr = page_address(eb->pages[0]) + offset_in_page(eb->start);
 again:
        ret = radix_tree_preload(GFP_NOFS);
-       if (ret) {
-               exists = ERR_PTR(ret);
-               goto free_eb;
-       }
+       if (ret)
+               goto out;
 
        spin_lock(&fs_info->buffer_lock);
        ret = radix_tree_insert(&fs_info->buffer_radix,
@@ -3612,9 +3678,10 @@ again:
        spin_unlock(&fs_info->buffer_lock);
        radix_tree_preload_end();
        if (ret == -EEXIST) {
-               exists = find_extent_buffer(fs_info, start);
-               if (exists)
-                       goto free_eb;
+               ret = 0;
+               existing_eb = find_extent_buffer(fs_info, start);
+               if (existing_eb)
+                       goto out;
                else
                        goto again;
        }
@@ -3627,19 +3694,28 @@ again:
         * btree_release_folio will correctly detect that a page belongs to a
         * live buffer and won't free them prematurely.
         */
-       for (i = 0; i < num_pages; i++)
+       for (int i = 0; i < num_pages; i++)
                unlock_page(eb->pages[i]);
        return eb;
 
-free_eb:
+out:
        WARN_ON(!atomic_dec_and_test(&eb->refs));
-       for (i = 0; i < num_pages; i++) {
-               if (eb->pages[i])
-                       unlock_page(eb->pages[i]);
+       for (int i = 0; i < attached; i++) {
+               ASSERT(eb->pages[i]);
+               detach_extent_buffer_page(eb, eb->pages[i]);
+               unlock_page(eb->pages[i]);
        }
+       /*
+        * Now all pages of that extent buffer is unmapped, set UNMAPPED flag,
+        * so it can be cleaned up without utlizing page->mapping.
+        */
+       set_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
 
        btrfs_release_extent_buffer(eb);
-       return exists;
+       if (ret < 0)
+               return ERR_PTR(ret);
+       ASSERT(existing_eb);
+       return existing_eb;
 }
 
 static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
index c2c6bfba63c0365ed438f341657f3bdf6b123ca4..c73d53c22ec516dfd357c60fc62f33a1f032fcd9 100644 (file)
@@ -302,7 +302,8 @@ int extent_invalidate_folio(struct extent_io_tree *tree,
 void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans,
                              struct extent_buffer *buf);
 
-int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array);
+int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array,
+                          gfp_t extra_gfp);
 
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 bool find_lock_delalloc_range(struct inode *inode,
index 17cfba9ee2737ca7bf25b4c85d8e8766142c9793..4e8c82e5d7a622c3b1afd9b7d37d14f3e7f8a8ed 100644 (file)
@@ -10028,7 +10028,7 @@ static ssize_t btrfs_encoded_read_regular(struct kiocb *iocb,
        pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
        if (!pages)
                return -ENOMEM;
-       ret = btrfs_alloc_page_array(nr_pages, pages);
+       ret = btrfs_alloc_page_array(nr_pages, pages, 0);
        if (ret) {
                ret = -ENOMEM;
                goto out;
index 90f12c0e88a1e14e34e81157ed00a8a447ffcd99..792c8e17c31d76cba70a0702f8588120c6109389 100644 (file)
@@ -964,7 +964,7 @@ static int alloc_rbio_pages(struct btrfs_raid_bio *rbio)
 {
        int ret;
 
-       ret = btrfs_alloc_page_array(rbio->nr_pages, rbio->stripe_pages);
+       ret = btrfs_alloc_page_array(rbio->nr_pages, rbio->stripe_pages, 0);
        if (ret < 0)
                return ret;
        /* Mapping all sectors */
@@ -979,7 +979,7 @@ static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
        int ret;
 
        ret = btrfs_alloc_page_array(rbio->nr_pages - data_pages,
-                                    rbio->stripe_pages + data_pages);
+                                    rbio->stripe_pages + data_pages, 0);
        if (ret < 0)
                return ret;
 
@@ -1530,7 +1530,7 @@ static int alloc_rbio_data_pages(struct btrfs_raid_bio *rbio)
        const int data_pages = rbio->nr_data * rbio->stripe_npages;
        int ret;
 
-       ret = btrfs_alloc_page_array(data_pages, rbio->stripe_pages);
+       ret = btrfs_alloc_page_array(data_pages, rbio->stripe_pages, 0);
        if (ret < 0)
                return ret;
 
index c0269e126f431f34fa0bce7d5beaff5e4d24ceef..a01807cbd4d44e4127c798e470cef51d8bfa13e6 100644 (file)
@@ -261,7 +261,7 @@ static int init_scrub_stripe(struct btrfs_fs_info *fs_info,
        atomic_set(&stripe->pending_io, 0);
        spin_lock_init(&stripe->write_error_lock);
 
-       ret = btrfs_alloc_page_array(SCRUB_STRIPE_PAGES, stripe->pages);
+       ret = btrfs_alloc_page_array(SCRUB_STRIPE_PAGES, stripe->pages, 0);
        if (ret < 0)
                goto error;