btrfs: refactor alloc_extent_buffer() to allocate-then-attach method

author Qu Wenruo <wqu@suse.com>

Wed, 29 Nov 2023 22:32:08 +0000 (09:02 +1030)

committer David Sterba <dsterba@suse.com>

Fri, 15 Dec 2023 22:01:04 +0000 (23:01 +0100)
author Qu Wenruo <wqu@suse.com>
Wed, 29 Nov 2023 22:32:08 +0000 (09:02 +1030)
committer David Sterba <dsterba@suse.com>
Fri, 15 Dec 2023 22:01:04 +0000 (23:01 +0100)
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c

index 2d9974c283c6cb14e1823514fdc08ad0a586535c..5678e73f1509f33571c47cce495a9966b2ee5ed2 100644 (file)
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -608,7 +608,7 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
                 goto out_free_bio;
         }
  
-       ret2 = btrfs_alloc_page_array(cb->nr_pages, cb->compressed_pages);
+       ret2 = btrfs_alloc_page_array(cb->nr_pages, cb->compressed_pages, 0);
         if (ret2) {
                 ret = BLK_STS_RESOURCE;
                 goto out_free_compressed_pages;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c

index 921e9e17fc65a41967be06175afc455eb5d91e17..cecd8939e99a96cd579be52c92f17a0ec62dfad4 100644 (file)
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -674,19 +674,22 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio)
   * @nr_pages:   number of pages to allocate
   * @page_array: the array to fill with pages; any existing non-null entries in
   *             the array will be skipped
+ * @extra_gfp: the extra GFP flags for the allocation.
   *
   * Return: 0        if all pages were able to be allocated;
   *         -ENOMEM  otherwise, the partially allocated pages would be freed and
   *                  the array slots zeroed
   */
-int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array)
+int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array,
+                          gfp_t extra_gfp)
  {
         unsigned int allocated;
  
         for (allocated = 0; allocated < nr_pages;) {
                 unsigned int last = allocated;
  
-               allocated = alloc_pages_bulk_array(GFP_NOFS, nr_pages, page_array);
+               allocated = alloc_pages_bulk_array(GFP_NOFS | extra_gfp,
+                                                  nr_pages, page_array);
  
                 if (allocated == nr_pages)
                         return 0;
@@ -3219,7 +3222,7 @@ struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src)
          */
         set_bit(EXTENT_BUFFER_UNMAPPED, &new->bflags);
  
-       ret = btrfs_alloc_page_array(num_pages, new->pages);
+       ret = btrfs_alloc_page_array(num_pages, new->pages, 0);
         if (ret) {
                 btrfs_release_extent_buffer(new);
                 return NULL;
@@ -3255,7 +3258,7 @@ struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
                 return NULL;
  
         num_pages = num_extent_pages(eb);
-       ret = btrfs_alloc_page_array(num_pages, eb->pages);
+       ret = btrfs_alloc_page_array(num_pages, eb->pages, 0);
         if (ret)
                 goto err;
  
@@ -3475,16 +3478,75 @@ static int check_eb_alignment(struct btrfs_fs_info *fs_info, u64 start)
         return 0;
  }
  
+
+/*
+ * Return 0 if eb->pages[i] is attached to btree inode successfully.
+ * Return >0 if there is already annother extent buffer for the range,
+ * and @found_eb_ret would be updated.
+ */
+static int attach_eb_page_to_filemap(struct extent_buffer *eb, int i,
+                                    struct extent_buffer **found_eb_ret)
+{
+
+       struct btrfs_fs_info *fs_info = eb->fs_info;
+       struct address_space *mapping = fs_info->btree_inode->i_mapping;
+       const unsigned long index = eb->start >> PAGE_SHIFT;
+       struct folio *existing_folio;
+       int ret;
+
+       ASSERT(found_eb_ret);
+
+       /* Caller should ensure the page exists. */
+       ASSERT(eb->pages[i]);
+
+retry:
+       ret = filemap_add_folio(mapping, page_folio(eb->pages[i]), index + i,
+                               GFP_NOFS | __GFP_NOFAIL);
+       if (!ret)
+               return 0;
+
+       existing_folio = filemap_lock_folio(mapping, index + i);
+       /* The page cache only exists for a very short time, just retry. */
+       if (IS_ERR(existing_folio))
+               goto retry;
+
+       /* For now, we should only have single-page folios for btree inode. */
+       ASSERT(folio_nr_pages(existing_folio) == 1);
+
+       if (fs_info->nodesize < PAGE_SIZE) {
+               /*
+                * We're going to reuse the existing page, can drop our page
+                * and subpage structure now.
+                */
+               __free_page(eb->pages[i]);
+               eb->pages[i] = folio_page(existing_folio, 0);
+       } else {
+               struct extent_buffer *existing_eb;
+
+               existing_eb = grab_extent_buffer(fs_info,
+                                                folio_page(existing_folio, 0));
+               if (existing_eb) {
+                       /* The extent buffer still exists, we can use it directly. */
+                       *found_eb_ret = existing_eb;
+                       folio_unlock(existing_folio);
+                       folio_put(existing_folio);
+                       return 1;
+               }
+               /* The extent buffer no longer exists, we can reuse the folio. */
+               __free_page(eb->pages[i]);
+               eb->pages[i] = folio_page(existing_folio, 0);
+       }
+       return 0;
+}
+
  struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
                                           u64 start, u64 owner_root, int level)
  {
         unsigned long len = fs_info->nodesize;
         int num_pages;
-       int i;
-       unsigned long index = start >> PAGE_SHIFT;
+       int attached = 0;
         struct extent_buffer *eb;
-       struct extent_buffer *exists = NULL;
-       struct page *p;
+       struct extent_buffer *existing_eb = NULL;
         struct address_space *mapping = fs_info->btree_inode->i_mapping;
         struct btrfs_subpage *prealloc = NULL;
         u64 lockdep_owner = owner_root;
@@ -3535,29 +3597,36 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
         if (fs_info->nodesize < PAGE_SIZE) {
                 prealloc = btrfs_alloc_subpage(fs_info, BTRFS_SUBPAGE_METADATA);
                 if (IS_ERR(prealloc)) {
-                       exists = ERR_CAST(prealloc);
-                       goto free_eb;
+                       ret = PTR_ERR(prealloc);
+                       goto out;
                 }
         }
  
-       for (i = 0; i < num_pages; i++, index++) {
-               p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
-               if (!p) {
-                       exists = ERR_PTR(-ENOMEM);
-                       btrfs_free_subpage(prealloc);
-                       goto free_eb;
+       /* Allocate all pages first. */
+       ret = btrfs_alloc_page_array(num_pages, eb->pages, __GFP_NOFAIL);
+       if (ret < 0) {
+               btrfs_free_subpage(prealloc);
+               goto out;
+       }
+
+       /* Attach all pages to the filemap. */
+       for (int i = 0; i < num_pages; i++) {
+               struct page *p;
+
+               ret = attach_eb_page_to_filemap(eb, i, &existing_eb);
+               if (ret > 0) {
+                       ASSERT(existing_eb);
+                       goto out;
                 }
+               attached++;
  
+               /*
+                * Only after attach_eb_page_to_filemap(), eb->pages[] is
+                * reliable, as we may choose to reuse the existing page cache
+                * and free the allocated page.
+                */
+               p = eb->pages[i];
                 spin_lock(&mapping->private_lock);
-               exists = grab_extent_buffer(fs_info, p);
-               if (exists) {
-                       spin_unlock(&mapping->private_lock);
-                       unlock_page(p);
-                       put_page(p);
-                       mark_extent_buffer_accessed(exists, p);
-                       btrfs_free_subpage(prealloc);
-                       goto free_eb;
-               }
                 /* Should not fail, as we have preallocated the memory */
                 ret = attach_extent_buffer_page(eb, p, prealloc);
                 ASSERT(!ret);
@@ -3574,7 +3643,6 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
                 spin_unlock(&mapping->private_lock);
  
                 WARN_ON(btrfs_page_test_dirty(fs_info, p, eb->start, eb->len));
-               eb->pages[i] = p;
  
                 /*
                  * Check if the current page is physically contiguous with previous eb
@@ -3601,10 +3669,8 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
                 eb->addr = page_address(eb->pages[0]) + offset_in_page(eb->start);
  again:
         ret = radix_tree_preload(GFP_NOFS);
-       if (ret) {
-               exists = ERR_PTR(ret);
-               goto free_eb;
-       }
+       if (ret)
+               goto out;
  
         spin_lock(&fs_info->buffer_lock);
         ret = radix_tree_insert(&fs_info->buffer_radix,
@@ -3612,9 +3678,10 @@ again:
         spin_unlock(&fs_info->buffer_lock);
         radix_tree_preload_end();
         if (ret == -EEXIST) {
-               exists = find_extent_buffer(fs_info, start);
-               if (exists)
-                       goto free_eb;
+               ret = 0;
+               existing_eb = find_extent_buffer(fs_info, start);
+               if (existing_eb)
+                       goto out;
                 else
                         goto again;
         }
@@ -3627,19 +3694,28 @@ again:
          * btree_release_folio will correctly detect that a page belongs to a
          * live buffer and won't free them prematurely.
          */
-       for (i = 0; i < num_pages; i++)
+       for (int i = 0; i < num_pages; i++)
                 unlock_page(eb->pages[i]);
         return eb;
  
-free_eb:
+out:
         WARN_ON(!atomic_dec_and_test(&eb->refs));
-       for (i = 0; i < num_pages; i++) {
-               if (eb->pages[i])
-                       unlock_page(eb->pages[i]);
+       for (int i = 0; i < attached; i++) {
+               ASSERT(eb->pages[i]);
+               detach_extent_buffer_page(eb, eb->pages[i]);
+               unlock_page(eb->pages[i]);
         }
+       /*
+        * Now all pages of that extent buffer is unmapped, set UNMAPPED flag,
+        * so it can be cleaned up without utlizing page->mapping.
+        */
+       set_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
  
         btrfs_release_extent_buffer(eb);
-       return exists;
+       if (ret < 0)
+               return ERR_PTR(ret);
+       ASSERT(existing_eb);
+       return existing_eb;
  }
  
  static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h

index c2c6bfba63c0365ed438f341657f3bdf6b123ca4..c73d53c22ec516dfd357c60fc62f33a1f032fcd9 100644 (file)
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -302,7 +302,8 @@ int extent_invalidate_folio(struct extent_io_tree *tree,
  void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans,
                               struct extent_buffer *buf);
  
-int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array);
+int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array,
+                          gfp_t extra_gfp);
  
  #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
  bool find_lock_delalloc_range(struct inode *inode,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

index 17cfba9ee2737ca7bf25b4c85d8e8766142c9793..4e8c82e5d7a622c3b1afd9b7d37d14f3e7f8a8ed 100644 (file)
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -10028,7 +10028,7 @@ static ssize_t btrfs_encoded_read_regular(struct kiocb *iocb,
         pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
         if (!pages)
                 return -ENOMEM;
-       ret = btrfs_alloc_page_array(nr_pages, pages);
+       ret = btrfs_alloc_page_array(nr_pages, pages, 0);
         if (ret) {
                 ret = -ENOMEM;
                 goto out;
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c

index 90f12c0e88a1e14e34e81157ed00a8a447ffcd99..792c8e17c31d76cba70a0702f8588120c6109389 100644 (file)
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -964,7 +964,7 @@ static int alloc_rbio_pages(struct btrfs_raid_bio *rbio)
  {
         int ret;
  
-       ret = btrfs_alloc_page_array(rbio->nr_pages, rbio->stripe_pages);
+       ret = btrfs_alloc_page_array(rbio->nr_pages, rbio->stripe_pages, 0);
         if (ret < 0)
                 return ret;
         /* Mapping all sectors */
@@ -979,7 +979,7 @@ static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
         int ret;
  
         ret = btrfs_alloc_page_array(rbio->nr_pages - data_pages,
-                                    rbio->stripe_pages + data_pages);
+                                    rbio->stripe_pages + data_pages, 0);
         if (ret < 0)
                 return ret;
  
@@ -1530,7 +1530,7 @@ static int alloc_rbio_data_pages(struct btrfs_raid_bio *rbio)
         const int data_pages = rbio->nr_data * rbio->stripe_npages;
         int ret;
  
-       ret = btrfs_alloc_page_array(data_pages, rbio->stripe_pages);
+       ret = btrfs_alloc_page_array(data_pages, rbio->stripe_pages, 0);
         if (ret < 0)
                 return ret;
  
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c

index c0269e126f431f34fa0bce7d5beaff5e4d24ceef..a01807cbd4d44e4127c798e470cef51d8bfa13e6 100644 (file)
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -261,7 +261,7 @@ static int init_scrub_stripe(struct btrfs_fs_info *fs_info,
         atomic_set(&stripe->pending_io, 0);
         spin_lock_init(&stripe->write_error_lock);
  
-       ret = btrfs_alloc_page_array(SCRUB_STRIPE_PAGES, stripe->pages);
+       ret = btrfs_alloc_page_array(SCRUB_STRIPE_PAGES, stripe->pages, 0);
         if (ret < 0)
                 goto error;
author	Qu Wenruo <wqu@suse.com>
	Wed, 29 Nov 2023 22:32:08 +0000 (09:02 +1030)
committer	David Sterba <dsterba@suse.com>
	Fri, 15 Dec 2023 22:01:04 +0000 (23:01 +0100)
fs/btrfs/compression.c		patch \| blob \| blame \| history
fs/btrfs/extent_io.c		patch \| blob \| blame \| history
fs/btrfs/extent_io.h		patch \| blob \| blame \| history
fs/btrfs/inode.c		patch \| blob \| blame \| history
fs/btrfs/raid56.c		patch \| blob \| blame \| history
fs/btrfs/scrub.c		patch \| blob \| blame \| history