]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
btrfs: switch btrfs_root::delayed_nodes_tree to xarray from radix-tree
authorDavid Sterba <dsterba@suse.com>
Wed, 6 Dec 2023 14:16:03 +0000 (15:16 +0100)
committerDavid Sterba <dsterba@suse.com>
Fri, 15 Dec 2023 22:01:03 +0000 (23:01 +0100)
The radix-tree has been superseded by the xarray
(https://lwn.net/Articles/745073), this patch converts the
btrfs_root::delayed_nodes, the APIs are used in a simple way.

First idea is to do xa_insert() but this would require GFP_ATOMIC
allocation which we want to avoid if possible. The preload mechanism of
radix-tree can be emulated within the xarray API.

- xa_reserve() with GFP_NOFS outside of the lock, the reserved entry
  is inserted atomically at most once

- xa_store() under a lock, in case something races in we can detect that
  and xa_load() returns a valid pointer

All uses of xa_load() must check for a valid pointer in case they manage
to get between the xa_reserve() and xa_store(), this is handled in
btrfs_get_delayed_node().

Otherwise the functionality is equivalent, xarray implements the
radix-tree and there should be no performance difference.

The patch continues the efforts started in 253bf57555e451 ("btrfs: turn
delayed_nodes_tree into an XArray") and fixes the problems with locking
and GFP flags 088aea3b97e0ae ("Revert "btrfs: turn delayed_nodes_tree
into an XArray"").

Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/ctree.h
fs/btrfs/delayed-inode.c
fs/btrfs/disk-io.c
fs/btrfs/inode.c

index 54fd4eb92745c2947e071318a6293eb1efa3e14f..70e828d33177d6f9764b901681b5972d930c94fc 100644 (file)
@@ -227,10 +227,10 @@ struct btrfs_root {
        struct rb_root inode_tree;
 
        /*
-        * radix tree that keeps track of delayed nodes of every inode,
-        * protected by inode_lock
+        * Xarray that keeps track of delayed nodes of every inode, protected
+        * by @inode_lock.
         */
-       struct radix_tree_root delayed_nodes_tree;
+       struct xarray delayed_nodes;
        /*
         * right now this just gets used so that a root has its own devid
         * for stat.  It may be used for more later
index 91159dd7355b36dc97b73070bf4f3cb7abd45e9c..08102883f560a39372abe85fd855ac8d17577821 100644 (file)
@@ -71,7 +71,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
        }
 
        spin_lock(&root->inode_lock);
-       node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
+       node = xa_load(&root->delayed_nodes, ino);
 
        if (node) {
                if (btrfs_inode->delayed_node) {
@@ -83,9 +83,9 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
 
                /*
                 * It's possible that we're racing into the middle of removing
-                * this node from the radix tree.  In this case, the refcount
+                * this node from the xarray.  In this case, the refcount
                 * was zero and it should never go back to one.  Just return
-                * NULL like it was never in the radix at all; our release
+                * NULL like it was never in the xarray at all; our release
                 * function is in the process of removing it.
                 *
                 * Some implementations of refcount_inc refuse to bump the
@@ -93,7 +93,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
                 * here, refcount_inc() may decide to just WARN_ONCE() instead
                 * of actually bumping the refcount.
                 *
-                * If this node is properly in the radix, we want to bump the
+                * If this node is properly in the xarray, we want to bump the
                 * refcount twice, once for the inode and once for this get
                 * operation.
                 */
@@ -120,6 +120,7 @@ static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
        struct btrfs_root *root = btrfs_inode->root;
        u64 ino = btrfs_ino(btrfs_inode);
        int ret;
+       void *ptr;
 
 again:
        node = btrfs_get_delayed_node(btrfs_inode);
@@ -131,26 +132,30 @@ again:
                return ERR_PTR(-ENOMEM);
        btrfs_init_delayed_node(node, root, ino);
 
-       /* cached in the btrfs inode and can be accessed */
+       /* Cached in the inode and can be accessed. */
        refcount_set(&node->refs, 2);
 
-       ret = radix_tree_preload(GFP_NOFS);
-       if (ret) {
+       /* Allocate and reserve the slot, from now it can return a NULL from xa_load(). */
+       ret = xa_reserve(&root->delayed_nodes, ino, GFP_NOFS);
+       if (ret == -ENOMEM) {
                kmem_cache_free(delayed_node_cache, node);
-               return ERR_PTR(ret);
+               return ERR_PTR(-ENOMEM);
        }
-
        spin_lock(&root->inode_lock);
-       ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
-       if (ret == -EEXIST) {
+       ptr = xa_load(&root->delayed_nodes, ino);
+       if (ptr) {
+               /* Somebody inserted it, go back and read it. */
                spin_unlock(&root->inode_lock);
                kmem_cache_free(delayed_node_cache, node);
-               radix_tree_preload_end();
+               node = NULL;
                goto again;
        }
+       ptr = xa_store(&root->delayed_nodes, ino, node, GFP_ATOMIC);
+       ASSERT(xa_err(ptr) != -EINVAL);
+       ASSERT(xa_err(ptr) != -ENOMEM);
+       ASSERT(ptr == NULL);
        btrfs_inode->delayed_node = node;
        spin_unlock(&root->inode_lock);
-       radix_tree_preload_end();
 
        return node;
 }
@@ -269,8 +274,7 @@ static void __btrfs_release_delayed_node(
                 * back up.  We can delete it now.
                 */
                ASSERT(refcount_read(&delayed_node->refs) == 0);
-               radix_tree_delete(&root->delayed_nodes_tree,
-                                 delayed_node->inode_id);
+               xa_erase(&root->delayed_nodes, delayed_node->inode_id);
                spin_unlock(&root->inode_lock);
                kmem_cache_free(delayed_node_cache, delayed_node);
        }
@@ -2038,34 +2042,36 @@ void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode)
 
 void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
 {
-       u64 inode_id = 0;
+       unsigned long index = 0;
        struct btrfs_delayed_node *delayed_nodes[8];
-       int i, n;
 
        while (1) {
+               struct btrfs_delayed_node *node;
+               int count;
+
                spin_lock(&root->inode_lock);
-               n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
-                                          (void **)delayed_nodes, inode_id,
-                                          ARRAY_SIZE(delayed_nodes));
-               if (!n) {
+               if (xa_empty(&root->delayed_nodes)) {
                        spin_unlock(&root->inode_lock);
-                       break;
+                       return;
                }
 
-               inode_id = delayed_nodes[n - 1]->inode_id + 1;
-               for (i = 0; i < n; i++) {
+               count = 0;
+               xa_for_each_start(&root->delayed_nodes, index, node, index) {
                        /*
                         * Don't increase refs in case the node is dead and
                         * about to be removed from the tree in the loop below
                         */
-                       if (!refcount_inc_not_zero(&delayed_nodes[i]->refs))
-                               delayed_nodes[i] = NULL;
+                       if (refcount_inc_not_zero(&node->refs)) {
+                               delayed_nodes[count] = node;
+                               count++;
+                       }
+                       if (count >= ARRAY_SIZE(delayed_nodes))
+                               break;
                }
                spin_unlock(&root->inode_lock);
+               index++;
 
-               for (i = 0; i < n; i++) {
-                       if (!delayed_nodes[i])
-                               continue;
+               for (int i = 0; i < count; i++) {
                        __btrfs_kill_delayed_node(delayed_nodes[i]);
                        btrfs_release_delayed_node(delayed_nodes[i]);
                }
index 00c9181fd356bfd71571ee949f86e7e8ca23f2c4..1b6afff66c328c2ee971f70acc504aa70674761b 100644 (file)
@@ -655,7 +655,8 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
        root->nr_delalloc_inodes = 0;
        root->nr_ordered_extents = 0;
        root->inode_tree = RB_ROOT;
-       INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
+       /* GFP flags are compatible with XA_FLAGS_*. */
+       xa_init_flags(&root->delayed_nodes, GFP_ATOMIC);
 
        btrfs_init_root_block_rsv(root);
 
index a1f6e8d3b546af79b203f064b440b52fddea94c2..17cfba9ee2737ca7bf25b4c85d8e8766142c9793 100644 (file)
@@ -3805,7 +3805,7 @@ cache_index:
         * cache.
         *
         * This is required for both inode re-read from disk and delayed inode
-        * in delayed_nodes_tree.
+        * in the delayed_nodes xarray.
         */
        if (BTRFS_I(inode)->last_trans == btrfs_get_fs_generation(fs_info))
                set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,