]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
xattr: remove rbtree-based simple_xattr infrastructure
authorChristian Brauner <brauner@kernel.org>
Mon, 16 Feb 2026 13:32:02 +0000 (14:32 +0100)
committerChristian Brauner <brauner@kernel.org>
Mon, 2 Mar 2026 10:06:42 +0000 (11:06 +0100)
Now that all consumers (shmem, kernfs, pidfs) have been converted to
use the rhashtable-based simple_xattrs with pointer-based lazy
allocation, remove the legacy rbtree code path. The rhashtable
implementation provides O(1) average-case lookup with RCU-based lockless
reads, replacing the O(log n) rbtree with reader-writer spinlock
contention.

Link: https://patch.msgid.link/20260216-work-xattr-socket-v1-6-c2efa4f74cb7@kernel.org
Acked-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
fs/xattr.c
include/linux/xattr.h

index eb45ae0fd17f58fe263c35ed04c20b6b225cbf89..64803097e1dc8fd00bb3497a31548e8bafc4d25f 100644 (file)
@@ -1200,20 +1200,18 @@ void simple_xattr_free(struct simple_xattr *xattr)
 
 static void simple_xattr_rcu_free(struct rcu_head *head)
 {
-       struct simple_xattr *xattr;
+       struct simple_xattr *xattr = container_of(head, struct simple_xattr, rcu);
 
-       xattr = container_of(head, struct simple_xattr, rcu);
        simple_xattr_free(xattr);
 }
 
 /**
- * simple_xattr_free_rcu - free an xattr object after an RCU grace period
+ * simple_xattr_free_rcu - free an xattr object with RCU delay
  * @xattr: the xattr object
  *
- * Schedule RCU-deferred freeing of an xattr entry. This is used by
- * rhashtable-based callers of simple_xattr_set() that replace or remove
- * an existing entry while concurrent RCU readers may still be accessing
- * it.
+ * Free the xattr object after an RCU grace period. This must be used when
+ * the xattr was removed from a data structure that concurrent RCU readers
+ * may still be traversing. Can handle @xattr being NULL.
  */
 void simple_xattr_free_rcu(struct simple_xattr *xattr)
 {
@@ -1254,43 +1252,6 @@ struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
        return new_xattr;
 }
 
-/**
- * rbtree_simple_xattr_cmp - compare xattr name with current rbtree xattr entry
- * @key: xattr name
- * @node: current node
- *
- * Compare the xattr name with the xattr name attached to @node in the rbtree.
- *
- * Return: Negative value if continuing left, positive if continuing right, 0
- * if the xattr attached to @node matches @key.
- */
-static int rbtree_simple_xattr_cmp(const void *key, const struct rb_node *node)
-{
-       const char *xattr_name = key;
-       const struct simple_xattr *xattr;
-
-       xattr = rb_entry(node, struct simple_xattr, rb_node);
-       return strcmp(xattr->name, xattr_name);
-}
-
-/**
- * rbtree_simple_xattr_node_cmp - compare two xattr rbtree nodes
- * @new_node: new node
- * @node: current node
- *
- * Compare the xattr attached to @new_node with the xattr attached to @node.
- *
- * Return: Negative value if continuing left, positive if continuing right, 0
- * if the xattr attached to @new_node matches the xattr attached to @node.
- */
-static int rbtree_simple_xattr_node_cmp(struct rb_node *new_node,
-                                       const struct rb_node *node)
-{
-       struct simple_xattr *xattr;
-       xattr = rb_entry(new_node, struct simple_xattr, rb_node);
-       return rbtree_simple_xattr_cmp(xattr->name, node);
-}
-
 static u32 simple_xattr_hashfn(const void *data, u32 len, u32 seed)
 {
        const char *name = data;
@@ -1336,41 +1297,19 @@ static const struct rhashtable_params simple_xattr_params = {
 int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
                     void *buffer, size_t size)
 {
-       struct simple_xattr *xattr = NULL;
+       struct simple_xattr *xattr;
        int ret = -ENODATA;
 
-       if (xattrs->use_rhashtable) {
-               guard(rcu)();
-               xattr = rhashtable_lookup(&xattrs->ht, name,
-                                         simple_xattr_params);
-               if (xattr) {
-                       ret = xattr->size;
-                       if (buffer) {
-                               if (size < xattr->size)
-                                       ret = -ERANGE;
-                               else
-                                       memcpy(buffer, xattr->value,
-                                              xattr->size);
-                       }
-               }
-       } else {
-               struct rb_node *rbp;
-
-               read_lock(&xattrs->lock);
-               rbp = rb_find(name, &xattrs->rb_root,
-                             rbtree_simple_xattr_cmp);
-               if (rbp) {
-                       xattr = rb_entry(rbp, struct simple_xattr, rb_node);
-                       ret = xattr->size;
-                       if (buffer) {
-                               if (size < xattr->size)
-                                       ret = -ERANGE;
-                               else
-                                       memcpy(buffer, xattr->value,
-                                              xattr->size);
-                       }
+       guard(rcu)();
+       xattr = rhashtable_lookup(&xattrs->ht, name, simple_xattr_params);
+       if (xattr) {
+               ret = xattr->size;
+               if (buffer) {
+                       if (size < xattr->size)
+                               ret = -ERANGE;
+                       else
+                               memcpy(buffer, xattr->value, xattr->size);
                }
-               read_unlock(&xattrs->lock);
        }
        return ret;
 }
@@ -1398,6 +1337,11 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
  * nothing if XATTR_CREATE is specified in @flags or @flags is zero. For
  * XATTR_REPLACE we fail as mentioned above.
  *
+ * Note: Callers must externally serialize writes. All current callers hold
+ * the inode lock for write operations. The lookup->replace/remove sequence
+ * is not atomic with respect to the rhashtable's per-bucket locking, but
+ * is safe because writes are serialized by the caller.
+ *
  * Return: On success, the removed or replaced xattr is returned, to be freed
  * by the caller; or NULL if none. On failure a negative error code is returned.
  */
@@ -1406,7 +1350,7 @@ struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
                                      size_t size, int flags)
 {
        struct simple_xattr *old_xattr = NULL;
-       int err = 0;
+       int err;
 
        CLASS(simple_xattr, new_xattr)(value, size);
        if (IS_ERR(new_xattr))
@@ -1418,119 +1362,52 @@ struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
                        return ERR_PTR(-ENOMEM);
        }
 
-       if (xattrs->use_rhashtable) {
-               /*
-                * Lookup is safe without RCU here since writes are
-                * serialized by the caller.
-                */
-               old_xattr = rhashtable_lookup_fast(&xattrs->ht, name,
-                                                  simple_xattr_params);
-
-               if (old_xattr) {
-                       /* Fail if XATTR_CREATE is requested and the xattr exists. */
-                       if (flags & XATTR_CREATE)
-                               return ERR_PTR(-EEXIST);
-
-                       if (new_xattr) {
-                               err = rhashtable_replace_fast(&xattrs->ht,
-                                                            &old_xattr->hash_node,
-                                                            &new_xattr->hash_node,
-                                                            simple_xattr_params);
-                               if (err)
-                                       return ERR_PTR(err);
-                       } else {
-                               err = rhashtable_remove_fast(&xattrs->ht,
-                                                           &old_xattr->hash_node,
-                                                           simple_xattr_params);
-                               if (err)
-                                       return ERR_PTR(err);
-                       }
-               } else {
-                       /* Fail if XATTR_REPLACE is requested but no xattr is found. */
-                       if (flags & XATTR_REPLACE)
-                               return ERR_PTR(-ENODATA);
-
-                       /*
-                        * If XATTR_CREATE or no flags are specified together
-                        * with a new value simply insert it.
-                        */
-                       if (new_xattr) {
-                               err = rhashtable_insert_fast(&xattrs->ht,
-                                                           &new_xattr->hash_node,
-                                                           simple_xattr_params);
-                               if (err)
-                                       return ERR_PTR(err);
-                       }
-
-                       /*
-                        * If XATTR_CREATE or no flags are specified and
-                        * neither an old or new xattr exist then we don't
-                        * need to do anything.
-                        */
-               }
-       } else {
-               struct rb_node *parent = NULL, **rbp;
-               int ret;
-
-               write_lock(&xattrs->lock);
-               rbp = &xattrs->rb_root.rb_node;
-               while (*rbp) {
-                       parent = *rbp;
-                       ret = rbtree_simple_xattr_cmp(name, *rbp);
-                       if (ret < 0)
-                               rbp = &(*rbp)->rb_left;
-                       else if (ret > 0)
-                               rbp = &(*rbp)->rb_right;
-                       else
-                               old_xattr = rb_entry(*rbp, struct simple_xattr,
-                                                    rb_node);
-                       if (old_xattr)
-                               break;
-               }
+       /* Lookup is safe without RCU here since writes are serialized. */
+       old_xattr = rhashtable_lookup_fast(&xattrs->ht, name,
+                                          simple_xattr_params);
 
-               if (old_xattr) {
-                       /* Fail if XATTR_CREATE is requested and the xattr exists. */
-                       if (flags & XATTR_CREATE) {
-                               err = -EEXIST;
-                               goto out_unlock;
-                       }
+       if (old_xattr) {
+               /* Fail if XATTR_CREATE is requested and the xattr exists. */
+               if (flags & XATTR_CREATE)
+                       return ERR_PTR(-EEXIST);
 
-                       if (new_xattr)
-                               rb_replace_node(&old_xattr->rb_node,
-                                               &new_xattr->rb_node,
-                                               &xattrs->rb_root);
-                       else
-                               rb_erase(&old_xattr->rb_node,
-                                        &xattrs->rb_root);
+               if (new_xattr) {
+                       err = rhashtable_replace_fast(&xattrs->ht,
+                                                     &old_xattr->hash_node,
+                                                     &new_xattr->hash_node,
+                                                     simple_xattr_params);
+                       if (err)
+                               return ERR_PTR(err);
                } else {
-                       /* Fail if XATTR_REPLACE is requested but no xattr is found. */
-                       if (flags & XATTR_REPLACE) {
-                               err = -ENODATA;
-                               goto out_unlock;
-                       }
-
-                       /*
-                        * If XATTR_CREATE or no flags are specified together
-                        * with a new value simply insert it.
-                        */
-                       if (new_xattr) {
-                               rb_link_node(&new_xattr->rb_node, parent, rbp);
-                               rb_insert_color(&new_xattr->rb_node,
-                                               &xattrs->rb_root);
-                       }
+                       err = rhashtable_remove_fast(&xattrs->ht,
+                                                    &old_xattr->hash_node,
+                                                    simple_xattr_params);
+                       if (err)
+                               return ERR_PTR(err);
+               }
+       } else {
+               /* Fail if XATTR_REPLACE is requested but no xattr is found. */
+               if (flags & XATTR_REPLACE)
+                       return ERR_PTR(-ENODATA);
 
-                       /*
-                        * If XATTR_CREATE or no flags are specified and
-                        * neither an old or new xattr exist then we don't
-                        * need to do anything.
-                        */
+               /*
+                * If XATTR_CREATE or no flags are specified together with a
+                * new value simply insert it.
+                */
+               if (new_xattr) {
+                       err = rhashtable_insert_fast(&xattrs->ht,
+                                                    &new_xattr->hash_node,
+                                                    simple_xattr_params);
+                       if (err)
+                               return ERR_PTR(err);
                }
 
-out_unlock:
-               write_unlock(&xattrs->lock);
-               if (err)
-                       return ERR_PTR(err);
+               /*
+                * If XATTR_CREATE or no flags are specified and neither an
+                * old or new xattr exist then we don't need to do anything.
+                */
        }
+
        retain_and_null_ptr(new_xattr);
        return old_xattr;
 }
@@ -1572,6 +1449,7 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
                          char *buffer, size_t size)
 {
        bool trusted = ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
+       struct rhashtable_iter iter;
        struct simple_xattr *xattr;
        ssize_t remaining_size = size;
        int err = 0;
@@ -1595,77 +1473,34 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
        if (!xattrs)
                return size - remaining_size;
 
-       if (xattrs->use_rhashtable) {
-               struct rhashtable_iter iter;
-
-               rhashtable_walk_enter(&xattrs->ht, &iter);
-               rhashtable_walk_start(&iter);
-
-               while ((xattr = rhashtable_walk_next(&iter)) != NULL) {
-                       if (IS_ERR(xattr)) {
-                               if (PTR_ERR(xattr) == -EAGAIN)
-                                       continue;
-                               err = PTR_ERR(xattr);
-                               break;
-                       }
-
-                       /* skip "trusted." attributes for unprivileged callers */
-                       if (!trusted && xattr_is_trusted(xattr->name))
-                               continue;
+       rhashtable_walk_enter(&xattrs->ht, &iter);
+       rhashtable_walk_start(&iter);
 
-                       /* skip MAC labels; these are provided by LSM above */
-                       if (xattr_is_maclabel(xattr->name))
+       while ((xattr = rhashtable_walk_next(&iter)) != NULL) {
+               if (IS_ERR(xattr)) {
+                       if (PTR_ERR(xattr) == -EAGAIN)
                                continue;
-
-                       err = xattr_list_one(&buffer, &remaining_size,
-                                            xattr->name);
-                       if (err)
-                               break;
+                       err = PTR_ERR(xattr);
+                       break;
                }
 
-               rhashtable_walk_stop(&iter);
-               rhashtable_walk_exit(&iter);
-       } else {
-               struct rb_node *rbp;
-
-               read_lock(&xattrs->lock);
-               for (rbp = rb_first(&xattrs->rb_root); rbp;
-                    rbp = rb_next(rbp)) {
-                       xattr = rb_entry(rbp, struct simple_xattr, rb_node);
-
-                       /* skip "trusted." attributes for unprivileged callers */
-                       if (!trusted && xattr_is_trusted(xattr->name))
-                               continue;
+               /* skip "trusted." attributes for unprivileged callers */
+               if (!trusted && xattr_is_trusted(xattr->name))
+                       continue;
 
-                       /* skip MAC labels; these are provided by LSM above */
-                       if (xattr_is_maclabel(xattr->name))
-                               continue;
+               /* skip MAC labels; these are provided by LSM above */
+               if (xattr_is_maclabel(xattr->name))
+                       continue;
 
-                       err = xattr_list_one(&buffer, &remaining_size,
-                                            xattr->name);
-                       if (err)
-                               break;
-               }
-               read_unlock(&xattrs->lock);
+               err = xattr_list_one(&buffer, &remaining_size, xattr->name);
+               if (err)
+                       break;
        }
 
-       return err ? err : size - remaining_size;
-}
+       rhashtable_walk_stop(&iter);
+       rhashtable_walk_exit(&iter);
 
-/**
- * rbtree_simple_xattr_less - compare two xattr rbtree nodes
- * @new_node: new node
- * @node: current node
- *
- * Compare the xattr attached to @new_node with the xattr attached to @node.
- * Note that this function technically tolerates duplicate entries.
- *
- * Return: True if insertion point in the rbtree is found.
- */
-static bool rbtree_simple_xattr_less(struct rb_node *new_node,
-                                    const struct rb_node *node)
-{
-       return rbtree_simple_xattr_node_cmp(new_node, node) < 0;
+       return err ? err : size - remaining_size;
 }
 
 /**
@@ -1676,33 +1511,29 @@ static bool rbtree_simple_xattr_less(struct rb_node *new_node,
  * Add an xattr object to @xattrs. This assumes no replacement or removal
  * of matching xattrs is wanted. Should only be called during inode
  * initialization when a few distinct initial xattrs are supposed to be set.
+ *
+ * Return: On success zero is returned. On failure a negative error code is
+ * returned.
  */
 int simple_xattr_add(struct simple_xattrs *xattrs,
                     struct simple_xattr *new_xattr)
 {
-       if (xattrs->use_rhashtable)
-               return rhashtable_insert_fast(&xattrs->ht,
-                                             &new_xattr->hash_node,
-                                             simple_xattr_params);
-
-       write_lock(&xattrs->lock);
-       rb_add(&new_xattr->rb_node, &xattrs->rb_root,
-              rbtree_simple_xattr_less);
-       write_unlock(&xattrs->lock);
-       return 0;
+       return rhashtable_insert_fast(&xattrs->ht, &new_xattr->hash_node,
+                                     simple_xattr_params);
 }
 
 /**
  * simple_xattrs_init - initialize new xattr header
  * @xattrs: header to initialize
  *
- * Initialize relevant fields of a an xattr header.
+ * Initialize the rhashtable used to store xattr objects.
+ *
+ * Return: On success zero is returned. On failure a negative error code is
+ * returned.
  */
-void simple_xattrs_init(struct simple_xattrs *xattrs)
+int simple_xattrs_init(struct simple_xattrs *xattrs)
 {
-       xattrs->use_rhashtable = false;
-       xattrs->rb_root = RB_ROOT;
-       rwlock_init(&xattrs->lock);
+       return rhashtable_init(&xattrs->ht, &simple_xattr_params);
 }
 
 /**
@@ -1710,7 +1541,8 @@ void simple_xattrs_init(struct simple_xattrs *xattrs)
  *
  * Dynamically allocate a simple_xattrs header and initialize the
  * underlying rhashtable. This is intended for consumers that want
- * rhashtable-based xattr storage.
+ * to lazily allocate xattr storage only when the first xattr is set,
+ * avoiding the per-inode rhashtable overhead when no xattrs are used.
  *
  * Return: On success a new simple_xattrs is returned. On failure an
  * ERR_PTR is returned.
@@ -1718,14 +1550,15 @@ void simple_xattrs_init(struct simple_xattrs *xattrs)
 struct simple_xattrs *simple_xattrs_alloc(void)
 {
        struct simple_xattrs *xattrs __free(kfree) = NULL;
+       int ret;
 
        xattrs = kzalloc(sizeof(*xattrs), GFP_KERNEL);
        if (!xattrs)
                return ERR_PTR(-ENOMEM);
 
-       xattrs->use_rhashtable = true;
-       if (rhashtable_init(&xattrs->ht, &simple_xattr_params))
-               return ERR_PTR(-ENOMEM);
+       ret = simple_xattrs_init(xattrs);
+       if (ret)
+               return ERR_PTR(ret);
 
        return no_free_ptr(xattrs);
 }
@@ -1784,28 +1617,10 @@ static void simple_xattr_ht_free(void *ptr, void *arg)
  */
 void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space)
 {
+       might_sleep();
+
        if (freed_space)
                *freed_space = 0;
-
-       if (xattrs->use_rhashtable) {
-               rhashtable_free_and_destroy(&xattrs->ht,
-                                           simple_xattr_ht_free, freed_space);
-       } else {
-               struct rb_node *rbp;
-
-               rbp = rb_first(&xattrs->rb_root);
-               while (rbp) {
-                       struct simple_xattr *xattr;
-                       struct rb_node *rbp_next;
-
-                       rbp_next = rb_next(rbp);
-                       xattr = rb_entry(rbp, struct simple_xattr, rb_node);
-                       rb_erase(&xattr->rb_node, &xattrs->rb_root);
-                       if (freed_space)
-                               *freed_space += simple_xattr_space(xattr->name,
-                                                                  xattr->size);
-                       simple_xattr_free(xattr);
-                       rbp = rbp_next;
-               }
-       }
+       rhashtable_free_and_destroy(&xattrs->ht, simple_xattr_ht_free,
+                                   freed_space);
 }
index 6e619e185e90dd46a97509f81ef174dcb4d8aa0e..3b5a5fd684ebe8d6e5e490e0649b967f9d5f02f6 100644 (file)
@@ -107,18 +107,10 @@ static inline const char *xattr_prefix(const struct xattr_handler *handler)
 }
 
 struct simple_xattrs {
-       bool use_rhashtable;
-       union {
-               struct {
-                       struct rb_root rb_root;
-                       rwlock_t lock;
-               };
-               struct rhashtable ht;
-       };
+       struct rhashtable ht;
 };
 
 struct simple_xattr {
-       struct rb_node rb_node;
        struct rhash_head hash_node;
        struct rcu_head rcu;
        char *name;
@@ -126,7 +118,7 @@ struct simple_xattr {
        char value[] __counted_by(size);
 };
 
-void simple_xattrs_init(struct simple_xattrs *xattrs);
+int simple_xattrs_init(struct simple_xattrs *xattrs);
 struct simple_xattrs *simple_xattrs_alloc(void);
 struct simple_xattrs *simple_xattrs_lazy_alloc(struct simple_xattrs **xattrsp,
                                               const void *value, int flags);