bpf: Add simple xattr support to bpffs

author Daniel Borkmann <daniel@iogearbox.net>

Tue, 2 Jun 2026 07:40:12 +0000 (09:40 +0200)

committer Christian Brauner <brauner@kernel.org>

Sat, 6 Jun 2026 13:22:44 +0000 (15:22 +0200)
author Daniel Borkmann <daniel@iogearbox.net>
Tue, 2 Jun 2026 07:40:12 +0000 (09:40 +0200)
committer Christian Brauner <brauner@kernel.org>
Sat, 6 Jun 2026 13:22:44 +0000 (15:22 +0200)
diff --git a/fs/xattr.c b/fs/xattr.c

index 89374cd9029a773981e06987e64a3108da8a72fb..ec2a4f3759d8b68553ce02266549b303d823c9f7 100644 (file)
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -1678,6 +1678,39 @@ int simple_xattr_add(struct simple_xattr_cache *cache, struct list_head *xattrs,
         return 0;
  }
  
+/**
+ * simple_xattr_add_limited - add an xattr object, charging per-inode limits
+ * @cache: anchor for the hash table
+ * @xattrs: the header of the xattr object
+ * @limits: per-inode limit counters
+ * @new_xattr: the xattr object to add
+ *
+ * Like simple_xattr_add(), but also accounts @new_xattr against @limits so
+ * that a later removal or replacement of it through simple_xattr_set_limited()
+ * decrements counters that were actually incremented, rather than underflowing
+ * them. Use this instead of simple_xattr_add() when seeding initial xattrs
+ * that share a namespace with the limited set/remove path.
+ *
+ * Return: On success zero is returned. On failure a negative error code is
+ * returned.
+ */
+int simple_xattr_add_limited(struct simple_xattr_cache *cache,
+                            struct list_head *xattrs,
+                            struct simple_xattr_limits *limits,
+                            struct simple_xattr *new_xattr)
+{
+       int err;
+
+       err = simple_xattr_limits_inc(limits, new_xattr->size);
+       if (err)
+               return err;
+
+       err = simple_xattr_add(cache, xattrs, new_xattr);
+       if (err)
+               simple_xattr_limits_dec(limits, new_xattr->size);
+       return err;
+}
+
  /**
   * simple_xattrs_free - free xattrs
   * @cache: anchor for the hash table
diff --git a/include/linux/bpf.h b/include/linux/bpf.h

index b4b703c90ca94f2528f04d87a9d429b7c6b70d6e..434ba91401c68ee74980d69aa95f6dcdc0938bac 100644 (file)
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -31,6 +31,7 @@
  #include <linux/static_call.h>
  #include <linux/memcontrol.h>
  #include <linux/cfi.h>
+#include <linux/xattr.h>
  #include <asm/rqspinlock.h>
  
  struct bpf_verifier_env;
@@ -1918,6 +1919,8 @@ struct bpf_mount_opts {
         u64 delegate_maps;
         u64 delegate_progs;
         u64 delegate_attachs;
+
+       struct simple_xattr_cache xa_cache;
  };
  
  struct bpf_token {
diff --git a/include/linux/xattr.h b/include/linux/xattr.h

index 7aaaf4f8aff5be658ef5c454b44208565e115236..54ac3cbc133f86fa31cb9666af9bee8a101a92c2 100644 (file)
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -155,6 +155,10 @@ ssize_t simple_xattr_list(struct inode *inode, struct list_head *xattrs,
                           char *buffer, size_t size);
  int simple_xattr_add(struct simple_xattr_cache *cache, struct list_head *xattrs,
                      struct simple_xattr *new_xattr);
+int simple_xattr_add_limited(struct simple_xattr_cache *cache,
+                            struct list_head *xattrs,
+                            struct simple_xattr_limits *limits,
+                            struct simple_xattr *new_xattr);
  int xattr_list_one(char **buffer, ssize_t *remaining_size, const char *name);
  
  void simple_xattr_cache_cleanup(struct simple_xattr_cache *cache);
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c

index 25c06a0118258dcafd8af0fff63458ff7272e65d..c3f79b5a2f8c0ce439b87731f0e5d74dc56413af 100644 (file)
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -21,6 +21,9 @@
  #include <linux/bpf.h>
  #include <linux/bpf_trace.h>
  #include <linux/kstrtox.h>
+#include <linux/xattr.h>
+#include <linux/security.h>
+
  #include "preload/bpf_preload.h"
  
  enum bpf_type {
@@ -30,6 +33,23 @@ enum bpf_type {
         BPF_TYPE_LINK,
  };
  
+struct bpf_fs_inode {
+       struct list_head                xattrs;
+       struct simple_xattr_limits      xlimits;
+       struct inode                    vfs_inode;
+};
+
+static inline struct bpf_fs_inode *BPF_FS_I(struct inode *inode)
+{
+       return container_of(inode, struct bpf_fs_inode, vfs_inode);
+}
+
+static struct kmem_cache *bpf_fs_inode_cachep __ro_after_init;
+
+static int bpf_fs_initxattrs(struct inode *inode,
+                            const struct xattr *xattr_array, void *fs_info);
+static ssize_t bpf_fs_listxattr(struct dentry *dentry, char *buf, size_t size);
+
  static void *bpf_any_get(void *raw, enum bpf_type type)
  {
         switch (type) {
@@ -94,10 +114,17 @@ static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type)
  }
  
  static const struct inode_operations bpf_dir_iops;
+static const struct inode_operations bpf_symlink_iops;
  
-static const struct inode_operations bpf_prog_iops = { };
-static const struct inode_operations bpf_map_iops  = { };
-static const struct inode_operations bpf_link_iops  = { };
+static const struct inode_operations bpf_prog_iops = {
+       .listxattr      = bpf_fs_listxattr,
+};
+static const struct inode_operations bpf_map_iops  = {
+       .listxattr      = bpf_fs_listxattr,
+};
+static const struct inode_operations bpf_link_iops  = {
+       .listxattr      = bpf_fs_listxattr,
+};
  
  struct inode *bpf_get_inode(struct super_block *sb,
                             const struct inode *dir,
@@ -153,11 +180,19 @@ static struct dentry *bpf_mkdir(struct mnt_idmap *idmap, struct inode *dir,
                                 struct dentry *dentry, umode_t mode)
  {
         struct inode *inode;
+       int ret;
  
         inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR);
         if (IS_ERR(inode))
                 return ERR_CAST(inode);
  
+       ret = security_inode_init_security(inode, dir, &dentry->d_name,
+                                          bpf_fs_initxattrs, NULL);
+       if (ret && ret != -EOPNOTSUPP) {
+               iput(inode);
+               return ERR_PTR(ret);
+       }
+
         inode->i_op = &bpf_dir_iops;
         inode->i_fop = &simple_dir_operations;
  
@@ -330,10 +365,20 @@ static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw,
                          const struct file_operations *fops)
  {
         struct inode *dir = dentry->d_parent->d_inode;
-       struct inode *inode = bpf_get_inode(dir->i_sb, dir, mode);
+       struct inode *inode;
+       int ret;
+
+       inode = bpf_get_inode(dir->i_sb, dir, mode);
         if (IS_ERR(inode))
                 return PTR_ERR(inode);
  
+       ret = security_inode_init_security(inode, dir, &dentry->d_name,
+                                          bpf_fs_initxattrs, NULL);
+       if (ret && ret != -EOPNOTSUPP) {
+               iput(inode);
+               return ret;
+       }
+
         inode->i_op = iops;
         inode->i_fop = fops;
         inode->i_private = raw;
@@ -382,9 +427,11 @@ bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags)
  static int bpf_symlink(struct mnt_idmap *idmap, struct inode *dir,
                        struct dentry *dentry, const char *target)
  {
-       char *link = kstrdup(target, GFP_USER | __GFP_NOWARN);
         struct inode *inode;
+       char *link;
+       int ret;
  
+       link = kstrdup(target, GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
         if (!link)
                 return -ENOMEM;
  
@@ -394,13 +441,25 @@ static int bpf_symlink(struct mnt_idmap *idmap, struct inode *dir,
                 return PTR_ERR(inode);
         }
  
-       inode->i_op = &simple_symlink_inode_operations;
+       inode->i_op = &bpf_symlink_iops;
         inode->i_link = link;
  
+       ret = security_inode_init_security(inode, dir, &dentry->d_name,
+                                          bpf_fs_initxattrs, NULL);
+       if (ret && ret != -EOPNOTSUPP) {
+               iput(inode);
+               return ret;
+       }
+
         bpf_dentry_finalize(dentry, inode, dir);
         return 0;
  }
  
+static const struct inode_operations bpf_symlink_iops = {
+       .get_link       = simple_get_link,
+       .listxattr      = bpf_fs_listxattr,
+};
+
  static const struct inode_operations bpf_dir_iops = {
         .lookup         = bpf_lookup,
         .mkdir          = bpf_mkdir,
@@ -409,6 +468,7 @@ static const struct inode_operations bpf_dir_iops = {
         .rename         = simple_rename,
         .link           = simple_link,
         .unlink         = simple_unlink,
+       .listxattr      = bpf_fs_listxattr,
  };
  
  /* pin iterator link into bpffs */
@@ -762,22 +822,147 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root)
         return 0;
  }
  
+static struct inode *bpf_fs_alloc_inode(struct super_block *sb)
+{
+       struct bpf_fs_inode *bi;
+
+       bi = alloc_inode_sb(sb, bpf_fs_inode_cachep, GFP_KERNEL);
+       if (!bi)
+               return NULL;
+       INIT_LIST_HEAD_RCU(&bi->xattrs);
+       simple_xattr_limits_init(&bi->xlimits);
+       return &bi->vfs_inode;
+}
+
  static void bpf_destroy_inode(struct inode *inode)
  {
+       struct bpf_mount_opts *opts = inode->i_sb->s_fs_info;
+       struct bpf_fs_inode *bi = BPF_FS_I(inode);
         enum bpf_type type;
  
-       if (S_ISLNK(inode->i_mode))
-               kfree(inode->i_link);
         if (!bpf_inode_type(inode, &type))
                 bpf_any_put(inode->i_private, type);
-       free_inode_nonrcu(inode);
+       simple_xattrs_free(&opts->xa_cache, &bi->xattrs, NULL);
+}
+
+static void bpf_free_inode(struct inode *inode)
+{
+       if (S_ISLNK(inode->i_mode))
+               kfree(inode->i_link);
+       kmem_cache_free(bpf_fs_inode_cachep, BPF_FS_I(inode));
+}
+
+static int bpf_fs_xattr_get(const struct xattr_handler *handler,
+                           struct dentry *unused, struct inode *inode,
+                           const char *name, void *value, size_t size)
+{
+       struct bpf_mount_opts *opts = inode->i_sb->s_fs_info;
+       struct bpf_fs_inode *bi = BPF_FS_I(inode);
+
+       name = xattr_full_name(handler, name);
+       return simple_xattr_get(&opts->xa_cache, &bi->xattrs, name, value, size);
+}
+
+enum {
+       BPF_FS_XATTR_UNSPEC,
+       BPF_FS_XATTR_SECURITY,
+       BPF_FS_XATTR_TRUSTED,
+};
+
+static int bpf_fs_xattr_set(const struct xattr_handler *handler,
+                           struct mnt_idmap *idmap, struct dentry *unused,
+                           struct inode *inode, const char *name,
+                           const void *value, size_t size, int flags)
+{
+       struct bpf_mount_opts *opts = inode->i_sb->s_fs_info;
+       struct bpf_fs_inode *bi = BPF_FS_I(inode);
+       struct simple_xattr *old;
+       int err = -EINVAL;
+
+       name = xattr_full_name(handler, name);
+       switch (handler->flags) {
+       case BPF_FS_XATTR_SECURITY:
+               err = simple_xattr_set_limited(&opts->xa_cache, &bi->xattrs,
+                                              &bi->xlimits, name, value, size,
+                                              flags);
+               break;
+       case BPF_FS_XATTR_TRUSTED:
+               old = simple_xattr_set(&opts->xa_cache, &bi->xattrs, name,
+                                      value, size, flags);
+               err = IS_ERR(old) ? PTR_ERR(old) : 0;
+               if (!err)
+                       simple_xattr_free_rcu(old);
+               break;
+       }
+       if (err)
+               return err;
+       inode_set_ctime_current(inode);
+       return 0;
+}
+
+static const struct xattr_handler bpf_fs_trusted_xattr_handler = {
+       .prefix = XATTR_TRUSTED_PREFIX,
+       .flags  = BPF_FS_XATTR_TRUSTED,
+       .get    = bpf_fs_xattr_get,
+       .set    = bpf_fs_xattr_set,
+};
+
+static const struct xattr_handler bpf_fs_security_xattr_handler = {
+       .prefix = XATTR_SECURITY_PREFIX,
+       .flags  = BPF_FS_XATTR_SECURITY,
+       .get    = bpf_fs_xattr_get,
+       .set    = bpf_fs_xattr_set,
+};
+
+static const struct xattr_handler * const bpf_fs_xattr_handlers[] = {
+       &bpf_fs_trusted_xattr_handler,
+       &bpf_fs_security_xattr_handler,
+       NULL,
+};
+
+static ssize_t bpf_fs_listxattr(struct dentry *dentry, char *buf, size_t size)
+{
+       struct inode *inode = d_inode(dentry);
+
+       return simple_xattr_list(inode, &BPF_FS_I(inode)->xattrs, buf, size);
+}
+
+static int bpf_fs_initxattrs(struct inode *inode,
+                            const struct xattr *xattr_array, void *fs_info)
+{
+       struct bpf_mount_opts *opts = inode->i_sb->s_fs_info;
+       struct bpf_fs_inode *bi = BPF_FS_I(inode);
+       const struct xattr *xattr;
+       int err;
+
+       for (xattr = xattr_array; xattr->name != NULL; xattr++) {
+               CLASS(simple_xattr, new_xattr)(xattr->value, xattr->value_len);
+               if (IS_ERR(new_xattr))
+                       return PTR_ERR(new_xattr);
+
+               new_xattr->name = kasprintf(GFP_KERNEL_ACCOUNT,
+                                           XATTR_SECURITY_PREFIX "%s",
+                                           xattr->name);
+               if (!new_xattr->name)
+                       return -ENOMEM;
+
+               err = simple_xattr_add_limited(&opts->xa_cache, &bi->xattrs,
+                                              &bi->xlimits, new_xattr);
+               if (err)
+                       return err;
+
+               retain_and_null_ptr(new_xattr);
+       }
+       return 0;
  }
  
  const struct super_operations bpf_super_ops = {
         .statfs         = simple_statfs,
         .drop_inode     = inode_just_drop,
         .show_options   = bpf_show_options,
+       .alloc_inode    = bpf_fs_alloc_inode,
         .destroy_inode  = bpf_destroy_inode,
+       .free_inode     = bpf_free_inode,
  };
  
  enum {
@@ -996,25 +1181,38 @@ out:
  
  static int bpf_fill_super(struct super_block *sb, struct fs_context *fc)
  {
-       static const struct tree_descr bpf_rfiles[] = { { "" } };
         struct bpf_mount_opts *opts = sb->s_fs_info;
         struct inode *inode;
-       int ret;
  
         /* Mounting an instance of BPF FS requires privileges */
         if (fc->user_ns != &init_user_ns && !capable(CAP_SYS_ADMIN))
                 return -EPERM;
  
-       ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles);
-       if (ret)
-               return ret;
-
+       sb->s_blocksize = PAGE_SIZE;
+       sb->s_blocksize_bits = PAGE_SHIFT;
+       sb->s_magic = BPF_FS_MAGIC;
         sb->s_op = &bpf_super_ops;
+       sb->s_xattr = bpf_fs_xattr_handlers;
+       sb->s_iflags |= SB_I_NOEXEC;
+       sb->s_iflags |= SB_I_NODEV;
+       sb->s_time_gran = 1;
+
+       inode = bpf_get_inode(sb, NULL, S_IFDIR | 0777);
+       if (IS_ERR(inode))
+               return PTR_ERR(inode);
+
+       inode->i_ino = 1;
+       inode->i_op = &bpf_dir_iops;
+       inode->i_fop = &simple_dir_operations;
+       set_nlink(inode, 2);
+
+       sb->s_root = d_make_root(inode);
+       if (!sb->s_root)
+               return -ENOMEM;
  
-       inode = sb->s_root->d_inode;
+       inode = d_inode(sb->s_root);
         inode->i_uid = opts->uid;
         inode->i_gid = opts->gid;
-       inode->i_op = &bpf_dir_iops;
         inode->i_mode &= ~S_IALLUGO;
         populate_bpffs(sb->s_root);
         inode->i_mode |= S_ISVTX | opts->mode;
@@ -1068,6 +1266,7 @@ static void bpf_kill_super(struct super_block *sb)
         struct bpf_mount_opts *opts = sb->s_fs_info;
  
         kill_anon_super(sb);
+       simple_xattr_cache_cleanup(&opts->xa_cache);
         kfree(opts);
  }
  
@@ -1080,18 +1279,37 @@ static struct file_system_type bpf_fs_type = {
         .fs_flags       = FS_USERNS_MOUNT,
  };
  
+static void bpf_fs_inode_init_once(void *foo)
+{
+       struct bpf_fs_inode *bi = foo;
+
+       inode_init_once(&bi->vfs_inode);
+}
+
  static int __init bpf_init(void)
  {
         int ret;
  
+       bpf_fs_inode_cachep = kmem_cache_create("bpf_fs_inode_cache",
+                                               sizeof(struct bpf_fs_inode),
+                                               0, SLAB_ACCOUNT,
+                                               bpf_fs_inode_init_once);
+       if (!bpf_fs_inode_cachep)
+               return -ENOMEM;
+
         ret = sysfs_create_mount_point(fs_kobj, "bpf");
         if (ret)
-               return ret;
+               goto out_cache;
  
         ret = register_filesystem(&bpf_fs_type);
-       if (ret)
+       if (ret) {
                 sysfs_remove_mount_point(fs_kobj, "bpf");
+               goto out_cache;
+       }
  
+       return 0;
+out_cache:
+       kmem_cache_destroy(bpf_fs_inode_cachep);
         return ret;
  }
  fs_initcall(bpf_init);
author	Daniel Borkmann <daniel@iogearbox.net>
	Tue, 2 Jun 2026 07:40:12 +0000 (09:40 +0200)
committer	Christian Brauner <brauner@kernel.org>
	Sat, 6 Jun 2026 13:22:44 +0000 (15:22 +0200)
fs/xattr.c		patch \| blob \| blame \| history
include/linux/bpf.h		patch \| blob \| blame \| history
include/linux/xattr.h		patch \| blob \| blame \| history
kernel/bpf/inode.c		patch \| blob \| blame \| history