mnt: support ns lookup

author Christian Brauner <brauner@kernel.org>

Fri, 12 Sep 2025 11:52:41 +0000 (13:52 +0200)

committer Christian Brauner <brauner@kernel.org>

Fri, 19 Sep 2025 12:26:15 +0000 (14:26 +0200)
author Christian Brauner <brauner@kernel.org>
Fri, 12 Sep 2025 11:52:41 +0000 (13:52 +0200)
committer Christian Brauner <brauner@kernel.org>
Fri, 19 Sep 2025 12:26:15 +0000 (14:26 +0200)
diff --git a/fs/mount.h b/fs/mount.h

index 97737051a8b9df274413922229339dfd8c8e1b56..76bf863c9ae2810c7299f61b8c458075d4c3fbbc 100644 (file)
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -17,11 +17,7 @@ struct mnt_namespace {
         };
         struct user_namespace   *user_ns;
         struct ucounts          *ucounts;
-       u64                     seq;    /* Sequence number to prevent loops */
-       union {
-               wait_queue_head_t       poll;
-               struct rcu_head         mnt_ns_rcu;
-       };
+       wait_queue_head_t       poll;
         u64                     seq_origin; /* Sequence number of origin mount namespace */
         u64 event;
  #ifdef CONFIG_FSNOTIFY
@@ -30,8 +26,6 @@ struct mnt_namespace {
  #endif
         unsigned int            nr_mounts; /* # of mounts in the namespace */
         unsigned int            pending_mounts;
-       struct rb_node          mnt_ns_tree_node; /* node in the mnt_ns_tree */
-       struct list_head        mnt_ns_list; /* entry in the sequential list of mounts namespace */
         refcount_t              passive; /* number references not pinning @mounts */
  } __randomize_layout;
  
@@ -173,7 +167,7 @@ static inline bool is_local_mountpoint(const struct dentry *dentry)
  
  static inline bool is_anon_ns(struct mnt_namespace *ns)
  {
-       return ns->seq == 0;
+       return ns->ns.ns_id == 0;
  }
  
  static inline bool anon_ns_root(const struct mount *m)
diff --git a/fs/namespace.c b/fs/namespace.c

index 5b2c15f7170faed5464270eacc5c7b7455cd669b..a68998449698977c48de78f0f74b29703f5f156a 100644 (file)
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -33,6 +33,7 @@
  #include <linux/shmem_fs.h>
  #include <linux/mnt_idmapping.h>
  #include <linux/pidfs.h>
+#include <linux/nstree.h>
  
  #include "pnode.h"
  #include "internal.h"
@@ -80,13 +81,10 @@ static DECLARE_RWSEM(namespace_sem);
  static HLIST_HEAD(unmounted);  /* protected by namespace_sem */
  static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
  static struct mnt_namespace *emptied_ns; /* protected by namespace_sem */
-static DEFINE_SEQLOCK(mnt_ns_tree_lock);
  
  #ifdef CONFIG_FSNOTIFY
  LIST_HEAD(notify_list); /* protected by namespace_sem */
  #endif
-static struct rb_root mnt_ns_tree = RB_ROOT; /* protected by mnt_ns_tree_lock */
-static LIST_HEAD(mnt_ns_list); /* protected by mnt_ns_tree_lock */
  
  enum mount_kattr_flags_t {
         MOUNT_KATTR_RECURSE             = (1 << 0),
@@ -119,53 +117,12 @@ __cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
  
  static inline struct mnt_namespace *node_to_mnt_ns(const struct rb_node *node)
  {
+       struct ns_common *ns;
+
         if (!node)
                 return NULL;
-       return rb_entry(node, struct mnt_namespace, mnt_ns_tree_node);
-}
-
-static int mnt_ns_cmp(struct rb_node *a, const struct rb_node *b)
-{
-       struct mnt_namespace *ns_a = node_to_mnt_ns(a);
-       struct mnt_namespace *ns_b = node_to_mnt_ns(b);
-       u64 seq_a = ns_a->seq;
-       u64 seq_b = ns_b->seq;
-
-       if (seq_a < seq_b)
-               return -1;
-       if (seq_a > seq_b)
-               return 1;
-       return 0;
-}
-
-static inline void mnt_ns_tree_write_lock(void)
-{
-       write_seqlock(&mnt_ns_tree_lock);
-}
-
-static inline void mnt_ns_tree_write_unlock(void)
-{
-       write_sequnlock(&mnt_ns_tree_lock);
-}
-
-static void mnt_ns_tree_add(struct mnt_namespace *ns)
-{
-       struct rb_node *node, *prev;
-
-       mnt_ns_tree_write_lock();
-       node = rb_find_add_rcu(&ns->mnt_ns_tree_node, &mnt_ns_tree, mnt_ns_cmp);
-       /*
-        * If there's no previous entry simply add it after the
-        * head and if there is add it after the previous entry.
-        */
-       prev = rb_prev(&ns->mnt_ns_tree_node);
-       if (!prev)
-               list_add_rcu(&ns->mnt_ns_list, &mnt_ns_list);
-       else
-               list_add_rcu(&ns->mnt_ns_list, &node_to_mnt_ns(prev)->mnt_ns_list);
-       mnt_ns_tree_write_unlock();
-
-       WARN_ON_ONCE(node);
+       ns = rb_entry(node, struct ns_common, ns_tree_node);
+       return container_of(ns, struct mnt_namespace, ns);
  }
  
  static void mnt_ns_release(struct mnt_namespace *ns)
@@ -181,32 +138,16 @@ DEFINE_FREE(mnt_ns_release, struct mnt_namespace *, if (_T) mnt_ns_release(_T))
  
  static void mnt_ns_release_rcu(struct rcu_head *rcu)
  {
-       mnt_ns_release(container_of(rcu, struct mnt_namespace, mnt_ns_rcu));
+       mnt_ns_release(container_of(rcu, struct mnt_namespace, ns.ns_rcu));
  }
  
  static void mnt_ns_tree_remove(struct mnt_namespace *ns)
  {
         /* remove from global mount namespace list */
-       if (!RB_EMPTY_NODE(&ns->mnt_ns_tree_node)) {
-               mnt_ns_tree_write_lock();
-               rb_erase(&ns->mnt_ns_tree_node, &mnt_ns_tree);
-               list_bidir_del_rcu(&ns->mnt_ns_list);
-               mnt_ns_tree_write_unlock();
-       }
-
-       call_rcu(&ns->mnt_ns_rcu, mnt_ns_release_rcu);
-}
-
-static int mnt_ns_find(const void *key, const struct rb_node *node)
-{
-       const u64 mnt_ns_id = *(u64 *)key;
-       const struct mnt_namespace *ns = node_to_mnt_ns(node);
+       if (ns_tree_active(ns))
+               ns_tree_remove(ns);
  
-       if (mnt_ns_id < ns->seq)
-               return -1;
-       if (mnt_ns_id > ns->seq)
-               return 1;
-       return 0;
+       call_rcu(&ns->ns.ns_rcu, mnt_ns_release_rcu);
  }
  
  /*
@@ -225,28 +166,21 @@ static int mnt_ns_find(const void *key, const struct rb_node *node)
   */
  static struct mnt_namespace *lookup_mnt_ns(u64 mnt_ns_id)
  {
-       struct mnt_namespace *ns;
-       struct rb_node *node;
-       unsigned int seq;
+       struct mnt_namespace *mnt_ns;
+       struct ns_common *ns;
  
         guard(rcu)();
-       do {
-               seq = read_seqbegin(&mnt_ns_tree_lock);
-               node = rb_find_rcu(&mnt_ns_id, &mnt_ns_tree, mnt_ns_find);
-               if (node)
-                       break;
-       } while (read_seqretry(&mnt_ns_tree_lock, seq));
-
-       if (!node)
+       ns = ns_tree_lookup_rcu(mnt_ns_id, CLONE_NEWNS);
+       if (!ns)
                 return NULL;
  
         /*
          * The last reference count is put with RCU delay so we can
          * unconditonally acquire a reference here.
          */
-       ns = node_to_mnt_ns(node);
-       refcount_inc(&ns->passive);
-       return ns;
+       mnt_ns = container_of(ns, struct mnt_namespace, ns);
+       refcount_inc(&mnt_ns->passive);
+       return mnt_ns;
  }
  
  static inline void lock_mount_hash(void)
@@ -1017,7 +951,7 @@ static inline bool check_anonymous_mnt(struct mount *mnt)
                 return false;
  
         seq = mnt->mnt_ns->seq_origin;
-       return !seq || (seq == current->nsproxy->mnt_ns->seq);
+       return !seq || (seq == current->nsproxy->mnt_ns->ns.ns_id);
  }
  
  /*
@@ -2152,19 +2086,16 @@ struct ns_common *from_mnt_ns(struct mnt_namespace *mnt)
  
  struct mnt_namespace *get_sequential_mnt_ns(struct mnt_namespace *mntns, bool previous)
  {
+       struct ns_common *ns;
+
         guard(rcu)();
  
         for (;;) {
-               struct list_head *list;
-
-               if (previous)
-                       list = rcu_dereference(list_bidir_prev_rcu(&mntns->mnt_ns_list));
-               else
-                       list = rcu_dereference(list_next_rcu(&mntns->mnt_ns_list));
-               if (list_is_head(list, &mnt_ns_list))
-                       return ERR_PTR(-ENOENT);
+               ns = ns_tree_adjoined_rcu(mntns, previous);
+               if (IS_ERR(ns))
+                       return ERR_CAST(ns);
  
-               mntns = list_entry_rcu(list, struct mnt_namespace, mnt_ns_list);
+               mntns = to_mnt_ns(ns);
  
                 /*
                  * The last passive reference count is put with RCU
@@ -2204,7 +2135,7 @@ static bool mnt_ns_loop(struct dentry *dentry)
         if (!mnt_ns)
                 return false;
  
-       return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
+       return current->nsproxy->mnt_ns->ns.ns_id >= mnt_ns->ns.ns_id;
  }
  
  struct mount *copy_tree(struct mount *src_root, struct dentry *dentry,
@@ -3080,7 +3011,7 @@ static struct file *open_detached_copy(struct path *path, bool recursive)
                 if (is_anon_ns(src_mnt_ns))
                         ns->seq_origin = src_mnt_ns->seq_origin;
                 else
-                       ns->seq_origin = src_mnt_ns->seq;
+                       ns->seq_origin = src_mnt_ns->ns.ns_id;
         }
  
         mnt = __do_loopback(path, recursive);
@@ -4156,15 +4087,6 @@ static void free_mnt_ns(struct mnt_namespace *ns)
         mnt_ns_tree_remove(ns);
  }
  
-/*
- * Assign a sequence number so we can detect when we attempt to bind
- * mount a reference to an older mount namespace into the current
- * mount namespace, preventing reference counting loops.  A 64bit
- * number incrementing at 10Ghz will take 12,427 years to wrap which
- * is effectively never, so we can ignore the possibility.
- */
-static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
-
  static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool anon)
  {
         struct mnt_namespace *new_ns;
@@ -4188,11 +4110,9 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
                 return ERR_PTR(ret);
         }
         if (!anon)
-               new_ns->seq = atomic64_inc_return(&mnt_ns_seq);
+               ns_tree_gen_id(&new_ns->ns);
         refcount_set(&new_ns->passive, 1);
         new_ns->mounts = RB_ROOT;
-       INIT_LIST_HEAD(&new_ns->mnt_ns_list);
-       RB_CLEAR_NODE(&new_ns->mnt_ns_tree_node);
         init_waitqueue_head(&new_ns->poll);
         new_ns->user_ns = get_user_ns(user_ns);
         new_ns->ucounts = ucounts;
@@ -4278,7 +4198,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
         if (pwdmnt)
                 mntput(pwdmnt);
  
-       mnt_ns_tree_add(new_ns);
+       ns_tree_add_raw(new_ns);
         return new_ns;
  }
  
@@ -5397,7 +5317,7 @@ static int statmount_sb_source(struct kstatmount *s, struct seq_file *seq)
  static void statmount_mnt_ns_id(struct kstatmount *s, struct mnt_namespace *ns)
  {
         s->sm.mask |= STATMOUNT_MNT_NS_ID;
-       s->sm.mnt_ns_id = ns->seq;
+       s->sm.mnt_ns_id = ns->ns.ns_id;
  }
  
  static int statmount_mnt_opts(struct kstatmount *s, struct seq_file *seq)
@@ -6102,7 +6022,6 @@ static void __init init_mount_tree(void)
         ns = alloc_mnt_ns(&init_user_ns, true);
         if (IS_ERR(ns))
                 panic("Can't allocate initial namespace");
-       ns->seq = atomic64_inc_return(&mnt_ns_seq);
         ns->ns.inum = PROC_MNT_INIT_INO;
         m = real_mount(mnt);
         ns->root = m;
@@ -6117,7 +6036,7 @@ static void __init init_mount_tree(void)
         set_fs_pwd(current->fs, &root);
         set_fs_root(current->fs, &root);
  
-       mnt_ns_tree_add(ns);
+       ns_tree_add(ns);
  }
  
  void __init mnt_init(void)
diff --git a/fs/nsfs.c b/fs/nsfs.c

index d016d36272d49ee06a0d8ea88e613f02946df6aa..80e631aeb3ce135efb4aeb19a402ca11967632c0 100644 (file)
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -139,7 +139,7 @@ static int copy_ns_info_to_user(const struct mnt_namespace *mnt_ns,
          * the size value will be set to the size the kernel knows about.
          */
         kinfo->size             = min(usize, sizeof(*kinfo));
-       kinfo->mnt_ns_id        = mnt_ns->seq;
+       kinfo->mnt_ns_id        = mnt_ns->ns.ns_id;
         kinfo->nr_mounts        = READ_ONCE(mnt_ns->nr_mounts);
         /* Subtract the root mount of the mount namespace. */
         if (kinfo->nr_mounts)
@@ -221,7 +221,7 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl,
  
                 mnt_ns = container_of(ns, struct mnt_namespace, ns);
                 idp = (__u64 __user *)arg;
-               id = mnt_ns->seq;
+               id = mnt_ns->ns.ns_id;
                 return put_user(id, idp);
         }
         case NS_GET_PID_FROM_PIDNS:
author	Christian Brauner <brauner@kernel.org>
	Fri, 12 Sep 2025 11:52:41 +0000 (13:52 +0200)
committer	Christian Brauner <brauner@kernel.org>
	Fri, 19 Sep 2025 12:26:15 +0000 (14:26 +0200)
fs/mount.h		patch \| blob \| blame \| history
fs/namespace.c		patch \| blob \| blame \| history
fs/nsfs.c		patch \| blob \| blame \| history