From 2ccaebc686e9ef7e94b3a8d89706daed6e696667 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 29 Oct 2025 13:20:26 +0100 Subject: [PATCH] nstree: introduce a unified tree This will allow userspace to lookup and stat a namespace simply by its identifier without having to know what type of namespace it is. Link: https://patch.msgid.link/20251029-work-namespace-nstree-listns-v4-13-2e6f823ebdc0@kernel.org Signed-off-by: Christian Brauner --- include/linux/ns_common.h | 3 ++ kernel/nscommon.c | 1 + kernel/nstree.c | 96 +++++++++++++++++++++++++++++---------- 3 files changed, 76 insertions(+), 24 deletions(-) diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index 32463203c8244..7a3c71b3a76f4 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -109,6 +109,9 @@ struct ns_common { union { struct { u64 ns_id; + struct /* global namespace rbtree and list */ { + struct rb_node ns_unified_tree_node; + }; struct /* per type rbtree and list */ { struct rb_node ns_tree_node; struct list_head ns_list_node; diff --git a/kernel/nscommon.c b/kernel/nscommon.c index abd1ac1a2d02e..17a6ea44f0540 100644 --- a/kernel/nscommon.c +++ b/kernel/nscommon.c @@ -62,6 +62,7 @@ int __ns_common_init(struct ns_common *ns, u32 ns_type, const struct proc_ns_ope ns->ns_id = 0; ns->ns_type = ns_type; RB_CLEAR_NODE(&ns->ns_tree_node); + RB_CLEAR_NODE(&ns->ns_unified_tree_node); INIT_LIST_HEAD(&ns->ns_list_node); #ifdef CONFIG_DEBUG_VFS diff --git a/kernel/nstree.c b/kernel/nstree.c index e2a5377851281..bbb34b46b01b6 100644 --- a/kernel/nstree.c +++ b/kernel/nstree.c @@ -5,31 +5,30 @@ #include #include +static __cacheline_aligned_in_smp DEFINE_SEQLOCK(ns_tree_lock); +static struct rb_root ns_unified_tree = RB_ROOT; /* protected by ns_tree_lock */ + /** * struct ns_tree - Namespace tree * @ns_tree: Rbtree of namespaces of a particular type * @ns_list: Sequentially walkable list of all namespaces of this type - * @ns_tree_lock: Seqlock to protect the tree and list * @type: type of namespaces in this tree */ struct ns_tree { - struct rb_root ns_tree; - struct list_head ns_list; - seqlock_t ns_tree_lock; - int type; + struct rb_root ns_tree; + struct list_head ns_list; + int type; }; struct ns_tree mnt_ns_tree = { .ns_tree = RB_ROOT, .ns_list = LIST_HEAD_INIT(mnt_ns_tree.ns_list), - .ns_tree_lock = __SEQLOCK_UNLOCKED(mnt_ns_tree.ns_tree_lock), .type = CLONE_NEWNS, }; struct ns_tree net_ns_tree = { .ns_tree = RB_ROOT, .ns_list = LIST_HEAD_INIT(net_ns_tree.ns_list), - .ns_tree_lock = __SEQLOCK_UNLOCKED(net_ns_tree.ns_tree_lock), .type = CLONE_NEWNET, }; EXPORT_SYMBOL_GPL(net_ns_tree); @@ -37,42 +36,36 @@ EXPORT_SYMBOL_GPL(net_ns_tree); struct ns_tree uts_ns_tree = { .ns_tree = RB_ROOT, .ns_list = LIST_HEAD_INIT(uts_ns_tree.ns_list), - .ns_tree_lock = __SEQLOCK_UNLOCKED(uts_ns_tree.ns_tree_lock), .type = CLONE_NEWUTS, }; struct ns_tree user_ns_tree = { .ns_tree = RB_ROOT, .ns_list = LIST_HEAD_INIT(user_ns_tree.ns_list), - .ns_tree_lock = __SEQLOCK_UNLOCKED(user_ns_tree.ns_tree_lock), .type = CLONE_NEWUSER, }; struct ns_tree ipc_ns_tree = { .ns_tree = RB_ROOT, .ns_list = LIST_HEAD_INIT(ipc_ns_tree.ns_list), - .ns_tree_lock = __SEQLOCK_UNLOCKED(ipc_ns_tree.ns_tree_lock), .type = CLONE_NEWIPC, }; struct ns_tree pid_ns_tree = { .ns_tree = RB_ROOT, .ns_list = LIST_HEAD_INIT(pid_ns_tree.ns_list), - .ns_tree_lock = __SEQLOCK_UNLOCKED(pid_ns_tree.ns_tree_lock), .type = CLONE_NEWPID, }; struct ns_tree cgroup_ns_tree = { .ns_tree = RB_ROOT, .ns_list = LIST_HEAD_INIT(cgroup_ns_tree.ns_list), - .ns_tree_lock = __SEQLOCK_UNLOCKED(cgroup_ns_tree.ns_tree_lock), .type = CLONE_NEWCGROUP, }; struct ns_tree time_ns_tree = { .ns_tree = RB_ROOT, .ns_list = LIST_HEAD_INIT(time_ns_tree.ns_list), - .ns_tree_lock = __SEQLOCK_UNLOCKED(time_ns_tree.ns_tree_lock), .type = CLONE_NEWTIME, }; @@ -85,6 +78,13 @@ static inline struct ns_common *node_to_ns(const struct rb_node *node) return rb_entry(node, struct ns_common, ns_tree_node); } +static inline struct ns_common *node_to_ns_unified(const struct rb_node *node) +{ + if (!node) + return NULL; + return rb_entry(node, struct ns_common, ns_unified_tree_node); +} + static inline int ns_cmp(struct rb_node *a, const struct rb_node *b) { struct ns_common *ns_a = node_to_ns(a); @@ -99,15 +99,27 @@ static inline int ns_cmp(struct rb_node *a, const struct rb_node *b) return 0; } +static inline int ns_cmp_unified(struct rb_node *a, const struct rb_node *b) +{ + struct ns_common *ns_a = node_to_ns_unified(a); + struct ns_common *ns_b = node_to_ns_unified(b); + u64 ns_id_a = ns_a->ns_id; + u64 ns_id_b = ns_b->ns_id; + + if (ns_id_a < ns_id_b) + return -1; + if (ns_id_a > ns_id_b) + return 1; + return 0; +} + void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree *ns_tree) { struct rb_node *node, *prev; VFS_WARN_ON_ONCE(!ns->ns_id); - write_seqlock(&ns_tree->ns_tree_lock); - - VFS_WARN_ON_ONCE(ns->ns_type != ns_tree->type); + write_seqlock(&ns_tree_lock); node = rb_find_add_rcu(&ns->ns_tree_node, &ns_tree->ns_tree, ns_cmp); /* @@ -120,7 +132,8 @@ void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree *ns_tree) else list_add_rcu(&ns->ns_list_node, &node_to_ns(prev)->ns_list_node); - write_sequnlock(&ns_tree->ns_tree_lock); + rb_find_add_rcu(&ns->ns_unified_tree_node, &ns_unified_tree, ns_cmp_unified); + write_sequnlock(&ns_tree_lock); VFS_WARN_ON_ONCE(node); @@ -139,11 +152,12 @@ void __ns_tree_remove(struct ns_common *ns, struct ns_tree *ns_tree) VFS_WARN_ON_ONCE(list_empty(&ns->ns_list_node)); VFS_WARN_ON_ONCE(ns->ns_type != ns_tree->type); - write_seqlock(&ns_tree->ns_tree_lock); + write_seqlock(&ns_tree_lock); rb_erase(&ns->ns_tree_node, &ns_tree->ns_tree); + rb_erase(&ns->ns_unified_tree_node, &ns_unified_tree); list_bidir_del_rcu(&ns->ns_list_node); RB_CLEAR_NODE(&ns->ns_tree_node); - write_sequnlock(&ns_tree->ns_tree_lock); + write_sequnlock(&ns_tree_lock); } EXPORT_SYMBOL_GPL(__ns_tree_remove); @@ -159,6 +173,17 @@ static int ns_find(const void *key, const struct rb_node *node) return 0; } +static int ns_find_unified(const void *key, const struct rb_node *node) +{ + const u64 ns_id = *(u64 *)key; + const struct ns_common *ns = node_to_ns_unified(node); + + if (ns_id < ns->ns_id) + return -1; + if (ns_id > ns->ns_id) + return 1; + return 0; +} static struct ns_tree *ns_tree_from_type(int ns_type) { @@ -184,28 +209,51 @@ static struct ns_tree *ns_tree_from_type(int ns_type) return NULL; } -struct ns_common *ns_tree_lookup_rcu(u64 ns_id, int ns_type) +static struct ns_common *__ns_unified_tree_lookup_rcu(u64 ns_id) { - struct ns_tree *ns_tree; struct rb_node *node; unsigned int seq; - RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "suspicious ns_tree_lookup_rcu() usage"); + do { + seq = read_seqbegin(&ns_tree_lock); + node = rb_find_rcu(&ns_id, &ns_unified_tree, ns_find_unified); + if (node) + break; + } while (read_seqretry(&ns_tree_lock, seq)); + + return node_to_ns_unified(node); +} + +static struct ns_common *__ns_tree_lookup_rcu(u64 ns_id, int ns_type) +{ + struct ns_tree *ns_tree; + struct rb_node *node; + unsigned int seq; ns_tree = ns_tree_from_type(ns_type); if (!ns_tree) return NULL; do { - seq = read_seqbegin(&ns_tree->ns_tree_lock); + seq = read_seqbegin(&ns_tree_lock); node = rb_find_rcu(&ns_id, &ns_tree->ns_tree, ns_find); if (node) break; - } while (read_seqretry(&ns_tree->ns_tree_lock, seq)); + } while (read_seqretry(&ns_tree_lock, seq)); return node_to_ns(node); } +struct ns_common *ns_tree_lookup_rcu(u64 ns_id, int ns_type) +{ + RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "suspicious ns_tree_lookup_rcu() usage"); + + if (ns_type) + return __ns_tree_lookup_rcu(ns_id, ns_type); + + return __ns_unified_tree_lookup_rcu(ns_id); +} + /** * ns_tree_adjoined_rcu - find the next/previous namespace in the same * tree -- 2.47.3