]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - releases/6.8.6/kernfs-rcu-protect-kernfs_nodes-and-avoid-kernfs_idr.patch
Linux 6.6.27
[thirdparty/kernel/stable-queue.git] / releases / 6.8.6 / kernfs-rcu-protect-kernfs_nodes-and-avoid-kernfs_idr.patch
1 From 8051496261c8fef0fbc63d14c4640e1cd5d8ceaf Mon Sep 17 00:00:00 2001
2 From: Sasha Levin <sashal@kernel.org>
3 Date: Tue, 9 Jan 2024 11:48:04 -1000
4 Subject: kernfs: RCU protect kernfs_nodes and avoid kernfs_idr_lock in
5 kernfs_find_and_get_node_by_id()
6
7 From: Tejun Heo <tj@kernel.org>
8
9 [ Upstream commit 4207b556e62f0a8915afc5da4c5d5ad915a253a5 ]
10
11 The BPF helper bpf_cgroup_from_id() calls kernfs_find_and_get_node_by_id()
12 which acquires kernfs_idr_lock, which is an non-raw non-IRQ-safe lock. This
13 can lead to deadlocks as bpf_cgroup_from_id() can be called from any BPF
14 programs including e.g. the ones that attach to functions which are holding
15 the scheduler rq lock.
16
17 Consider the following BPF program:
18
19 SEC("fentry/__set_cpus_allowed_ptr_locked")
20 int BPF_PROG(__set_cpus_allowed_ptr_locked, struct task_struct *p,
21 struct affinity_context *affn_ctx, struct rq *rq, struct rq_flags *rf)
22 {
23 struct cgroup *cgrp = bpf_cgroup_from_id(p->cgroups->dfl_cgrp->kn->id);
24
25 if (cgrp) {
26 bpf_printk("%d[%s] in %s", p->pid, p->comm, cgrp->kn->name);
27 bpf_cgroup_release(cgrp);
28 }
29 return 0;
30 }
31
32 __set_cpus_allowed_ptr_locked() is called with rq lock held and the above
33 BPF program calls bpf_cgroup_from_id() within leading to the following
34 lockdep warning:
35
36 =====================================================
37 WARNING: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected
38 6.7.0-rc3-work-00053-g07124366a1d7-dirty #147 Not tainted
39 -----------------------------------------------------
40 repro/1620 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire:
41 ffffffff833b3688 (kernfs_idr_lock){+.+.}-{2:2}, at: kernfs_find_and_get_node_by_id+0x1e/0x70
42
43 and this task is already holding:
44 ffff888237ced698 (&rq->__lock){-.-.}-{2:2}, at: task_rq_lock+0x4e/0xf0
45 which would create a new lock dependency:
46 (&rq->__lock){-.-.}-{2:2} -> (kernfs_idr_lock){+.+.}-{2:2}
47 ...
48 Possible interrupt unsafe locking scenario:
49
50 CPU0 CPU1
51 ---- ----
52 lock(kernfs_idr_lock);
53 local_irq_disable();
54 lock(&rq->__lock);
55 lock(kernfs_idr_lock);
56 <Interrupt>
57 lock(&rq->__lock);
58
59 *** DEADLOCK ***
60 ...
61 Call Trace:
62 dump_stack_lvl+0x55/0x70
63 dump_stack+0x10/0x20
64 __lock_acquire+0x781/0x2a40
65 lock_acquire+0xbf/0x1f0
66 _raw_spin_lock+0x2f/0x40
67 kernfs_find_and_get_node_by_id+0x1e/0x70
68 cgroup_get_from_id+0x21/0x240
69 bpf_cgroup_from_id+0xe/0x20
70 bpf_prog_98652316e9337a5a___set_cpus_allowed_ptr_locked+0x96/0x11a
71 bpf_trampoline_6442545632+0x4f/0x1000
72 __set_cpus_allowed_ptr_locked+0x5/0x5a0
73 sched_setaffinity+0x1b3/0x290
74 __x64_sys_sched_setaffinity+0x4f/0x60
75 do_syscall_64+0x40/0xe0
76 entry_SYSCALL_64_after_hwframe+0x46/0x4e
77
78 Let's fix it by protecting kernfs_node and kernfs_root with RCU and making
79 kernfs_find_and_get_node_by_id() acquire rcu_read_lock() instead of
80 kernfs_idr_lock.
81
82 This adds an rcu_head to kernfs_node making it larger by 16 bytes on 64bit.
83 Combined with the preceding rearrange patch, the net increase is 8 bytes.
84
85 Signed-off-by: Tejun Heo <tj@kernel.org>
86 Cc: Andrea Righi <andrea.righi@canonical.com>
87 Cc: Geert Uytterhoeven <geert@linux-m68k.org>
88 Link: https://lore.kernel.org/r/20240109214828.252092-4-tj@kernel.org
89 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
90 Signed-off-by: Sasha Levin <sashal@kernel.org>
91 ---
92 fs/kernfs/dir.c | 31 ++++++++++++++++++++-----------
93 fs/kernfs/kernfs-internal.h | 2 ++
94 include/linux/kernfs.h | 2 ++
95 3 files changed, 24 insertions(+), 11 deletions(-)
96
97 diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
98 index bce1d7ac95caa..458519e416fe7 100644
99 --- a/fs/kernfs/dir.c
100 +++ b/fs/kernfs/dir.c
101 @@ -529,6 +529,20 @@ void kernfs_get(struct kernfs_node *kn)
102 }
103 EXPORT_SYMBOL_GPL(kernfs_get);
104
105 +static void kernfs_free_rcu(struct rcu_head *rcu)
106 +{
107 + struct kernfs_node *kn = container_of(rcu, struct kernfs_node, rcu);
108 +
109 + kfree_const(kn->name);
110 +
111 + if (kn->iattr) {
112 + simple_xattrs_free(&kn->iattr->xattrs, NULL);
113 + kmem_cache_free(kernfs_iattrs_cache, kn->iattr);
114 + }
115 +
116 + kmem_cache_free(kernfs_node_cache, kn);
117 +}
118 +
119 /**
120 * kernfs_put - put a reference count on a kernfs_node
121 * @kn: the target kernfs_node
122 @@ -557,16 +571,11 @@ void kernfs_put(struct kernfs_node *kn)
123 if (kernfs_type(kn) == KERNFS_LINK)
124 kernfs_put(kn->symlink.target_kn);
125
126 - kfree_const(kn->name);
127 -
128 - if (kn->iattr) {
129 - simple_xattrs_free(&kn->iattr->xattrs, NULL);
130 - kmem_cache_free(kernfs_iattrs_cache, kn->iattr);
131 - }
132 spin_lock(&kernfs_idr_lock);
133 idr_remove(&root->ino_idr, (u32)kernfs_ino(kn));
134 spin_unlock(&kernfs_idr_lock);
135 - kmem_cache_free(kernfs_node_cache, kn);
136 +
137 + call_rcu(&kn->rcu, kernfs_free_rcu);
138
139 kn = parent;
140 if (kn) {
141 @@ -575,7 +584,7 @@ void kernfs_put(struct kernfs_node *kn)
142 } else {
143 /* just released the root kn, free @root too */
144 idr_destroy(&root->ino_idr);
145 - kfree(root);
146 + kfree_rcu(root, rcu);
147 }
148 }
149 EXPORT_SYMBOL_GPL(kernfs_put);
150 @@ -715,7 +724,7 @@ struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root,
151 ino_t ino = kernfs_id_ino(id);
152 u32 gen = kernfs_id_gen(id);
153
154 - spin_lock(&kernfs_idr_lock);
155 + rcu_read_lock();
156
157 kn = idr_find(&root->ino_idr, (u32)ino);
158 if (!kn)
159 @@ -739,10 +748,10 @@ struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root,
160 if (unlikely(!__kernfs_active(kn) || !atomic_inc_not_zero(&kn->count)))
161 goto err_unlock;
162
163 - spin_unlock(&kernfs_idr_lock);
164 + rcu_read_unlock();
165 return kn;
166 err_unlock:
167 - spin_unlock(&kernfs_idr_lock);
168 + rcu_read_unlock();
169 return NULL;
170 }
171
172 diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
173 index 237f2764b9412..b42ee6547cdc1 100644
174 --- a/fs/kernfs/kernfs-internal.h
175 +++ b/fs/kernfs/kernfs-internal.h
176 @@ -49,6 +49,8 @@ struct kernfs_root {
177 struct rw_semaphore kernfs_rwsem;
178 struct rw_semaphore kernfs_iattr_rwsem;
179 struct rw_semaphore kernfs_supers_rwsem;
180 +
181 + struct rcu_head rcu;
182 };
183
184 /* +1 to avoid triggering overflow warning when negating it */
185 diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
186 index 99aaa050ccb76..e857a150babc6 100644
187 --- a/include/linux/kernfs.h
188 +++ b/include/linux/kernfs.h
189 @@ -223,6 +223,8 @@ struct kernfs_node {
190 unsigned short flags;
191 umode_t mode;
192 struct kernfs_iattrs *iattr;
193 +
194 + struct rcu_head rcu;
195 };
196
197 /*
198 --
199 2.43.0
200