]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - releases/6.6.26/bpf-support-deferring-bpf_link-dealloc-to-after-rcu-grace-period.patch
Linux 6.1.85
[thirdparty/kernel/stable-queue.git] / releases / 6.6.26 / bpf-support-deferring-bpf_link-dealloc-to-after-rcu-grace-period.patch
1 From 1a80dbcb2dbaf6e4c216e62e30fa7d3daa8001ce Mon Sep 17 00:00:00 2001
2 From: Andrii Nakryiko <andrii@kernel.org>
3 Date: Wed, 27 Mar 2024 22:24:26 -0700
4 Subject: bpf: support deferring bpf_link dealloc to after RCU grace period
5
6 From: Andrii Nakryiko <andrii@kernel.org>
7
8 commit 1a80dbcb2dbaf6e4c216e62e30fa7d3daa8001ce upstream.
9
10 BPF link for some program types is passed as a "context" which can be
11 used by those BPF programs to look up additional information. E.g., for
12 multi-kprobes and multi-uprobes, link is used to fetch BPF cookie values.
13
14 Because of this runtime dependency, when bpf_link refcnt drops to zero
15 there could still be active BPF programs running accessing link data.
16
17 This patch adds generic support to defer bpf_link dealloc callback to
18 after RCU GP, if requested. This is done by exposing two different
19 deallocation callbacks, one synchronous and one deferred. If deferred
20 one is provided, bpf_link_free() will schedule dealloc_deferred()
21 callback to happen after RCU GP.
22
23 BPF is using two flavors of RCU: "classic" non-sleepable one and RCU
24 tasks trace one. The latter is used when sleepable BPF programs are
25 used. bpf_link_free() accommodates that by checking underlying BPF
26 program's sleepable flag, and goes either through normal RCU GP only for
27 non-sleepable, or through RCU tasks trace GP *and* then normal RCU GP
28 (taking into account rcu_trace_implies_rcu_gp() optimization), if BPF
29 program is sleepable.
30
31 We use this for multi-kprobe and multi-uprobe links, which dereference
32 link during program run. We also preventively switch raw_tp link to use
33 deferred dealloc callback, as upcoming changes in bpf-next tree expose
34 raw_tp link data (specifically, cookie value) to BPF program at runtime
35 as well.
36
37 Fixes: 0dcac2725406 ("bpf: Add multi kprobe link")
38 Fixes: 89ae89f53d20 ("bpf: Add multi uprobe link")
39 Reported-by: syzbot+981935d9485a560bfbcb@syzkaller.appspotmail.com
40 Reported-by: syzbot+2cb5a6c573e98db598cc@syzkaller.appspotmail.com
41 Reported-by: syzbot+62d8b26793e8a2bd0516@syzkaller.appspotmail.com
42 Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
43 Acked-by: Jiri Olsa <jolsa@kernel.org>
44 Link: https://lore.kernel.org/r/20240328052426.3042617-2-andrii@kernel.org
45 Signed-off-by: Alexei Starovoitov <ast@kernel.org>
46 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
47 ---
48 include/linux/bpf.h | 16 +++++++++++++++-
49 kernel/bpf/syscall.c | 35 ++++++++++++++++++++++++++++++++---
50 kernel/trace/bpf_trace.c | 4 ++--
51 3 files changed, 49 insertions(+), 6 deletions(-)
52
53 --- a/include/linux/bpf.h
54 +++ b/include/linux/bpf.h
55 @@ -1524,12 +1524,26 @@ struct bpf_link {
56 enum bpf_link_type type;
57 const struct bpf_link_ops *ops;
58 struct bpf_prog *prog;
59 - struct work_struct work;
60 + /* rcu is used before freeing, work can be used to schedule that
61 + * RCU-based freeing before that, so they never overlap
62 + */
63 + union {
64 + struct rcu_head rcu;
65 + struct work_struct work;
66 + };
67 };
68
69 struct bpf_link_ops {
70 void (*release)(struct bpf_link *link);
71 + /* deallocate link resources callback, called without RCU grace period
72 + * waiting
73 + */
74 void (*dealloc)(struct bpf_link *link);
75 + /* deallocate link resources callback, called after RCU grace period;
76 + * if underlying BPF program is sleepable we go through tasks trace
77 + * RCU GP and then "classic" RCU GP
78 + */
79 + void (*dealloc_deferred)(struct bpf_link *link);
80 int (*detach)(struct bpf_link *link);
81 int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog,
82 struct bpf_prog *old_prog);
83 --- a/kernel/bpf/syscall.c
84 +++ b/kernel/bpf/syscall.c
85 @@ -2866,17 +2866,46 @@ void bpf_link_inc(struct bpf_link *link)
86 atomic64_inc(&link->refcnt);
87 }
88
89 +static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu)
90 +{
91 + struct bpf_link *link = container_of(rcu, struct bpf_link, rcu);
92 +
93 + /* free bpf_link and its containing memory */
94 + link->ops->dealloc_deferred(link);
95 +}
96 +
97 +static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu)
98 +{
99 + if (rcu_trace_implies_rcu_gp())
100 + bpf_link_defer_dealloc_rcu_gp(rcu);
101 + else
102 + call_rcu(rcu, bpf_link_defer_dealloc_rcu_gp);
103 +}
104 +
105 /* bpf_link_free is guaranteed to be called from process context */
106 static void bpf_link_free(struct bpf_link *link)
107 {
108 + bool sleepable = false;
109 +
110 bpf_link_free_id(link->id);
111 if (link->prog) {
112 + sleepable = link->prog->aux->sleepable;
113 /* detach BPF program, clean up used resources */
114 link->ops->release(link);
115 bpf_prog_put(link->prog);
116 }
117 - /* free bpf_link and its containing memory */
118 - link->ops->dealloc(link);
119 + if (link->ops->dealloc_deferred) {
120 + /* schedule BPF link deallocation; if underlying BPF program
121 + * is sleepable, we need to first wait for RCU tasks trace
122 + * sync, then go through "classic" RCU grace period
123 + */
124 + if (sleepable)
125 + call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp);
126 + else
127 + call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
128 + }
129 + if (link->ops->dealloc)
130 + link->ops->dealloc(link);
131 }
132
133 static void bpf_link_put_deferred(struct work_struct *work)
134 @@ -3381,7 +3410,7 @@ static int bpf_raw_tp_link_fill_link_inf
135
136 static const struct bpf_link_ops bpf_raw_tp_link_lops = {
137 .release = bpf_raw_tp_link_release,
138 - .dealloc = bpf_raw_tp_link_dealloc,
139 + .dealloc_deferred = bpf_raw_tp_link_dealloc,
140 .show_fdinfo = bpf_raw_tp_link_show_fdinfo,
141 .fill_link_info = bpf_raw_tp_link_fill_link_info,
142 };
143 --- a/kernel/trace/bpf_trace.c
144 +++ b/kernel/trace/bpf_trace.c
145 @@ -2639,7 +2639,7 @@ static int bpf_kprobe_multi_link_fill_li
146
147 static const struct bpf_link_ops bpf_kprobe_multi_link_lops = {
148 .release = bpf_kprobe_multi_link_release,
149 - .dealloc = bpf_kprobe_multi_link_dealloc,
150 + .dealloc_deferred = bpf_kprobe_multi_link_dealloc,
151 .fill_link_info = bpf_kprobe_multi_link_fill_link_info,
152 };
153
154 @@ -3081,7 +3081,7 @@ static void bpf_uprobe_multi_link_deallo
155
156 static const struct bpf_link_ops bpf_uprobe_multi_link_lops = {
157 .release = bpf_uprobe_multi_link_release,
158 - .dealloc = bpf_uprobe_multi_link_dealloc,
159 + .dealloc_deferred = bpf_uprobe_multi_link_dealloc,
160 };
161
162 static int uprobe_prog_run(struct bpf_uprobe *uprobe,