1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com
3 * Copyright (c) 2016 Facebook
5 #include <linux/kernel.h>
6 #include <linux/types.h>
7 #include <linux/slab.h>
9 #include <linux/bpf_verifier.h>
10 #include <linux/bpf_perf_event.h>
11 #include <linux/btf.h>
12 #include <linux/filter.h>
13 #include <linux/uaccess.h>
14 #include <linux/ctype.h>
15 #include <linux/kprobes.h>
16 #include <linux/spinlock.h>
17 #include <linux/syscalls.h>
18 #include <linux/error-injection.h>
19 #include <linux/btf_ids.h>
20 #include <linux/bpf_lsm.h>
21 #include <linux/fprobe.h>
22 #include <linux/bsearch.h>
23 #include <linux/sort.h>
24 #include <linux/key.h>
25 #include <linux/verification.h>
26 #include <linux/namei.h>
28 #include <net/bpf_sk_storage.h>
30 #include <uapi/linux/bpf.h>
31 #include <uapi/linux/btf.h>
35 #include "trace_probe.h"
38 #define CREATE_TRACE_POINTS
39 #include "bpf_trace.h"
41 #define bpf_event_rcu_dereference(p) \
42 rcu_dereference_protected(p, lockdep_is_held(&bpf_event_mutex))
45 struct bpf_trace_module
{
46 struct module
*module
;
47 struct list_head list
;
50 static LIST_HEAD(bpf_trace_modules
);
51 static DEFINE_MUTEX(bpf_module_mutex
);
53 static struct bpf_raw_event_map
*bpf_get_raw_tracepoint_module(const char *name
)
55 struct bpf_raw_event_map
*btp
, *ret
= NULL
;
56 struct bpf_trace_module
*btm
;
59 mutex_lock(&bpf_module_mutex
);
60 list_for_each_entry(btm
, &bpf_trace_modules
, list
) {
61 for (i
= 0; i
< btm
->module
->num_bpf_raw_events
; ++i
) {
62 btp
= &btm
->module
->bpf_raw_events
[i
];
63 if (!strcmp(btp
->tp
->name
, name
)) {
64 if (try_module_get(btm
->module
))
71 mutex_unlock(&bpf_module_mutex
);
75 static struct bpf_raw_event_map
*bpf_get_raw_tracepoint_module(const char *name
)
79 #endif /* CONFIG_MODULES */
81 u64
bpf_get_stackid(u64 r1
, u64 r2
, u64 r3
, u64 r4
, u64 r5
);
82 u64
bpf_get_stack(u64 r1
, u64 r2
, u64 r3
, u64 r4
, u64 r5
);
84 static int bpf_btf_printf_prepare(struct btf_ptr
*ptr
, u32 btf_ptr_size
,
85 u64 flags
, const struct btf
**btf
,
87 static u64
bpf_kprobe_multi_cookie(struct bpf_run_ctx
*ctx
);
88 static u64
bpf_kprobe_multi_entry_ip(struct bpf_run_ctx
*ctx
);
90 static u64
bpf_uprobe_multi_cookie(struct bpf_run_ctx
*ctx
);
91 static u64
bpf_uprobe_multi_entry_ip(struct bpf_run_ctx
*ctx
);
94 * trace_call_bpf - invoke BPF program
95 * @call: tracepoint event
96 * @ctx: opaque context pointer
98 * kprobe handlers execute BPF programs via this helper.
99 * Can be used from static tracepoints in the future.
101 * Return: BPF programs always return an integer which is interpreted by
103 * 0 - return from kprobe (event is filtered out)
104 * 1 - store kprobe event into ring buffer
105 * Other values are reserved and currently alias to 1
107 unsigned int trace_call_bpf(struct trace_event_call
*call
, void *ctx
)
113 if (unlikely(__this_cpu_inc_return(bpf_prog_active
) != 1)) {
115 * since some bpf program is already running on this cpu,
116 * don't call into another bpf program (same or different)
117 * and don't send kprobe event into ring-buffer,
118 * so return zero here
121 bpf_prog_inc_misses_counters(rcu_dereference(call
->prog_array
));
128 * Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock
129 * to all call sites, we did a bpf_prog_array_valid() there to check
130 * whether call->prog_array is empty or not, which is
131 * a heuristic to speed up execution.
133 * If bpf_prog_array_valid() fetched prog_array was
134 * non-NULL, we go into trace_call_bpf() and do the actual
135 * proper rcu_dereference() under RCU lock.
136 * If it turns out that prog_array is NULL then, we bail out.
137 * For the opposite, if the bpf_prog_array_valid() fetched pointer
138 * was NULL, you'll skip the prog_array with the risk of missing
139 * out of events when it was updated in between this and the
140 * rcu_dereference() which is accepted risk.
143 ret
= bpf_prog_run_array(rcu_dereference(call
->prog_array
),
148 __this_cpu_dec(bpf_prog_active
);
153 #ifdef CONFIG_BPF_KPROBE_OVERRIDE
154 BPF_CALL_2(bpf_override_return
, struct pt_regs
*, regs
, unsigned long, rc
)
156 regs_set_return_value(regs
, rc
);
157 override_function_with_return(regs
);
161 static const struct bpf_func_proto bpf_override_return_proto
= {
162 .func
= bpf_override_return
,
164 .ret_type
= RET_INTEGER
,
165 .arg1_type
= ARG_PTR_TO_CTX
,
166 .arg2_type
= ARG_ANYTHING
,
170 static __always_inline
int
171 bpf_probe_read_user_common(void *dst
, u32 size
, const void __user
*unsafe_ptr
)
175 ret
= copy_from_user_nofault(dst
, unsafe_ptr
, size
);
176 if (unlikely(ret
< 0))
177 memset(dst
, 0, size
);
181 BPF_CALL_3(bpf_probe_read_user
, void *, dst
, u32
, size
,
182 const void __user
*, unsafe_ptr
)
184 return bpf_probe_read_user_common(dst
, size
, unsafe_ptr
);
187 const struct bpf_func_proto bpf_probe_read_user_proto
= {
188 .func
= bpf_probe_read_user
,
190 .ret_type
= RET_INTEGER
,
191 .arg1_type
= ARG_PTR_TO_UNINIT_MEM
,
192 .arg2_type
= ARG_CONST_SIZE_OR_ZERO
,
193 .arg3_type
= ARG_ANYTHING
,
196 static __always_inline
int
197 bpf_probe_read_user_str_common(void *dst
, u32 size
,
198 const void __user
*unsafe_ptr
)
203 * NB: We rely on strncpy_from_user() not copying junk past the NUL
204 * terminator into `dst`.
206 * strncpy_from_user() does long-sized strides in the fast path. If the
207 * strncpy does not mask out the bytes after the NUL in `unsafe_ptr`,
208 * then there could be junk after the NUL in `dst`. If user takes `dst`
209 * and keys a hash map with it, then semantically identical strings can
210 * occupy multiple entries in the map.
212 ret
= strncpy_from_user_nofault(dst
, unsafe_ptr
, size
);
213 if (unlikely(ret
< 0))
214 memset(dst
, 0, size
);
218 BPF_CALL_3(bpf_probe_read_user_str
, void *, dst
, u32
, size
,
219 const void __user
*, unsafe_ptr
)
221 return bpf_probe_read_user_str_common(dst
, size
, unsafe_ptr
);
224 const struct bpf_func_proto bpf_probe_read_user_str_proto
= {
225 .func
= bpf_probe_read_user_str
,
227 .ret_type
= RET_INTEGER
,
228 .arg1_type
= ARG_PTR_TO_UNINIT_MEM
,
229 .arg2_type
= ARG_CONST_SIZE_OR_ZERO
,
230 .arg3_type
= ARG_ANYTHING
,
233 BPF_CALL_3(bpf_probe_read_kernel
, void *, dst
, u32
, size
,
234 const void *, unsafe_ptr
)
236 return bpf_probe_read_kernel_common(dst
, size
, unsafe_ptr
);
239 const struct bpf_func_proto bpf_probe_read_kernel_proto
= {
240 .func
= bpf_probe_read_kernel
,
242 .ret_type
= RET_INTEGER
,
243 .arg1_type
= ARG_PTR_TO_UNINIT_MEM
,
244 .arg2_type
= ARG_CONST_SIZE_OR_ZERO
,
245 .arg3_type
= ARG_ANYTHING
,
248 static __always_inline
int
249 bpf_probe_read_kernel_str_common(void *dst
, u32 size
, const void *unsafe_ptr
)
254 * The strncpy_from_kernel_nofault() call will likely not fill the
255 * entire buffer, but that's okay in this circumstance as we're probing
256 * arbitrary memory anyway similar to bpf_probe_read_*() and might
257 * as well probe the stack. Thus, memory is explicitly cleared
258 * only in error case, so that improper users ignoring return
259 * code altogether don't copy garbage; otherwise length of string
260 * is returned that can be used for bpf_perf_event_output() et al.
262 ret
= strncpy_from_kernel_nofault(dst
, unsafe_ptr
, size
);
263 if (unlikely(ret
< 0))
264 memset(dst
, 0, size
);
268 BPF_CALL_3(bpf_probe_read_kernel_str
, void *, dst
, u32
, size
,
269 const void *, unsafe_ptr
)
271 return bpf_probe_read_kernel_str_common(dst
, size
, unsafe_ptr
);
274 const struct bpf_func_proto bpf_probe_read_kernel_str_proto
= {
275 .func
= bpf_probe_read_kernel_str
,
277 .ret_type
= RET_INTEGER
,
278 .arg1_type
= ARG_PTR_TO_UNINIT_MEM
,
279 .arg2_type
= ARG_CONST_SIZE_OR_ZERO
,
280 .arg3_type
= ARG_ANYTHING
,
283 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
284 BPF_CALL_3(bpf_probe_read_compat
, void *, dst
, u32
, size
,
285 const void *, unsafe_ptr
)
287 if ((unsigned long)unsafe_ptr
< TASK_SIZE
) {
288 return bpf_probe_read_user_common(dst
, size
,
289 (__force
void __user
*)unsafe_ptr
);
291 return bpf_probe_read_kernel_common(dst
, size
, unsafe_ptr
);
294 static const struct bpf_func_proto bpf_probe_read_compat_proto
= {
295 .func
= bpf_probe_read_compat
,
297 .ret_type
= RET_INTEGER
,
298 .arg1_type
= ARG_PTR_TO_UNINIT_MEM
,
299 .arg2_type
= ARG_CONST_SIZE_OR_ZERO
,
300 .arg3_type
= ARG_ANYTHING
,
303 BPF_CALL_3(bpf_probe_read_compat_str
, void *, dst
, u32
, size
,
304 const void *, unsafe_ptr
)
306 if ((unsigned long)unsafe_ptr
< TASK_SIZE
) {
307 return bpf_probe_read_user_str_common(dst
, size
,
308 (__force
void __user
*)unsafe_ptr
);
310 return bpf_probe_read_kernel_str_common(dst
, size
, unsafe_ptr
);
313 static const struct bpf_func_proto bpf_probe_read_compat_str_proto
= {
314 .func
= bpf_probe_read_compat_str
,
316 .ret_type
= RET_INTEGER
,
317 .arg1_type
= ARG_PTR_TO_UNINIT_MEM
,
318 .arg2_type
= ARG_CONST_SIZE_OR_ZERO
,
319 .arg3_type
= ARG_ANYTHING
,
321 #endif /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */
323 BPF_CALL_3(bpf_probe_write_user
, void __user
*, unsafe_ptr
, const void *, src
,
327 * Ensure we're in user context which is safe for the helper to
328 * run. This helper has no business in a kthread.
330 * access_ok() should prevent writing to non-user memory, but in
331 * some situations (nommu, temporary switch, etc) access_ok() does
332 * not provide enough validation, hence the check on KERNEL_DS.
334 * nmi_uaccess_okay() ensures the probe is not run in an interim
335 * state, when the task or mm are switched. This is specifically
336 * required to prevent the use of temporary mm.
339 if (unlikely(in_interrupt() ||
340 current
->flags
& (PF_KTHREAD
| PF_EXITING
)))
342 if (unlikely(!nmi_uaccess_okay()))
345 return copy_to_user_nofault(unsafe_ptr
, src
, size
);
348 static const struct bpf_func_proto bpf_probe_write_user_proto
= {
349 .func
= bpf_probe_write_user
,
351 .ret_type
= RET_INTEGER
,
352 .arg1_type
= ARG_ANYTHING
,
353 .arg2_type
= ARG_PTR_TO_MEM
| MEM_RDONLY
,
354 .arg3_type
= ARG_CONST_SIZE
,
357 static const struct bpf_func_proto
*bpf_get_probe_write_proto(void)
359 if (!capable(CAP_SYS_ADMIN
))
362 pr_warn_ratelimited("%s[%d] is installing a program with bpf_probe_write_user helper that may corrupt user memory!",
363 current
->comm
, task_pid_nr(current
));
365 return &bpf_probe_write_user_proto
;
368 #define MAX_TRACE_PRINTK_VARARGS 3
369 #define BPF_TRACE_PRINTK_SIZE 1024
371 BPF_CALL_5(bpf_trace_printk
, char *, fmt
, u32
, fmt_size
, u64
, arg1
,
372 u64
, arg2
, u64
, arg3
)
374 u64 args
[MAX_TRACE_PRINTK_VARARGS
] = { arg1
, arg2
, arg3
};
375 struct bpf_bprintf_data data
= {
376 .get_bin_args
= true,
381 ret
= bpf_bprintf_prepare(fmt
, fmt_size
, args
,
382 MAX_TRACE_PRINTK_VARARGS
, &data
);
386 ret
= bstr_printf(data
.buf
, MAX_BPRINTF_BUF
, fmt
, data
.bin_args
);
388 trace_bpf_trace_printk(data
.buf
);
390 bpf_bprintf_cleanup(&data
);
395 static const struct bpf_func_proto bpf_trace_printk_proto
= {
396 .func
= bpf_trace_printk
,
398 .ret_type
= RET_INTEGER
,
399 .arg1_type
= ARG_PTR_TO_MEM
| MEM_RDONLY
,
400 .arg2_type
= ARG_CONST_SIZE
,
403 static void __set_printk_clr_event(void)
406 * This program might be calling bpf_trace_printk,
407 * so enable the associated bpf_trace/bpf_trace_printk event.
408 * Repeat this each time as it is possible a user has
409 * disabled bpf_trace_printk events. By loading a program
410 * calling bpf_trace_printk() however the user has expressed
411 * the intent to see such events.
413 if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1))
414 pr_warn_ratelimited("could not enable bpf_trace_printk events");
417 const struct bpf_func_proto
*bpf_get_trace_printk_proto(void)
419 __set_printk_clr_event();
420 return &bpf_trace_printk_proto
;
423 BPF_CALL_4(bpf_trace_vprintk
, char *, fmt
, u32
, fmt_size
, const void *, args
,
426 struct bpf_bprintf_data data
= {
427 .get_bin_args
= true,
432 if (data_len
& 7 || data_len
> MAX_BPRINTF_VARARGS
* 8 ||
435 num_args
= data_len
/ 8;
437 ret
= bpf_bprintf_prepare(fmt
, fmt_size
, args
, num_args
, &data
);
441 ret
= bstr_printf(data
.buf
, MAX_BPRINTF_BUF
, fmt
, data
.bin_args
);
443 trace_bpf_trace_printk(data
.buf
);
445 bpf_bprintf_cleanup(&data
);
450 static const struct bpf_func_proto bpf_trace_vprintk_proto
= {
451 .func
= bpf_trace_vprintk
,
453 .ret_type
= RET_INTEGER
,
454 .arg1_type
= ARG_PTR_TO_MEM
| MEM_RDONLY
,
455 .arg2_type
= ARG_CONST_SIZE
,
456 .arg3_type
= ARG_PTR_TO_MEM
| PTR_MAYBE_NULL
| MEM_RDONLY
,
457 .arg4_type
= ARG_CONST_SIZE_OR_ZERO
,
460 const struct bpf_func_proto
*bpf_get_trace_vprintk_proto(void)
462 __set_printk_clr_event();
463 return &bpf_trace_vprintk_proto
;
466 BPF_CALL_5(bpf_seq_printf
, struct seq_file
*, m
, char *, fmt
, u32
, fmt_size
,
467 const void *, args
, u32
, data_len
)
469 struct bpf_bprintf_data data
= {
470 .get_bin_args
= true,
474 if (data_len
& 7 || data_len
> MAX_BPRINTF_VARARGS
* 8 ||
477 num_args
= data_len
/ 8;
479 err
= bpf_bprintf_prepare(fmt
, fmt_size
, args
, num_args
, &data
);
483 seq_bprintf(m
, fmt
, data
.bin_args
);
485 bpf_bprintf_cleanup(&data
);
487 return seq_has_overflowed(m
) ? -EOVERFLOW
: 0;
490 BTF_ID_LIST_SINGLE(btf_seq_file_ids
, struct, seq_file
)
492 static const struct bpf_func_proto bpf_seq_printf_proto
= {
493 .func
= bpf_seq_printf
,
495 .ret_type
= RET_INTEGER
,
496 .arg1_type
= ARG_PTR_TO_BTF_ID
,
497 .arg1_btf_id
= &btf_seq_file_ids
[0],
498 .arg2_type
= ARG_PTR_TO_MEM
| MEM_RDONLY
,
499 .arg3_type
= ARG_CONST_SIZE
,
500 .arg4_type
= ARG_PTR_TO_MEM
| PTR_MAYBE_NULL
| MEM_RDONLY
,
501 .arg5_type
= ARG_CONST_SIZE_OR_ZERO
,
504 BPF_CALL_3(bpf_seq_write
, struct seq_file
*, m
, const void *, data
, u32
, len
)
506 return seq_write(m
, data
, len
) ? -EOVERFLOW
: 0;
509 static const struct bpf_func_proto bpf_seq_write_proto
= {
510 .func
= bpf_seq_write
,
512 .ret_type
= RET_INTEGER
,
513 .arg1_type
= ARG_PTR_TO_BTF_ID
,
514 .arg1_btf_id
= &btf_seq_file_ids
[0],
515 .arg2_type
= ARG_PTR_TO_MEM
| MEM_RDONLY
,
516 .arg3_type
= ARG_CONST_SIZE_OR_ZERO
,
519 BPF_CALL_4(bpf_seq_printf_btf
, struct seq_file
*, m
, struct btf_ptr
*, ptr
,
520 u32
, btf_ptr_size
, u64
, flags
)
522 const struct btf
*btf
;
526 ret
= bpf_btf_printf_prepare(ptr
, btf_ptr_size
, flags
, &btf
, &btf_id
);
530 return btf_type_seq_show_flags(btf
, btf_id
, ptr
->ptr
, m
, flags
);
533 static const struct bpf_func_proto bpf_seq_printf_btf_proto
= {
534 .func
= bpf_seq_printf_btf
,
536 .ret_type
= RET_INTEGER
,
537 .arg1_type
= ARG_PTR_TO_BTF_ID
,
538 .arg1_btf_id
= &btf_seq_file_ids
[0],
539 .arg2_type
= ARG_PTR_TO_MEM
| MEM_RDONLY
,
540 .arg3_type
= ARG_CONST_SIZE_OR_ZERO
,
541 .arg4_type
= ARG_ANYTHING
,
544 static __always_inline
int
545 get_map_perf_counter(struct bpf_map
*map
, u64 flags
,
546 u64
*value
, u64
*enabled
, u64
*running
)
548 struct bpf_array
*array
= container_of(map
, struct bpf_array
, map
);
549 unsigned int cpu
= smp_processor_id();
550 u64 index
= flags
& BPF_F_INDEX_MASK
;
551 struct bpf_event_entry
*ee
;
553 if (unlikely(flags
& ~(BPF_F_INDEX_MASK
)))
555 if (index
== BPF_F_CURRENT_CPU
)
557 if (unlikely(index
>= array
->map
.max_entries
))
560 ee
= READ_ONCE(array
->ptrs
[index
]);
564 return perf_event_read_local(ee
->event
, value
, enabled
, running
);
567 BPF_CALL_2(bpf_perf_event_read
, struct bpf_map
*, map
, u64
, flags
)
572 err
= get_map_perf_counter(map
, flags
, &value
, NULL
, NULL
);
574 * this api is ugly since we miss [-22..-2] range of valid
575 * counter values, but that's uapi
582 static const struct bpf_func_proto bpf_perf_event_read_proto
= {
583 .func
= bpf_perf_event_read
,
585 .ret_type
= RET_INTEGER
,
586 .arg1_type
= ARG_CONST_MAP_PTR
,
587 .arg2_type
= ARG_ANYTHING
,
590 BPF_CALL_4(bpf_perf_event_read_value
, struct bpf_map
*, map
, u64
, flags
,
591 struct bpf_perf_event_value
*, buf
, u32
, size
)
595 if (unlikely(size
!= sizeof(struct bpf_perf_event_value
)))
597 err
= get_map_perf_counter(map
, flags
, &buf
->counter
, &buf
->enabled
,
603 memset(buf
, 0, size
);
607 static const struct bpf_func_proto bpf_perf_event_read_value_proto
= {
608 .func
= bpf_perf_event_read_value
,
610 .ret_type
= RET_INTEGER
,
611 .arg1_type
= ARG_CONST_MAP_PTR
,
612 .arg2_type
= ARG_ANYTHING
,
613 .arg3_type
= ARG_PTR_TO_UNINIT_MEM
,
614 .arg4_type
= ARG_CONST_SIZE
,
617 static __always_inline u64
618 __bpf_perf_event_output(struct pt_regs
*regs
, struct bpf_map
*map
,
619 u64 flags
, struct perf_sample_data
*sd
)
621 struct bpf_array
*array
= container_of(map
, struct bpf_array
, map
);
622 unsigned int cpu
= smp_processor_id();
623 u64 index
= flags
& BPF_F_INDEX_MASK
;
624 struct bpf_event_entry
*ee
;
625 struct perf_event
*event
;
627 if (index
== BPF_F_CURRENT_CPU
)
629 if (unlikely(index
>= array
->map
.max_entries
))
632 ee
= READ_ONCE(array
->ptrs
[index
]);
637 if (unlikely(event
->attr
.type
!= PERF_TYPE_SOFTWARE
||
638 event
->attr
.config
!= PERF_COUNT_SW_BPF_OUTPUT
))
641 if (unlikely(event
->oncpu
!= cpu
))
644 return perf_event_output(event
, sd
, regs
);
648 * Support executing tracepoints in normal, irq, and nmi context that each call
649 * bpf_perf_event_output
651 struct bpf_trace_sample_data
{
652 struct perf_sample_data sds
[3];
655 static DEFINE_PER_CPU(struct bpf_trace_sample_data
, bpf_trace_sds
);
656 static DEFINE_PER_CPU(int, bpf_trace_nest_level
);
657 BPF_CALL_5(bpf_perf_event_output
, struct pt_regs
*, regs
, struct bpf_map
*, map
,
658 u64
, flags
, void *, data
, u64
, size
)
660 struct bpf_trace_sample_data
*sds
;
661 struct perf_raw_record raw
= {
667 struct perf_sample_data
*sd
;
671 sds
= this_cpu_ptr(&bpf_trace_sds
);
672 nest_level
= this_cpu_inc_return(bpf_trace_nest_level
);
674 if (WARN_ON_ONCE(nest_level
> ARRAY_SIZE(sds
->sds
))) {
679 sd
= &sds
->sds
[nest_level
- 1];
681 if (unlikely(flags
& ~(BPF_F_INDEX_MASK
))) {
686 perf_sample_data_init(sd
, 0, 0);
687 perf_sample_save_raw_data(sd
, &raw
);
689 err
= __bpf_perf_event_output(regs
, map
, flags
, sd
);
691 this_cpu_dec(bpf_trace_nest_level
);
696 static const struct bpf_func_proto bpf_perf_event_output_proto
= {
697 .func
= bpf_perf_event_output
,
699 .ret_type
= RET_INTEGER
,
700 .arg1_type
= ARG_PTR_TO_CTX
,
701 .arg2_type
= ARG_CONST_MAP_PTR
,
702 .arg3_type
= ARG_ANYTHING
,
703 .arg4_type
= ARG_PTR_TO_MEM
| MEM_RDONLY
,
704 .arg5_type
= ARG_CONST_SIZE_OR_ZERO
,
707 static DEFINE_PER_CPU(int, bpf_event_output_nest_level
);
708 struct bpf_nested_pt_regs
{
709 struct pt_regs regs
[3];
711 static DEFINE_PER_CPU(struct bpf_nested_pt_regs
, bpf_pt_regs
);
712 static DEFINE_PER_CPU(struct bpf_trace_sample_data
, bpf_misc_sds
);
714 u64
bpf_event_output(struct bpf_map
*map
, u64 flags
, void *meta
, u64 meta_size
,
715 void *ctx
, u64 ctx_size
, bpf_ctx_copy_t ctx_copy
)
717 struct perf_raw_frag frag
= {
722 struct perf_raw_record raw
= {
725 .next
= ctx_size
? &frag
: NULL
,
731 struct perf_sample_data
*sd
;
732 struct pt_regs
*regs
;
737 nest_level
= this_cpu_inc_return(bpf_event_output_nest_level
);
739 if (WARN_ON_ONCE(nest_level
> ARRAY_SIZE(bpf_misc_sds
.sds
))) {
743 sd
= this_cpu_ptr(&bpf_misc_sds
.sds
[nest_level
- 1]);
744 regs
= this_cpu_ptr(&bpf_pt_regs
.regs
[nest_level
- 1]);
746 perf_fetch_caller_regs(regs
);
747 perf_sample_data_init(sd
, 0, 0);
748 perf_sample_save_raw_data(sd
, &raw
);
750 ret
= __bpf_perf_event_output(regs
, map
, flags
, sd
);
752 this_cpu_dec(bpf_event_output_nest_level
);
757 BPF_CALL_0(bpf_get_current_task
)
759 return (long) current
;
762 const struct bpf_func_proto bpf_get_current_task_proto
= {
763 .func
= bpf_get_current_task
,
765 .ret_type
= RET_INTEGER
,
768 BPF_CALL_0(bpf_get_current_task_btf
)
770 return (unsigned long) current
;
773 const struct bpf_func_proto bpf_get_current_task_btf_proto
= {
774 .func
= bpf_get_current_task_btf
,
776 .ret_type
= RET_PTR_TO_BTF_ID_TRUSTED
,
777 .ret_btf_id
= &btf_tracing_ids
[BTF_TRACING_TYPE_TASK
],
780 BPF_CALL_1(bpf_task_pt_regs
, struct task_struct
*, task
)
782 return (unsigned long) task_pt_regs(task
);
785 BTF_ID_LIST(bpf_task_pt_regs_ids
)
786 BTF_ID(struct, pt_regs
)
788 const struct bpf_func_proto bpf_task_pt_regs_proto
= {
789 .func
= bpf_task_pt_regs
,
791 .arg1_type
= ARG_PTR_TO_BTF_ID
,
792 .arg1_btf_id
= &btf_tracing_ids
[BTF_TRACING_TYPE_TASK
],
793 .ret_type
= RET_PTR_TO_BTF_ID
,
794 .ret_btf_id
= &bpf_task_pt_regs_ids
[0],
797 BPF_CALL_2(bpf_current_task_under_cgroup
, struct bpf_map
*, map
, u32
, idx
)
799 struct bpf_array
*array
= container_of(map
, struct bpf_array
, map
);
802 if (unlikely(idx
>= array
->map
.max_entries
))
805 cgrp
= READ_ONCE(array
->ptrs
[idx
]);
809 return task_under_cgroup_hierarchy(current
, cgrp
);
812 static const struct bpf_func_proto bpf_current_task_under_cgroup_proto
= {
813 .func
= bpf_current_task_under_cgroup
,
815 .ret_type
= RET_INTEGER
,
816 .arg1_type
= ARG_CONST_MAP_PTR
,
817 .arg2_type
= ARG_ANYTHING
,
820 struct send_signal_irq_work
{
821 struct irq_work irq_work
;
822 struct task_struct
*task
;
827 static DEFINE_PER_CPU(struct send_signal_irq_work
, send_signal_work
);
829 static void do_bpf_send_signal(struct irq_work
*entry
)
831 struct send_signal_irq_work
*work
;
833 work
= container_of(entry
, struct send_signal_irq_work
, irq_work
);
834 group_send_sig_info(work
->sig
, SEND_SIG_PRIV
, work
->task
, work
->type
);
835 put_task_struct(work
->task
);
838 static int bpf_send_signal_common(u32 sig
, enum pid_type type
)
840 struct send_signal_irq_work
*work
= NULL
;
842 /* Similar to bpf_probe_write_user, task needs to be
843 * in a sound condition and kernel memory access be
844 * permitted in order to send signal to the current
847 if (unlikely(current
->flags
& (PF_KTHREAD
| PF_EXITING
)))
849 if (unlikely(!nmi_uaccess_okay()))
851 /* Task should not be pid=1 to avoid kernel panic. */
852 if (unlikely(is_global_init(current
)))
855 if (irqs_disabled()) {
856 /* Do an early check on signal validity. Otherwise,
857 * the error is lost in deferred irq_work.
859 if (unlikely(!valid_signal(sig
)))
862 work
= this_cpu_ptr(&send_signal_work
);
863 if (irq_work_is_busy(&work
->irq_work
))
866 /* Add the current task, which is the target of sending signal,
867 * to the irq_work. The current task may change when queued
868 * irq works get executed.
870 work
->task
= get_task_struct(current
);
873 irq_work_queue(&work
->irq_work
);
877 return group_send_sig_info(sig
, SEND_SIG_PRIV
, current
, type
);
880 BPF_CALL_1(bpf_send_signal
, u32
, sig
)
882 return bpf_send_signal_common(sig
, PIDTYPE_TGID
);
885 static const struct bpf_func_proto bpf_send_signal_proto
= {
886 .func
= bpf_send_signal
,
888 .ret_type
= RET_INTEGER
,
889 .arg1_type
= ARG_ANYTHING
,
892 BPF_CALL_1(bpf_send_signal_thread
, u32
, sig
)
894 return bpf_send_signal_common(sig
, PIDTYPE_PID
);
897 static const struct bpf_func_proto bpf_send_signal_thread_proto
= {
898 .func
= bpf_send_signal_thread
,
900 .ret_type
= RET_INTEGER
,
901 .arg1_type
= ARG_ANYTHING
,
904 BPF_CALL_3(bpf_d_path
, struct path
*, path
, char *, buf
, u32
, sz
)
914 * The path pointer is verified as trusted and safe to use,
915 * but let's double check it's valid anyway to workaround
916 * potentially broken verifier.
918 len
= copy_from_kernel_nofault(©
, path
, sizeof(*path
));
922 p
= d_path(©
, buf
, sz
);
927 memmove(buf
, p
, len
);
933 BTF_SET_START(btf_allowlist_d_path
)
934 #ifdef CONFIG_SECURITY
935 BTF_ID(func
, security_file_permission
)
936 BTF_ID(func
, security_inode_getattr
)
937 BTF_ID(func
, security_file_open
)
939 #ifdef CONFIG_SECURITY_PATH
940 BTF_ID(func
, security_path_truncate
)
942 BTF_ID(func
, vfs_truncate
)
943 BTF_ID(func
, vfs_fallocate
)
944 BTF_ID(func
, dentry_open
)
945 BTF_ID(func
, vfs_getattr
)
946 BTF_ID(func
, filp_close
)
947 BTF_SET_END(btf_allowlist_d_path
)
949 static bool bpf_d_path_allowed(const struct bpf_prog
*prog
)
951 if (prog
->type
== BPF_PROG_TYPE_TRACING
&&
952 prog
->expected_attach_type
== BPF_TRACE_ITER
)
955 if (prog
->type
== BPF_PROG_TYPE_LSM
)
956 return bpf_lsm_is_sleepable_hook(prog
->aux
->attach_btf_id
);
958 return btf_id_set_contains(&btf_allowlist_d_path
,
959 prog
->aux
->attach_btf_id
);
962 BTF_ID_LIST_SINGLE(bpf_d_path_btf_ids
, struct, path
)
964 static const struct bpf_func_proto bpf_d_path_proto
= {
967 .ret_type
= RET_INTEGER
,
968 .arg1_type
= ARG_PTR_TO_BTF_ID
,
969 .arg1_btf_id
= &bpf_d_path_btf_ids
[0],
970 .arg2_type
= ARG_PTR_TO_MEM
,
971 .arg3_type
= ARG_CONST_SIZE_OR_ZERO
,
972 .allowed
= bpf_d_path_allowed
,
975 #define BTF_F_ALL (BTF_F_COMPACT | BTF_F_NONAME | \
976 BTF_F_PTR_RAW | BTF_F_ZERO)
978 static int bpf_btf_printf_prepare(struct btf_ptr
*ptr
, u32 btf_ptr_size
,
979 u64 flags
, const struct btf
**btf
,
982 const struct btf_type
*t
;
984 if (unlikely(flags
& ~(BTF_F_ALL
)))
987 if (btf_ptr_size
!= sizeof(struct btf_ptr
))
990 *btf
= bpf_get_btf_vmlinux();
992 if (IS_ERR_OR_NULL(*btf
))
993 return IS_ERR(*btf
) ? PTR_ERR(*btf
) : -EINVAL
;
995 if (ptr
->type_id
> 0)
996 *btf_id
= ptr
->type_id
;
1001 t
= btf_type_by_id(*btf
, *btf_id
);
1002 if (*btf_id
<= 0 || !t
)
1008 BPF_CALL_5(bpf_snprintf_btf
, char *, str
, u32
, str_size
, struct btf_ptr
*, ptr
,
1009 u32
, btf_ptr_size
, u64
, flags
)
1011 const struct btf
*btf
;
1015 ret
= bpf_btf_printf_prepare(ptr
, btf_ptr_size
, flags
, &btf
, &btf_id
);
1019 return btf_type_snprintf_show(btf
, btf_id
, ptr
->ptr
, str
, str_size
,
1023 const struct bpf_func_proto bpf_snprintf_btf_proto
= {
1024 .func
= bpf_snprintf_btf
,
1026 .ret_type
= RET_INTEGER
,
1027 .arg1_type
= ARG_PTR_TO_MEM
,
1028 .arg2_type
= ARG_CONST_SIZE
,
1029 .arg3_type
= ARG_PTR_TO_MEM
| MEM_RDONLY
,
1030 .arg4_type
= ARG_CONST_SIZE
,
1031 .arg5_type
= ARG_ANYTHING
,
1034 BPF_CALL_1(bpf_get_func_ip_tracing
, void *, ctx
)
1036 /* This helper call is inlined by verifier. */
1037 return ((u64
*)ctx
)[-2];
1040 static const struct bpf_func_proto bpf_get_func_ip_proto_tracing
= {
1041 .func
= bpf_get_func_ip_tracing
,
1043 .ret_type
= RET_INTEGER
,
1044 .arg1_type
= ARG_PTR_TO_CTX
,
1047 #ifdef CONFIG_X86_KERNEL_IBT
1048 static unsigned long get_entry_ip(unsigned long fentry_ip
)
1052 /* Being extra safe in here in case entry ip is on the page-edge. */
1053 if (get_kernel_nofault(instr
, (u32
*) fentry_ip
- 1))
1055 if (is_endbr(instr
))
1056 fentry_ip
-= ENDBR_INSN_SIZE
;
1060 #define get_entry_ip(fentry_ip) fentry_ip
1063 BPF_CALL_1(bpf_get_func_ip_kprobe
, struct pt_regs
*, regs
)
1065 struct bpf_trace_run_ctx
*run_ctx __maybe_unused
;
1068 #ifdef CONFIG_UPROBES
1069 run_ctx
= container_of(current
->bpf_ctx
, struct bpf_trace_run_ctx
, run_ctx
);
1070 if (run_ctx
->is_uprobe
)
1071 return ((struct uprobe_dispatch_data
*)current
->utask
->vaddr
)->bp_addr
;
1074 kp
= kprobe_running();
1076 if (!kp
|| !(kp
->flags
& KPROBE_FLAG_ON_FUNC_ENTRY
))
1079 return get_entry_ip((uintptr_t)kp
->addr
);
1082 static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe
= {
1083 .func
= bpf_get_func_ip_kprobe
,
1085 .ret_type
= RET_INTEGER
,
1086 .arg1_type
= ARG_PTR_TO_CTX
,
1089 BPF_CALL_1(bpf_get_func_ip_kprobe_multi
, struct pt_regs
*, regs
)
1091 return bpf_kprobe_multi_entry_ip(current
->bpf_ctx
);
1094 static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe_multi
= {
1095 .func
= bpf_get_func_ip_kprobe_multi
,
1097 .ret_type
= RET_INTEGER
,
1098 .arg1_type
= ARG_PTR_TO_CTX
,
1101 BPF_CALL_1(bpf_get_attach_cookie_kprobe_multi
, struct pt_regs
*, regs
)
1103 return bpf_kprobe_multi_cookie(current
->bpf_ctx
);
1106 static const struct bpf_func_proto bpf_get_attach_cookie_proto_kmulti
= {
1107 .func
= bpf_get_attach_cookie_kprobe_multi
,
1109 .ret_type
= RET_INTEGER
,
1110 .arg1_type
= ARG_PTR_TO_CTX
,
1113 BPF_CALL_1(bpf_get_func_ip_uprobe_multi
, struct pt_regs
*, regs
)
1115 return bpf_uprobe_multi_entry_ip(current
->bpf_ctx
);
1118 static const struct bpf_func_proto bpf_get_func_ip_proto_uprobe_multi
= {
1119 .func
= bpf_get_func_ip_uprobe_multi
,
1121 .ret_type
= RET_INTEGER
,
1122 .arg1_type
= ARG_PTR_TO_CTX
,
1125 BPF_CALL_1(bpf_get_attach_cookie_uprobe_multi
, struct pt_regs
*, regs
)
1127 return bpf_uprobe_multi_cookie(current
->bpf_ctx
);
1130 static const struct bpf_func_proto bpf_get_attach_cookie_proto_umulti
= {
1131 .func
= bpf_get_attach_cookie_uprobe_multi
,
1133 .ret_type
= RET_INTEGER
,
1134 .arg1_type
= ARG_PTR_TO_CTX
,
1137 BPF_CALL_1(bpf_get_attach_cookie_trace
, void *, ctx
)
1139 struct bpf_trace_run_ctx
*run_ctx
;
1141 run_ctx
= container_of(current
->bpf_ctx
, struct bpf_trace_run_ctx
, run_ctx
);
1142 return run_ctx
->bpf_cookie
;
1145 static const struct bpf_func_proto bpf_get_attach_cookie_proto_trace
= {
1146 .func
= bpf_get_attach_cookie_trace
,
1148 .ret_type
= RET_INTEGER
,
1149 .arg1_type
= ARG_PTR_TO_CTX
,
1152 BPF_CALL_1(bpf_get_attach_cookie_pe
, struct bpf_perf_event_data_kern
*, ctx
)
1154 return ctx
->event
->bpf_cookie
;
1157 static const struct bpf_func_proto bpf_get_attach_cookie_proto_pe
= {
1158 .func
= bpf_get_attach_cookie_pe
,
1160 .ret_type
= RET_INTEGER
,
1161 .arg1_type
= ARG_PTR_TO_CTX
,
1164 BPF_CALL_1(bpf_get_attach_cookie_tracing
, void *, ctx
)
1166 struct bpf_trace_run_ctx
*run_ctx
;
1168 run_ctx
= container_of(current
->bpf_ctx
, struct bpf_trace_run_ctx
, run_ctx
);
1169 return run_ctx
->bpf_cookie
;
1172 static const struct bpf_func_proto bpf_get_attach_cookie_proto_tracing
= {
1173 .func
= bpf_get_attach_cookie_tracing
,
1175 .ret_type
= RET_INTEGER
,
1176 .arg1_type
= ARG_PTR_TO_CTX
,
1179 BPF_CALL_3(bpf_get_branch_snapshot
, void *, buf
, u32
, size
, u64
, flags
)
1184 static const u32 br_entry_size
= sizeof(struct perf_branch_entry
);
1185 u32 entry_cnt
= size
/ br_entry_size
;
1187 entry_cnt
= static_call(perf_snapshot_branch_stack
)(buf
, entry_cnt
);
1189 if (unlikely(flags
))
1195 return entry_cnt
* br_entry_size
;
1199 static const struct bpf_func_proto bpf_get_branch_snapshot_proto
= {
1200 .func
= bpf_get_branch_snapshot
,
1202 .ret_type
= RET_INTEGER
,
1203 .arg1_type
= ARG_PTR_TO_UNINIT_MEM
,
1204 .arg2_type
= ARG_CONST_SIZE_OR_ZERO
,
1207 BPF_CALL_3(get_func_arg
, void *, ctx
, u32
, n
, u64
*, value
)
1209 /* This helper call is inlined by verifier. */
1210 u64 nr_args
= ((u64
*)ctx
)[-1];
1212 if ((u64
) n
>= nr_args
)
1214 *value
= ((u64
*)ctx
)[n
];
1218 static const struct bpf_func_proto bpf_get_func_arg_proto
= {
1219 .func
= get_func_arg
,
1220 .ret_type
= RET_INTEGER
,
1221 .arg1_type
= ARG_PTR_TO_CTX
,
1222 .arg2_type
= ARG_ANYTHING
,
1223 .arg3_type
= ARG_PTR_TO_LONG
,
1226 BPF_CALL_2(get_func_ret
, void *, ctx
, u64
*, value
)
1228 /* This helper call is inlined by verifier. */
1229 u64 nr_args
= ((u64
*)ctx
)[-1];
1231 *value
= ((u64
*)ctx
)[nr_args
];
1235 static const struct bpf_func_proto bpf_get_func_ret_proto
= {
1236 .func
= get_func_ret
,
1237 .ret_type
= RET_INTEGER
,
1238 .arg1_type
= ARG_PTR_TO_CTX
,
1239 .arg2_type
= ARG_PTR_TO_LONG
,
1242 BPF_CALL_1(get_func_arg_cnt
, void *, ctx
)
1244 /* This helper call is inlined by verifier. */
1245 return ((u64
*)ctx
)[-1];
1248 static const struct bpf_func_proto bpf_get_func_arg_cnt_proto
= {
1249 .func
= get_func_arg_cnt
,
1250 .ret_type
= RET_INTEGER
,
1251 .arg1_type
= ARG_PTR_TO_CTX
,
1256 __diag_ignore_all("-Wmissing-prototypes",
1257 "kfuncs which will be used in BPF programs");
1260 * bpf_lookup_user_key - lookup a key by its serial
1261 * @serial: key handle serial number
1262 * @flags: lookup-specific flags
1264 * Search a key with a given *serial* and the provided *flags*.
1265 * If found, increment the reference count of the key by one, and
1266 * return it in the bpf_key structure.
1268 * The bpf_key structure must be passed to bpf_key_put() when done
1269 * with it, so that the key reference count is decremented and the
1270 * bpf_key structure is freed.
1272 * Permission checks are deferred to the time the key is used by
1273 * one of the available key-specific kfuncs.
1275 * Set *flags* with KEY_LOOKUP_CREATE, to attempt creating a requested
1276 * special keyring (e.g. session keyring), if it doesn't yet exist.
1277 * Set *flags* with KEY_LOOKUP_PARTIAL, to lookup a key without waiting
1278 * for the key construction, and to retrieve uninstantiated keys (keys
1279 * without data attached to them).
1281 * Return: a bpf_key pointer with a valid key pointer if the key is found, a
1282 * NULL pointer otherwise.
1284 __bpf_kfunc
struct bpf_key
*bpf_lookup_user_key(u32 serial
, u64 flags
)
1287 struct bpf_key
*bkey
;
1289 if (flags
& ~KEY_LOOKUP_ALL
)
1293 * Permission check is deferred until the key is used, as the
1294 * intent of the caller is unknown here.
1296 key_ref
= lookup_user_key(serial
, flags
, KEY_DEFER_PERM_CHECK
);
1297 if (IS_ERR(key_ref
))
1300 bkey
= kmalloc(sizeof(*bkey
), GFP_KERNEL
);
1302 key_put(key_ref_to_ptr(key_ref
));
1306 bkey
->key
= key_ref_to_ptr(key_ref
);
1307 bkey
->has_ref
= true;
1313 * bpf_lookup_system_key - lookup a key by a system-defined ID
1316 * Obtain a bpf_key structure with a key pointer set to the passed key ID.
1317 * The key pointer is marked as invalid, to prevent bpf_key_put() from
1318 * attempting to decrement the key reference count on that pointer. The key
1319 * pointer set in such way is currently understood only by
1320 * verify_pkcs7_signature().
1322 * Set *id* to one of the values defined in include/linux/verification.h:
1323 * 0 for the primary keyring (immutable keyring of system keys);
1324 * VERIFY_USE_SECONDARY_KEYRING for both the primary and secondary keyring
1325 * (where keys can be added only if they are vouched for by existing keys
1326 * in those keyrings); VERIFY_USE_PLATFORM_KEYRING for the platform
1327 * keyring (primarily used by the integrity subsystem to verify a kexec'ed
1328 * kerned image and, possibly, the initramfs signature).
1330 * Return: a bpf_key pointer with an invalid key pointer set from the
1331 * pre-determined ID on success, a NULL pointer otherwise
1333 __bpf_kfunc
struct bpf_key
*bpf_lookup_system_key(u64 id
)
1335 struct bpf_key
*bkey
;
1337 if (system_keyring_id_check(id
) < 0)
1340 bkey
= kmalloc(sizeof(*bkey
), GFP_ATOMIC
);
1344 bkey
->key
= (struct key
*)(unsigned long)id
;
1345 bkey
->has_ref
= false;
1351 * bpf_key_put - decrement key reference count if key is valid and free bpf_key
1352 * @bkey: bpf_key structure
1354 * Decrement the reference count of the key inside *bkey*, if the pointer
1355 * is valid, and free *bkey*.
1357 __bpf_kfunc
void bpf_key_put(struct bpf_key
*bkey
)
1365 #ifdef CONFIG_SYSTEM_DATA_VERIFICATION
1367 * bpf_verify_pkcs7_signature - verify a PKCS#7 signature
1368 * @data_ptr: data to verify
1369 * @sig_ptr: signature of the data
1370 * @trusted_keyring: keyring with keys trusted for signature verification
1372 * Verify the PKCS#7 signature *sig_ptr* against the supplied *data_ptr*
1373 * with keys in a keyring referenced by *trusted_keyring*.
1375 * Return: 0 on success, a negative value on error.
1377 __bpf_kfunc
int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern
*data_ptr
,
1378 struct bpf_dynptr_kern
*sig_ptr
,
1379 struct bpf_key
*trusted_keyring
)
1383 if (trusted_keyring
->has_ref
) {
1385 * Do the permission check deferred in bpf_lookup_user_key().
1386 * See bpf_lookup_user_key() for more details.
1388 * A call to key_task_permission() here would be redundant, as
1389 * it is already done by keyring_search() called by
1390 * find_asymmetric_key().
1392 ret
= key_validate(trusted_keyring
->key
);
1397 return verify_pkcs7_signature(data_ptr
->data
,
1398 __bpf_dynptr_size(data_ptr
),
1400 __bpf_dynptr_size(sig_ptr
),
1401 trusted_keyring
->key
,
1402 VERIFYING_UNSPECIFIED_SIGNATURE
, NULL
,
1405 #endif /* CONFIG_SYSTEM_DATA_VERIFICATION */
1409 BTF_SET8_START(key_sig_kfunc_set
)
1410 BTF_ID_FLAGS(func
, bpf_lookup_user_key
, KF_ACQUIRE
| KF_RET_NULL
| KF_SLEEPABLE
)
1411 BTF_ID_FLAGS(func
, bpf_lookup_system_key
, KF_ACQUIRE
| KF_RET_NULL
)
1412 BTF_ID_FLAGS(func
, bpf_key_put
, KF_RELEASE
)
1413 #ifdef CONFIG_SYSTEM_DATA_VERIFICATION
1414 BTF_ID_FLAGS(func
, bpf_verify_pkcs7_signature
, KF_SLEEPABLE
)
1416 BTF_SET8_END(key_sig_kfunc_set
)
1418 static const struct btf_kfunc_id_set bpf_key_sig_kfunc_set
= {
1419 .owner
= THIS_MODULE
,
1420 .set
= &key_sig_kfunc_set
,
1423 static int __init
bpf_key_sig_kfuncs_init(void)
1425 return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING
,
1426 &bpf_key_sig_kfunc_set
);
1429 late_initcall(bpf_key_sig_kfuncs_init
);
1430 #endif /* CONFIG_KEYS */
1432 static const struct bpf_func_proto
*
1433 bpf_tracing_func_proto(enum bpf_func_id func_id
, const struct bpf_prog
*prog
)
1436 case BPF_FUNC_map_lookup_elem
:
1437 return &bpf_map_lookup_elem_proto
;
1438 case BPF_FUNC_map_update_elem
:
1439 return &bpf_map_update_elem_proto
;
1440 case BPF_FUNC_map_delete_elem
:
1441 return &bpf_map_delete_elem_proto
;
1442 case BPF_FUNC_map_push_elem
:
1443 return &bpf_map_push_elem_proto
;
1444 case BPF_FUNC_map_pop_elem
:
1445 return &bpf_map_pop_elem_proto
;
1446 case BPF_FUNC_map_peek_elem
:
1447 return &bpf_map_peek_elem_proto
;
1448 case BPF_FUNC_map_lookup_percpu_elem
:
1449 return &bpf_map_lookup_percpu_elem_proto
;
1450 case BPF_FUNC_ktime_get_ns
:
1451 return &bpf_ktime_get_ns_proto
;
1452 case BPF_FUNC_ktime_get_boot_ns
:
1453 return &bpf_ktime_get_boot_ns_proto
;
1454 case BPF_FUNC_tail_call
:
1455 return &bpf_tail_call_proto
;
1456 case BPF_FUNC_get_current_pid_tgid
:
1457 return &bpf_get_current_pid_tgid_proto
;
1458 case BPF_FUNC_get_current_task
:
1459 return &bpf_get_current_task_proto
;
1460 case BPF_FUNC_get_current_task_btf
:
1461 return &bpf_get_current_task_btf_proto
;
1462 case BPF_FUNC_task_pt_regs
:
1463 return &bpf_task_pt_regs_proto
;
1464 case BPF_FUNC_get_current_uid_gid
:
1465 return &bpf_get_current_uid_gid_proto
;
1466 case BPF_FUNC_get_current_comm
:
1467 return &bpf_get_current_comm_proto
;
1468 case BPF_FUNC_trace_printk
:
1469 return bpf_get_trace_printk_proto();
1470 case BPF_FUNC_get_smp_processor_id
:
1471 return &bpf_get_smp_processor_id_proto
;
1472 case BPF_FUNC_get_numa_node_id
:
1473 return &bpf_get_numa_node_id_proto
;
1474 case BPF_FUNC_perf_event_read
:
1475 return &bpf_perf_event_read_proto
;
1476 case BPF_FUNC_current_task_under_cgroup
:
1477 return &bpf_current_task_under_cgroup_proto
;
1478 case BPF_FUNC_get_prandom_u32
:
1479 return &bpf_get_prandom_u32_proto
;
1480 case BPF_FUNC_probe_write_user
:
1481 return security_locked_down(LOCKDOWN_BPF_WRITE_USER
) < 0 ?
1482 NULL
: bpf_get_probe_write_proto();
1483 case BPF_FUNC_probe_read_user
:
1484 return &bpf_probe_read_user_proto
;
1485 case BPF_FUNC_probe_read_kernel
:
1486 return security_locked_down(LOCKDOWN_BPF_READ_KERNEL
) < 0 ?
1487 NULL
: &bpf_probe_read_kernel_proto
;
1488 case BPF_FUNC_probe_read_user_str
:
1489 return &bpf_probe_read_user_str_proto
;
1490 case BPF_FUNC_probe_read_kernel_str
:
1491 return security_locked_down(LOCKDOWN_BPF_READ_KERNEL
) < 0 ?
1492 NULL
: &bpf_probe_read_kernel_str_proto
;
1493 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
1494 case BPF_FUNC_probe_read
:
1495 return security_locked_down(LOCKDOWN_BPF_READ_KERNEL
) < 0 ?
1496 NULL
: &bpf_probe_read_compat_proto
;
1497 case BPF_FUNC_probe_read_str
:
1498 return security_locked_down(LOCKDOWN_BPF_READ_KERNEL
) < 0 ?
1499 NULL
: &bpf_probe_read_compat_str_proto
;
1501 #ifdef CONFIG_CGROUPS
1502 case BPF_FUNC_cgrp_storage_get
:
1503 return &bpf_cgrp_storage_get_proto
;
1504 case BPF_FUNC_cgrp_storage_delete
:
1505 return &bpf_cgrp_storage_delete_proto
;
1507 case BPF_FUNC_send_signal
:
1508 return &bpf_send_signal_proto
;
1509 case BPF_FUNC_send_signal_thread
:
1510 return &bpf_send_signal_thread_proto
;
1511 case BPF_FUNC_perf_event_read_value
:
1512 return &bpf_perf_event_read_value_proto
;
1513 case BPF_FUNC_get_ns_current_pid_tgid
:
1514 return &bpf_get_ns_current_pid_tgid_proto
;
1515 case BPF_FUNC_ringbuf_output
:
1516 return &bpf_ringbuf_output_proto
;
1517 case BPF_FUNC_ringbuf_reserve
:
1518 return &bpf_ringbuf_reserve_proto
;
1519 case BPF_FUNC_ringbuf_submit
:
1520 return &bpf_ringbuf_submit_proto
;
1521 case BPF_FUNC_ringbuf_discard
:
1522 return &bpf_ringbuf_discard_proto
;
1523 case BPF_FUNC_ringbuf_query
:
1524 return &bpf_ringbuf_query_proto
;
1525 case BPF_FUNC_jiffies64
:
1526 return &bpf_jiffies64_proto
;
1527 case BPF_FUNC_get_task_stack
:
1528 return &bpf_get_task_stack_proto
;
1529 case BPF_FUNC_copy_from_user
:
1530 return &bpf_copy_from_user_proto
;
1531 case BPF_FUNC_copy_from_user_task
:
1532 return &bpf_copy_from_user_task_proto
;
1533 case BPF_FUNC_snprintf_btf
:
1534 return &bpf_snprintf_btf_proto
;
1535 case BPF_FUNC_per_cpu_ptr
:
1536 return &bpf_per_cpu_ptr_proto
;
1537 case BPF_FUNC_this_cpu_ptr
:
1538 return &bpf_this_cpu_ptr_proto
;
1539 case BPF_FUNC_task_storage_get
:
1540 if (bpf_prog_check_recur(prog
))
1541 return &bpf_task_storage_get_recur_proto
;
1542 return &bpf_task_storage_get_proto
;
1543 case BPF_FUNC_task_storage_delete
:
1544 if (bpf_prog_check_recur(prog
))
1545 return &bpf_task_storage_delete_recur_proto
;
1546 return &bpf_task_storage_delete_proto
;
1547 case BPF_FUNC_for_each_map_elem
:
1548 return &bpf_for_each_map_elem_proto
;
1549 case BPF_FUNC_snprintf
:
1550 return &bpf_snprintf_proto
;
1551 case BPF_FUNC_get_func_ip
:
1552 return &bpf_get_func_ip_proto_tracing
;
1553 case BPF_FUNC_get_branch_snapshot
:
1554 return &bpf_get_branch_snapshot_proto
;
1555 case BPF_FUNC_find_vma
:
1556 return &bpf_find_vma_proto
;
1557 case BPF_FUNC_trace_vprintk
:
1558 return bpf_get_trace_vprintk_proto();
1560 return bpf_base_func_proto(func_id
);
1564 static const struct bpf_func_proto
*
1565 kprobe_prog_func_proto(enum bpf_func_id func_id
, const struct bpf_prog
*prog
)
1568 case BPF_FUNC_perf_event_output
:
1569 return &bpf_perf_event_output_proto
;
1570 case BPF_FUNC_get_stackid
:
1571 return &bpf_get_stackid_proto
;
1572 case BPF_FUNC_get_stack
:
1573 return &bpf_get_stack_proto
;
1574 #ifdef CONFIG_BPF_KPROBE_OVERRIDE
1575 case BPF_FUNC_override_return
:
1576 return &bpf_override_return_proto
;
1578 case BPF_FUNC_get_func_ip
:
1579 if (prog
->expected_attach_type
== BPF_TRACE_KPROBE_MULTI
)
1580 return &bpf_get_func_ip_proto_kprobe_multi
;
1581 if (prog
->expected_attach_type
== BPF_TRACE_UPROBE_MULTI
)
1582 return &bpf_get_func_ip_proto_uprobe_multi
;
1583 return &bpf_get_func_ip_proto_kprobe
;
1584 case BPF_FUNC_get_attach_cookie
:
1585 if (prog
->expected_attach_type
== BPF_TRACE_KPROBE_MULTI
)
1586 return &bpf_get_attach_cookie_proto_kmulti
;
1587 if (prog
->expected_attach_type
== BPF_TRACE_UPROBE_MULTI
)
1588 return &bpf_get_attach_cookie_proto_umulti
;
1589 return &bpf_get_attach_cookie_proto_trace
;
1591 return bpf_tracing_func_proto(func_id
, prog
);
1595 /* bpf+kprobe programs can access fields of 'struct pt_regs' */
1596 static bool kprobe_prog_is_valid_access(int off
, int size
, enum bpf_access_type type
,
1597 const struct bpf_prog
*prog
,
1598 struct bpf_insn_access_aux
*info
)
1600 if (off
< 0 || off
>= sizeof(struct pt_regs
))
1602 if (type
!= BPF_READ
)
1604 if (off
% size
!= 0)
1607 * Assertion for 32 bit to make sure last 8 byte access
1608 * (BPF_DW) to the last 4 byte member is disallowed.
1610 if (off
+ size
> sizeof(struct pt_regs
))
1616 const struct bpf_verifier_ops kprobe_verifier_ops
= {
1617 .get_func_proto
= kprobe_prog_func_proto
,
1618 .is_valid_access
= kprobe_prog_is_valid_access
,
1621 const struct bpf_prog_ops kprobe_prog_ops
= {
1624 BPF_CALL_5(bpf_perf_event_output_tp
, void *, tp_buff
, struct bpf_map
*, map
,
1625 u64
, flags
, void *, data
, u64
, size
)
1627 struct pt_regs
*regs
= *(struct pt_regs
**)tp_buff
;
1630 * r1 points to perf tracepoint buffer where first 8 bytes are hidden
1631 * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it
1632 * from there and call the same bpf_perf_event_output() helper inline.
1634 return ____bpf_perf_event_output(regs
, map
, flags
, data
, size
);
1637 static const struct bpf_func_proto bpf_perf_event_output_proto_tp
= {
1638 .func
= bpf_perf_event_output_tp
,
1640 .ret_type
= RET_INTEGER
,
1641 .arg1_type
= ARG_PTR_TO_CTX
,
1642 .arg2_type
= ARG_CONST_MAP_PTR
,
1643 .arg3_type
= ARG_ANYTHING
,
1644 .arg4_type
= ARG_PTR_TO_MEM
| MEM_RDONLY
,
1645 .arg5_type
= ARG_CONST_SIZE_OR_ZERO
,
1648 BPF_CALL_3(bpf_get_stackid_tp
, void *, tp_buff
, struct bpf_map
*, map
,
1651 struct pt_regs
*regs
= *(struct pt_regs
**)tp_buff
;
1654 * Same comment as in bpf_perf_event_output_tp(), only that this time
1655 * the other helper's function body cannot be inlined due to being
1656 * external, thus we need to call raw helper function.
1658 return bpf_get_stackid((unsigned long) regs
, (unsigned long) map
,
1662 static const struct bpf_func_proto bpf_get_stackid_proto_tp
= {
1663 .func
= bpf_get_stackid_tp
,
1665 .ret_type
= RET_INTEGER
,
1666 .arg1_type
= ARG_PTR_TO_CTX
,
1667 .arg2_type
= ARG_CONST_MAP_PTR
,
1668 .arg3_type
= ARG_ANYTHING
,
1671 BPF_CALL_4(bpf_get_stack_tp
, void *, tp_buff
, void *, buf
, u32
, size
,
1674 struct pt_regs
*regs
= *(struct pt_regs
**)tp_buff
;
1676 return bpf_get_stack((unsigned long) regs
, (unsigned long) buf
,
1677 (unsigned long) size
, flags
, 0);
1680 static const struct bpf_func_proto bpf_get_stack_proto_tp
= {
1681 .func
= bpf_get_stack_tp
,
1683 .ret_type
= RET_INTEGER
,
1684 .arg1_type
= ARG_PTR_TO_CTX
,
1685 .arg2_type
= ARG_PTR_TO_UNINIT_MEM
,
1686 .arg3_type
= ARG_CONST_SIZE_OR_ZERO
,
1687 .arg4_type
= ARG_ANYTHING
,
1690 static const struct bpf_func_proto
*
1691 tp_prog_func_proto(enum bpf_func_id func_id
, const struct bpf_prog
*prog
)
1694 case BPF_FUNC_perf_event_output
:
1695 return &bpf_perf_event_output_proto_tp
;
1696 case BPF_FUNC_get_stackid
:
1697 return &bpf_get_stackid_proto_tp
;
1698 case BPF_FUNC_get_stack
:
1699 return &bpf_get_stack_proto_tp
;
1700 case BPF_FUNC_get_attach_cookie
:
1701 return &bpf_get_attach_cookie_proto_trace
;
1703 return bpf_tracing_func_proto(func_id
, prog
);
1707 static bool tp_prog_is_valid_access(int off
, int size
, enum bpf_access_type type
,
1708 const struct bpf_prog
*prog
,
1709 struct bpf_insn_access_aux
*info
)
1711 if (off
< sizeof(void *) || off
>= PERF_MAX_TRACE_SIZE
)
1713 if (type
!= BPF_READ
)
1715 if (off
% size
!= 0)
1718 BUILD_BUG_ON(PERF_MAX_TRACE_SIZE
% sizeof(__u64
));
1722 const struct bpf_verifier_ops tracepoint_verifier_ops
= {
1723 .get_func_proto
= tp_prog_func_proto
,
1724 .is_valid_access
= tp_prog_is_valid_access
,
1727 const struct bpf_prog_ops tracepoint_prog_ops
= {
1730 BPF_CALL_3(bpf_perf_prog_read_value
, struct bpf_perf_event_data_kern
*, ctx
,
1731 struct bpf_perf_event_value
*, buf
, u32
, size
)
1735 if (unlikely(size
!= sizeof(struct bpf_perf_event_value
)))
1737 err
= perf_event_read_local(ctx
->event
, &buf
->counter
, &buf
->enabled
,
1743 memset(buf
, 0, size
);
1747 static const struct bpf_func_proto bpf_perf_prog_read_value_proto
= {
1748 .func
= bpf_perf_prog_read_value
,
1750 .ret_type
= RET_INTEGER
,
1751 .arg1_type
= ARG_PTR_TO_CTX
,
1752 .arg2_type
= ARG_PTR_TO_UNINIT_MEM
,
1753 .arg3_type
= ARG_CONST_SIZE
,
1756 BPF_CALL_4(bpf_read_branch_records
, struct bpf_perf_event_data_kern
*, ctx
,
1757 void *, buf
, u32
, size
, u64
, flags
)
1759 static const u32 br_entry_size
= sizeof(struct perf_branch_entry
);
1760 struct perf_branch_stack
*br_stack
= ctx
->data
->br_stack
;
1763 if (unlikely(flags
& ~BPF_F_GET_BRANCH_RECORDS_SIZE
))
1766 if (unlikely(!(ctx
->data
->sample_flags
& PERF_SAMPLE_BRANCH_STACK
)))
1769 if (unlikely(!br_stack
))
1772 if (flags
& BPF_F_GET_BRANCH_RECORDS_SIZE
)
1773 return br_stack
->nr
* br_entry_size
;
1775 if (!buf
|| (size
% br_entry_size
!= 0))
1778 to_copy
= min_t(u32
, br_stack
->nr
* br_entry_size
, size
);
1779 memcpy(buf
, br_stack
->entries
, to_copy
);
1784 static const struct bpf_func_proto bpf_read_branch_records_proto
= {
1785 .func
= bpf_read_branch_records
,
1787 .ret_type
= RET_INTEGER
,
1788 .arg1_type
= ARG_PTR_TO_CTX
,
1789 .arg2_type
= ARG_PTR_TO_MEM_OR_NULL
,
1790 .arg3_type
= ARG_CONST_SIZE_OR_ZERO
,
1791 .arg4_type
= ARG_ANYTHING
,
1794 static const struct bpf_func_proto
*
1795 pe_prog_func_proto(enum bpf_func_id func_id
, const struct bpf_prog
*prog
)
1798 case BPF_FUNC_perf_event_output
:
1799 return &bpf_perf_event_output_proto_tp
;
1800 case BPF_FUNC_get_stackid
:
1801 return &bpf_get_stackid_proto_pe
;
1802 case BPF_FUNC_get_stack
:
1803 return &bpf_get_stack_proto_pe
;
1804 case BPF_FUNC_perf_prog_read_value
:
1805 return &bpf_perf_prog_read_value_proto
;
1806 case BPF_FUNC_read_branch_records
:
1807 return &bpf_read_branch_records_proto
;
1808 case BPF_FUNC_get_attach_cookie
:
1809 return &bpf_get_attach_cookie_proto_pe
;
1811 return bpf_tracing_func_proto(func_id
, prog
);
1816 * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp
1817 * to avoid potential recursive reuse issue when/if tracepoints are added
1818 * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack.
1820 * Since raw tracepoints run despite bpf_prog_active, support concurrent usage
1821 * in normal, irq, and nmi context.
1823 struct bpf_raw_tp_regs
{
1824 struct pt_regs regs
[3];
1826 static DEFINE_PER_CPU(struct bpf_raw_tp_regs
, bpf_raw_tp_regs
);
1827 static DEFINE_PER_CPU(int, bpf_raw_tp_nest_level
);
1828 static struct pt_regs
*get_bpf_raw_tp_regs(void)
1830 struct bpf_raw_tp_regs
*tp_regs
= this_cpu_ptr(&bpf_raw_tp_regs
);
1831 int nest_level
= this_cpu_inc_return(bpf_raw_tp_nest_level
);
1833 if (WARN_ON_ONCE(nest_level
> ARRAY_SIZE(tp_regs
->regs
))) {
1834 this_cpu_dec(bpf_raw_tp_nest_level
);
1835 return ERR_PTR(-EBUSY
);
1838 return &tp_regs
->regs
[nest_level
- 1];
1841 static void put_bpf_raw_tp_regs(void)
1843 this_cpu_dec(bpf_raw_tp_nest_level
);
1846 BPF_CALL_5(bpf_perf_event_output_raw_tp
, struct bpf_raw_tracepoint_args
*, args
,
1847 struct bpf_map
*, map
, u64
, flags
, void *, data
, u64
, size
)
1849 struct pt_regs
*regs
= get_bpf_raw_tp_regs();
1853 return PTR_ERR(regs
);
1855 perf_fetch_caller_regs(regs
);
1856 ret
= ____bpf_perf_event_output(regs
, map
, flags
, data
, size
);
1858 put_bpf_raw_tp_regs();
1862 static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp
= {
1863 .func
= bpf_perf_event_output_raw_tp
,
1865 .ret_type
= RET_INTEGER
,
1866 .arg1_type
= ARG_PTR_TO_CTX
,
1867 .arg2_type
= ARG_CONST_MAP_PTR
,
1868 .arg3_type
= ARG_ANYTHING
,
1869 .arg4_type
= ARG_PTR_TO_MEM
| MEM_RDONLY
,
1870 .arg5_type
= ARG_CONST_SIZE_OR_ZERO
,
1873 extern const struct bpf_func_proto bpf_skb_output_proto
;
1874 extern const struct bpf_func_proto bpf_xdp_output_proto
;
1875 extern const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto
;
1877 BPF_CALL_3(bpf_get_stackid_raw_tp
, struct bpf_raw_tracepoint_args
*, args
,
1878 struct bpf_map
*, map
, u64
, flags
)
1880 struct pt_regs
*regs
= get_bpf_raw_tp_regs();
1884 return PTR_ERR(regs
);
1886 perf_fetch_caller_regs(regs
);
1887 /* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */
1888 ret
= bpf_get_stackid((unsigned long) regs
, (unsigned long) map
,
1890 put_bpf_raw_tp_regs();
1894 static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp
= {
1895 .func
= bpf_get_stackid_raw_tp
,
1897 .ret_type
= RET_INTEGER
,
1898 .arg1_type
= ARG_PTR_TO_CTX
,
1899 .arg2_type
= ARG_CONST_MAP_PTR
,
1900 .arg3_type
= ARG_ANYTHING
,
1903 BPF_CALL_4(bpf_get_stack_raw_tp
, struct bpf_raw_tracepoint_args
*, args
,
1904 void *, buf
, u32
, size
, u64
, flags
)
1906 struct pt_regs
*regs
= get_bpf_raw_tp_regs();
1910 return PTR_ERR(regs
);
1912 perf_fetch_caller_regs(regs
);
1913 ret
= bpf_get_stack((unsigned long) regs
, (unsigned long) buf
,
1914 (unsigned long) size
, flags
, 0);
1915 put_bpf_raw_tp_regs();
1919 static const struct bpf_func_proto bpf_get_stack_proto_raw_tp
= {
1920 .func
= bpf_get_stack_raw_tp
,
1922 .ret_type
= RET_INTEGER
,
1923 .arg1_type
= ARG_PTR_TO_CTX
,
1924 .arg2_type
= ARG_PTR_TO_MEM
| MEM_RDONLY
,
1925 .arg3_type
= ARG_CONST_SIZE_OR_ZERO
,
1926 .arg4_type
= ARG_ANYTHING
,
1929 static const struct bpf_func_proto
*
1930 raw_tp_prog_func_proto(enum bpf_func_id func_id
, const struct bpf_prog
*prog
)
1933 case BPF_FUNC_perf_event_output
:
1934 return &bpf_perf_event_output_proto_raw_tp
;
1935 case BPF_FUNC_get_stackid
:
1936 return &bpf_get_stackid_proto_raw_tp
;
1937 case BPF_FUNC_get_stack
:
1938 return &bpf_get_stack_proto_raw_tp
;
1940 return bpf_tracing_func_proto(func_id
, prog
);
1944 const struct bpf_func_proto
*
1945 tracing_prog_func_proto(enum bpf_func_id func_id
, const struct bpf_prog
*prog
)
1947 const struct bpf_func_proto
*fn
;
1951 case BPF_FUNC_skb_output
:
1952 return &bpf_skb_output_proto
;
1953 case BPF_FUNC_xdp_output
:
1954 return &bpf_xdp_output_proto
;
1955 case BPF_FUNC_skc_to_tcp6_sock
:
1956 return &bpf_skc_to_tcp6_sock_proto
;
1957 case BPF_FUNC_skc_to_tcp_sock
:
1958 return &bpf_skc_to_tcp_sock_proto
;
1959 case BPF_FUNC_skc_to_tcp_timewait_sock
:
1960 return &bpf_skc_to_tcp_timewait_sock_proto
;
1961 case BPF_FUNC_skc_to_tcp_request_sock
:
1962 return &bpf_skc_to_tcp_request_sock_proto
;
1963 case BPF_FUNC_skc_to_udp6_sock
:
1964 return &bpf_skc_to_udp6_sock_proto
;
1965 case BPF_FUNC_skc_to_unix_sock
:
1966 return &bpf_skc_to_unix_sock_proto
;
1967 case BPF_FUNC_skc_to_mptcp_sock
:
1968 return &bpf_skc_to_mptcp_sock_proto
;
1969 case BPF_FUNC_sk_storage_get
:
1970 return &bpf_sk_storage_get_tracing_proto
;
1971 case BPF_FUNC_sk_storage_delete
:
1972 return &bpf_sk_storage_delete_tracing_proto
;
1973 case BPF_FUNC_sock_from_file
:
1974 return &bpf_sock_from_file_proto
;
1975 case BPF_FUNC_get_socket_cookie
:
1976 return &bpf_get_socket_ptr_cookie_proto
;
1977 case BPF_FUNC_xdp_get_buff_len
:
1978 return &bpf_xdp_get_buff_len_trace_proto
;
1980 case BPF_FUNC_seq_printf
:
1981 return prog
->expected_attach_type
== BPF_TRACE_ITER
?
1982 &bpf_seq_printf_proto
:
1984 case BPF_FUNC_seq_write
:
1985 return prog
->expected_attach_type
== BPF_TRACE_ITER
?
1986 &bpf_seq_write_proto
:
1988 case BPF_FUNC_seq_printf_btf
:
1989 return prog
->expected_attach_type
== BPF_TRACE_ITER
?
1990 &bpf_seq_printf_btf_proto
:
1992 case BPF_FUNC_d_path
:
1993 return &bpf_d_path_proto
;
1994 case BPF_FUNC_get_func_arg
:
1995 return bpf_prog_has_trampoline(prog
) ? &bpf_get_func_arg_proto
: NULL
;
1996 case BPF_FUNC_get_func_ret
:
1997 return bpf_prog_has_trampoline(prog
) ? &bpf_get_func_ret_proto
: NULL
;
1998 case BPF_FUNC_get_func_arg_cnt
:
1999 return bpf_prog_has_trampoline(prog
) ? &bpf_get_func_arg_cnt_proto
: NULL
;
2000 case BPF_FUNC_get_attach_cookie
:
2001 return bpf_prog_has_trampoline(prog
) ? &bpf_get_attach_cookie_proto_tracing
: NULL
;
2003 fn
= raw_tp_prog_func_proto(func_id
, prog
);
2004 if (!fn
&& prog
->expected_attach_type
== BPF_TRACE_ITER
)
2005 fn
= bpf_iter_get_func_proto(func_id
, prog
);
2010 static bool raw_tp_prog_is_valid_access(int off
, int size
,
2011 enum bpf_access_type type
,
2012 const struct bpf_prog
*prog
,
2013 struct bpf_insn_access_aux
*info
)
2015 return bpf_tracing_ctx_access(off
, size
, type
);
2018 static bool tracing_prog_is_valid_access(int off
, int size
,
2019 enum bpf_access_type type
,
2020 const struct bpf_prog
*prog
,
2021 struct bpf_insn_access_aux
*info
)
2023 return bpf_tracing_btf_ctx_access(off
, size
, type
, prog
, info
);
2026 int __weak
bpf_prog_test_run_tracing(struct bpf_prog
*prog
,
2027 const union bpf_attr
*kattr
,
2028 union bpf_attr __user
*uattr
)
2033 const struct bpf_verifier_ops raw_tracepoint_verifier_ops
= {
2034 .get_func_proto
= raw_tp_prog_func_proto
,
2035 .is_valid_access
= raw_tp_prog_is_valid_access
,
2038 const struct bpf_prog_ops raw_tracepoint_prog_ops
= {
2040 .test_run
= bpf_prog_test_run_raw_tp
,
2044 const struct bpf_verifier_ops tracing_verifier_ops
= {
2045 .get_func_proto
= tracing_prog_func_proto
,
2046 .is_valid_access
= tracing_prog_is_valid_access
,
2049 const struct bpf_prog_ops tracing_prog_ops
= {
2050 .test_run
= bpf_prog_test_run_tracing
,
2053 static bool raw_tp_writable_prog_is_valid_access(int off
, int size
,
2054 enum bpf_access_type type
,
2055 const struct bpf_prog
*prog
,
2056 struct bpf_insn_access_aux
*info
)
2059 if (size
!= sizeof(u64
) || type
!= BPF_READ
)
2061 info
->reg_type
= PTR_TO_TP_BUFFER
;
2063 return raw_tp_prog_is_valid_access(off
, size
, type
, prog
, info
);
2066 const struct bpf_verifier_ops raw_tracepoint_writable_verifier_ops
= {
2067 .get_func_proto
= raw_tp_prog_func_proto
,
2068 .is_valid_access
= raw_tp_writable_prog_is_valid_access
,
2071 const struct bpf_prog_ops raw_tracepoint_writable_prog_ops
= {
2074 static bool pe_prog_is_valid_access(int off
, int size
, enum bpf_access_type type
,
2075 const struct bpf_prog
*prog
,
2076 struct bpf_insn_access_aux
*info
)
2078 const int size_u64
= sizeof(u64
);
2080 if (off
< 0 || off
>= sizeof(struct bpf_perf_event_data
))
2082 if (type
!= BPF_READ
)
2084 if (off
% size
!= 0) {
2085 if (sizeof(unsigned long) != 4)
2089 if (off
% size
!= 4)
2094 case bpf_ctx_range(struct bpf_perf_event_data
, sample_period
):
2095 bpf_ctx_record_field_size(info
, size_u64
);
2096 if (!bpf_ctx_narrow_access_ok(off
, size
, size_u64
))
2099 case bpf_ctx_range(struct bpf_perf_event_data
, addr
):
2100 bpf_ctx_record_field_size(info
, size_u64
);
2101 if (!bpf_ctx_narrow_access_ok(off
, size
, size_u64
))
2105 if (size
!= sizeof(long))
2112 static u32
pe_prog_convert_ctx_access(enum bpf_access_type type
,
2113 const struct bpf_insn
*si
,
2114 struct bpf_insn
*insn_buf
,
2115 struct bpf_prog
*prog
, u32
*target_size
)
2117 struct bpf_insn
*insn
= insn_buf
;
2120 case offsetof(struct bpf_perf_event_data
, sample_period
):
2121 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern
,
2122 data
), si
->dst_reg
, si
->src_reg
,
2123 offsetof(struct bpf_perf_event_data_kern
, data
));
2124 *insn
++ = BPF_LDX_MEM(BPF_DW
, si
->dst_reg
, si
->dst_reg
,
2125 bpf_target_off(struct perf_sample_data
, period
, 8,
2128 case offsetof(struct bpf_perf_event_data
, addr
):
2129 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern
,
2130 data
), si
->dst_reg
, si
->src_reg
,
2131 offsetof(struct bpf_perf_event_data_kern
, data
));
2132 *insn
++ = BPF_LDX_MEM(BPF_DW
, si
->dst_reg
, si
->dst_reg
,
2133 bpf_target_off(struct perf_sample_data
, addr
, 8,
2137 *insn
++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern
,
2138 regs
), si
->dst_reg
, si
->src_reg
,
2139 offsetof(struct bpf_perf_event_data_kern
, regs
));
2140 *insn
++ = BPF_LDX_MEM(BPF_SIZEOF(long), si
->dst_reg
, si
->dst_reg
,
2145 return insn
- insn_buf
;
2148 const struct bpf_verifier_ops perf_event_verifier_ops
= {
2149 .get_func_proto
= pe_prog_func_proto
,
2150 .is_valid_access
= pe_prog_is_valid_access
,
2151 .convert_ctx_access
= pe_prog_convert_ctx_access
,
2154 const struct bpf_prog_ops perf_event_prog_ops
= {
2157 static DEFINE_MUTEX(bpf_event_mutex
);
2159 #define BPF_TRACE_MAX_PROGS 64
2161 int perf_event_attach_bpf_prog(struct perf_event
*event
,
2162 struct bpf_prog
*prog
,
2165 struct bpf_prog_array
*old_array
;
2166 struct bpf_prog_array
*new_array
;
2170 * Kprobe override only works if they are on the function entry,
2171 * and only if they are on the opt-in list.
2173 if (prog
->kprobe_override
&&
2174 (!trace_kprobe_on_func_entry(event
->tp_event
) ||
2175 !trace_kprobe_error_injectable(event
->tp_event
)))
2178 mutex_lock(&bpf_event_mutex
);
2183 old_array
= bpf_event_rcu_dereference(event
->tp_event
->prog_array
);
2185 bpf_prog_array_length(old_array
) >= BPF_TRACE_MAX_PROGS
) {
2190 ret
= bpf_prog_array_copy(old_array
, NULL
, prog
, bpf_cookie
, &new_array
);
2194 /* set the new array to event->tp_event and set event->prog */
2196 event
->bpf_cookie
= bpf_cookie
;
2197 rcu_assign_pointer(event
->tp_event
->prog_array
, new_array
);
2198 bpf_prog_array_free_sleepable(old_array
);
2201 mutex_unlock(&bpf_event_mutex
);
2205 void perf_event_detach_bpf_prog(struct perf_event
*event
)
2207 struct bpf_prog_array
*old_array
;
2208 struct bpf_prog_array
*new_array
;
2211 mutex_lock(&bpf_event_mutex
);
2216 old_array
= bpf_event_rcu_dereference(event
->tp_event
->prog_array
);
2217 ret
= bpf_prog_array_copy(old_array
, event
->prog
, NULL
, 0, &new_array
);
2221 bpf_prog_array_delete_safe(old_array
, event
->prog
);
2223 rcu_assign_pointer(event
->tp_event
->prog_array
, new_array
);
2224 bpf_prog_array_free_sleepable(old_array
);
2227 bpf_prog_put(event
->prog
);
2231 mutex_unlock(&bpf_event_mutex
);
2234 int perf_event_query_prog_array(struct perf_event
*event
, void __user
*info
)
2236 struct perf_event_query_bpf __user
*uquery
= info
;
2237 struct perf_event_query_bpf query
= {};
2238 struct bpf_prog_array
*progs
;
2239 u32
*ids
, prog_cnt
, ids_len
;
2242 if (!perfmon_capable())
2244 if (event
->attr
.type
!= PERF_TYPE_TRACEPOINT
)
2246 if (copy_from_user(&query
, uquery
, sizeof(query
)))
2249 ids_len
= query
.ids_len
;
2250 if (ids_len
> BPF_TRACE_MAX_PROGS
)
2252 ids
= kcalloc(ids_len
, sizeof(u32
), GFP_USER
| __GFP_NOWARN
);
2256 * The above kcalloc returns ZERO_SIZE_PTR when ids_len = 0, which
2257 * is required when user only wants to check for uquery->prog_cnt.
2258 * There is no need to check for it since the case is handled
2259 * gracefully in bpf_prog_array_copy_info.
2262 mutex_lock(&bpf_event_mutex
);
2263 progs
= bpf_event_rcu_dereference(event
->tp_event
->prog_array
);
2264 ret
= bpf_prog_array_copy_info(progs
, ids
, ids_len
, &prog_cnt
);
2265 mutex_unlock(&bpf_event_mutex
);
2267 if (copy_to_user(&uquery
->prog_cnt
, &prog_cnt
, sizeof(prog_cnt
)) ||
2268 copy_to_user(uquery
->ids
, ids
, ids_len
* sizeof(u32
)))
2275 extern struct bpf_raw_event_map __start__bpf_raw_tp
[];
2276 extern struct bpf_raw_event_map __stop__bpf_raw_tp
[];
2278 struct bpf_raw_event_map
*bpf_get_raw_tracepoint(const char *name
)
2280 struct bpf_raw_event_map
*btp
= __start__bpf_raw_tp
;
2282 for (; btp
< __stop__bpf_raw_tp
; btp
++) {
2283 if (!strcmp(btp
->tp
->name
, name
))
2287 return bpf_get_raw_tracepoint_module(name
);
2290 void bpf_put_raw_tracepoint(struct bpf_raw_event_map
*btp
)
2295 mod
= __module_address((unsigned long)btp
);
2300 static __always_inline
2301 void __bpf_trace_run(struct bpf_prog
*prog
, u64
*args
)
2304 if (unlikely(this_cpu_inc_return(*(prog
->active
)) != 1)) {
2305 bpf_prog_inc_misses_counter(prog
);
2309 (void) bpf_prog_run(prog
, args
);
2312 this_cpu_dec(*(prog
->active
));
2315 #define UNPACK(...) __VA_ARGS__
2316 #define REPEAT_1(FN, DL, X, ...) FN(X)
2317 #define REPEAT_2(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_1(FN, DL, __VA_ARGS__)
2318 #define REPEAT_3(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_2(FN, DL, __VA_ARGS__)
2319 #define REPEAT_4(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_3(FN, DL, __VA_ARGS__)
2320 #define REPEAT_5(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_4(FN, DL, __VA_ARGS__)
2321 #define REPEAT_6(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_5(FN, DL, __VA_ARGS__)
2322 #define REPEAT_7(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_6(FN, DL, __VA_ARGS__)
2323 #define REPEAT_8(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_7(FN, DL, __VA_ARGS__)
2324 #define REPEAT_9(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_8(FN, DL, __VA_ARGS__)
2325 #define REPEAT_10(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_9(FN, DL, __VA_ARGS__)
2326 #define REPEAT_11(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_10(FN, DL, __VA_ARGS__)
2327 #define REPEAT_12(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_11(FN, DL, __VA_ARGS__)
2328 #define REPEAT(X, FN, DL, ...) REPEAT_##X(FN, DL, __VA_ARGS__)
2330 #define SARG(X) u64 arg##X
2331 #define COPY(X) args[X] = arg##X
2333 #define __DL_COM (,)
2334 #define __DL_SEM (;)
2336 #define __SEQ_0_11 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
2338 #define BPF_TRACE_DEFN_x(x) \
2339 void bpf_trace_run##x(struct bpf_prog *prog, \
2340 REPEAT(x, SARG, __DL_COM, __SEQ_0_11)) \
2343 REPEAT(x, COPY, __DL_SEM, __SEQ_0_11); \
2344 __bpf_trace_run(prog, args); \
2346 EXPORT_SYMBOL_GPL(bpf_trace_run##x)
2347 BPF_TRACE_DEFN_x(1);
2348 BPF_TRACE_DEFN_x(2);
2349 BPF_TRACE_DEFN_x(3);
2350 BPF_TRACE_DEFN_x(4);
2351 BPF_TRACE_DEFN_x(5);
2352 BPF_TRACE_DEFN_x(6);
2353 BPF_TRACE_DEFN_x(7);
2354 BPF_TRACE_DEFN_x(8);
2355 BPF_TRACE_DEFN_x(9);
2356 BPF_TRACE_DEFN_x(10);
2357 BPF_TRACE_DEFN_x(11);
2358 BPF_TRACE_DEFN_x(12);
2360 static int __bpf_probe_register(struct bpf_raw_event_map
*btp
, struct bpf_prog
*prog
)
2362 struct tracepoint
*tp
= btp
->tp
;
2365 * check that program doesn't access arguments beyond what's
2366 * available in this tracepoint
2368 if (prog
->aux
->max_ctx_offset
> btp
->num_args
* sizeof(u64
))
2371 if (prog
->aux
->max_tp_access
> btp
->writable_size
)
2374 return tracepoint_probe_register_may_exist(tp
, (void *)btp
->bpf_func
,
2378 int bpf_probe_register(struct bpf_raw_event_map
*btp
, struct bpf_prog
*prog
)
2380 return __bpf_probe_register(btp
, prog
);
2383 int bpf_probe_unregister(struct bpf_raw_event_map
*btp
, struct bpf_prog
*prog
)
2385 return tracepoint_probe_unregister(btp
->tp
, (void *)btp
->bpf_func
, prog
);
2388 int bpf_get_perf_event_info(const struct perf_event
*event
, u32
*prog_id
,
2389 u32
*fd_type
, const char **buf
,
2390 u64
*probe_offset
, u64
*probe_addr
,
2391 unsigned long *missed
)
2393 bool is_tracepoint
, is_syscall_tp
;
2394 struct bpf_prog
*prog
;
2401 /* not supporting BPF_PROG_TYPE_PERF_EVENT yet */
2402 if (prog
->type
== BPF_PROG_TYPE_PERF_EVENT
)
2405 *prog_id
= prog
->aux
->id
;
2406 flags
= event
->tp_event
->flags
;
2407 is_tracepoint
= flags
& TRACE_EVENT_FL_TRACEPOINT
;
2408 is_syscall_tp
= is_syscall_trace_event(event
->tp_event
);
2410 if (is_tracepoint
|| is_syscall_tp
) {
2411 *buf
= is_tracepoint
? event
->tp_event
->tp
->name
2412 : event
->tp_event
->name
;
2413 /* We allow NULL pointer for tracepoint */
2415 *fd_type
= BPF_FD_TYPE_TRACEPOINT
;
2417 *probe_offset
= 0x0;
2423 #ifdef CONFIG_KPROBE_EVENTS
2424 if (flags
& TRACE_EVENT_FL_KPROBE
)
2425 err
= bpf_get_kprobe_info(event
, fd_type
, buf
,
2426 probe_offset
, probe_addr
, missed
,
2427 event
->attr
.type
== PERF_TYPE_TRACEPOINT
);
2429 #ifdef CONFIG_UPROBE_EVENTS
2430 if (flags
& TRACE_EVENT_FL_UPROBE
)
2431 err
= bpf_get_uprobe_info(event
, fd_type
, buf
,
2432 probe_offset
, probe_addr
,
2433 event
->attr
.type
== PERF_TYPE_TRACEPOINT
);
2440 static int __init
send_signal_irq_work_init(void)
2443 struct send_signal_irq_work
*work
;
2445 for_each_possible_cpu(cpu
) {
2446 work
= per_cpu_ptr(&send_signal_work
, cpu
);
2447 init_irq_work(&work
->irq_work
, do_bpf_send_signal
);
2452 subsys_initcall(send_signal_irq_work_init
);
2454 #ifdef CONFIG_MODULES
2455 static int bpf_event_notify(struct notifier_block
*nb
, unsigned long op
,
2458 struct bpf_trace_module
*btm
, *tmp
;
2459 struct module
*mod
= module
;
2462 if (mod
->num_bpf_raw_events
== 0 ||
2463 (op
!= MODULE_STATE_COMING
&& op
!= MODULE_STATE_GOING
))
2466 mutex_lock(&bpf_module_mutex
);
2469 case MODULE_STATE_COMING
:
2470 btm
= kzalloc(sizeof(*btm
), GFP_KERNEL
);
2472 btm
->module
= module
;
2473 list_add(&btm
->list
, &bpf_trace_modules
);
2478 case MODULE_STATE_GOING
:
2479 list_for_each_entry_safe(btm
, tmp
, &bpf_trace_modules
, list
) {
2480 if (btm
->module
== module
) {
2481 list_del(&btm
->list
);
2489 mutex_unlock(&bpf_module_mutex
);
2492 return notifier_from_errno(ret
);
2495 static struct notifier_block bpf_module_nb
= {
2496 .notifier_call
= bpf_event_notify
,
2499 static int __init
bpf_event_init(void)
2501 register_module_notifier(&bpf_module_nb
);
2505 fs_initcall(bpf_event_init
);
2506 #endif /* CONFIG_MODULES */
2508 #ifdef CONFIG_FPROBE
2509 struct bpf_kprobe_multi_link
{
2510 struct bpf_link link
;
2512 unsigned long *addrs
;
2516 struct module
**mods
;
2520 struct bpf_kprobe_multi_run_ctx
{
2521 struct bpf_run_ctx run_ctx
;
2522 struct bpf_kprobe_multi_link
*link
;
2523 unsigned long entry_ip
;
2531 static int copy_user_syms(struct user_syms
*us
, unsigned long __user
*usyms
, u32 cnt
)
2533 unsigned long __user usymbol
;
2534 const char **syms
= NULL
;
2535 char *buf
= NULL
, *p
;
2539 syms
= kvmalloc_array(cnt
, sizeof(*syms
), GFP_KERNEL
);
2543 buf
= kvmalloc_array(cnt
, KSYM_NAME_LEN
, GFP_KERNEL
);
2547 for (p
= buf
, i
= 0; i
< cnt
; i
++) {
2548 if (__get_user(usymbol
, usyms
+ i
)) {
2552 err
= strncpy_from_user(p
, (const char __user
*) usymbol
, KSYM_NAME_LEN
);
2553 if (err
== KSYM_NAME_LEN
)
2573 static void kprobe_multi_put_modules(struct module
**mods
, u32 cnt
)
2577 for (i
= 0; i
< cnt
; i
++)
2578 module_put(mods
[i
]);
2581 static void free_user_syms(struct user_syms
*us
)
2587 static void bpf_kprobe_multi_link_release(struct bpf_link
*link
)
2589 struct bpf_kprobe_multi_link
*kmulti_link
;
2591 kmulti_link
= container_of(link
, struct bpf_kprobe_multi_link
, link
);
2592 unregister_fprobe(&kmulti_link
->fp
);
2593 kprobe_multi_put_modules(kmulti_link
->mods
, kmulti_link
->mods_cnt
);
2596 static void bpf_kprobe_multi_link_dealloc(struct bpf_link
*link
)
2598 struct bpf_kprobe_multi_link
*kmulti_link
;
2600 kmulti_link
= container_of(link
, struct bpf_kprobe_multi_link
, link
);
2601 kvfree(kmulti_link
->addrs
);
2602 kvfree(kmulti_link
->cookies
);
2603 kfree(kmulti_link
->mods
);
2607 static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link
*link
,
2608 struct bpf_link_info
*info
)
2610 u64 __user
*uaddrs
= u64_to_user_ptr(info
->kprobe_multi
.addrs
);
2611 struct bpf_kprobe_multi_link
*kmulti_link
;
2612 u32 ucount
= info
->kprobe_multi
.count
;
2615 if (!uaddrs
^ !ucount
)
2618 kmulti_link
= container_of(link
, struct bpf_kprobe_multi_link
, link
);
2619 info
->kprobe_multi
.count
= kmulti_link
->cnt
;
2620 info
->kprobe_multi
.flags
= kmulti_link
->flags
;
2621 info
->kprobe_multi
.missed
= kmulti_link
->fp
.nmissed
;
2625 if (ucount
< kmulti_link
->cnt
)
2628 ucount
= kmulti_link
->cnt
;
2630 if (kallsyms_show_value(current_cred())) {
2631 if (copy_to_user(uaddrs
, kmulti_link
->addrs
, ucount
* sizeof(u64
)))
2634 for (i
= 0; i
< ucount
; i
++) {
2635 if (put_user(0, uaddrs
+ i
))
2642 static const struct bpf_link_ops bpf_kprobe_multi_link_lops
= {
2643 .release
= bpf_kprobe_multi_link_release
,
2644 .dealloc
= bpf_kprobe_multi_link_dealloc
,
2645 .fill_link_info
= bpf_kprobe_multi_link_fill_link_info
,
2648 static void bpf_kprobe_multi_cookie_swap(void *a
, void *b
, int size
, const void *priv
)
2650 const struct bpf_kprobe_multi_link
*link
= priv
;
2651 unsigned long *addr_a
= a
, *addr_b
= b
;
2652 u64
*cookie_a
, *cookie_b
;
2654 cookie_a
= link
->cookies
+ (addr_a
- link
->addrs
);
2655 cookie_b
= link
->cookies
+ (addr_b
- link
->addrs
);
2657 /* swap addr_a/addr_b and cookie_a/cookie_b values */
2658 swap(*addr_a
, *addr_b
);
2659 swap(*cookie_a
, *cookie_b
);
2662 static int bpf_kprobe_multi_addrs_cmp(const void *a
, const void *b
)
2664 const unsigned long *addr_a
= a
, *addr_b
= b
;
2666 if (*addr_a
== *addr_b
)
2668 return *addr_a
< *addr_b
? -1 : 1;
2671 static int bpf_kprobe_multi_cookie_cmp(const void *a
, const void *b
, const void *priv
)
2673 return bpf_kprobe_multi_addrs_cmp(a
, b
);
2676 static u64
bpf_kprobe_multi_cookie(struct bpf_run_ctx
*ctx
)
2678 struct bpf_kprobe_multi_run_ctx
*run_ctx
;
2679 struct bpf_kprobe_multi_link
*link
;
2680 u64
*cookie
, entry_ip
;
2681 unsigned long *addr
;
2683 if (WARN_ON_ONCE(!ctx
))
2685 run_ctx
= container_of(current
->bpf_ctx
, struct bpf_kprobe_multi_run_ctx
, run_ctx
);
2686 link
= run_ctx
->link
;
2689 entry_ip
= run_ctx
->entry_ip
;
2690 addr
= bsearch(&entry_ip
, link
->addrs
, link
->cnt
, sizeof(entry_ip
),
2691 bpf_kprobe_multi_addrs_cmp
);
2694 cookie
= link
->cookies
+ (addr
- link
->addrs
);
2698 static u64
bpf_kprobe_multi_entry_ip(struct bpf_run_ctx
*ctx
)
2700 struct bpf_kprobe_multi_run_ctx
*run_ctx
;
2702 run_ctx
= container_of(current
->bpf_ctx
, struct bpf_kprobe_multi_run_ctx
, run_ctx
);
2703 return run_ctx
->entry_ip
;
2707 kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link
*link
,
2708 unsigned long entry_ip
, struct pt_regs
*regs
)
2710 struct bpf_kprobe_multi_run_ctx run_ctx
= {
2712 .entry_ip
= entry_ip
,
2714 struct bpf_run_ctx
*old_run_ctx
;
2717 if (unlikely(__this_cpu_inc_return(bpf_prog_active
) != 1)) {
2718 bpf_prog_inc_misses_counter(link
->link
.prog
);
2725 old_run_ctx
= bpf_set_run_ctx(&run_ctx
.run_ctx
);
2726 err
= bpf_prog_run(link
->link
.prog
, regs
);
2727 bpf_reset_run_ctx(old_run_ctx
);
2732 __this_cpu_dec(bpf_prog_active
);
2737 kprobe_multi_link_handler(struct fprobe
*fp
, unsigned long fentry_ip
,
2738 unsigned long ret_ip
, struct pt_regs
*regs
,
2741 struct bpf_kprobe_multi_link
*link
;
2743 link
= container_of(fp
, struct bpf_kprobe_multi_link
, fp
);
2744 kprobe_multi_link_prog_run(link
, get_entry_ip(fentry_ip
), regs
);
2749 kprobe_multi_link_exit_handler(struct fprobe
*fp
, unsigned long fentry_ip
,
2750 unsigned long ret_ip
, struct pt_regs
*regs
,
2753 struct bpf_kprobe_multi_link
*link
;
2755 link
= container_of(fp
, struct bpf_kprobe_multi_link
, fp
);
2756 kprobe_multi_link_prog_run(link
, get_entry_ip(fentry_ip
), regs
);
2759 static int symbols_cmp_r(const void *a
, const void *b
, const void *priv
)
2761 const char **str_a
= (const char **) a
;
2762 const char **str_b
= (const char **) b
;
2764 return strcmp(*str_a
, *str_b
);
2767 struct multi_symbols_sort
{
2772 static void symbols_swap_r(void *a
, void *b
, int size
, const void *priv
)
2774 const struct multi_symbols_sort
*data
= priv
;
2775 const char **name_a
= a
, **name_b
= b
;
2777 swap(*name_a
, *name_b
);
2779 /* If defined, swap also related cookies. */
2780 if (data
->cookies
) {
2781 u64
*cookie_a
, *cookie_b
;
2783 cookie_a
= data
->cookies
+ (name_a
- data
->funcs
);
2784 cookie_b
= data
->cookies
+ (name_b
- data
->funcs
);
2785 swap(*cookie_a
, *cookie_b
);
2789 struct modules_array
{
2790 struct module
**mods
;
2795 static int add_module(struct modules_array
*arr
, struct module
*mod
)
2797 struct module
**mods
;
2799 if (arr
->mods_cnt
== arr
->mods_cap
) {
2800 arr
->mods_cap
= max(16, arr
->mods_cap
* 3 / 2);
2801 mods
= krealloc_array(arr
->mods
, arr
->mods_cap
, sizeof(*mods
), GFP_KERNEL
);
2807 arr
->mods
[arr
->mods_cnt
] = mod
;
2812 static bool has_module(struct modules_array
*arr
, struct module
*mod
)
2816 for (i
= arr
->mods_cnt
- 1; i
>= 0; i
--) {
2817 if (arr
->mods
[i
] == mod
)
2823 static int get_modules_for_addrs(struct module
***mods
, unsigned long *addrs
, u32 addrs_cnt
)
2825 struct modules_array arr
= {};
2828 for (i
= 0; i
< addrs_cnt
; i
++) {
2832 mod
= __module_address(addrs
[i
]);
2833 /* Either no module or we it's already stored */
2834 if (!mod
|| has_module(&arr
, mod
)) {
2838 if (!try_module_get(mod
))
2843 err
= add_module(&arr
, mod
);
2850 /* We return either err < 0 in case of error, ... */
2852 kprobe_multi_put_modules(arr
.mods
, arr
.mods_cnt
);
2857 /* or number of modules found if everything is ok. */
2859 return arr
.mods_cnt
;
2862 static int addrs_check_error_injection_list(unsigned long *addrs
, u32 cnt
)
2866 for (i
= 0; i
< cnt
; i
++) {
2867 if (!within_error_injection_list(addrs
[i
]))
2873 int bpf_kprobe_multi_link_attach(const union bpf_attr
*attr
, struct bpf_prog
*prog
)
2875 struct bpf_kprobe_multi_link
*link
= NULL
;
2876 struct bpf_link_primer link_primer
;
2877 void __user
*ucookies
;
2878 unsigned long *addrs
;
2879 u32 flags
, cnt
, size
;
2880 void __user
*uaddrs
;
2881 u64
*cookies
= NULL
;
2885 /* no support for 32bit archs yet */
2886 if (sizeof(u64
) != sizeof(void *))
2889 if (prog
->expected_attach_type
!= BPF_TRACE_KPROBE_MULTI
)
2892 flags
= attr
->link_create
.kprobe_multi
.flags
;
2893 if (flags
& ~BPF_F_KPROBE_MULTI_RETURN
)
2896 uaddrs
= u64_to_user_ptr(attr
->link_create
.kprobe_multi
.addrs
);
2897 usyms
= u64_to_user_ptr(attr
->link_create
.kprobe_multi
.syms
);
2898 if (!!uaddrs
== !!usyms
)
2901 cnt
= attr
->link_create
.kprobe_multi
.cnt
;
2905 size
= cnt
* sizeof(*addrs
);
2906 addrs
= kvmalloc_array(cnt
, sizeof(*addrs
), GFP_KERNEL
);
2910 ucookies
= u64_to_user_ptr(attr
->link_create
.kprobe_multi
.cookies
);
2912 cookies
= kvmalloc_array(cnt
, sizeof(*addrs
), GFP_KERNEL
);
2917 if (copy_from_user(cookies
, ucookies
, size
)) {
2924 if (copy_from_user(addrs
, uaddrs
, size
)) {
2929 struct multi_symbols_sort data
= {
2932 struct user_syms us
;
2934 err
= copy_user_syms(&us
, usyms
, cnt
);
2939 data
.funcs
= us
.syms
;
2941 sort_r(us
.syms
, cnt
, sizeof(*us
.syms
), symbols_cmp_r
,
2942 symbols_swap_r
, &data
);
2944 err
= ftrace_lookup_symbols(us
.syms
, cnt
, addrs
);
2945 free_user_syms(&us
);
2950 if (prog
->kprobe_override
&& addrs_check_error_injection_list(addrs
, cnt
)) {
2955 link
= kzalloc(sizeof(*link
), GFP_KERNEL
);
2961 bpf_link_init(&link
->link
, BPF_LINK_TYPE_KPROBE_MULTI
,
2962 &bpf_kprobe_multi_link_lops
, prog
);
2964 err
= bpf_link_prime(&link
->link
, &link_primer
);
2968 if (flags
& BPF_F_KPROBE_MULTI_RETURN
)
2969 link
->fp
.exit_handler
= kprobe_multi_link_exit_handler
;
2971 link
->fp
.entry_handler
= kprobe_multi_link_handler
;
2973 link
->addrs
= addrs
;
2974 link
->cookies
= cookies
;
2976 link
->flags
= flags
;
2980 * Sorting addresses will trigger sorting cookies as well
2981 * (check bpf_kprobe_multi_cookie_swap). This way we can
2982 * find cookie based on the address in bpf_get_attach_cookie
2985 sort_r(addrs
, cnt
, sizeof(*addrs
),
2986 bpf_kprobe_multi_cookie_cmp
,
2987 bpf_kprobe_multi_cookie_swap
,
2991 err
= get_modules_for_addrs(&link
->mods
, addrs
, cnt
);
2993 bpf_link_cleanup(&link_primer
);
2996 link
->mods_cnt
= err
;
2998 err
= register_fprobe_ips(&link
->fp
, addrs
, cnt
);
3000 kprobe_multi_put_modules(link
->mods
, link
->mods_cnt
);
3001 bpf_link_cleanup(&link_primer
);
3005 return bpf_link_settle(&link_primer
);
3013 #else /* !CONFIG_FPROBE */
3014 int bpf_kprobe_multi_link_attach(const union bpf_attr
*attr
, struct bpf_prog
*prog
)
3018 static u64
bpf_kprobe_multi_cookie(struct bpf_run_ctx
*ctx
)
3022 static u64
bpf_kprobe_multi_entry_ip(struct bpf_run_ctx
*ctx
)
3028 #ifdef CONFIG_UPROBES
3029 struct bpf_uprobe_multi_link
;
3032 struct bpf_uprobe_multi_link
*link
;
3035 struct uprobe_consumer consumer
;
3038 struct bpf_uprobe_multi_link
{
3040 struct bpf_link link
;
3042 struct bpf_uprobe
*uprobes
;
3043 struct task_struct
*task
;
3046 struct bpf_uprobe_multi_run_ctx
{
3047 struct bpf_run_ctx run_ctx
;
3048 unsigned long entry_ip
;
3049 struct bpf_uprobe
*uprobe
;
3052 static void bpf_uprobe_unregister(struct path
*path
, struct bpf_uprobe
*uprobes
,
3057 for (i
= 0; i
< cnt
; i
++) {
3058 uprobe_unregister(d_real_inode(path
->dentry
), uprobes
[i
].offset
,
3059 &uprobes
[i
].consumer
);
3063 static void bpf_uprobe_multi_link_release(struct bpf_link
*link
)
3065 struct bpf_uprobe_multi_link
*umulti_link
;
3067 umulti_link
= container_of(link
, struct bpf_uprobe_multi_link
, link
);
3068 bpf_uprobe_unregister(&umulti_link
->path
, umulti_link
->uprobes
, umulti_link
->cnt
);
3071 static void bpf_uprobe_multi_link_dealloc(struct bpf_link
*link
)
3073 struct bpf_uprobe_multi_link
*umulti_link
;
3075 umulti_link
= container_of(link
, struct bpf_uprobe_multi_link
, link
);
3076 if (umulti_link
->task
)
3077 put_task_struct(umulti_link
->task
);
3078 path_put(&umulti_link
->path
);
3079 kvfree(umulti_link
->uprobes
);
3083 static const struct bpf_link_ops bpf_uprobe_multi_link_lops
= {
3084 .release
= bpf_uprobe_multi_link_release
,
3085 .dealloc
= bpf_uprobe_multi_link_dealloc
,
3088 static int uprobe_prog_run(struct bpf_uprobe
*uprobe
,
3089 unsigned long entry_ip
,
3090 struct pt_regs
*regs
)
3092 struct bpf_uprobe_multi_link
*link
= uprobe
->link
;
3093 struct bpf_uprobe_multi_run_ctx run_ctx
= {
3094 .entry_ip
= entry_ip
,
3097 struct bpf_prog
*prog
= link
->link
.prog
;
3098 bool sleepable
= prog
->aux
->sleepable
;
3099 struct bpf_run_ctx
*old_run_ctx
;
3102 if (link
->task
&& current
!= link
->task
)
3106 rcu_read_lock_trace();
3112 old_run_ctx
= bpf_set_run_ctx(&run_ctx
.run_ctx
);
3113 err
= bpf_prog_run(link
->link
.prog
, regs
);
3114 bpf_reset_run_ctx(old_run_ctx
);
3119 rcu_read_unlock_trace();
3126 uprobe_multi_link_filter(struct uprobe_consumer
*con
, enum uprobe_filter_ctx ctx
,
3127 struct mm_struct
*mm
)
3129 struct bpf_uprobe
*uprobe
;
3131 uprobe
= container_of(con
, struct bpf_uprobe
, consumer
);
3132 return uprobe
->link
->task
->mm
== mm
;
3136 uprobe_multi_link_handler(struct uprobe_consumer
*con
, struct pt_regs
*regs
)
3138 struct bpf_uprobe
*uprobe
;
3140 uprobe
= container_of(con
, struct bpf_uprobe
, consumer
);
3141 return uprobe_prog_run(uprobe
, instruction_pointer(regs
), regs
);
3145 uprobe_multi_link_ret_handler(struct uprobe_consumer
*con
, unsigned long func
, struct pt_regs
*regs
)
3147 struct bpf_uprobe
*uprobe
;
3149 uprobe
= container_of(con
, struct bpf_uprobe
, consumer
);
3150 return uprobe_prog_run(uprobe
, func
, regs
);
3153 static u64
bpf_uprobe_multi_entry_ip(struct bpf_run_ctx
*ctx
)
3155 struct bpf_uprobe_multi_run_ctx
*run_ctx
;
3157 run_ctx
= container_of(current
->bpf_ctx
, struct bpf_uprobe_multi_run_ctx
, run_ctx
);
3158 return run_ctx
->entry_ip
;
3161 static u64
bpf_uprobe_multi_cookie(struct bpf_run_ctx
*ctx
)
3163 struct bpf_uprobe_multi_run_ctx
*run_ctx
;
3165 run_ctx
= container_of(current
->bpf_ctx
, struct bpf_uprobe_multi_run_ctx
, run_ctx
);
3166 return run_ctx
->uprobe
->cookie
;
3169 int bpf_uprobe_multi_link_attach(const union bpf_attr
*attr
, struct bpf_prog
*prog
)
3171 struct bpf_uprobe_multi_link
*link
= NULL
;
3172 unsigned long __user
*uref_ctr_offsets
;
3173 unsigned long *ref_ctr_offsets
= NULL
;
3174 struct bpf_link_primer link_primer
;
3175 struct bpf_uprobe
*uprobes
= NULL
;
3176 struct task_struct
*task
= NULL
;
3177 unsigned long __user
*uoffsets
;
3178 u64 __user
*ucookies
;
3186 /* no support for 32bit archs yet */
3187 if (sizeof(u64
) != sizeof(void *))
3190 if (prog
->expected_attach_type
!= BPF_TRACE_UPROBE_MULTI
)
3193 flags
= attr
->link_create
.uprobe_multi
.flags
;
3194 if (flags
& ~BPF_F_UPROBE_MULTI_RETURN
)
3198 * path, offsets and cnt are mandatory,
3199 * ref_ctr_offsets and cookies are optional
3201 upath
= u64_to_user_ptr(attr
->link_create
.uprobe_multi
.path
);
3202 uoffsets
= u64_to_user_ptr(attr
->link_create
.uprobe_multi
.offsets
);
3203 cnt
= attr
->link_create
.uprobe_multi
.cnt
;
3205 if (!upath
|| !uoffsets
|| !cnt
)
3208 uref_ctr_offsets
= u64_to_user_ptr(attr
->link_create
.uprobe_multi
.ref_ctr_offsets
);
3209 ucookies
= u64_to_user_ptr(attr
->link_create
.uprobe_multi
.cookies
);
3211 name
= strndup_user(upath
, PATH_MAX
);
3213 err
= PTR_ERR(name
);
3217 err
= kern_path(name
, LOOKUP_FOLLOW
, &path
);
3222 if (!d_is_reg(path
.dentry
)) {
3224 goto error_path_put
;
3227 pid
= attr
->link_create
.uprobe_multi
.pid
;
3230 task
= get_pid_task(find_vpid(pid
), PIDTYPE_PID
);
3234 goto error_path_put
;
3240 link
= kzalloc(sizeof(*link
), GFP_KERNEL
);
3241 uprobes
= kvcalloc(cnt
, sizeof(*uprobes
), GFP_KERNEL
);
3243 if (!uprobes
|| !link
)
3246 if (uref_ctr_offsets
) {
3247 ref_ctr_offsets
= kvcalloc(cnt
, sizeof(*ref_ctr_offsets
), GFP_KERNEL
);
3248 if (!ref_ctr_offsets
)
3252 for (i
= 0; i
< cnt
; i
++) {
3253 if (ucookies
&& __get_user(uprobes
[i
].cookie
, ucookies
+ i
)) {
3257 if (uref_ctr_offsets
&& __get_user(ref_ctr_offsets
[i
], uref_ctr_offsets
+ i
)) {
3261 if (__get_user(uprobes
[i
].offset
, uoffsets
+ i
)) {
3266 uprobes
[i
].link
= link
;
3268 if (flags
& BPF_F_UPROBE_MULTI_RETURN
)
3269 uprobes
[i
].consumer
.ret_handler
= uprobe_multi_link_ret_handler
;
3271 uprobes
[i
].consumer
.handler
= uprobe_multi_link_handler
;
3274 uprobes
[i
].consumer
.filter
= uprobe_multi_link_filter
;
3278 link
->uprobes
= uprobes
;
3282 bpf_link_init(&link
->link
, BPF_LINK_TYPE_UPROBE_MULTI
,
3283 &bpf_uprobe_multi_link_lops
, prog
);
3285 for (i
= 0; i
< cnt
; i
++) {
3286 err
= uprobe_register_refctr(d_real_inode(link
->path
.dentry
),
3288 ref_ctr_offsets
? ref_ctr_offsets
[i
] : 0,
3289 &uprobes
[i
].consumer
);
3291 bpf_uprobe_unregister(&path
, uprobes
, i
);
3296 err
= bpf_link_prime(&link
->link
, &link_primer
);
3300 kvfree(ref_ctr_offsets
);
3301 return bpf_link_settle(&link_primer
);
3304 kvfree(ref_ctr_offsets
);
3308 put_task_struct(task
);
3313 #else /* !CONFIG_UPROBES */
3314 int bpf_uprobe_multi_link_attach(const union bpf_attr
*attr
, struct bpf_prog
*prog
)
3318 static u64
bpf_uprobe_multi_cookie(struct bpf_run_ctx
*ctx
)
3322 static u64
bpf_uprobe_multi_entry_ip(struct bpf_run_ctx
*ctx
)
3326 #endif /* CONFIG_UPROBES */