1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
4 * Common eBPF ELF object loading operations.
6 * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
7 * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
8 * Copyright (C) 2015 Huawei Inc.
9 * Copyright (C) 2017 Nicira, Inc.
10 * Copyright (C) 2019 Isovalent, Inc.
28 #include <asm/unistd.h>
29 #include <linux/err.h>
30 #include <linux/kernel.h>
31 #include <linux/bpf.h>
32 #include <linux/btf.h>
33 #include <linux/filter.h>
34 #include <linux/limits.h>
35 #include <linux/perf_event.h>
36 #include <linux/bpf_perf_event.h>
37 #include <linux/ring_buffer.h>
38 #include <sys/epoll.h>
39 #include <sys/ioctl.h>
42 #include <sys/types.h>
44 #include <sys/utsname.h>
45 #include <sys/resource.h>
53 #include "str_error.h"
54 #include "libbpf_internal.h"
56 #include "bpf_gen_internal.h"
60 #define BPF_FS_MAGIC 0xcafe4a11
63 #define BPF_FS_DEFAULT_PATH "/sys/fs/bpf"
65 #define BPF_INSN_SZ (sizeof(struct bpf_insn))
67 /* vsprintf() in __base_pr() uses nonliteral format string. It may break
68 * compilation if user enables corresponding warning. Disable it explicitly.
70 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
72 #define __printf(a, b) __attribute__((format(printf, a, b)))
74 static struct bpf_map
*bpf_object__add_map(struct bpf_object
*obj
);
75 static bool prog_is_subprog(const struct bpf_object
*obj
, const struct bpf_program
*prog
);
76 static int map_set_def_max_entries(struct bpf_map
*map
);
78 static const char * const attach_type_name
[] = {
79 [BPF_CGROUP_INET_INGRESS
] = "cgroup_inet_ingress",
80 [BPF_CGROUP_INET_EGRESS
] = "cgroup_inet_egress",
81 [BPF_CGROUP_INET_SOCK_CREATE
] = "cgroup_inet_sock_create",
82 [BPF_CGROUP_INET_SOCK_RELEASE
] = "cgroup_inet_sock_release",
83 [BPF_CGROUP_SOCK_OPS
] = "cgroup_sock_ops",
84 [BPF_CGROUP_DEVICE
] = "cgroup_device",
85 [BPF_CGROUP_INET4_BIND
] = "cgroup_inet4_bind",
86 [BPF_CGROUP_INET6_BIND
] = "cgroup_inet6_bind",
87 [BPF_CGROUP_INET4_CONNECT
] = "cgroup_inet4_connect",
88 [BPF_CGROUP_INET6_CONNECT
] = "cgroup_inet6_connect",
89 [BPF_CGROUP_UNIX_CONNECT
] = "cgroup_unix_connect",
90 [BPF_CGROUP_INET4_POST_BIND
] = "cgroup_inet4_post_bind",
91 [BPF_CGROUP_INET6_POST_BIND
] = "cgroup_inet6_post_bind",
92 [BPF_CGROUP_INET4_GETPEERNAME
] = "cgroup_inet4_getpeername",
93 [BPF_CGROUP_INET6_GETPEERNAME
] = "cgroup_inet6_getpeername",
94 [BPF_CGROUP_UNIX_GETPEERNAME
] = "cgroup_unix_getpeername",
95 [BPF_CGROUP_INET4_GETSOCKNAME
] = "cgroup_inet4_getsockname",
96 [BPF_CGROUP_INET6_GETSOCKNAME
] = "cgroup_inet6_getsockname",
97 [BPF_CGROUP_UNIX_GETSOCKNAME
] = "cgroup_unix_getsockname",
98 [BPF_CGROUP_UDP4_SENDMSG
] = "cgroup_udp4_sendmsg",
99 [BPF_CGROUP_UDP6_SENDMSG
] = "cgroup_udp6_sendmsg",
100 [BPF_CGROUP_UNIX_SENDMSG
] = "cgroup_unix_sendmsg",
101 [BPF_CGROUP_SYSCTL
] = "cgroup_sysctl",
102 [BPF_CGROUP_UDP4_RECVMSG
] = "cgroup_udp4_recvmsg",
103 [BPF_CGROUP_UDP6_RECVMSG
] = "cgroup_udp6_recvmsg",
104 [BPF_CGROUP_UNIX_RECVMSG
] = "cgroup_unix_recvmsg",
105 [BPF_CGROUP_GETSOCKOPT
] = "cgroup_getsockopt",
106 [BPF_CGROUP_SETSOCKOPT
] = "cgroup_setsockopt",
107 [BPF_SK_SKB_STREAM_PARSER
] = "sk_skb_stream_parser",
108 [BPF_SK_SKB_STREAM_VERDICT
] = "sk_skb_stream_verdict",
109 [BPF_SK_SKB_VERDICT
] = "sk_skb_verdict",
110 [BPF_SK_MSG_VERDICT
] = "sk_msg_verdict",
111 [BPF_LIRC_MODE2
] = "lirc_mode2",
112 [BPF_FLOW_DISSECTOR
] = "flow_dissector",
113 [BPF_TRACE_RAW_TP
] = "trace_raw_tp",
114 [BPF_TRACE_FENTRY
] = "trace_fentry",
115 [BPF_TRACE_FEXIT
] = "trace_fexit",
116 [BPF_MODIFY_RETURN
] = "modify_return",
117 [BPF_LSM_MAC
] = "lsm_mac",
118 [BPF_LSM_CGROUP
] = "lsm_cgroup",
119 [BPF_SK_LOOKUP
] = "sk_lookup",
120 [BPF_TRACE_ITER
] = "trace_iter",
121 [BPF_XDP_DEVMAP
] = "xdp_devmap",
122 [BPF_XDP_CPUMAP
] = "xdp_cpumap",
124 [BPF_SK_REUSEPORT_SELECT
] = "sk_reuseport_select",
125 [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE
] = "sk_reuseport_select_or_migrate",
126 [BPF_PERF_EVENT
] = "perf_event",
127 [BPF_TRACE_KPROBE_MULTI
] = "trace_kprobe_multi",
128 [BPF_STRUCT_OPS
] = "struct_ops",
129 [BPF_NETFILTER
] = "netfilter",
130 [BPF_TCX_INGRESS
] = "tcx_ingress",
131 [BPF_TCX_EGRESS
] = "tcx_egress",
132 [BPF_TRACE_UPROBE_MULTI
] = "trace_uprobe_multi",
133 [BPF_NETKIT_PRIMARY
] = "netkit_primary",
134 [BPF_NETKIT_PEER
] = "netkit_peer",
137 static const char * const link_type_name
[] = {
138 [BPF_LINK_TYPE_UNSPEC
] = "unspec",
139 [BPF_LINK_TYPE_RAW_TRACEPOINT
] = "raw_tracepoint",
140 [BPF_LINK_TYPE_TRACING
] = "tracing",
141 [BPF_LINK_TYPE_CGROUP
] = "cgroup",
142 [BPF_LINK_TYPE_ITER
] = "iter",
143 [BPF_LINK_TYPE_NETNS
] = "netns",
144 [BPF_LINK_TYPE_XDP
] = "xdp",
145 [BPF_LINK_TYPE_PERF_EVENT
] = "perf_event",
146 [BPF_LINK_TYPE_KPROBE_MULTI
] = "kprobe_multi",
147 [BPF_LINK_TYPE_STRUCT_OPS
] = "struct_ops",
148 [BPF_LINK_TYPE_NETFILTER
] = "netfilter",
149 [BPF_LINK_TYPE_TCX
] = "tcx",
150 [BPF_LINK_TYPE_UPROBE_MULTI
] = "uprobe_multi",
151 [BPF_LINK_TYPE_NETKIT
] = "netkit",
154 static const char * const map_type_name
[] = {
155 [BPF_MAP_TYPE_UNSPEC
] = "unspec",
156 [BPF_MAP_TYPE_HASH
] = "hash",
157 [BPF_MAP_TYPE_ARRAY
] = "array",
158 [BPF_MAP_TYPE_PROG_ARRAY
] = "prog_array",
159 [BPF_MAP_TYPE_PERF_EVENT_ARRAY
] = "perf_event_array",
160 [BPF_MAP_TYPE_PERCPU_HASH
] = "percpu_hash",
161 [BPF_MAP_TYPE_PERCPU_ARRAY
] = "percpu_array",
162 [BPF_MAP_TYPE_STACK_TRACE
] = "stack_trace",
163 [BPF_MAP_TYPE_CGROUP_ARRAY
] = "cgroup_array",
164 [BPF_MAP_TYPE_LRU_HASH
] = "lru_hash",
165 [BPF_MAP_TYPE_LRU_PERCPU_HASH
] = "lru_percpu_hash",
166 [BPF_MAP_TYPE_LPM_TRIE
] = "lpm_trie",
167 [BPF_MAP_TYPE_ARRAY_OF_MAPS
] = "array_of_maps",
168 [BPF_MAP_TYPE_HASH_OF_MAPS
] = "hash_of_maps",
169 [BPF_MAP_TYPE_DEVMAP
] = "devmap",
170 [BPF_MAP_TYPE_DEVMAP_HASH
] = "devmap_hash",
171 [BPF_MAP_TYPE_SOCKMAP
] = "sockmap",
172 [BPF_MAP_TYPE_CPUMAP
] = "cpumap",
173 [BPF_MAP_TYPE_XSKMAP
] = "xskmap",
174 [BPF_MAP_TYPE_SOCKHASH
] = "sockhash",
175 [BPF_MAP_TYPE_CGROUP_STORAGE
] = "cgroup_storage",
176 [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY
] = "reuseport_sockarray",
177 [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE
] = "percpu_cgroup_storage",
178 [BPF_MAP_TYPE_QUEUE
] = "queue",
179 [BPF_MAP_TYPE_STACK
] = "stack",
180 [BPF_MAP_TYPE_SK_STORAGE
] = "sk_storage",
181 [BPF_MAP_TYPE_STRUCT_OPS
] = "struct_ops",
182 [BPF_MAP_TYPE_RINGBUF
] = "ringbuf",
183 [BPF_MAP_TYPE_INODE_STORAGE
] = "inode_storage",
184 [BPF_MAP_TYPE_TASK_STORAGE
] = "task_storage",
185 [BPF_MAP_TYPE_BLOOM_FILTER
] = "bloom_filter",
186 [BPF_MAP_TYPE_USER_RINGBUF
] = "user_ringbuf",
187 [BPF_MAP_TYPE_CGRP_STORAGE
] = "cgrp_storage",
190 static const char * const prog_type_name
[] = {
191 [BPF_PROG_TYPE_UNSPEC
] = "unspec",
192 [BPF_PROG_TYPE_SOCKET_FILTER
] = "socket_filter",
193 [BPF_PROG_TYPE_KPROBE
] = "kprobe",
194 [BPF_PROG_TYPE_SCHED_CLS
] = "sched_cls",
195 [BPF_PROG_TYPE_SCHED_ACT
] = "sched_act",
196 [BPF_PROG_TYPE_TRACEPOINT
] = "tracepoint",
197 [BPF_PROG_TYPE_XDP
] = "xdp",
198 [BPF_PROG_TYPE_PERF_EVENT
] = "perf_event",
199 [BPF_PROG_TYPE_CGROUP_SKB
] = "cgroup_skb",
200 [BPF_PROG_TYPE_CGROUP_SOCK
] = "cgroup_sock",
201 [BPF_PROG_TYPE_LWT_IN
] = "lwt_in",
202 [BPF_PROG_TYPE_LWT_OUT
] = "lwt_out",
203 [BPF_PROG_TYPE_LWT_XMIT
] = "lwt_xmit",
204 [BPF_PROG_TYPE_SOCK_OPS
] = "sock_ops",
205 [BPF_PROG_TYPE_SK_SKB
] = "sk_skb",
206 [BPF_PROG_TYPE_CGROUP_DEVICE
] = "cgroup_device",
207 [BPF_PROG_TYPE_SK_MSG
] = "sk_msg",
208 [BPF_PROG_TYPE_RAW_TRACEPOINT
] = "raw_tracepoint",
209 [BPF_PROG_TYPE_CGROUP_SOCK_ADDR
] = "cgroup_sock_addr",
210 [BPF_PROG_TYPE_LWT_SEG6LOCAL
] = "lwt_seg6local",
211 [BPF_PROG_TYPE_LIRC_MODE2
] = "lirc_mode2",
212 [BPF_PROG_TYPE_SK_REUSEPORT
] = "sk_reuseport",
213 [BPF_PROG_TYPE_FLOW_DISSECTOR
] = "flow_dissector",
214 [BPF_PROG_TYPE_CGROUP_SYSCTL
] = "cgroup_sysctl",
215 [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE
] = "raw_tracepoint_writable",
216 [BPF_PROG_TYPE_CGROUP_SOCKOPT
] = "cgroup_sockopt",
217 [BPF_PROG_TYPE_TRACING
] = "tracing",
218 [BPF_PROG_TYPE_STRUCT_OPS
] = "struct_ops",
219 [BPF_PROG_TYPE_EXT
] = "ext",
220 [BPF_PROG_TYPE_LSM
] = "lsm",
221 [BPF_PROG_TYPE_SK_LOOKUP
] = "sk_lookup",
222 [BPF_PROG_TYPE_SYSCALL
] = "syscall",
223 [BPF_PROG_TYPE_NETFILTER
] = "netfilter",
226 static int __base_pr(enum libbpf_print_level level
, const char *format
,
229 if (level
== LIBBPF_DEBUG
)
232 return vfprintf(stderr
, format
, args
);
235 static libbpf_print_fn_t __libbpf_pr
= __base_pr
;
237 libbpf_print_fn_t
libbpf_set_print(libbpf_print_fn_t fn
)
239 libbpf_print_fn_t old_print_fn
;
241 old_print_fn
= __atomic_exchange_n(&__libbpf_pr
, fn
, __ATOMIC_RELAXED
);
247 void libbpf_print(enum libbpf_print_level level
, const char *format
, ...)
251 libbpf_print_fn_t print_fn
;
253 print_fn
= __atomic_load_n(&__libbpf_pr
, __ATOMIC_RELAXED
);
259 va_start(args
, format
);
260 __libbpf_pr(level
, format
, args
);
266 static void pr_perm_msg(int err
)
271 if (err
!= -EPERM
|| geteuid() != 0)
274 err
= getrlimit(RLIMIT_MEMLOCK
, &limit
);
278 if (limit
.rlim_cur
== RLIM_INFINITY
)
281 if (limit
.rlim_cur
< 1024)
282 snprintf(buf
, sizeof(buf
), "%zu bytes", (size_t)limit
.rlim_cur
);
283 else if (limit
.rlim_cur
< 1024*1024)
284 snprintf(buf
, sizeof(buf
), "%.1f KiB", (double)limit
.rlim_cur
/ 1024);
286 snprintf(buf
, sizeof(buf
), "%.1f MiB", (double)limit
.rlim_cur
/ (1024*1024));
288 pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n",
292 #define STRERR_BUFSIZE 128
294 /* Copied from tools/perf/util/util.h */
296 # define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
300 # define zclose(fd) ({ \
303 ___err = close((fd)); \
308 static inline __u64
ptr_to_u64(const void *ptr
)
310 return (__u64
) (unsigned long) ptr
;
313 int libbpf_set_strict_mode(enum libbpf_strict_mode mode
)
315 /* as of v1.0 libbpf_set_strict_mode() is a no-op */
319 __u32
libbpf_major_version(void)
321 return LIBBPF_MAJOR_VERSION
;
324 __u32
libbpf_minor_version(void)
326 return LIBBPF_MINOR_VERSION
;
329 const char *libbpf_version_string(void)
333 return "v" _S(LIBBPF_MAJOR_VERSION
) "." _S(LIBBPF_MINOR_VERSION
);
349 enum reloc_type type
;
352 const struct bpf_core_relo
*core_relo
; /* used when type == RELO_CORE */
361 /* stored as sec_def->cookie for all libbpf-supported SEC()s */
364 /* expected_attach_type is optional, if kernel doesn't support that */
365 SEC_EXP_ATTACH_OPT
= 1,
366 /* legacy, only used by libbpf_get_type_names() and
367 * libbpf_attach_type_by_name(), not used by libbpf itself at all.
368 * This used to be associated with cgroup (and few other) BPF programs
369 * that were attachable through BPF_PROG_ATTACH command. Pretty
370 * meaningless nowadays, though.
373 SEC_ATTACHABLE_OPT
= SEC_ATTACHABLE
| SEC_EXP_ATTACH_OPT
,
374 /* attachment target is specified through BTF ID in either kernel or
375 * other BPF program's BTF object
378 /* BPF program type allows sleeping/blocking in kernel */
380 /* BPF program support non-linear XDP buffer */
382 /* Setup proper attach type for usdt probes. */
388 enum bpf_prog_type prog_type
;
389 enum bpf_attach_type expected_attach_type
;
393 libbpf_prog_setup_fn_t prog_setup_fn
;
394 libbpf_prog_prepare_load_fn_t prog_prepare_load_fn
;
395 libbpf_prog_attach_fn_t prog_attach_fn
;
399 * bpf_prog should be a better name but it has been used in
406 const struct bpf_sec_def
*sec_def
;
407 /* this program's instruction offset (in number of instructions)
408 * within its containing ELF section
411 /* number of original instructions in ELF section belonging to this
412 * program, not taking into account subprogram instructions possible
413 * appended later during relocation
416 /* Offset (in number of instructions) of the start of instruction
417 * belonging to this BPF program within its containing main BPF
418 * program. For the entry-point (main) BPF program, this is always
419 * zero. For a sub-program, this gets reset before each of main BPF
420 * programs are processed and relocated and is used to determined
421 * whether sub-program was already appended to the main program, and
422 * if yes, at which instruction offset.
426 /* instructions that belong to BPF program; insns[0] is located at
427 * sec_insn_off instruction within its ELF section in ELF file, so
428 * when mapping ELF file instruction index to the local instruction,
429 * one needs to subtract sec_insn_off; and vice versa.
431 struct bpf_insn
*insns
;
432 /* actual number of instruction in this BPF program's image; for
433 * entry-point BPF programs this includes the size of main program
434 * itself plus all the used sub-programs, appended at the end
438 struct reloc_desc
*reloc_desc
;
441 /* BPF verifier log settings */
446 struct bpf_object
*obj
;
452 bool mark_btf_static
;
453 enum bpf_prog_type type
;
454 enum bpf_attach_type expected_attach_type
;
455 int exception_cb_idx
;
458 __u32 attach_btf_obj_fd
;
460 __u32 attach_prog_fd
;
463 __u32 func_info_rec_size
;
467 __u32 line_info_rec_size
;
472 struct bpf_struct_ops
{
474 const struct btf_type
*type
;
475 struct bpf_program
**progs
;
476 __u32
*kern_func_off
;
477 /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */
479 /* e.g. struct bpf_struct_ops_tcp_congestion_ops in
480 * btf_vmlinux's format.
481 * struct bpf_struct_ops_tcp_congestion_ops {
482 * [... some other kernel fields ...]
483 * struct tcp_congestion_ops data;
485 * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops)
486 * bpf_map__init_kern_struct_ops() will populate the "kern_vdata"
493 #define DATA_SEC ".data"
494 #define BSS_SEC ".bss"
495 #define RODATA_SEC ".rodata"
496 #define KCONFIG_SEC ".kconfig"
497 #define KSYMS_SEC ".ksyms"
498 #define STRUCT_OPS_SEC ".struct_ops"
499 #define STRUCT_OPS_LINK_SEC ".struct_ops.link"
501 enum libbpf_map_type
{
511 unsigned int key_size
;
512 unsigned int value_size
;
513 unsigned int max_entries
;
514 unsigned int map_flags
;
518 struct bpf_object
*obj
;
520 /* real_name is defined for special internal maps (.rodata*,
521 * .data*, .bss, .kconfig) and preserves their original ELF section
522 * name. This is important to be able to find corresponding BTF
523 * DATASEC information.
531 struct bpf_map_def def
;
535 __u32 btf_key_type_id
;
536 __u32 btf_value_type_id
;
537 __u32 btf_vmlinux_value_type_id
;
538 enum libbpf_map_type libbpf_type
;
540 struct bpf_struct_ops
*st_ops
;
541 struct bpf_map
*inner_map
;
567 enum extern_type type
;
584 unsigned long long addr
;
586 /* target btf_id of the corresponding kernel var. */
587 int kernel_btf_obj_fd
;
590 /* local btf_id of the ksym extern's type. */
592 /* BTF fd index to be patched in for insn->off, this is
593 * 0 for vmlinux BTF, index in obj->fd_array for module
618 struct elf_sec_desc
{
619 enum sec_type sec_type
;
631 size_t shstrndx
; /* section index for section name strings */
633 struct elf_sec_desc
*secs
;
636 __u32 btf_maps_sec_btf_id
;
645 char name
[BPF_OBJ_NAME_LEN
];
649 struct bpf_program
*programs
;
651 struct bpf_map
*maps
;
656 struct extern_desc
*externs
;
664 struct bpf_gen
*gen_loader
;
666 /* Information when doing ELF related work. Only valid if efile.elf is not NULL */
667 struct elf_state efile
;
670 struct btf_ext
*btf_ext
;
672 /* Parse and load BTF vmlinux if any of the programs in the object need
675 struct btf
*btf_vmlinux
;
676 /* Path to the custom BTF to be used for BPF CO-RE relocations as an
677 * override for vmlinux BTF.
679 char *btf_custom_path
;
680 /* vmlinux BTF override for CO-RE relocations */
681 struct btf
*btf_vmlinux_override
;
682 /* Lazily initialized kernel module BTFs */
683 struct module_btf
*btf_modules
;
684 bool btf_modules_loaded
;
685 size_t btf_module_cnt
;
686 size_t btf_module_cap
;
688 /* optional log settings passed to BPF_BTF_LOAD and BPF_PROG_LOAD commands */
697 struct usdt_manager
*usdt_man
;
699 struct kern_feature_cache
*feat_cache
;
706 static const char *elf_sym_str(const struct bpf_object
*obj
, size_t off
);
707 static const char *elf_sec_str(const struct bpf_object
*obj
, size_t off
);
708 static Elf_Scn
*elf_sec_by_idx(const struct bpf_object
*obj
, size_t idx
);
709 static Elf_Scn
*elf_sec_by_name(const struct bpf_object
*obj
, const char *name
);
710 static Elf64_Shdr
*elf_sec_hdr(const struct bpf_object
*obj
, Elf_Scn
*scn
);
711 static const char *elf_sec_name(const struct bpf_object
*obj
, Elf_Scn
*scn
);
712 static Elf_Data
*elf_sec_data(const struct bpf_object
*obj
, Elf_Scn
*scn
);
713 static Elf64_Sym
*elf_sym_by_idx(const struct bpf_object
*obj
, size_t idx
);
714 static Elf64_Rel
*elf_rel_by_idx(Elf_Data
*data
, size_t idx
);
716 void bpf_program__unload(struct bpf_program
*prog
)
723 zfree(&prog
->func_info
);
724 zfree(&prog
->line_info
);
727 static void bpf_program__exit(struct bpf_program
*prog
)
732 bpf_program__unload(prog
);
734 zfree(&prog
->sec_name
);
736 zfree(&prog
->reloc_desc
);
743 static bool insn_is_subprog_call(const struct bpf_insn
*insn
)
745 return BPF_CLASS(insn
->code
) == BPF_JMP
&&
746 BPF_OP(insn
->code
) == BPF_CALL
&&
747 BPF_SRC(insn
->code
) == BPF_K
&&
748 insn
->src_reg
== BPF_PSEUDO_CALL
&&
749 insn
->dst_reg
== 0 &&
753 static bool is_call_insn(const struct bpf_insn
*insn
)
755 return insn
->code
== (BPF_JMP
| BPF_CALL
);
758 static bool insn_is_pseudo_func(struct bpf_insn
*insn
)
760 return is_ldimm64_insn(insn
) && insn
->src_reg
== BPF_PSEUDO_FUNC
;
764 bpf_object__init_prog(struct bpf_object
*obj
, struct bpf_program
*prog
,
765 const char *name
, size_t sec_idx
, const char *sec_name
,
766 size_t sec_off
, void *insn_data
, size_t insn_data_sz
)
768 if (insn_data_sz
== 0 || insn_data_sz
% BPF_INSN_SZ
|| sec_off
% BPF_INSN_SZ
) {
769 pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
770 sec_name
, name
, sec_off
, insn_data_sz
);
774 memset(prog
, 0, sizeof(*prog
));
777 prog
->sec_idx
= sec_idx
;
778 prog
->sec_insn_off
= sec_off
/ BPF_INSN_SZ
;
779 prog
->sec_insn_cnt
= insn_data_sz
/ BPF_INSN_SZ
;
780 /* insns_cnt can later be increased by appending used subprograms */
781 prog
->insns_cnt
= prog
->sec_insn_cnt
;
783 prog
->type
= BPF_PROG_TYPE_UNSPEC
;
785 prog
->exception_cb_idx
= -1;
787 /* libbpf's convention for SEC("?abc...") is that it's just like
788 * SEC("abc...") but the corresponding bpf_program starts out with
789 * autoload set to false.
791 if (sec_name
[0] == '?') {
792 prog
->autoload
= false;
793 /* from now on forget there was ? in section name */
796 prog
->autoload
= true;
799 prog
->autoattach
= true;
801 /* inherit object's log_level */
802 prog
->log_level
= obj
->log_level
;
804 prog
->sec_name
= strdup(sec_name
);
808 prog
->name
= strdup(name
);
812 prog
->insns
= malloc(insn_data_sz
);
815 memcpy(prog
->insns
, insn_data
, insn_data_sz
);
819 pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name
, name
);
820 bpf_program__exit(prog
);
825 bpf_object__add_programs(struct bpf_object
*obj
, Elf_Data
*sec_data
,
826 const char *sec_name
, int sec_idx
)
828 Elf_Data
*symbols
= obj
->efile
.symbols
;
829 struct bpf_program
*prog
, *progs
;
830 void *data
= sec_data
->d_buf
;
831 size_t sec_sz
= sec_data
->d_size
, sec_off
, prog_sz
, nr_syms
;
832 int nr_progs
, err
, i
;
836 progs
= obj
->programs
;
837 nr_progs
= obj
->nr_programs
;
838 nr_syms
= symbols
->d_size
/ sizeof(Elf64_Sym
);
840 for (i
= 0; i
< nr_syms
; i
++) {
841 sym
= elf_sym_by_idx(obj
, i
);
843 if (sym
->st_shndx
!= sec_idx
)
845 if (ELF64_ST_TYPE(sym
->st_info
) != STT_FUNC
)
848 prog_sz
= sym
->st_size
;
849 sec_off
= sym
->st_value
;
851 name
= elf_sym_str(obj
, sym
->st_name
);
853 pr_warn("sec '%s': failed to get symbol name for offset %zu\n",
855 return -LIBBPF_ERRNO__FORMAT
;
858 if (sec_off
+ prog_sz
> sec_sz
) {
859 pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
861 return -LIBBPF_ERRNO__FORMAT
;
864 if (sec_idx
!= obj
->efile
.text_shndx
&& ELF64_ST_BIND(sym
->st_info
) == STB_LOCAL
) {
865 pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name
, name
);
869 pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
870 sec_name
, name
, sec_off
/ BPF_INSN_SZ
, sec_off
, prog_sz
/ BPF_INSN_SZ
, prog_sz
);
872 progs
= libbpf_reallocarray(progs
, nr_progs
+ 1, sizeof(*progs
));
875 * In this case the original obj->programs
876 * is still valid, so don't need special treat for
877 * bpf_close_object().
879 pr_warn("sec '%s': failed to alloc memory for new program '%s'\n",
883 obj
->programs
= progs
;
885 prog
= &progs
[nr_progs
];
887 err
= bpf_object__init_prog(obj
, prog
, name
, sec_idx
, sec_name
,
888 sec_off
, data
+ sec_off
, prog_sz
);
892 if (ELF64_ST_BIND(sym
->st_info
) != STB_LOCAL
)
893 prog
->sym_global
= true;
895 /* if function is a global/weak symbol, but has restricted
896 * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC
897 * as static to enable more permissive BPF verification mode
898 * with more outside context available to BPF verifier
900 if (prog
->sym_global
&& (ELF64_ST_VISIBILITY(sym
->st_other
) == STV_HIDDEN
901 || ELF64_ST_VISIBILITY(sym
->st_other
) == STV_INTERNAL
))
902 prog
->mark_btf_static
= true;
905 obj
->nr_programs
= nr_progs
;
911 static const struct btf_member
*
912 find_member_by_offset(const struct btf_type
*t
, __u32 bit_offset
)
914 struct btf_member
*m
;
917 for (i
= 0, m
= btf_members(t
); i
< btf_vlen(t
); i
++, m
++) {
918 if (btf_member_bit_offset(t
, i
) == bit_offset
)
925 static const struct btf_member
*
926 find_member_by_name(const struct btf
*btf
, const struct btf_type
*t
,
929 struct btf_member
*m
;
932 for (i
= 0, m
= btf_members(t
); i
< btf_vlen(t
); i
++, m
++) {
933 if (!strcmp(btf__name_by_offset(btf
, m
->name_off
), name
))
940 static int find_ksym_btf_id(struct bpf_object
*obj
, const char *ksym_name
,
941 __u16 kind
, struct btf
**res_btf
,
942 struct module_btf
**res_mod_btf
);
944 #define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
945 static int find_btf_by_prefix_kind(const struct btf
*btf
, const char *prefix
,
946 const char *name
, __u32 kind
);
949 find_struct_ops_kern_types(struct bpf_object
*obj
, const char *tname_raw
,
950 struct module_btf
**mod_btf
,
951 const struct btf_type
**type
, __u32
*type_id
,
952 const struct btf_type
**vtype
, __u32
*vtype_id
,
953 const struct btf_member
**data_member
)
955 const struct btf_type
*kern_type
, *kern_vtype
;
956 const struct btf_member
*kern_data_member
;
958 __s32 kern_vtype_id
, kern_type_id
;
962 snprintf(tname
, sizeof(tname
), "%.*s",
963 (int)bpf_core_essential_name_len(tname_raw
), tname_raw
);
965 kern_type_id
= find_ksym_btf_id(obj
, tname
, BTF_KIND_STRUCT
,
967 if (kern_type_id
< 0) {
968 pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
972 kern_type
= btf__type_by_id(btf
, kern_type_id
);
974 /* Find the corresponding "map_value" type that will be used
975 * in map_update(BPF_MAP_TYPE_STRUCT_OPS). For example,
976 * find "struct bpf_struct_ops_tcp_congestion_ops" from the
979 kern_vtype_id
= find_btf_by_prefix_kind(btf
, STRUCT_OPS_VALUE_PREFIX
,
980 tname
, BTF_KIND_STRUCT
);
981 if (kern_vtype_id
< 0) {
982 pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n",
983 STRUCT_OPS_VALUE_PREFIX
, tname
);
984 return kern_vtype_id
;
986 kern_vtype
= btf__type_by_id(btf
, kern_vtype_id
);
988 /* Find "struct tcp_congestion_ops" from
989 * struct bpf_struct_ops_tcp_congestion_ops {
991 * struct tcp_congestion_ops data;
994 kern_data_member
= btf_members(kern_vtype
);
995 for (i
= 0; i
< btf_vlen(kern_vtype
); i
++, kern_data_member
++) {
996 if (kern_data_member
->type
== kern_type_id
)
999 if (i
== btf_vlen(kern_vtype
)) {
1000 pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n",
1001 tname
, STRUCT_OPS_VALUE_PREFIX
, tname
);
1006 *type_id
= kern_type_id
;
1007 *vtype
= kern_vtype
;
1008 *vtype_id
= kern_vtype_id
;
1009 *data_member
= kern_data_member
;
1014 static bool bpf_map__is_struct_ops(const struct bpf_map
*map
)
1016 return map
->def
.type
== BPF_MAP_TYPE_STRUCT_OPS
;
1019 static bool is_valid_st_ops_program(struct bpf_object
*obj
,
1020 const struct bpf_program
*prog
)
1024 for (i
= 0; i
< obj
->nr_programs
; i
++) {
1025 if (&obj
->programs
[i
] == prog
)
1026 return prog
->type
== BPF_PROG_TYPE_STRUCT_OPS
;
1032 /* For each struct_ops program P, referenced from some struct_ops map M,
1033 * enable P.autoload if there are Ms for which M.autocreate is true,
1034 * disable P.autoload if for all Ms M.autocreate is false.
1035 * Don't change P.autoload for programs that are not referenced from any maps.
1037 static int bpf_object_adjust_struct_ops_autoload(struct bpf_object
*obj
)
1039 struct bpf_program
*prog
, *slot_prog
;
1040 struct bpf_map
*map
;
1043 for (i
= 0; i
< obj
->nr_programs
; ++i
) {
1044 int should_load
= false;
1047 prog
= &obj
->programs
[i
];
1048 if (prog
->type
!= BPF_PROG_TYPE_STRUCT_OPS
)
1051 for (j
= 0; j
< obj
->nr_maps
; ++j
) {
1052 map
= &obj
->maps
[j
];
1053 if (!bpf_map__is_struct_ops(map
))
1056 vlen
= btf_vlen(map
->st_ops
->type
);
1057 for (k
= 0; k
< vlen
; ++k
) {
1058 slot_prog
= map
->st_ops
->progs
[k
];
1059 if (prog
!= slot_prog
)
1063 if (map
->autocreate
)
1068 prog
->autoload
= should_load
;
1074 /* Init the map's fields that depend on kern_btf */
1075 static int bpf_map__init_kern_struct_ops(struct bpf_map
*map
)
1077 const struct btf_member
*member
, *kern_member
, *kern_data_member
;
1078 const struct btf_type
*type
, *kern_type
, *kern_vtype
;
1079 __u32 i
, kern_type_id
, kern_vtype_id
, kern_data_off
;
1080 struct bpf_object
*obj
= map
->obj
;
1081 const struct btf
*btf
= obj
->btf
;
1082 struct bpf_struct_ops
*st_ops
;
1083 const struct btf
*kern_btf
;
1084 struct module_btf
*mod_btf
;
1085 void *data
, *kern_data
;
1089 st_ops
= map
->st_ops
;
1090 type
= st_ops
->type
;
1091 tname
= st_ops
->tname
;
1092 err
= find_struct_ops_kern_types(obj
, tname
, &mod_btf
,
1093 &kern_type
, &kern_type_id
,
1094 &kern_vtype
, &kern_vtype_id
,
1099 kern_btf
= mod_btf
? mod_btf
->btf
: obj
->btf_vmlinux
;
1101 pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n",
1102 map
->name
, st_ops
->type_id
, kern_type_id
, kern_vtype_id
);
1104 map
->mod_btf_fd
= mod_btf
? mod_btf
->fd
: -1;
1105 map
->def
.value_size
= kern_vtype
->size
;
1106 map
->btf_vmlinux_value_type_id
= kern_vtype_id
;
1108 st_ops
->kern_vdata
= calloc(1, kern_vtype
->size
);
1109 if (!st_ops
->kern_vdata
)
1112 data
= st_ops
->data
;
1113 kern_data_off
= kern_data_member
->offset
/ 8;
1114 kern_data
= st_ops
->kern_vdata
+ kern_data_off
;
1116 member
= btf_members(type
);
1117 for (i
= 0; i
< btf_vlen(type
); i
++, member
++) {
1118 const struct btf_type
*mtype
, *kern_mtype
;
1119 __u32 mtype_id
, kern_mtype_id
;
1120 void *mdata
, *kern_mdata
;
1121 __s64 msize
, kern_msize
;
1122 __u32 moff
, kern_moff
;
1123 __u32 kern_member_idx
;
1126 mname
= btf__name_by_offset(btf
, member
->name_off
);
1127 kern_member
= find_member_by_name(kern_btf
, kern_type
, mname
);
1129 pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n",
1134 kern_member_idx
= kern_member
- btf_members(kern_type
);
1135 if (btf_member_bitfield_size(type
, i
) ||
1136 btf_member_bitfield_size(kern_type
, kern_member_idx
)) {
1137 pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n",
1142 moff
= member
->offset
/ 8;
1143 kern_moff
= kern_member
->offset
/ 8;
1145 mdata
= data
+ moff
;
1146 kern_mdata
= kern_data
+ kern_moff
;
1148 mtype
= skip_mods_and_typedefs(btf
, member
->type
, &mtype_id
);
1149 kern_mtype
= skip_mods_and_typedefs(kern_btf
, kern_member
->type
,
1151 if (BTF_INFO_KIND(mtype
->info
) !=
1152 BTF_INFO_KIND(kern_mtype
->info
)) {
1153 pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n",
1154 map
->name
, mname
, BTF_INFO_KIND(mtype
->info
),
1155 BTF_INFO_KIND(kern_mtype
->info
));
1159 if (btf_is_ptr(mtype
)) {
1160 struct bpf_program
*prog
;
1162 /* Update the value from the shadow type */
1163 prog
= *(void **)mdata
;
1164 st_ops
->progs
[i
] = prog
;
1167 if (!is_valid_st_ops_program(obj
, prog
)) {
1168 pr_warn("struct_ops init_kern %s: member %s is not a struct_ops program\n",
1173 kern_mtype
= skip_mods_and_typedefs(kern_btf
,
1177 /* mtype->type must be a func_proto which was
1178 * guaranteed in bpf_object__collect_st_ops_relos(),
1179 * so only check kern_mtype for func_proto here.
1181 if (!btf_is_func_proto(kern_mtype
)) {
1182 pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n",
1188 prog
->attach_btf_obj_fd
= mod_btf
->fd
;
1190 /* if we haven't yet processed this BPF program, record proper
1191 * attach_btf_id and member_idx
1193 if (!prog
->attach_btf_id
) {
1194 prog
->attach_btf_id
= kern_type_id
;
1195 prog
->expected_attach_type
= kern_member_idx
;
1198 /* struct_ops BPF prog can be re-used between multiple
1199 * .struct_ops & .struct_ops.link as long as it's the
1200 * same struct_ops struct definition and the same
1201 * function pointer field
1203 if (prog
->attach_btf_id
!= kern_type_id
) {
1204 pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: attach_btf_id %u != kern_type_id %u\n",
1205 map
->name
, mname
, prog
->name
, prog
->sec_name
, prog
->type
,
1206 prog
->attach_btf_id
, kern_type_id
);
1209 if (prog
->expected_attach_type
!= kern_member_idx
) {
1210 pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: expected_attach_type %u != kern_member_idx %u\n",
1211 map
->name
, mname
, prog
->name
, prog
->sec_name
, prog
->type
,
1212 prog
->expected_attach_type
, kern_member_idx
);
1216 st_ops
->kern_func_off
[i
] = kern_data_off
+ kern_moff
;
1218 pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n",
1219 map
->name
, mname
, prog
->name
, moff
,
1225 msize
= btf__resolve_size(btf
, mtype_id
);
1226 kern_msize
= btf__resolve_size(kern_btf
, kern_mtype_id
);
1227 if (msize
< 0 || kern_msize
< 0 || msize
!= kern_msize
) {
1228 pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n",
1229 map
->name
, mname
, (ssize_t
)msize
,
1230 (ssize_t
)kern_msize
);
1234 pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n",
1235 map
->name
, mname
, (unsigned int)msize
,
1237 memcpy(kern_mdata
, mdata
, msize
);
1243 static int bpf_object__init_kern_struct_ops_maps(struct bpf_object
*obj
)
1245 struct bpf_map
*map
;
1249 for (i
= 0; i
< obj
->nr_maps
; i
++) {
1250 map
= &obj
->maps
[i
];
1252 if (!bpf_map__is_struct_ops(map
))
1255 if (!map
->autocreate
)
1258 err
= bpf_map__init_kern_struct_ops(map
);
1266 static int init_struct_ops_maps(struct bpf_object
*obj
, const char *sec_name
,
1267 int shndx
, Elf_Data
*data
)
1269 const struct btf_type
*type
, *datasec
;
1270 const struct btf_var_secinfo
*vsi
;
1271 struct bpf_struct_ops
*st_ops
;
1272 const char *tname
, *var_name
;
1273 __s32 type_id
, datasec_id
;
1274 const struct btf
*btf
;
1275 struct bpf_map
*map
;
1282 datasec_id
= btf__find_by_name_kind(btf
, sec_name
,
1284 if (datasec_id
< 0) {
1285 pr_warn("struct_ops init: DATASEC %s not found\n",
1290 datasec
= btf__type_by_id(btf
, datasec_id
);
1291 vsi
= btf_var_secinfos(datasec
);
1292 for (i
= 0; i
< btf_vlen(datasec
); i
++, vsi
++) {
1293 type
= btf__type_by_id(obj
->btf
, vsi
->type
);
1294 var_name
= btf__name_by_offset(obj
->btf
, type
->name_off
);
1296 type_id
= btf__resolve_type(obj
->btf
, vsi
->type
);
1298 pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n",
1299 vsi
->type
, sec_name
);
1303 type
= btf__type_by_id(obj
->btf
, type_id
);
1304 tname
= btf__name_by_offset(obj
->btf
, type
->name_off
);
1306 pr_warn("struct_ops init: anonymous type is not supported\n");
1309 if (!btf_is_struct(type
)) {
1310 pr_warn("struct_ops init: %s is not a struct\n", tname
);
1314 map
= bpf_object__add_map(obj
);
1316 return PTR_ERR(map
);
1318 map
->sec_idx
= shndx
;
1319 map
->sec_offset
= vsi
->offset
;
1320 map
->name
= strdup(var_name
);
1323 map
->btf_value_type_id
= type_id
;
1325 /* Follow same convention as for programs autoload:
1326 * SEC("?.struct_ops") means map is not created by default.
1328 if (sec_name
[0] == '?') {
1329 map
->autocreate
= false;
1330 /* from now on forget there was ? in section name */
1334 map
->def
.type
= BPF_MAP_TYPE_STRUCT_OPS
;
1335 map
->def
.key_size
= sizeof(int);
1336 map
->def
.value_size
= type
->size
;
1337 map
->def
.max_entries
= 1;
1338 map
->def
.map_flags
= strcmp(sec_name
, STRUCT_OPS_LINK_SEC
) == 0 ? BPF_F_LINK
: 0;
1340 map
->st_ops
= calloc(1, sizeof(*map
->st_ops
));
1343 st_ops
= map
->st_ops
;
1344 st_ops
->data
= malloc(type
->size
);
1345 st_ops
->progs
= calloc(btf_vlen(type
), sizeof(*st_ops
->progs
));
1346 st_ops
->kern_func_off
= malloc(btf_vlen(type
) *
1347 sizeof(*st_ops
->kern_func_off
));
1348 if (!st_ops
->data
|| !st_ops
->progs
|| !st_ops
->kern_func_off
)
1351 if (vsi
->offset
+ type
->size
> data
->d_size
) {
1352 pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n",
1353 var_name
, sec_name
);
1357 memcpy(st_ops
->data
,
1358 data
->d_buf
+ vsi
->offset
,
1360 st_ops
->tname
= tname
;
1361 st_ops
->type
= type
;
1362 st_ops
->type_id
= type_id
;
1364 pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n",
1365 tname
, type_id
, var_name
, vsi
->offset
);
1371 static int bpf_object_init_struct_ops(struct bpf_object
*obj
)
1373 const char *sec_name
;
1376 for (sec_idx
= 0; sec_idx
< obj
->efile
.sec_cnt
; ++sec_idx
) {
1377 struct elf_sec_desc
*desc
= &obj
->efile
.secs
[sec_idx
];
1379 if (desc
->sec_type
!= SEC_ST_OPS
)
1382 sec_name
= elf_sec_name(obj
, elf_sec_by_idx(obj
, sec_idx
));
1384 return -LIBBPF_ERRNO__FORMAT
;
1386 err
= init_struct_ops_maps(obj
, sec_name
, sec_idx
, desc
->data
);
1394 static struct bpf_object
*bpf_object__new(const char *path
,
1395 const void *obj_buf
,
1397 const char *obj_name
)
1399 struct bpf_object
*obj
;
1402 obj
= calloc(1, sizeof(struct bpf_object
) + strlen(path
) + 1);
1404 pr_warn("alloc memory failed for %s\n", path
);
1405 return ERR_PTR(-ENOMEM
);
1408 strcpy(obj
->path
, path
);
1410 libbpf_strlcpy(obj
->name
, obj_name
, sizeof(obj
->name
));
1412 /* Using basename() GNU version which doesn't modify arg. */
1413 libbpf_strlcpy(obj
->name
, basename((void *)path
), sizeof(obj
->name
));
1414 end
= strchr(obj
->name
, '.');
1421 * Caller of this function should also call
1422 * bpf_object__elf_finish() after data collection to return
1423 * obj_buf to user. If not, we should duplicate the buffer to
1424 * avoid user freeing them before elf finish.
1426 obj
->efile
.obj_buf
= obj_buf
;
1427 obj
->efile
.obj_buf_sz
= obj_buf_sz
;
1428 obj
->efile
.btf_maps_shndx
= -1;
1429 obj
->kconfig_map_idx
= -1;
1431 obj
->kern_version
= get_kernel_version();
1432 obj
->loaded
= false;
1437 static void bpf_object__elf_finish(struct bpf_object
*obj
)
1439 if (!obj
->efile
.elf
)
1442 elf_end(obj
->efile
.elf
);
1443 obj
->efile
.elf
= NULL
;
1444 obj
->efile
.symbols
= NULL
;
1446 zfree(&obj
->efile
.secs
);
1447 obj
->efile
.sec_cnt
= 0;
1448 zclose(obj
->efile
.fd
);
1449 obj
->efile
.obj_buf
= NULL
;
1450 obj
->efile
.obj_buf_sz
= 0;
1453 static int bpf_object__elf_init(struct bpf_object
*obj
)
1459 if (obj
->efile
.elf
) {
1460 pr_warn("elf: init internal error\n");
1461 return -LIBBPF_ERRNO__LIBELF
;
1464 if (obj
->efile
.obj_buf_sz
> 0) {
1465 /* obj_buf should have been validated by bpf_object__open_mem(). */
1466 elf
= elf_memory((char *)obj
->efile
.obj_buf
, obj
->efile
.obj_buf_sz
);
1468 obj
->efile
.fd
= open(obj
->path
, O_RDONLY
| O_CLOEXEC
);
1469 if (obj
->efile
.fd
< 0) {
1470 char errmsg
[STRERR_BUFSIZE
], *cp
;
1473 cp
= libbpf_strerror_r(err
, errmsg
, sizeof(errmsg
));
1474 pr_warn("elf: failed to open %s: %s\n", obj
->path
, cp
);
1478 elf
= elf_begin(obj
->efile
.fd
, ELF_C_READ_MMAP
, NULL
);
1482 pr_warn("elf: failed to open %s as ELF file: %s\n", obj
->path
, elf_errmsg(-1));
1483 err
= -LIBBPF_ERRNO__LIBELF
;
1487 obj
->efile
.elf
= elf
;
1489 if (elf_kind(elf
) != ELF_K_ELF
) {
1490 err
= -LIBBPF_ERRNO__FORMAT
;
1491 pr_warn("elf: '%s' is not a proper ELF object\n", obj
->path
);
1495 if (gelf_getclass(elf
) != ELFCLASS64
) {
1496 err
= -LIBBPF_ERRNO__FORMAT
;
1497 pr_warn("elf: '%s' is not a 64-bit ELF object\n", obj
->path
);
1501 obj
->efile
.ehdr
= ehdr
= elf64_getehdr(elf
);
1502 if (!obj
->efile
.ehdr
) {
1503 pr_warn("elf: failed to get ELF header from %s: %s\n", obj
->path
, elf_errmsg(-1));
1504 err
= -LIBBPF_ERRNO__FORMAT
;
1508 if (elf_getshdrstrndx(elf
, &obj
->efile
.shstrndx
)) {
1509 pr_warn("elf: failed to get section names section index for %s: %s\n",
1510 obj
->path
, elf_errmsg(-1));
1511 err
= -LIBBPF_ERRNO__FORMAT
;
1515 /* ELF is corrupted/truncated, avoid calling elf_strptr. */
1516 if (!elf_rawdata(elf_getscn(elf
, obj
->efile
.shstrndx
), NULL
)) {
1517 pr_warn("elf: failed to get section names strings from %s: %s\n",
1518 obj
->path
, elf_errmsg(-1));
1519 err
= -LIBBPF_ERRNO__FORMAT
;
1523 /* Old LLVM set e_machine to EM_NONE */
1524 if (ehdr
->e_type
!= ET_REL
|| (ehdr
->e_machine
&& ehdr
->e_machine
!= EM_BPF
)) {
1525 pr_warn("elf: %s is not a valid eBPF object file\n", obj
->path
);
1526 err
= -LIBBPF_ERRNO__FORMAT
;
1532 bpf_object__elf_finish(obj
);
1536 static int bpf_object__check_endianness(struct bpf_object
*obj
)
1538 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1539 if (obj
->efile
.ehdr
->e_ident
[EI_DATA
] == ELFDATA2LSB
)
1541 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
1542 if (obj
->efile
.ehdr
->e_ident
[EI_DATA
] == ELFDATA2MSB
)
1545 # error "Unrecognized __BYTE_ORDER__"
1547 pr_warn("elf: endianness mismatch in %s.\n", obj
->path
);
1548 return -LIBBPF_ERRNO__ENDIAN
;
1552 bpf_object__init_license(struct bpf_object
*obj
, void *data
, size_t size
)
1555 pr_warn("invalid license section in %s\n", obj
->path
);
1556 return -LIBBPF_ERRNO__FORMAT
;
1558 /* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't
1559 * go over allowed ELF data section buffer
1561 libbpf_strlcpy(obj
->license
, data
, min(size
+ 1, sizeof(obj
->license
)));
1562 pr_debug("license of %s is %s\n", obj
->path
, obj
->license
);
1567 bpf_object__init_kversion(struct bpf_object
*obj
, void *data
, size_t size
)
1571 if (!data
|| size
!= sizeof(kver
)) {
1572 pr_warn("invalid kver section in %s\n", obj
->path
);
1573 return -LIBBPF_ERRNO__FORMAT
;
1575 memcpy(&kver
, data
, sizeof(kver
));
1576 obj
->kern_version
= kver
;
1577 pr_debug("kernel version of %s is %x\n", obj
->path
, obj
->kern_version
);
1581 static bool bpf_map_type__is_map_in_map(enum bpf_map_type type
)
1583 if (type
== BPF_MAP_TYPE_ARRAY_OF_MAPS
||
1584 type
== BPF_MAP_TYPE_HASH_OF_MAPS
)
1589 static int find_elf_sec_sz(const struct bpf_object
*obj
, const char *name
, __u32
*size
)
1597 scn
= elf_sec_by_name(obj
, name
);
1598 data
= elf_sec_data(obj
, scn
);
1600 *size
= data
->d_size
;
1601 return 0; /* found it */
1607 static Elf64_Sym
*find_elf_var_sym(const struct bpf_object
*obj
, const char *name
)
1609 Elf_Data
*symbols
= obj
->efile
.symbols
;
1613 for (si
= 0; si
< symbols
->d_size
/ sizeof(Elf64_Sym
); si
++) {
1614 Elf64_Sym
*sym
= elf_sym_by_idx(obj
, si
);
1616 if (ELF64_ST_TYPE(sym
->st_info
) != STT_OBJECT
)
1619 if (ELF64_ST_BIND(sym
->st_info
) != STB_GLOBAL
&&
1620 ELF64_ST_BIND(sym
->st_info
) != STB_WEAK
)
1623 sname
= elf_sym_str(obj
, sym
->st_name
);
1625 pr_warn("failed to get sym name string for var %s\n", name
);
1626 return ERR_PTR(-EIO
);
1628 if (strcmp(name
, sname
) == 0)
1632 return ERR_PTR(-ENOENT
);
1635 /* Some versions of Android don't provide memfd_create() in their libc
1636 * implementation, so avoid complications and just go straight to Linux
1639 static int sys_memfd_create(const char *name
, unsigned flags
)
1641 return syscall(__NR_memfd_create
, name
, flags
);
1644 static int create_placeholder_fd(void)
1648 fd
= ensure_good_fd(sys_memfd_create("libbpf-placeholder-fd", MFD_CLOEXEC
));
1654 static struct bpf_map
*bpf_object__add_map(struct bpf_object
*obj
)
1656 struct bpf_map
*map
;
1659 err
= libbpf_ensure_mem((void **)&obj
->maps
, &obj
->maps_cap
,
1660 sizeof(*obj
->maps
), obj
->nr_maps
+ 1);
1662 return ERR_PTR(err
);
1664 map
= &obj
->maps
[obj
->nr_maps
++];
1666 /* Preallocate map FD without actually creating BPF map just yet.
1667 * These map FD "placeholders" will be reused later without changing
1668 * FD value when map is actually created in the kernel.
1670 * This is useful to be able to perform BPF program relocations
1671 * without having to create BPF maps before that step. This allows us
1672 * to finalize and load BTF very late in BPF object's loading phase,
1673 * right before BPF maps have to be created and BPF programs have to
1674 * be loaded. By having these map FD placeholders we can perform all
1675 * the sanitizations, relocations, and any other adjustments before we
1676 * start creating actual BPF kernel objects (BTF, maps, progs).
1678 map
->fd
= create_placeholder_fd();
1680 return ERR_PTR(map
->fd
);
1681 map
->inner_map_fd
= -1;
1682 map
->autocreate
= true;
1687 static size_t bpf_map_mmap_sz(unsigned int value_sz
, unsigned int max_entries
)
1689 const long page_sz
= sysconf(_SC_PAGE_SIZE
);
1692 map_sz
= (size_t)roundup(value_sz
, 8) * max_entries
;
1693 map_sz
= roundup(map_sz
, page_sz
);
1697 static int bpf_map_mmap_resize(struct bpf_map
*map
, size_t old_sz
, size_t new_sz
)
1704 if (old_sz
== new_sz
)
1707 mmaped
= mmap(NULL
, new_sz
, PROT_READ
| PROT_WRITE
, MAP_SHARED
| MAP_ANONYMOUS
, -1, 0);
1708 if (mmaped
== MAP_FAILED
)
1711 memcpy(mmaped
, map
->mmaped
, min(old_sz
, new_sz
));
1712 munmap(map
->mmaped
, old_sz
);
1713 map
->mmaped
= mmaped
;
1717 static char *internal_map_name(struct bpf_object
*obj
, const char *real_name
)
1719 char map_name
[BPF_OBJ_NAME_LEN
], *p
;
1720 int pfx_len
, sfx_len
= max((size_t)7, strlen(real_name
));
1722 /* This is one of the more confusing parts of libbpf for various
1723 * reasons, some of which are historical. The original idea for naming
1724 * internal names was to include as much of BPF object name prefix as
1725 * possible, so that it can be distinguished from similar internal
1726 * maps of a different BPF object.
1727 * As an example, let's say we have bpf_object named 'my_object_name'
1728 * and internal map corresponding to '.rodata' ELF section. The final
1729 * map name advertised to user and to the kernel will be
1730 * 'my_objec.rodata', taking first 8 characters of object name and
1731 * entire 7 characters of '.rodata'.
1732 * Somewhat confusingly, if internal map ELF section name is shorter
1733 * than 7 characters, e.g., '.bss', we still reserve 7 characters
1734 * for the suffix, even though we only have 4 actual characters, and
1735 * resulting map will be called 'my_objec.bss', not even using all 15
1736 * characters allowed by the kernel. Oh well, at least the truncated
1737 * object name is somewhat consistent in this case. But if the map
1738 * name is '.kconfig', we'll still have entirety of '.kconfig' added
1739 * (8 chars) and thus will be left with only first 7 characters of the
1740 * object name ('my_obje'). Happy guessing, user, that the final map
1741 * name will be "my_obje.kconfig".
1742 * Now, with libbpf starting to support arbitrarily named .rodata.*
1743 * and .data.* data sections, it's possible that ELF section name is
1744 * longer than allowed 15 chars, so we now need to be careful to take
1745 * only up to 15 first characters of ELF name, taking no BPF object
1746 * name characters at all. So '.rodata.abracadabra' will result in
1747 * '.rodata.abracad' kernel and user-visible name.
1748 * We need to keep this convoluted logic intact for .data, .bss and
1749 * .rodata maps, but for new custom .data.custom and .rodata.custom
1750 * maps we use their ELF names as is, not prepending bpf_object name
1751 * in front. We still need to truncate them to 15 characters for the
1752 * kernel. Full name can be recovered for such maps by using DATASEC
1753 * BTF type associated with such map's value type, though.
1755 if (sfx_len
>= BPF_OBJ_NAME_LEN
)
1756 sfx_len
= BPF_OBJ_NAME_LEN
- 1;
1758 /* if there are two or more dots in map name, it's a custom dot map */
1759 if (strchr(real_name
+ 1, '.') != NULL
)
1762 pfx_len
= min((size_t)BPF_OBJ_NAME_LEN
- sfx_len
- 1, strlen(obj
->name
));
1764 snprintf(map_name
, sizeof(map_name
), "%.*s%.*s", pfx_len
, obj
->name
,
1765 sfx_len
, real_name
);
1767 /* sanitise map name to characters allowed by kernel */
1768 for (p
= map_name
; *p
&& p
< map_name
+ sizeof(map_name
); p
++)
1769 if (!isalnum(*p
) && *p
!= '_' && *p
!= '.')
1772 return strdup(map_name
);
1776 map_fill_btf_type_info(struct bpf_object
*obj
, struct bpf_map
*map
);
1778 /* Internal BPF map is mmap()'able only if at least one of corresponding
1779 * DATASEC's VARs are to be exposed through BPF skeleton. I.e., it's a GLOBAL
1780 * variable and it's not marked as __hidden (which turns it into, effectively,
1781 * a STATIC variable).
1783 static bool map_is_mmapable(struct bpf_object
*obj
, struct bpf_map
*map
)
1785 const struct btf_type
*t
, *vt
;
1786 struct btf_var_secinfo
*vsi
;
1789 if (!map
->btf_value_type_id
)
1792 t
= btf__type_by_id(obj
->btf
, map
->btf_value_type_id
);
1793 if (!btf_is_datasec(t
))
1796 vsi
= btf_var_secinfos(t
);
1797 for (i
= 0, n
= btf_vlen(t
); i
< n
; i
++, vsi
++) {
1798 vt
= btf__type_by_id(obj
->btf
, vsi
->type
);
1799 if (!btf_is_var(vt
))
1802 if (btf_var(vt
)->linkage
!= BTF_VAR_STATIC
)
1810 bpf_object__init_internal_map(struct bpf_object
*obj
, enum libbpf_map_type type
,
1811 const char *real_name
, int sec_idx
, void *data
, size_t data_sz
)
1813 struct bpf_map_def
*def
;
1814 struct bpf_map
*map
;
1818 map
= bpf_object__add_map(obj
);
1820 return PTR_ERR(map
);
1822 map
->libbpf_type
= type
;
1823 map
->sec_idx
= sec_idx
;
1824 map
->sec_offset
= 0;
1825 map
->real_name
= strdup(real_name
);
1826 map
->name
= internal_map_name(obj
, real_name
);
1827 if (!map
->real_name
|| !map
->name
) {
1828 zfree(&map
->real_name
);
1834 def
->type
= BPF_MAP_TYPE_ARRAY
;
1835 def
->key_size
= sizeof(int);
1836 def
->value_size
= data_sz
;
1837 def
->max_entries
= 1;
1838 def
->map_flags
= type
== LIBBPF_MAP_RODATA
|| type
== LIBBPF_MAP_KCONFIG
1839 ? BPF_F_RDONLY_PROG
: 0;
1841 /* failures are fine because of maps like .rodata.str1.1 */
1842 (void) map_fill_btf_type_info(obj
, map
);
1844 if (map_is_mmapable(obj
, map
))
1845 def
->map_flags
|= BPF_F_MMAPABLE
;
1847 pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
1848 map
->name
, map
->sec_idx
, map
->sec_offset
, def
->map_flags
);
1850 mmap_sz
= bpf_map_mmap_sz(map
->def
.value_size
, map
->def
.max_entries
);
1851 map
->mmaped
= mmap(NULL
, mmap_sz
, PROT_READ
| PROT_WRITE
,
1852 MAP_SHARED
| MAP_ANONYMOUS
, -1, 0);
1853 if (map
->mmaped
== MAP_FAILED
) {
1856 pr_warn("failed to alloc map '%s' content buffer: %d\n",
1858 zfree(&map
->real_name
);
1864 memcpy(map
->mmaped
, data
, data_sz
);
1866 pr_debug("map %td is \"%s\"\n", map
- obj
->maps
, map
->name
);
1870 static int bpf_object__init_global_data_maps(struct bpf_object
*obj
)
1872 struct elf_sec_desc
*sec_desc
;
1873 const char *sec_name
;
1874 int err
= 0, sec_idx
;
1877 * Populate obj->maps with libbpf internal maps.
1879 for (sec_idx
= 1; sec_idx
< obj
->efile
.sec_cnt
; sec_idx
++) {
1880 sec_desc
= &obj
->efile
.secs
[sec_idx
];
1882 /* Skip recognized sections with size 0. */
1883 if (!sec_desc
->data
|| sec_desc
->data
->d_size
== 0)
1886 switch (sec_desc
->sec_type
) {
1888 sec_name
= elf_sec_name(obj
, elf_sec_by_idx(obj
, sec_idx
));
1889 err
= bpf_object__init_internal_map(obj
, LIBBPF_MAP_DATA
,
1891 sec_desc
->data
->d_buf
,
1892 sec_desc
->data
->d_size
);
1895 obj
->has_rodata
= true;
1896 sec_name
= elf_sec_name(obj
, elf_sec_by_idx(obj
, sec_idx
));
1897 err
= bpf_object__init_internal_map(obj
, LIBBPF_MAP_RODATA
,
1899 sec_desc
->data
->d_buf
,
1900 sec_desc
->data
->d_size
);
1903 sec_name
= elf_sec_name(obj
, elf_sec_by_idx(obj
, sec_idx
));
1904 err
= bpf_object__init_internal_map(obj
, LIBBPF_MAP_BSS
,
1907 sec_desc
->data
->d_size
);
1920 static struct extern_desc
*find_extern_by_name(const struct bpf_object
*obj
,
1925 for (i
= 0; i
< obj
->nr_extern
; i
++) {
1926 if (strcmp(obj
->externs
[i
].name
, name
) == 0)
1927 return &obj
->externs
[i
];
1932 static int set_kcfg_value_tri(struct extern_desc
*ext
, void *ext_val
,
1935 switch (ext
->kcfg
.type
) {
1938 pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n",
1942 *(bool *)ext_val
= value
== 'y' ? true : false;
1946 *(enum libbpf_tristate
*)ext_val
= TRI_YES
;
1947 else if (value
== 'm')
1948 *(enum libbpf_tristate
*)ext_val
= TRI_MODULE
;
1949 else /* value == 'n' */
1950 *(enum libbpf_tristate
*)ext_val
= TRI_NO
;
1953 *(char *)ext_val
= value
;
1959 pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n",
1967 static int set_kcfg_value_str(struct extern_desc
*ext
, char *ext_val
,
1972 if (ext
->kcfg
.type
!= KCFG_CHAR_ARR
) {
1973 pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n",
1978 len
= strlen(value
);
1979 if (value
[len
- 1] != '"') {
1980 pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
1987 if (len
>= ext
->kcfg
.sz
) {
1988 pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n",
1989 ext
->name
, value
, len
, ext
->kcfg
.sz
- 1);
1990 len
= ext
->kcfg
.sz
- 1;
1992 memcpy(ext_val
, value
+ 1, len
);
1993 ext_val
[len
] = '\0';
1998 static int parse_u64(const char *value
, __u64
*res
)
2004 *res
= strtoull(value
, &value_end
, 0);
2007 pr_warn("failed to parse '%s' as integer: %d\n", value
, err
);
2011 pr_warn("failed to parse '%s' as integer completely\n", value
);
2017 static bool is_kcfg_value_in_range(const struct extern_desc
*ext
, __u64 v
)
2019 int bit_sz
= ext
->kcfg
.sz
* 8;
2021 if (ext
->kcfg
.sz
== 8)
2024 /* Validate that value stored in u64 fits in integer of `ext->sz`
2025 * bytes size without any loss of information. If the target integer
2026 * is signed, we rely on the following limits of integer type of
2027 * Y bits and subsequent transformation:
2029 * -2^(Y-1) <= X <= 2^(Y-1) - 1
2030 * 0 <= X + 2^(Y-1) <= 2^Y - 1
2031 * 0 <= X + 2^(Y-1) < 2^Y
2033 * For unsigned target integer, check that all the (64 - Y) bits are
2036 if (ext
->kcfg
.is_signed
)
2037 return v
+ (1ULL << (bit_sz
- 1)) < (1ULL << bit_sz
);
2039 return (v
>> bit_sz
) == 0;
2042 static int set_kcfg_value_num(struct extern_desc
*ext
, void *ext_val
,
2045 if (ext
->kcfg
.type
!= KCFG_INT
&& ext
->kcfg
.type
!= KCFG_CHAR
&&
2046 ext
->kcfg
.type
!= KCFG_BOOL
) {
2047 pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n",
2048 ext
->name
, (unsigned long long)value
);
2051 if (ext
->kcfg
.type
== KCFG_BOOL
&& value
> 1) {
2052 pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n",
2053 ext
->name
, (unsigned long long)value
);
2057 if (!is_kcfg_value_in_range(ext
, value
)) {
2058 pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n",
2059 ext
->name
, (unsigned long long)value
, ext
->kcfg
.sz
);
2062 switch (ext
->kcfg
.sz
) {
2064 *(__u8
*)ext_val
= value
;
2067 *(__u16
*)ext_val
= value
;
2070 *(__u32
*)ext_val
= value
;
2073 *(__u64
*)ext_val
= value
;
2082 static int bpf_object__process_kconfig_line(struct bpf_object
*obj
,
2083 char *buf
, void *data
)
2085 struct extern_desc
*ext
;
2091 if (!str_has_pfx(buf
, "CONFIG_"))
2094 sep
= strchr(buf
, '=');
2096 pr_warn("failed to parse '%s': no separator\n", buf
);
2100 /* Trim ending '\n' */
2102 if (buf
[len
- 1] == '\n')
2103 buf
[len
- 1] = '\0';
2104 /* Split on '=' and ensure that a value is present. */
2108 pr_warn("failed to parse '%s': no value\n", buf
);
2112 ext
= find_extern_by_name(obj
, buf
);
2113 if (!ext
|| ext
->is_set
)
2116 ext_val
= data
+ ext
->kcfg
.data_off
;
2120 case 'y': case 'n': case 'm':
2121 err
= set_kcfg_value_tri(ext
, ext_val
, *value
);
2124 err
= set_kcfg_value_str(ext
, ext_val
, value
);
2127 /* assume integer */
2128 err
= parse_u64(value
, &num
);
2130 pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext
->name
, value
);
2133 if (ext
->kcfg
.type
!= KCFG_INT
&& ext
->kcfg
.type
!= KCFG_CHAR
) {
2134 pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext
->name
, value
);
2137 err
= set_kcfg_value_num(ext
, ext_val
, num
);
2142 pr_debug("extern (kcfg) '%s': set to %s\n", ext
->name
, value
);
2146 static int bpf_object__read_kconfig_file(struct bpf_object
*obj
, void *data
)
2154 len
= snprintf(buf
, PATH_MAX
, "/boot/config-%s", uts
.release
);
2157 else if (len
>= PATH_MAX
)
2158 return -ENAMETOOLONG
;
2160 /* gzopen also accepts uncompressed files. */
2161 file
= gzopen(buf
, "re");
2163 file
= gzopen("/proc/config.gz", "re");
2166 pr_warn("failed to open system Kconfig\n");
2170 while (gzgets(file
, buf
, sizeof(buf
))) {
2171 err
= bpf_object__process_kconfig_line(obj
, buf
, data
);
2173 pr_warn("error parsing system Kconfig line '%s': %d\n",
2184 static int bpf_object__read_kconfig_mem(struct bpf_object
*obj
,
2185 const char *config
, void *data
)
2191 file
= fmemopen((void *)config
, strlen(config
), "r");
2194 pr_warn("failed to open in-memory Kconfig: %d\n", err
);
2198 while (fgets(buf
, sizeof(buf
), file
)) {
2199 err
= bpf_object__process_kconfig_line(obj
, buf
, data
);
2201 pr_warn("error parsing in-memory Kconfig line '%s': %d\n",
2211 static int bpf_object__init_kconfig_map(struct bpf_object
*obj
)
2213 struct extern_desc
*last_ext
= NULL
, *ext
;
2217 for (i
= 0; i
< obj
->nr_extern
; i
++) {
2218 ext
= &obj
->externs
[i
];
2219 if (ext
->type
== EXT_KCFG
)
2226 map_sz
= last_ext
->kcfg
.data_off
+ last_ext
->kcfg
.sz
;
2227 err
= bpf_object__init_internal_map(obj
, LIBBPF_MAP_KCONFIG
,
2228 ".kconfig", obj
->efile
.symbols_shndx
,
2233 obj
->kconfig_map_idx
= obj
->nr_maps
- 1;
2238 const struct btf_type
*
2239 skip_mods_and_typedefs(const struct btf
*btf
, __u32 id
, __u32
*res_id
)
2241 const struct btf_type
*t
= btf__type_by_id(btf
, id
);
2246 while (btf_is_mod(t
) || btf_is_typedef(t
)) {
2249 t
= btf__type_by_id(btf
, t
->type
);
2255 static const struct btf_type
*
2256 resolve_func_ptr(const struct btf
*btf
, __u32 id
, __u32
*res_id
)
2258 const struct btf_type
*t
;
2260 t
= skip_mods_and_typedefs(btf
, id
, NULL
);
2264 t
= skip_mods_and_typedefs(btf
, t
->type
, res_id
);
2266 return btf_is_func_proto(t
) ? t
: NULL
;
2269 static const char *__btf_kind_str(__u16 kind
)
2272 case BTF_KIND_UNKN
: return "void";
2273 case BTF_KIND_INT
: return "int";
2274 case BTF_KIND_PTR
: return "ptr";
2275 case BTF_KIND_ARRAY
: return "array";
2276 case BTF_KIND_STRUCT
: return "struct";
2277 case BTF_KIND_UNION
: return "union";
2278 case BTF_KIND_ENUM
: return "enum";
2279 case BTF_KIND_FWD
: return "fwd";
2280 case BTF_KIND_TYPEDEF
: return "typedef";
2281 case BTF_KIND_VOLATILE
: return "volatile";
2282 case BTF_KIND_CONST
: return "const";
2283 case BTF_KIND_RESTRICT
: return "restrict";
2284 case BTF_KIND_FUNC
: return "func";
2285 case BTF_KIND_FUNC_PROTO
: return "func_proto";
2286 case BTF_KIND_VAR
: return "var";
2287 case BTF_KIND_DATASEC
: return "datasec";
2288 case BTF_KIND_FLOAT
: return "float";
2289 case BTF_KIND_DECL_TAG
: return "decl_tag";
2290 case BTF_KIND_TYPE_TAG
: return "type_tag";
2291 case BTF_KIND_ENUM64
: return "enum64";
2292 default: return "unknown";
2296 const char *btf_kind_str(const struct btf_type
*t
)
2298 return __btf_kind_str(btf_kind(t
));
2302 * Fetch integer attribute of BTF map definition. Such attributes are
2303 * represented using a pointer to an array, in which dimensionality of array
2304 * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
2305 * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
2306 * type definition, while using only sizeof(void *) space in ELF data section.
2308 static bool get_map_field_int(const char *map_name
, const struct btf
*btf
,
2309 const struct btf_member
*m
, __u32
*res
)
2311 const struct btf_type
*t
= skip_mods_and_typedefs(btf
, m
->type
, NULL
);
2312 const char *name
= btf__name_by_offset(btf
, m
->name_off
);
2313 const struct btf_array
*arr_info
;
2314 const struct btf_type
*arr_t
;
2316 if (!btf_is_ptr(t
)) {
2317 pr_warn("map '%s': attr '%s': expected PTR, got %s.\n",
2318 map_name
, name
, btf_kind_str(t
));
2322 arr_t
= btf__type_by_id(btf
, t
->type
);
2324 pr_warn("map '%s': attr '%s': type [%u] not found.\n",
2325 map_name
, name
, t
->type
);
2328 if (!btf_is_array(arr_t
)) {
2329 pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n",
2330 map_name
, name
, btf_kind_str(arr_t
));
2333 arr_info
= btf_array(arr_t
);
2334 *res
= arr_info
->nelems
;
2338 static int pathname_concat(char *buf
, size_t buf_sz
, const char *path
, const char *name
)
2342 len
= snprintf(buf
, buf_sz
, "%s/%s", path
, name
);
2346 return -ENAMETOOLONG
;
2351 static int build_map_pin_path(struct bpf_map
*map
, const char *path
)
2357 path
= BPF_FS_DEFAULT_PATH
;
2359 err
= pathname_concat(buf
, sizeof(buf
), path
, bpf_map__name(map
));
2363 return bpf_map__set_pin_path(map
, buf
);
2366 /* should match definition in bpf_helpers.h */
2367 enum libbpf_pin_type
{
2369 /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
2373 int parse_btf_map_def(const char *map_name
, struct btf
*btf
,
2374 const struct btf_type
*def_t
, bool strict
,
2375 struct btf_map_def
*map_def
, struct btf_map_def
*inner_def
)
2377 const struct btf_type
*t
;
2378 const struct btf_member
*m
;
2379 bool is_inner
= inner_def
== NULL
;
2382 vlen
= btf_vlen(def_t
);
2383 m
= btf_members(def_t
);
2384 for (i
= 0; i
< vlen
; i
++, m
++) {
2385 const char *name
= btf__name_by_offset(btf
, m
->name_off
);
2388 pr_warn("map '%s': invalid field #%d.\n", map_name
, i
);
2391 if (strcmp(name
, "type") == 0) {
2392 if (!get_map_field_int(map_name
, btf
, m
, &map_def
->map_type
))
2394 map_def
->parts
|= MAP_DEF_MAP_TYPE
;
2395 } else if (strcmp(name
, "max_entries") == 0) {
2396 if (!get_map_field_int(map_name
, btf
, m
, &map_def
->max_entries
))
2398 map_def
->parts
|= MAP_DEF_MAX_ENTRIES
;
2399 } else if (strcmp(name
, "map_flags") == 0) {
2400 if (!get_map_field_int(map_name
, btf
, m
, &map_def
->map_flags
))
2402 map_def
->parts
|= MAP_DEF_MAP_FLAGS
;
2403 } else if (strcmp(name
, "numa_node") == 0) {
2404 if (!get_map_field_int(map_name
, btf
, m
, &map_def
->numa_node
))
2406 map_def
->parts
|= MAP_DEF_NUMA_NODE
;
2407 } else if (strcmp(name
, "key_size") == 0) {
2410 if (!get_map_field_int(map_name
, btf
, m
, &sz
))
2412 if (map_def
->key_size
&& map_def
->key_size
!= sz
) {
2413 pr_warn("map '%s': conflicting key size %u != %u.\n",
2414 map_name
, map_def
->key_size
, sz
);
2417 map_def
->key_size
= sz
;
2418 map_def
->parts
|= MAP_DEF_KEY_SIZE
;
2419 } else if (strcmp(name
, "key") == 0) {
2422 t
= btf__type_by_id(btf
, m
->type
);
2424 pr_warn("map '%s': key type [%d] not found.\n",
2428 if (!btf_is_ptr(t
)) {
2429 pr_warn("map '%s': key spec is not PTR: %s.\n",
2430 map_name
, btf_kind_str(t
));
2433 sz
= btf__resolve_size(btf
, t
->type
);
2435 pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
2436 map_name
, t
->type
, (ssize_t
)sz
);
2439 if (map_def
->key_size
&& map_def
->key_size
!= sz
) {
2440 pr_warn("map '%s': conflicting key size %u != %zd.\n",
2441 map_name
, map_def
->key_size
, (ssize_t
)sz
);
2444 map_def
->key_size
= sz
;
2445 map_def
->key_type_id
= t
->type
;
2446 map_def
->parts
|= MAP_DEF_KEY_SIZE
| MAP_DEF_KEY_TYPE
;
2447 } else if (strcmp(name
, "value_size") == 0) {
2450 if (!get_map_field_int(map_name
, btf
, m
, &sz
))
2452 if (map_def
->value_size
&& map_def
->value_size
!= sz
) {
2453 pr_warn("map '%s': conflicting value size %u != %u.\n",
2454 map_name
, map_def
->value_size
, sz
);
2457 map_def
->value_size
= sz
;
2458 map_def
->parts
|= MAP_DEF_VALUE_SIZE
;
2459 } else if (strcmp(name
, "value") == 0) {
2462 t
= btf__type_by_id(btf
, m
->type
);
2464 pr_warn("map '%s': value type [%d] not found.\n",
2468 if (!btf_is_ptr(t
)) {
2469 pr_warn("map '%s': value spec is not PTR: %s.\n",
2470 map_name
, btf_kind_str(t
));
2473 sz
= btf__resolve_size(btf
, t
->type
);
2475 pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
2476 map_name
, t
->type
, (ssize_t
)sz
);
2479 if (map_def
->value_size
&& map_def
->value_size
!= sz
) {
2480 pr_warn("map '%s': conflicting value size %u != %zd.\n",
2481 map_name
, map_def
->value_size
, (ssize_t
)sz
);
2484 map_def
->value_size
= sz
;
2485 map_def
->value_type_id
= t
->type
;
2486 map_def
->parts
|= MAP_DEF_VALUE_SIZE
| MAP_DEF_VALUE_TYPE
;
2488 else if (strcmp(name
, "values") == 0) {
2489 bool is_map_in_map
= bpf_map_type__is_map_in_map(map_def
->map_type
);
2490 bool is_prog_array
= map_def
->map_type
== BPF_MAP_TYPE_PROG_ARRAY
;
2491 const char *desc
= is_map_in_map
? "map-in-map inner" : "prog-array value";
2492 char inner_map_name
[128];
2496 pr_warn("map '%s': multi-level inner maps not supported.\n",
2500 if (i
!= vlen
- 1) {
2501 pr_warn("map '%s': '%s' member should be last.\n",
2505 if (!is_map_in_map
&& !is_prog_array
) {
2506 pr_warn("map '%s': should be map-in-map or prog-array.\n",
2510 if (map_def
->value_size
&& map_def
->value_size
!= 4) {
2511 pr_warn("map '%s': conflicting value size %u != 4.\n",
2512 map_name
, map_def
->value_size
);
2515 map_def
->value_size
= 4;
2516 t
= btf__type_by_id(btf
, m
->type
);
2518 pr_warn("map '%s': %s type [%d] not found.\n",
2519 map_name
, desc
, m
->type
);
2522 if (!btf_is_array(t
) || btf_array(t
)->nelems
) {
2523 pr_warn("map '%s': %s spec is not a zero-sized array.\n",
2527 t
= skip_mods_and_typedefs(btf
, btf_array(t
)->type
, NULL
);
2528 if (!btf_is_ptr(t
)) {
2529 pr_warn("map '%s': %s def is of unexpected kind %s.\n",
2530 map_name
, desc
, btf_kind_str(t
));
2533 t
= skip_mods_and_typedefs(btf
, t
->type
, NULL
);
2534 if (is_prog_array
) {
2535 if (!btf_is_func_proto(t
)) {
2536 pr_warn("map '%s': prog-array value def is of unexpected kind %s.\n",
2537 map_name
, btf_kind_str(t
));
2542 if (!btf_is_struct(t
)) {
2543 pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
2544 map_name
, btf_kind_str(t
));
2548 snprintf(inner_map_name
, sizeof(inner_map_name
), "%s.inner", map_name
);
2549 err
= parse_btf_map_def(inner_map_name
, btf
, t
, strict
, inner_def
, NULL
);
2553 map_def
->parts
|= MAP_DEF_INNER_MAP
;
2554 } else if (strcmp(name
, "pinning") == 0) {
2558 pr_warn("map '%s': inner def can't be pinned.\n", map_name
);
2561 if (!get_map_field_int(map_name
, btf
, m
, &val
))
2563 if (val
!= LIBBPF_PIN_NONE
&& val
!= LIBBPF_PIN_BY_NAME
) {
2564 pr_warn("map '%s': invalid pinning value %u.\n",
2568 map_def
->pinning
= val
;
2569 map_def
->parts
|= MAP_DEF_PINNING
;
2570 } else if (strcmp(name
, "map_extra") == 0) {
2573 if (!get_map_field_int(map_name
, btf
, m
, &map_extra
))
2575 map_def
->map_extra
= map_extra
;
2576 map_def
->parts
|= MAP_DEF_MAP_EXTRA
;
2579 pr_warn("map '%s': unknown field '%s'.\n", map_name
, name
);
2582 pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name
, name
);
2586 if (map_def
->map_type
== BPF_MAP_TYPE_UNSPEC
) {
2587 pr_warn("map '%s': map type isn't specified.\n", map_name
);
2594 static size_t adjust_ringbuf_sz(size_t sz
)
2596 __u32 page_sz
= sysconf(_SC_PAGE_SIZE
);
2599 /* if user forgot to set any size, make sure they see error */
2602 /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be
2603 * a power-of-2 multiple of kernel's page size. If user diligently
2604 * satisified these conditions, pass the size through.
2606 if ((sz
% page_sz
) == 0 && is_pow_of_2(sz
/ page_sz
))
2609 /* Otherwise find closest (page_sz * power_of_2) product bigger than
2610 * user-set size to satisfy both user size request and kernel
2611 * requirements and substitute correct max_entries for map creation.
2613 for (mul
= 1; mul
<= UINT_MAX
/ page_sz
; mul
<<= 1) {
2614 if (mul
* page_sz
> sz
)
2615 return mul
* page_sz
;
2618 /* if it's impossible to satisfy the conditions (i.e., user size is
2619 * very close to UINT_MAX but is not a power-of-2 multiple of
2620 * page_size) then just return original size and let kernel reject it
2625 static bool map_is_ringbuf(const struct bpf_map
*map
)
2627 return map
->def
.type
== BPF_MAP_TYPE_RINGBUF
||
2628 map
->def
.type
== BPF_MAP_TYPE_USER_RINGBUF
;
2631 static void fill_map_from_def(struct bpf_map
*map
, const struct btf_map_def
*def
)
2633 map
->def
.type
= def
->map_type
;
2634 map
->def
.key_size
= def
->key_size
;
2635 map
->def
.value_size
= def
->value_size
;
2636 map
->def
.max_entries
= def
->max_entries
;
2637 map
->def
.map_flags
= def
->map_flags
;
2638 map
->map_extra
= def
->map_extra
;
2640 map
->numa_node
= def
->numa_node
;
2641 map
->btf_key_type_id
= def
->key_type_id
;
2642 map
->btf_value_type_id
= def
->value_type_id
;
2644 /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
2645 if (map_is_ringbuf(map
))
2646 map
->def
.max_entries
= adjust_ringbuf_sz(map
->def
.max_entries
);
2648 if (def
->parts
& MAP_DEF_MAP_TYPE
)
2649 pr_debug("map '%s': found type = %u.\n", map
->name
, def
->map_type
);
2651 if (def
->parts
& MAP_DEF_KEY_TYPE
)
2652 pr_debug("map '%s': found key [%u], sz = %u.\n",
2653 map
->name
, def
->key_type_id
, def
->key_size
);
2654 else if (def
->parts
& MAP_DEF_KEY_SIZE
)
2655 pr_debug("map '%s': found key_size = %u.\n", map
->name
, def
->key_size
);
2657 if (def
->parts
& MAP_DEF_VALUE_TYPE
)
2658 pr_debug("map '%s': found value [%u], sz = %u.\n",
2659 map
->name
, def
->value_type_id
, def
->value_size
);
2660 else if (def
->parts
& MAP_DEF_VALUE_SIZE
)
2661 pr_debug("map '%s': found value_size = %u.\n", map
->name
, def
->value_size
);
2663 if (def
->parts
& MAP_DEF_MAX_ENTRIES
)
2664 pr_debug("map '%s': found max_entries = %u.\n", map
->name
, def
->max_entries
);
2665 if (def
->parts
& MAP_DEF_MAP_FLAGS
)
2666 pr_debug("map '%s': found map_flags = 0x%x.\n", map
->name
, def
->map_flags
);
2667 if (def
->parts
& MAP_DEF_MAP_EXTRA
)
2668 pr_debug("map '%s': found map_extra = 0x%llx.\n", map
->name
,
2669 (unsigned long long)def
->map_extra
);
2670 if (def
->parts
& MAP_DEF_PINNING
)
2671 pr_debug("map '%s': found pinning = %u.\n", map
->name
, def
->pinning
);
2672 if (def
->parts
& MAP_DEF_NUMA_NODE
)
2673 pr_debug("map '%s': found numa_node = %u.\n", map
->name
, def
->numa_node
);
2675 if (def
->parts
& MAP_DEF_INNER_MAP
)
2676 pr_debug("map '%s': found inner map definition.\n", map
->name
);
2679 static const char *btf_var_linkage_str(__u32 linkage
)
2682 case BTF_VAR_STATIC
: return "static";
2683 case BTF_VAR_GLOBAL_ALLOCATED
: return "global";
2684 case BTF_VAR_GLOBAL_EXTERN
: return "extern";
2685 default: return "unknown";
2689 static int bpf_object__init_user_btf_map(struct bpf_object
*obj
,
2690 const struct btf_type
*sec
,
2691 int var_idx
, int sec_idx
,
2692 const Elf_Data
*data
, bool strict
,
2693 const char *pin_root_path
)
2695 struct btf_map_def map_def
= {}, inner_def
= {};
2696 const struct btf_type
*var
, *def
;
2697 const struct btf_var_secinfo
*vi
;
2698 const struct btf_var
*var_extra
;
2699 const char *map_name
;
2700 struct bpf_map
*map
;
2703 vi
= btf_var_secinfos(sec
) + var_idx
;
2704 var
= btf__type_by_id(obj
->btf
, vi
->type
);
2705 var_extra
= btf_var(var
);
2706 map_name
= btf__name_by_offset(obj
->btf
, var
->name_off
);
2708 if (map_name
== NULL
|| map_name
[0] == '\0') {
2709 pr_warn("map #%d: empty name.\n", var_idx
);
2712 if ((__u64
)vi
->offset
+ vi
->size
> data
->d_size
) {
2713 pr_warn("map '%s' BTF data is corrupted.\n", map_name
);
2716 if (!btf_is_var(var
)) {
2717 pr_warn("map '%s': unexpected var kind %s.\n",
2718 map_name
, btf_kind_str(var
));
2721 if (var_extra
->linkage
!= BTF_VAR_GLOBAL_ALLOCATED
) {
2722 pr_warn("map '%s': unsupported map linkage %s.\n",
2723 map_name
, btf_var_linkage_str(var_extra
->linkage
));
2727 def
= skip_mods_and_typedefs(obj
->btf
, var
->type
, NULL
);
2728 if (!btf_is_struct(def
)) {
2729 pr_warn("map '%s': unexpected def kind %s.\n",
2730 map_name
, btf_kind_str(var
));
2733 if (def
->size
> vi
->size
) {
2734 pr_warn("map '%s': invalid def size.\n", map_name
);
2738 map
= bpf_object__add_map(obj
);
2740 return PTR_ERR(map
);
2741 map
->name
= strdup(map_name
);
2743 pr_warn("map '%s': failed to alloc map name.\n", map_name
);
2746 map
->libbpf_type
= LIBBPF_MAP_UNSPEC
;
2747 map
->def
.type
= BPF_MAP_TYPE_UNSPEC
;
2748 map
->sec_idx
= sec_idx
;
2749 map
->sec_offset
= vi
->offset
;
2750 map
->btf_var_idx
= var_idx
;
2751 pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
2752 map_name
, map
->sec_idx
, map
->sec_offset
);
2754 err
= parse_btf_map_def(map
->name
, obj
->btf
, def
, strict
, &map_def
, &inner_def
);
2758 fill_map_from_def(map
, &map_def
);
2760 if (map_def
.pinning
== LIBBPF_PIN_BY_NAME
) {
2761 err
= build_map_pin_path(map
, pin_root_path
);
2763 pr_warn("map '%s': couldn't build pin path.\n", map
->name
);
2768 if (map_def
.parts
& MAP_DEF_INNER_MAP
) {
2769 map
->inner_map
= calloc(1, sizeof(*map
->inner_map
));
2770 if (!map
->inner_map
)
2772 map
->inner_map
->fd
= create_placeholder_fd();
2773 if (map
->inner_map
->fd
< 0)
2774 return map
->inner_map
->fd
;
2775 map
->inner_map
->sec_idx
= sec_idx
;
2776 map
->inner_map
->name
= malloc(strlen(map_name
) + sizeof(".inner") + 1);
2777 if (!map
->inner_map
->name
)
2779 sprintf(map
->inner_map
->name
, "%s.inner", map_name
);
2781 fill_map_from_def(map
->inner_map
, &inner_def
);
2784 err
= map_fill_btf_type_info(obj
, map
);
2791 static int bpf_object__init_user_btf_maps(struct bpf_object
*obj
, bool strict
,
2792 const char *pin_root_path
)
2794 const struct btf_type
*sec
= NULL
;
2795 int nr_types
, i
, vlen
, err
;
2796 const struct btf_type
*t
;
2801 if (obj
->efile
.btf_maps_shndx
< 0)
2804 scn
= elf_sec_by_idx(obj
, obj
->efile
.btf_maps_shndx
);
2805 data
= elf_sec_data(obj
, scn
);
2806 if (!scn
|| !data
) {
2807 pr_warn("elf: failed to get %s map definitions for %s\n",
2808 MAPS_ELF_SEC
, obj
->path
);
2812 nr_types
= btf__type_cnt(obj
->btf
);
2813 for (i
= 1; i
< nr_types
; i
++) {
2814 t
= btf__type_by_id(obj
->btf
, i
);
2815 if (!btf_is_datasec(t
))
2817 name
= btf__name_by_offset(obj
->btf
, t
->name_off
);
2818 if (strcmp(name
, MAPS_ELF_SEC
) == 0) {
2820 obj
->efile
.btf_maps_sec_btf_id
= i
;
2826 pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC
);
2830 vlen
= btf_vlen(sec
);
2831 for (i
= 0; i
< vlen
; i
++) {
2832 err
= bpf_object__init_user_btf_map(obj
, sec
, i
,
2833 obj
->efile
.btf_maps_shndx
,
2843 static int bpf_object__init_maps(struct bpf_object
*obj
,
2844 const struct bpf_object_open_opts
*opts
)
2846 const char *pin_root_path
;
2850 strict
= !OPTS_GET(opts
, relaxed_maps
, false);
2851 pin_root_path
= OPTS_GET(opts
, pin_root_path
, NULL
);
2853 err
= bpf_object__init_user_btf_maps(obj
, strict
, pin_root_path
);
2854 err
= err
?: bpf_object__init_global_data_maps(obj
);
2855 err
= err
?: bpf_object__init_kconfig_map(obj
);
2856 err
= err
?: bpf_object_init_struct_ops(obj
);
2861 static bool section_have_execinstr(struct bpf_object
*obj
, int idx
)
2865 sh
= elf_sec_hdr(obj
, elf_sec_by_idx(obj
, idx
));
2869 return sh
->sh_flags
& SHF_EXECINSTR
;
2872 static bool starts_with_qmark(const char *s
)
2874 return s
&& s
[0] == '?';
2877 static bool btf_needs_sanitization(struct bpf_object
*obj
)
2879 bool has_func_global
= kernel_supports(obj
, FEAT_BTF_GLOBAL_FUNC
);
2880 bool has_datasec
= kernel_supports(obj
, FEAT_BTF_DATASEC
);
2881 bool has_float
= kernel_supports(obj
, FEAT_BTF_FLOAT
);
2882 bool has_func
= kernel_supports(obj
, FEAT_BTF_FUNC
);
2883 bool has_decl_tag
= kernel_supports(obj
, FEAT_BTF_DECL_TAG
);
2884 bool has_type_tag
= kernel_supports(obj
, FEAT_BTF_TYPE_TAG
);
2885 bool has_enum64
= kernel_supports(obj
, FEAT_BTF_ENUM64
);
2886 bool has_qmark_datasec
= kernel_supports(obj
, FEAT_BTF_QMARK_DATASEC
);
2888 return !has_func
|| !has_datasec
|| !has_func_global
|| !has_float
||
2889 !has_decl_tag
|| !has_type_tag
|| !has_enum64
|| !has_qmark_datasec
;
2892 static int bpf_object__sanitize_btf(struct bpf_object
*obj
, struct btf
*btf
)
2894 bool has_func_global
= kernel_supports(obj
, FEAT_BTF_GLOBAL_FUNC
);
2895 bool has_datasec
= kernel_supports(obj
, FEAT_BTF_DATASEC
);
2896 bool has_float
= kernel_supports(obj
, FEAT_BTF_FLOAT
);
2897 bool has_func
= kernel_supports(obj
, FEAT_BTF_FUNC
);
2898 bool has_decl_tag
= kernel_supports(obj
, FEAT_BTF_DECL_TAG
);
2899 bool has_type_tag
= kernel_supports(obj
, FEAT_BTF_TYPE_TAG
);
2900 bool has_enum64
= kernel_supports(obj
, FEAT_BTF_ENUM64
);
2901 bool has_qmark_datasec
= kernel_supports(obj
, FEAT_BTF_QMARK_DATASEC
);
2902 int enum64_placeholder_id
= 0;
2906 for (i
= 1; i
< btf__type_cnt(btf
); i
++) {
2907 t
= (struct btf_type
*)btf__type_by_id(btf
, i
);
2909 if ((!has_datasec
&& btf_is_var(t
)) || (!has_decl_tag
&& btf_is_decl_tag(t
))) {
2910 /* replace VAR/DECL_TAG with INT */
2911 t
->info
= BTF_INFO_ENC(BTF_KIND_INT
, 0, 0);
2913 * using size = 1 is the safest choice, 4 will be too
2914 * big and cause kernel BTF validation failure if
2915 * original variable took less than 4 bytes
2918 *(int *)(t
+ 1) = BTF_INT_ENC(0, 0, 8);
2919 } else if (!has_datasec
&& btf_is_datasec(t
)) {
2920 /* replace DATASEC with STRUCT */
2921 const struct btf_var_secinfo
*v
= btf_var_secinfos(t
);
2922 struct btf_member
*m
= btf_members(t
);
2923 struct btf_type
*vt
;
2926 name
= (char *)btf__name_by_offset(btf
, t
->name_off
);
2928 if (*name
== '.' || *name
== '?')
2934 t
->info
= BTF_INFO_ENC(BTF_KIND_STRUCT
, 0, vlen
);
2935 for (j
= 0; j
< vlen
; j
++, v
++, m
++) {
2936 /* order of field assignments is important */
2937 m
->offset
= v
->offset
* 8;
2939 /* preserve variable name as member name */
2940 vt
= (void *)btf__type_by_id(btf
, v
->type
);
2941 m
->name_off
= vt
->name_off
;
2943 } else if (!has_qmark_datasec
&& btf_is_datasec(t
) &&
2944 starts_with_qmark(btf__name_by_offset(btf
, t
->name_off
))) {
2945 /* replace '?' prefix with '_' for DATASEC names */
2948 name
= (char *)btf__name_by_offset(btf
, t
->name_off
);
2951 } else if (!has_func
&& btf_is_func_proto(t
)) {
2952 /* replace FUNC_PROTO with ENUM */
2954 t
->info
= BTF_INFO_ENC(BTF_KIND_ENUM
, 0, vlen
);
2955 t
->size
= sizeof(__u32
); /* kernel enforced */
2956 } else if (!has_func
&& btf_is_func(t
)) {
2957 /* replace FUNC with TYPEDEF */
2958 t
->info
= BTF_INFO_ENC(BTF_KIND_TYPEDEF
, 0, 0);
2959 } else if (!has_func_global
&& btf_is_func(t
)) {
2960 /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
2961 t
->info
= BTF_INFO_ENC(BTF_KIND_FUNC
, 0, 0);
2962 } else if (!has_float
&& btf_is_float(t
)) {
2963 /* replace FLOAT with an equally-sized empty STRUCT;
2964 * since C compilers do not accept e.g. "float" as a
2965 * valid struct name, make it anonymous
2968 t
->info
= BTF_INFO_ENC(BTF_KIND_STRUCT
, 0, 0);
2969 } else if (!has_type_tag
&& btf_is_type_tag(t
)) {
2970 /* replace TYPE_TAG with a CONST */
2972 t
->info
= BTF_INFO_ENC(BTF_KIND_CONST
, 0, 0);
2973 } else if (!has_enum64
&& btf_is_enum(t
)) {
2974 /* clear the kflag */
2975 t
->info
= btf_type_info(btf_kind(t
), btf_vlen(t
), false);
2976 } else if (!has_enum64
&& btf_is_enum64(t
)) {
2977 /* replace ENUM64 with a union */
2978 struct btf_member
*m
;
2980 if (enum64_placeholder_id
== 0) {
2981 enum64_placeholder_id
= btf__add_int(btf
, "enum64_placeholder", 1, 0);
2982 if (enum64_placeholder_id
< 0)
2983 return enum64_placeholder_id
;
2985 t
= (struct btf_type
*)btf__type_by_id(btf
, i
);
2990 t
->info
= BTF_INFO_ENC(BTF_KIND_UNION
, 0, vlen
);
2991 for (j
= 0; j
< vlen
; j
++, m
++) {
2992 m
->type
= enum64_placeholder_id
;
3001 static bool libbpf_needs_btf(const struct bpf_object
*obj
)
3003 return obj
->efile
.btf_maps_shndx
>= 0 ||
3004 obj
->efile
.has_st_ops
||
3008 static bool kernel_needs_btf(const struct bpf_object
*obj
)
3010 return obj
->efile
.has_st_ops
;
3013 static int bpf_object__init_btf(struct bpf_object
*obj
,
3015 Elf_Data
*btf_ext_data
)
3020 obj
->btf
= btf__new(btf_data
->d_buf
, btf_data
->d_size
);
3021 err
= libbpf_get_error(obj
->btf
);
3024 pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC
, err
);
3027 /* enforce 8-byte pointers for BPF-targeted BTFs */
3028 btf__set_pointer_size(obj
->btf
, 8);
3031 struct btf_ext_info
*ext_segs
[3];
3032 int seg_num
, sec_num
;
3035 pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
3036 BTF_EXT_ELF_SEC
, BTF_ELF_SEC
);
3039 obj
->btf_ext
= btf_ext__new(btf_ext_data
->d_buf
, btf_ext_data
->d_size
);
3040 err
= libbpf_get_error(obj
->btf_ext
);
3042 pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n",
3043 BTF_EXT_ELF_SEC
, err
);
3044 obj
->btf_ext
= NULL
;
3048 /* setup .BTF.ext to ELF section mapping */
3049 ext_segs
[0] = &obj
->btf_ext
->func_info
;
3050 ext_segs
[1] = &obj
->btf_ext
->line_info
;
3051 ext_segs
[2] = &obj
->btf_ext
->core_relo_info
;
3052 for (seg_num
= 0; seg_num
< ARRAY_SIZE(ext_segs
); seg_num
++) {
3053 struct btf_ext_info
*seg
= ext_segs
[seg_num
];
3054 const struct btf_ext_info_sec
*sec
;
3055 const char *sec_name
;
3058 if (seg
->sec_cnt
== 0)
3061 seg
->sec_idxs
= calloc(seg
->sec_cnt
, sizeof(*seg
->sec_idxs
));
3062 if (!seg
->sec_idxs
) {
3068 for_each_btf_ext_sec(seg
, sec
) {
3069 /* preventively increment index to avoid doing
3070 * this before every continue below
3074 sec_name
= btf__name_by_offset(obj
->btf
, sec
->sec_name_off
);
3075 if (str_is_empty(sec_name
))
3077 scn
= elf_sec_by_name(obj
, sec_name
);
3081 seg
->sec_idxs
[sec_num
- 1] = elf_ndxscn(scn
);
3086 if (err
&& libbpf_needs_btf(obj
)) {
3087 pr_warn("BTF is required, but is missing or corrupted.\n");
3093 static int compare_vsi_off(const void *_a
, const void *_b
)
3095 const struct btf_var_secinfo
*a
= _a
;
3096 const struct btf_var_secinfo
*b
= _b
;
3098 return a
->offset
- b
->offset
;
3101 static int btf_fixup_datasec(struct bpf_object
*obj
, struct btf
*btf
,
3104 __u32 size
= 0, i
, vars
= btf_vlen(t
);
3105 const char *sec_name
= btf__name_by_offset(btf
, t
->name_off
);
3106 struct btf_var_secinfo
*vsi
;
3107 bool fixup_offsets
= false;
3111 pr_debug("No name found in string section for DATASEC kind.\n");
3115 /* Extern-backing datasecs (.ksyms, .kconfig) have their size and
3116 * variable offsets set at the previous step. Further, not every
3117 * extern BTF VAR has corresponding ELF symbol preserved, so we skip
3118 * all fixups altogether for such sections and go straight to sorting
3119 * VARs within their DATASEC.
3121 if (strcmp(sec_name
, KCONFIG_SEC
) == 0 || strcmp(sec_name
, KSYMS_SEC
) == 0)
3124 /* Clang leaves DATASEC size and VAR offsets as zeroes, so we need to
3125 * fix this up. But BPF static linker already fixes this up and fills
3126 * all the sizes and offsets during static linking. So this step has
3127 * to be optional. But the STV_HIDDEN handling is non-optional for any
3128 * non-extern DATASEC, so the variable fixup loop below handles both
3129 * functions at the same time, paying the cost of BTF VAR <-> ELF
3130 * symbol matching just once.
3133 err
= find_elf_sec_sz(obj
, sec_name
, &size
);
3135 pr_debug("sec '%s': failed to determine size from ELF: size %u, err %d\n",
3136 sec_name
, size
, err
);
3141 fixup_offsets
= true;
3144 for (i
= 0, vsi
= btf_var_secinfos(t
); i
< vars
; i
++, vsi
++) {
3145 const struct btf_type
*t_var
;
3146 struct btf_var
*var
;
3147 const char *var_name
;
3150 t_var
= btf__type_by_id(btf
, vsi
->type
);
3151 if (!t_var
|| !btf_is_var(t_var
)) {
3152 pr_debug("sec '%s': unexpected non-VAR type found\n", sec_name
);
3156 var
= btf_var(t_var
);
3157 if (var
->linkage
== BTF_VAR_STATIC
|| var
->linkage
== BTF_VAR_GLOBAL_EXTERN
)
3160 var_name
= btf__name_by_offset(btf
, t_var
->name_off
);
3162 pr_debug("sec '%s': failed to find name of DATASEC's member #%d\n",
3167 sym
= find_elf_var_sym(obj
, var_name
);
3169 pr_debug("sec '%s': failed to find ELF symbol for VAR '%s'\n",
3170 sec_name
, var_name
);
3175 vsi
->offset
= sym
->st_value
;
3177 /* if variable is a global/weak symbol, but has restricted
3178 * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF VAR
3179 * as static. This follows similar logic for functions (BPF
3180 * subprogs) and influences libbpf's further decisions about
3181 * whether to make global data BPF array maps as
3184 if (ELF64_ST_VISIBILITY(sym
->st_other
) == STV_HIDDEN
3185 || ELF64_ST_VISIBILITY(sym
->st_other
) == STV_INTERNAL
)
3186 var
->linkage
= BTF_VAR_STATIC
;
3190 qsort(btf_var_secinfos(t
), vars
, sizeof(*vsi
), compare_vsi_off
);
3194 static int bpf_object_fixup_btf(struct bpf_object
*obj
)
3201 n
= btf__type_cnt(obj
->btf
);
3202 for (i
= 1; i
< n
; i
++) {
3203 struct btf_type
*t
= btf_type_by_id(obj
->btf
, i
);
3205 /* Loader needs to fix up some of the things compiler
3206 * couldn't get its hands on while emitting BTF. This
3207 * is section size and global variable offset. We use
3208 * the info from the ELF itself for this purpose.
3210 if (btf_is_datasec(t
)) {
3211 err
= btf_fixup_datasec(obj
, obj
->btf
, t
);
3220 static bool prog_needs_vmlinux_btf(struct bpf_program
*prog
)
3222 if (prog
->type
== BPF_PROG_TYPE_STRUCT_OPS
||
3223 prog
->type
== BPF_PROG_TYPE_LSM
)
3226 /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
3227 * also need vmlinux BTF
3229 if (prog
->type
== BPF_PROG_TYPE_TRACING
&& !prog
->attach_prog_fd
)
3235 static bool map_needs_vmlinux_btf(struct bpf_map
*map
)
3237 return bpf_map__is_struct_ops(map
);
3240 static bool obj_needs_vmlinux_btf(const struct bpf_object
*obj
)
3242 struct bpf_program
*prog
;
3243 struct bpf_map
*map
;
3246 /* CO-RE relocations need kernel BTF, only when btf_custom_path
3249 if (obj
->btf_ext
&& obj
->btf_ext
->core_relo_info
.len
&& !obj
->btf_custom_path
)
3252 /* Support for typed ksyms needs kernel BTF */
3253 for (i
= 0; i
< obj
->nr_extern
; i
++) {
3254 const struct extern_desc
*ext
;
3256 ext
= &obj
->externs
[i
];
3257 if (ext
->type
== EXT_KSYM
&& ext
->ksym
.type_id
)
3261 bpf_object__for_each_program(prog
, obj
) {
3262 if (!prog
->autoload
)
3264 if (prog_needs_vmlinux_btf(prog
))
3268 bpf_object__for_each_map(map
, obj
) {
3269 if (map_needs_vmlinux_btf(map
))
3276 static int bpf_object__load_vmlinux_btf(struct bpf_object
*obj
, bool force
)
3280 /* btf_vmlinux could be loaded earlier */
3281 if (obj
->btf_vmlinux
|| obj
->gen_loader
)
3284 if (!force
&& !obj_needs_vmlinux_btf(obj
))
3287 obj
->btf_vmlinux
= btf__load_vmlinux_btf();
3288 err
= libbpf_get_error(obj
->btf_vmlinux
);
3290 pr_warn("Error loading vmlinux BTF: %d\n", err
);
3291 obj
->btf_vmlinux
= NULL
;
3297 static int bpf_object__sanitize_and_load_btf(struct bpf_object
*obj
)
3299 struct btf
*kern_btf
= obj
->btf
;
3300 bool btf_mandatory
, sanitize
;
3306 if (!kernel_supports(obj
, FEAT_BTF
)) {
3307 if (kernel_needs_btf(obj
)) {
3311 pr_debug("Kernel doesn't support BTF, skipping uploading it.\n");
3315 /* Even though some subprogs are global/weak, user might prefer more
3316 * permissive BPF verification process that BPF verifier performs for
3317 * static functions, taking into account more context from the caller
3318 * functions. In such case, they need to mark such subprogs with
3319 * __attribute__((visibility("hidden"))) and libbpf will adjust
3320 * corresponding FUNC BTF type to be marked as static and trigger more
3321 * involved BPF verification process.
3323 for (i
= 0; i
< obj
->nr_programs
; i
++) {
3324 struct bpf_program
*prog
= &obj
->programs
[i
];
3329 if (!prog
->mark_btf_static
|| !prog_is_subprog(obj
, prog
))
3332 n
= btf__type_cnt(obj
->btf
);
3333 for (j
= 1; j
< n
; j
++) {
3334 t
= btf_type_by_id(obj
->btf
, j
);
3335 if (!btf_is_func(t
) || btf_func_linkage(t
) != BTF_FUNC_GLOBAL
)
3338 name
= btf__str_by_offset(obj
->btf
, t
->name_off
);
3339 if (strcmp(name
, prog
->name
) != 0)
3342 t
->info
= btf_type_info(BTF_KIND_FUNC
, BTF_FUNC_STATIC
, 0);
3347 sanitize
= btf_needs_sanitization(obj
);
3349 const void *raw_data
;
3352 /* clone BTF to sanitize a copy and leave the original intact */
3353 raw_data
= btf__raw_data(obj
->btf
, &sz
);
3354 kern_btf
= btf__new(raw_data
, sz
);
3355 err
= libbpf_get_error(kern_btf
);
3359 /* enforce 8-byte pointers for BPF-targeted BTFs */
3360 btf__set_pointer_size(obj
->btf
, 8);
3361 err
= bpf_object__sanitize_btf(obj
, kern_btf
);
3366 if (obj
->gen_loader
) {
3368 const void *raw_data
= btf__raw_data(kern_btf
, &raw_size
);
3372 bpf_gen__load_btf(obj
->gen_loader
, raw_data
, raw_size
);
3373 /* Pretend to have valid FD to pass various fd >= 0 checks.
3374 * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
3376 btf__set_fd(kern_btf
, 0);
3378 /* currently BPF_BTF_LOAD only supports log_level 1 */
3379 err
= btf_load_into_kernel(kern_btf
, obj
->log_buf
, obj
->log_size
,
3380 obj
->log_level
? 1 : 0, obj
->token_fd
);
3384 /* move fd to libbpf's BTF */
3385 btf__set_fd(obj
->btf
, btf__fd(kern_btf
));
3386 btf__set_fd(kern_btf
, -1);
3388 btf__free(kern_btf
);
3392 btf_mandatory
= kernel_needs_btf(obj
);
3393 pr_warn("Error loading .BTF into kernel: %d. %s\n", err
,
3394 btf_mandatory
? "BTF is mandatory, can't proceed."
3395 : "BTF is optional, ignoring.");
3402 static const char *elf_sym_str(const struct bpf_object
*obj
, size_t off
)
3406 name
= elf_strptr(obj
->efile
.elf
, obj
->efile
.strtabidx
, off
);
3408 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
3409 off
, obj
->path
, elf_errmsg(-1));
3416 static const char *elf_sec_str(const struct bpf_object
*obj
, size_t off
)
3420 name
= elf_strptr(obj
->efile
.elf
, obj
->efile
.shstrndx
, off
);
3422 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
3423 off
, obj
->path
, elf_errmsg(-1));
3430 static Elf_Scn
*elf_sec_by_idx(const struct bpf_object
*obj
, size_t idx
)
3434 scn
= elf_getscn(obj
->efile
.elf
, idx
);
3436 pr_warn("elf: failed to get section(%zu) from %s: %s\n",
3437 idx
, obj
->path
, elf_errmsg(-1));
3443 static Elf_Scn
*elf_sec_by_name(const struct bpf_object
*obj
, const char *name
)
3445 Elf_Scn
*scn
= NULL
;
3446 Elf
*elf
= obj
->efile
.elf
;
3447 const char *sec_name
;
3449 while ((scn
= elf_nextscn(elf
, scn
)) != NULL
) {
3450 sec_name
= elf_sec_name(obj
, scn
);
3454 if (strcmp(sec_name
, name
) != 0)
3462 static Elf64_Shdr
*elf_sec_hdr(const struct bpf_object
*obj
, Elf_Scn
*scn
)
3469 shdr
= elf64_getshdr(scn
);
3471 pr_warn("elf: failed to get section(%zu) header from %s: %s\n",
3472 elf_ndxscn(scn
), obj
->path
, elf_errmsg(-1));
3479 static const char *elf_sec_name(const struct bpf_object
*obj
, Elf_Scn
*scn
)
3487 sh
= elf_sec_hdr(obj
, scn
);
3491 name
= elf_sec_str(obj
, sh
->sh_name
);
3493 pr_warn("elf: failed to get section(%zu) name from %s: %s\n",
3494 elf_ndxscn(scn
), obj
->path
, elf_errmsg(-1));
3501 static Elf_Data
*elf_sec_data(const struct bpf_object
*obj
, Elf_Scn
*scn
)
3508 data
= elf_getdata(scn
, 0);
3510 pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n",
3511 elf_ndxscn(scn
), elf_sec_name(obj
, scn
) ?: "<?>",
3512 obj
->path
, elf_errmsg(-1));
3519 static Elf64_Sym
*elf_sym_by_idx(const struct bpf_object
*obj
, size_t idx
)
3521 if (idx
>= obj
->efile
.symbols
->d_size
/ sizeof(Elf64_Sym
))
3524 return (Elf64_Sym
*)obj
->efile
.symbols
->d_buf
+ idx
;
3527 static Elf64_Rel
*elf_rel_by_idx(Elf_Data
*data
, size_t idx
)
3529 if (idx
>= data
->d_size
/ sizeof(Elf64_Rel
))
3532 return (Elf64_Rel
*)data
->d_buf
+ idx
;
3535 static bool is_sec_name_dwarf(const char *name
)
3537 /* approximation, but the actual list is too long */
3538 return str_has_pfx(name
, ".debug_");
3541 static bool ignore_elf_section(Elf64_Shdr
*hdr
, const char *name
)
3543 /* no special handling of .strtab */
3544 if (hdr
->sh_type
== SHT_STRTAB
)
3547 /* ignore .llvm_addrsig section as well */
3548 if (hdr
->sh_type
== SHT_LLVM_ADDRSIG
)
3551 /* no subprograms will lead to an empty .text section, ignore it */
3552 if (hdr
->sh_type
== SHT_PROGBITS
&& hdr
->sh_size
== 0 &&
3553 strcmp(name
, ".text") == 0)
3556 /* DWARF sections */
3557 if (is_sec_name_dwarf(name
))
3560 if (str_has_pfx(name
, ".rel")) {
3561 name
+= sizeof(".rel") - 1;
3562 /* DWARF section relocations */
3563 if (is_sec_name_dwarf(name
))
3566 /* .BTF and .BTF.ext don't need relocations */
3567 if (strcmp(name
, BTF_ELF_SEC
) == 0 ||
3568 strcmp(name
, BTF_EXT_ELF_SEC
) == 0)
3575 static int cmp_progs(const void *_a
, const void *_b
)
3577 const struct bpf_program
*a
= _a
;
3578 const struct bpf_program
*b
= _b
;
3580 if (a
->sec_idx
!= b
->sec_idx
)
3581 return a
->sec_idx
< b
->sec_idx
? -1 : 1;
3583 /* sec_insn_off can't be the same within the section */
3584 return a
->sec_insn_off
< b
->sec_insn_off
? -1 : 1;
3587 static int bpf_object__elf_collect(struct bpf_object
*obj
)
3589 struct elf_sec_desc
*sec_desc
;
3590 Elf
*elf
= obj
->efile
.elf
;
3591 Elf_Data
*btf_ext_data
= NULL
;
3592 Elf_Data
*btf_data
= NULL
;
3593 int idx
= 0, err
= 0;
3599 /* ELF section indices are 0-based, but sec #0 is special "invalid"
3600 * section. Since section count retrieved by elf_getshdrnum() does
3601 * include sec #0, it is already the necessary size of an array to keep
3604 if (elf_getshdrnum(obj
->efile
.elf
, &obj
->efile
.sec_cnt
)) {
3605 pr_warn("elf: failed to get the number of sections for %s: %s\n",
3606 obj
->path
, elf_errmsg(-1));
3607 return -LIBBPF_ERRNO__FORMAT
;
3609 obj
->efile
.secs
= calloc(obj
->efile
.sec_cnt
, sizeof(*obj
->efile
.secs
));
3610 if (!obj
->efile
.secs
)
3613 /* a bunch of ELF parsing functionality depends on processing symbols,
3614 * so do the first pass and find the symbol table
3617 while ((scn
= elf_nextscn(elf
, scn
)) != NULL
) {
3618 sh
= elf_sec_hdr(obj
, scn
);
3620 return -LIBBPF_ERRNO__FORMAT
;
3622 if (sh
->sh_type
== SHT_SYMTAB
) {
3623 if (obj
->efile
.symbols
) {
3624 pr_warn("elf: multiple symbol tables in %s\n", obj
->path
);
3625 return -LIBBPF_ERRNO__FORMAT
;
3628 data
= elf_sec_data(obj
, scn
);
3630 return -LIBBPF_ERRNO__FORMAT
;
3632 idx
= elf_ndxscn(scn
);
3634 obj
->efile
.symbols
= data
;
3635 obj
->efile
.symbols_shndx
= idx
;
3636 obj
->efile
.strtabidx
= sh
->sh_link
;
3640 if (!obj
->efile
.symbols
) {
3641 pr_warn("elf: couldn't find symbol table in %s, stripped object file?\n",
3647 while ((scn
= elf_nextscn(elf
, scn
)) != NULL
) {
3648 idx
= elf_ndxscn(scn
);
3649 sec_desc
= &obj
->efile
.secs
[idx
];
3651 sh
= elf_sec_hdr(obj
, scn
);
3653 return -LIBBPF_ERRNO__FORMAT
;
3655 name
= elf_sec_str(obj
, sh
->sh_name
);
3657 return -LIBBPF_ERRNO__FORMAT
;
3659 if (ignore_elf_section(sh
, name
))
3662 data
= elf_sec_data(obj
, scn
);
3664 return -LIBBPF_ERRNO__FORMAT
;
3666 pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
3667 idx
, name
, (unsigned long)data
->d_size
,
3668 (int)sh
->sh_link
, (unsigned long)sh
->sh_flags
,
3671 if (strcmp(name
, "license") == 0) {
3672 err
= bpf_object__init_license(obj
, data
->d_buf
, data
->d_size
);
3675 } else if (strcmp(name
, "version") == 0) {
3676 err
= bpf_object__init_kversion(obj
, data
->d_buf
, data
->d_size
);
3679 } else if (strcmp(name
, "maps") == 0) {
3680 pr_warn("elf: legacy map definitions in 'maps' section are not supported by libbpf v1.0+\n");
3682 } else if (strcmp(name
, MAPS_ELF_SEC
) == 0) {
3683 obj
->efile
.btf_maps_shndx
= idx
;
3684 } else if (strcmp(name
, BTF_ELF_SEC
) == 0) {
3685 if (sh
->sh_type
!= SHT_PROGBITS
)
3686 return -LIBBPF_ERRNO__FORMAT
;
3688 } else if (strcmp(name
, BTF_EXT_ELF_SEC
) == 0) {
3689 if (sh
->sh_type
!= SHT_PROGBITS
)
3690 return -LIBBPF_ERRNO__FORMAT
;
3691 btf_ext_data
= data
;
3692 } else if (sh
->sh_type
== SHT_SYMTAB
) {
3693 /* already processed during the first pass above */
3694 } else if (sh
->sh_type
== SHT_PROGBITS
&& data
->d_size
> 0) {
3695 if (sh
->sh_flags
& SHF_EXECINSTR
) {
3696 if (strcmp(name
, ".text") == 0)
3697 obj
->efile
.text_shndx
= idx
;
3698 err
= bpf_object__add_programs(obj
, data
, name
, idx
);
3701 } else if (strcmp(name
, DATA_SEC
) == 0 ||
3702 str_has_pfx(name
, DATA_SEC
".")) {
3703 sec_desc
->sec_type
= SEC_DATA
;
3704 sec_desc
->shdr
= sh
;
3705 sec_desc
->data
= data
;
3706 } else if (strcmp(name
, RODATA_SEC
) == 0 ||
3707 str_has_pfx(name
, RODATA_SEC
".")) {
3708 sec_desc
->sec_type
= SEC_RODATA
;
3709 sec_desc
->shdr
= sh
;
3710 sec_desc
->data
= data
;
3711 } else if (strcmp(name
, STRUCT_OPS_SEC
) == 0 ||
3712 strcmp(name
, STRUCT_OPS_LINK_SEC
) == 0 ||
3713 strcmp(name
, "?" STRUCT_OPS_SEC
) == 0 ||
3714 strcmp(name
, "?" STRUCT_OPS_LINK_SEC
) == 0) {
3715 sec_desc
->sec_type
= SEC_ST_OPS
;
3716 sec_desc
->shdr
= sh
;
3717 sec_desc
->data
= data
;
3718 obj
->efile
.has_st_ops
= true;
3720 pr_info("elf: skipping unrecognized data section(%d) %s\n",
3723 } else if (sh
->sh_type
== SHT_REL
) {
3724 int targ_sec_idx
= sh
->sh_info
; /* points to other section */
3726 if (sh
->sh_entsize
!= sizeof(Elf64_Rel
) ||
3727 targ_sec_idx
>= obj
->efile
.sec_cnt
)
3728 return -LIBBPF_ERRNO__FORMAT
;
3730 /* Only do relo for section with exec instructions */
3731 if (!section_have_execinstr(obj
, targ_sec_idx
) &&
3732 strcmp(name
, ".rel" STRUCT_OPS_SEC
) &&
3733 strcmp(name
, ".rel" STRUCT_OPS_LINK_SEC
) &&
3734 strcmp(name
, ".rel?" STRUCT_OPS_SEC
) &&
3735 strcmp(name
, ".rel?" STRUCT_OPS_LINK_SEC
) &&
3736 strcmp(name
, ".rel" MAPS_ELF_SEC
)) {
3737 pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
3738 idx
, name
, targ_sec_idx
,
3739 elf_sec_name(obj
, elf_sec_by_idx(obj
, targ_sec_idx
)) ?: "<?>");
3743 sec_desc
->sec_type
= SEC_RELO
;
3744 sec_desc
->shdr
= sh
;
3745 sec_desc
->data
= data
;
3746 } else if (sh
->sh_type
== SHT_NOBITS
&& (strcmp(name
, BSS_SEC
) == 0 ||
3747 str_has_pfx(name
, BSS_SEC
"."))) {
3748 sec_desc
->sec_type
= SEC_BSS
;
3749 sec_desc
->shdr
= sh
;
3750 sec_desc
->data
= data
;
3752 pr_info("elf: skipping section(%d) %s (size %zu)\n", idx
, name
,
3753 (size_t)sh
->sh_size
);
3757 if (!obj
->efile
.strtabidx
|| obj
->efile
.strtabidx
> idx
) {
3758 pr_warn("elf: symbol strings section missing or invalid in %s\n", obj
->path
);
3759 return -LIBBPF_ERRNO__FORMAT
;
3762 /* sort BPF programs by section name and in-section instruction offset
3765 if (obj
->nr_programs
)
3766 qsort(obj
->programs
, obj
->nr_programs
, sizeof(*obj
->programs
), cmp_progs
);
3768 return bpf_object__init_btf(obj
, btf_data
, btf_ext_data
);
3771 static bool sym_is_extern(const Elf64_Sym
*sym
)
3773 int bind
= ELF64_ST_BIND(sym
->st_info
);
3774 /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
3775 return sym
->st_shndx
== SHN_UNDEF
&&
3776 (bind
== STB_GLOBAL
|| bind
== STB_WEAK
) &&
3777 ELF64_ST_TYPE(sym
->st_info
) == STT_NOTYPE
;
3780 static bool sym_is_subprog(const Elf64_Sym
*sym
, int text_shndx
)
3782 int bind
= ELF64_ST_BIND(sym
->st_info
);
3783 int type
= ELF64_ST_TYPE(sym
->st_info
);
3785 /* in .text section */
3786 if (sym
->st_shndx
!= text_shndx
)
3789 /* local function */
3790 if (bind
== STB_LOCAL
&& type
== STT_SECTION
)
3793 /* global function */
3794 return bind
== STB_GLOBAL
&& type
== STT_FUNC
;
3797 static int find_extern_btf_id(const struct btf
*btf
, const char *ext_name
)
3799 const struct btf_type
*t
;
3806 n
= btf__type_cnt(btf
);
3807 for (i
= 1; i
< n
; i
++) {
3808 t
= btf__type_by_id(btf
, i
);
3810 if (!btf_is_var(t
) && !btf_is_func(t
))
3813 tname
= btf__name_by_offset(btf
, t
->name_off
);
3814 if (strcmp(tname
, ext_name
))
3817 if (btf_is_var(t
) &&
3818 btf_var(t
)->linkage
!= BTF_VAR_GLOBAL_EXTERN
)
3821 if (btf_is_func(t
) && btf_func_linkage(t
) != BTF_FUNC_EXTERN
)
3830 static int find_extern_sec_btf_id(struct btf
*btf
, int ext_btf_id
) {
3831 const struct btf_var_secinfo
*vs
;
3832 const struct btf_type
*t
;
3838 n
= btf__type_cnt(btf
);
3839 for (i
= 1; i
< n
; i
++) {
3840 t
= btf__type_by_id(btf
, i
);
3842 if (!btf_is_datasec(t
))
3845 vs
= btf_var_secinfos(t
);
3846 for (j
= 0; j
< btf_vlen(t
); j
++, vs
++) {
3847 if (vs
->type
== ext_btf_id
)
3855 static enum kcfg_type
find_kcfg_type(const struct btf
*btf
, int id
,
3858 const struct btf_type
*t
;
3861 t
= skip_mods_and_typedefs(btf
, id
, NULL
);
3862 name
= btf__name_by_offset(btf
, t
->name_off
);
3866 switch (btf_kind(t
)) {
3867 case BTF_KIND_INT
: {
3868 int enc
= btf_int_encoding(t
);
3870 if (enc
& BTF_INT_BOOL
)
3871 return t
->size
== 1 ? KCFG_BOOL
: KCFG_UNKNOWN
;
3873 *is_signed
= enc
& BTF_INT_SIGNED
;
3876 if (t
->size
< 1 || t
->size
> 8 || (t
->size
& (t
->size
- 1)))
3877 return KCFG_UNKNOWN
;
3882 return KCFG_UNKNOWN
;
3883 if (strcmp(name
, "libbpf_tristate"))
3884 return KCFG_UNKNOWN
;
3885 return KCFG_TRISTATE
;
3886 case BTF_KIND_ENUM64
:
3887 if (strcmp(name
, "libbpf_tristate"))
3888 return KCFG_UNKNOWN
;
3889 return KCFG_TRISTATE
;
3890 case BTF_KIND_ARRAY
:
3891 if (btf_array(t
)->nelems
== 0)
3892 return KCFG_UNKNOWN
;
3893 if (find_kcfg_type(btf
, btf_array(t
)->type
, NULL
) != KCFG_CHAR
)
3894 return KCFG_UNKNOWN
;
3895 return KCFG_CHAR_ARR
;
3897 return KCFG_UNKNOWN
;
3901 static int cmp_externs(const void *_a
, const void *_b
)
3903 const struct extern_desc
*a
= _a
;
3904 const struct extern_desc
*b
= _b
;
3906 if (a
->type
!= b
->type
)
3907 return a
->type
< b
->type
? -1 : 1;
3909 if (a
->type
== EXT_KCFG
) {
3910 /* descending order by alignment requirements */
3911 if (a
->kcfg
.align
!= b
->kcfg
.align
)
3912 return a
->kcfg
.align
> b
->kcfg
.align
? -1 : 1;
3913 /* ascending order by size, within same alignment class */
3914 if (a
->kcfg
.sz
!= b
->kcfg
.sz
)
3915 return a
->kcfg
.sz
< b
->kcfg
.sz
? -1 : 1;
3918 /* resolve ties by name */
3919 return strcmp(a
->name
, b
->name
);
3922 static int find_int_btf_id(const struct btf
*btf
)
3924 const struct btf_type
*t
;
3927 n
= btf__type_cnt(btf
);
3928 for (i
= 1; i
< n
; i
++) {
3929 t
= btf__type_by_id(btf
, i
);
3931 if (btf_is_int(t
) && btf_int_bits(t
) == 32)
3938 static int add_dummy_ksym_var(struct btf
*btf
)
3940 int i
, int_btf_id
, sec_btf_id
, dummy_var_btf_id
;
3941 const struct btf_var_secinfo
*vs
;
3942 const struct btf_type
*sec
;
3947 sec_btf_id
= btf__find_by_name_kind(btf
, KSYMS_SEC
,
3952 sec
= btf__type_by_id(btf
, sec_btf_id
);
3953 vs
= btf_var_secinfos(sec
);
3954 for (i
= 0; i
< btf_vlen(sec
); i
++, vs
++) {
3955 const struct btf_type
*vt
;
3957 vt
= btf__type_by_id(btf
, vs
->type
);
3958 if (btf_is_func(vt
))
3962 /* No func in ksyms sec. No need to add dummy var. */
3963 if (i
== btf_vlen(sec
))
3966 int_btf_id
= find_int_btf_id(btf
);
3967 dummy_var_btf_id
= btf__add_var(btf
,
3969 BTF_VAR_GLOBAL_ALLOCATED
,
3971 if (dummy_var_btf_id
< 0)
3972 pr_warn("cannot create a dummy_ksym var\n");
3974 return dummy_var_btf_id
;
3977 static int bpf_object__collect_externs(struct bpf_object
*obj
)
3979 struct btf_type
*sec
, *kcfg_sec
= NULL
, *ksym_sec
= NULL
;
3980 const struct btf_type
*t
;
3981 struct extern_desc
*ext
;
3982 int i
, n
, off
, dummy_var_btf_id
;
3983 const char *ext_name
, *sec_name
;
3984 size_t ext_essent_len
;
3988 if (!obj
->efile
.symbols
)
3991 scn
= elf_sec_by_idx(obj
, obj
->efile
.symbols_shndx
);
3992 sh
= elf_sec_hdr(obj
, scn
);
3993 if (!sh
|| sh
->sh_entsize
!= sizeof(Elf64_Sym
))
3994 return -LIBBPF_ERRNO__FORMAT
;
3996 dummy_var_btf_id
= add_dummy_ksym_var(obj
->btf
);
3997 if (dummy_var_btf_id
< 0)
3998 return dummy_var_btf_id
;
4000 n
= sh
->sh_size
/ sh
->sh_entsize
;
4001 pr_debug("looking for externs among %d symbols...\n", n
);
4003 for (i
= 0; i
< n
; i
++) {
4004 Elf64_Sym
*sym
= elf_sym_by_idx(obj
, i
);
4007 return -LIBBPF_ERRNO__FORMAT
;
4008 if (!sym_is_extern(sym
))
4010 ext_name
= elf_sym_str(obj
, sym
->st_name
);
4011 if (!ext_name
|| !ext_name
[0])
4015 ext
= libbpf_reallocarray(ext
, obj
->nr_extern
+ 1, sizeof(*ext
));
4019 ext
= &ext
[obj
->nr_extern
];
4020 memset(ext
, 0, sizeof(*ext
));
4023 ext
->btf_id
= find_extern_btf_id(obj
->btf
, ext_name
);
4024 if (ext
->btf_id
<= 0) {
4025 pr_warn("failed to find BTF for extern '%s': %d\n",
4026 ext_name
, ext
->btf_id
);
4029 t
= btf__type_by_id(obj
->btf
, ext
->btf_id
);
4030 ext
->name
= btf__name_by_offset(obj
->btf
, t
->name_off
);
4032 ext
->is_weak
= ELF64_ST_BIND(sym
->st_info
) == STB_WEAK
;
4034 ext_essent_len
= bpf_core_essential_name_len(ext
->name
);
4035 ext
->essent_name
= NULL
;
4036 if (ext_essent_len
!= strlen(ext
->name
)) {
4037 ext
->essent_name
= strndup(ext
->name
, ext_essent_len
);
4038 if (!ext
->essent_name
)
4042 ext
->sec_btf_id
= find_extern_sec_btf_id(obj
->btf
, ext
->btf_id
);
4043 if (ext
->sec_btf_id
<= 0) {
4044 pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
4045 ext_name
, ext
->btf_id
, ext
->sec_btf_id
);
4046 return ext
->sec_btf_id
;
4048 sec
= (void *)btf__type_by_id(obj
->btf
, ext
->sec_btf_id
);
4049 sec_name
= btf__name_by_offset(obj
->btf
, sec
->name_off
);
4051 if (strcmp(sec_name
, KCONFIG_SEC
) == 0) {
4052 if (btf_is_func(t
)) {
4053 pr_warn("extern function %s is unsupported under %s section\n",
4054 ext
->name
, KCONFIG_SEC
);
4058 ext
->type
= EXT_KCFG
;
4059 ext
->kcfg
.sz
= btf__resolve_size(obj
->btf
, t
->type
);
4060 if (ext
->kcfg
.sz
<= 0) {
4061 pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
4062 ext_name
, ext
->kcfg
.sz
);
4063 return ext
->kcfg
.sz
;
4065 ext
->kcfg
.align
= btf__align_of(obj
->btf
, t
->type
);
4066 if (ext
->kcfg
.align
<= 0) {
4067 pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
4068 ext_name
, ext
->kcfg
.align
);
4071 ext
->kcfg
.type
= find_kcfg_type(obj
->btf
, t
->type
,
4072 &ext
->kcfg
.is_signed
);
4073 if (ext
->kcfg
.type
== KCFG_UNKNOWN
) {
4074 pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name
);
4077 } else if (strcmp(sec_name
, KSYMS_SEC
) == 0) {
4079 ext
->type
= EXT_KSYM
;
4080 skip_mods_and_typedefs(obj
->btf
, t
->type
,
4081 &ext
->ksym
.type_id
);
4083 pr_warn("unrecognized extern section '%s'\n", sec_name
);
4087 pr_debug("collected %d externs total\n", obj
->nr_extern
);
4089 if (!obj
->nr_extern
)
4092 /* sort externs by type, for kcfg ones also by (align, size, name) */
4093 qsort(obj
->externs
, obj
->nr_extern
, sizeof(*ext
), cmp_externs
);
4095 /* for .ksyms section, we need to turn all externs into allocated
4096 * variables in BTF to pass kernel verification; we do this by
4097 * pretending that each extern is a 8-byte variable
4100 /* find existing 4-byte integer type in BTF to use for fake
4101 * extern variables in DATASEC
4103 int int_btf_id
= find_int_btf_id(obj
->btf
);
4104 /* For extern function, a dummy_var added earlier
4105 * will be used to replace the vs->type and
4106 * its name string will be used to refill
4107 * the missing param's name.
4109 const struct btf_type
*dummy_var
;
4111 dummy_var
= btf__type_by_id(obj
->btf
, dummy_var_btf_id
);
4112 for (i
= 0; i
< obj
->nr_extern
; i
++) {
4113 ext
= &obj
->externs
[i
];
4114 if (ext
->type
!= EXT_KSYM
)
4116 pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
4117 i
, ext
->sym_idx
, ext
->name
);
4122 for (i
= 0, off
= 0; i
< n
; i
++, off
+= sizeof(int)) {
4123 struct btf_var_secinfo
*vs
= btf_var_secinfos(sec
) + i
;
4124 struct btf_type
*vt
;
4126 vt
= (void *)btf__type_by_id(obj
->btf
, vs
->type
);
4127 ext_name
= btf__name_by_offset(obj
->btf
, vt
->name_off
);
4128 ext
= find_extern_by_name(obj
, ext_name
);
4130 pr_warn("failed to find extern definition for BTF %s '%s'\n",
4131 btf_kind_str(vt
), ext_name
);
4134 if (btf_is_func(vt
)) {
4135 const struct btf_type
*func_proto
;
4136 struct btf_param
*param
;
4139 func_proto
= btf__type_by_id(obj
->btf
,
4141 param
= btf_params(func_proto
);
4142 /* Reuse the dummy_var string if the
4143 * func proto does not have param name.
4145 for (j
= 0; j
< btf_vlen(func_proto
); j
++)
4146 if (param
[j
].type
&& !param
[j
].name_off
)
4148 dummy_var
->name_off
;
4149 vs
->type
= dummy_var_btf_id
;
4150 vt
->info
&= ~0xffff;
4151 vt
->info
|= BTF_FUNC_GLOBAL
;
4153 btf_var(vt
)->linkage
= BTF_VAR_GLOBAL_ALLOCATED
;
4154 vt
->type
= int_btf_id
;
4157 vs
->size
= sizeof(int);
4164 /* for kcfg externs calculate their offsets within a .kconfig map */
4166 for (i
= 0; i
< obj
->nr_extern
; i
++) {
4167 ext
= &obj
->externs
[i
];
4168 if (ext
->type
!= EXT_KCFG
)
4171 ext
->kcfg
.data_off
= roundup(off
, ext
->kcfg
.align
);
4172 off
= ext
->kcfg
.data_off
+ ext
->kcfg
.sz
;
4173 pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
4174 i
, ext
->sym_idx
, ext
->kcfg
.data_off
, ext
->name
);
4178 for (i
= 0; i
< n
; i
++) {
4179 struct btf_var_secinfo
*vs
= btf_var_secinfos(sec
) + i
;
4181 t
= btf__type_by_id(obj
->btf
, vs
->type
);
4182 ext_name
= btf__name_by_offset(obj
->btf
, t
->name_off
);
4183 ext
= find_extern_by_name(obj
, ext_name
);
4185 pr_warn("failed to find extern definition for BTF var '%s'\n",
4189 btf_var(t
)->linkage
= BTF_VAR_GLOBAL_ALLOCATED
;
4190 vs
->offset
= ext
->kcfg
.data_off
;
4196 static bool prog_is_subprog(const struct bpf_object
*obj
, const struct bpf_program
*prog
)
4198 return prog
->sec_idx
== obj
->efile
.text_shndx
&& obj
->nr_programs
> 1;
4201 struct bpf_program
*
4202 bpf_object__find_program_by_name(const struct bpf_object
*obj
,
4205 struct bpf_program
*prog
;
4207 bpf_object__for_each_program(prog
, obj
) {
4208 if (prog_is_subprog(obj
, prog
))
4210 if (!strcmp(prog
->name
, name
))
4213 return errno
= ENOENT
, NULL
;
4216 static bool bpf_object__shndx_is_data(const struct bpf_object
*obj
,
4219 switch (obj
->efile
.secs
[shndx
].sec_type
) {
4229 static bool bpf_object__shndx_is_maps(const struct bpf_object
*obj
,
4232 return shndx
== obj
->efile
.btf_maps_shndx
;
4235 static enum libbpf_map_type
4236 bpf_object__section_to_libbpf_map_type(const struct bpf_object
*obj
, int shndx
)
4238 if (shndx
== obj
->efile
.symbols_shndx
)
4239 return LIBBPF_MAP_KCONFIG
;
4241 switch (obj
->efile
.secs
[shndx
].sec_type
) {
4243 return LIBBPF_MAP_BSS
;
4245 return LIBBPF_MAP_DATA
;
4247 return LIBBPF_MAP_RODATA
;
4249 return LIBBPF_MAP_UNSPEC
;
4253 static int bpf_program__record_reloc(struct bpf_program
*prog
,
4254 struct reloc_desc
*reloc_desc
,
4255 __u32 insn_idx
, const char *sym_name
,
4256 const Elf64_Sym
*sym
, const Elf64_Rel
*rel
)
4258 struct bpf_insn
*insn
= &prog
->insns
[insn_idx
];
4259 size_t map_idx
, nr_maps
= prog
->obj
->nr_maps
;
4260 struct bpf_object
*obj
= prog
->obj
;
4261 __u32 shdr_idx
= sym
->st_shndx
;
4262 enum libbpf_map_type type
;
4263 const char *sym_sec_name
;
4264 struct bpf_map
*map
;
4266 if (!is_call_insn(insn
) && !is_ldimm64_insn(insn
)) {
4267 pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
4268 prog
->name
, sym_name
, insn_idx
, insn
->code
);
4269 return -LIBBPF_ERRNO__RELOC
;
4272 if (sym_is_extern(sym
)) {
4273 int sym_idx
= ELF64_R_SYM(rel
->r_info
);
4274 int i
, n
= obj
->nr_extern
;
4275 struct extern_desc
*ext
;
4277 for (i
= 0; i
< n
; i
++) {
4278 ext
= &obj
->externs
[i
];
4279 if (ext
->sym_idx
== sym_idx
)
4283 pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
4284 prog
->name
, sym_name
, sym_idx
);
4285 return -LIBBPF_ERRNO__RELOC
;
4287 pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
4288 prog
->name
, i
, ext
->name
, ext
->sym_idx
, insn_idx
);
4289 if (insn
->code
== (BPF_JMP
| BPF_CALL
))
4290 reloc_desc
->type
= RELO_EXTERN_CALL
;
4292 reloc_desc
->type
= RELO_EXTERN_LD64
;
4293 reloc_desc
->insn_idx
= insn_idx
;
4294 reloc_desc
->ext_idx
= i
;
4298 /* sub-program call relocation */
4299 if (is_call_insn(insn
)) {
4300 if (insn
->src_reg
!= BPF_PSEUDO_CALL
) {
4301 pr_warn("prog '%s': incorrect bpf_call opcode\n", prog
->name
);
4302 return -LIBBPF_ERRNO__RELOC
;
4304 /* text_shndx can be 0, if no default "main" program exists */
4305 if (!shdr_idx
|| shdr_idx
!= obj
->efile
.text_shndx
) {
4306 sym_sec_name
= elf_sec_name(obj
, elf_sec_by_idx(obj
, shdr_idx
));
4307 pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
4308 prog
->name
, sym_name
, sym_sec_name
);
4309 return -LIBBPF_ERRNO__RELOC
;
4311 if (sym
->st_value
% BPF_INSN_SZ
) {
4312 pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
4313 prog
->name
, sym_name
, (size_t)sym
->st_value
);
4314 return -LIBBPF_ERRNO__RELOC
;
4316 reloc_desc
->type
= RELO_CALL
;
4317 reloc_desc
->insn_idx
= insn_idx
;
4318 reloc_desc
->sym_off
= sym
->st_value
;
4322 if (!shdr_idx
|| shdr_idx
>= SHN_LORESERVE
) {
4323 pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
4324 prog
->name
, sym_name
, shdr_idx
);
4325 return -LIBBPF_ERRNO__RELOC
;
4328 /* loading subprog addresses */
4329 if (sym_is_subprog(sym
, obj
->efile
.text_shndx
)) {
4330 /* global_func: sym->st_value = offset in the section, insn->imm = 0.
4331 * local_func: sym->st_value = 0, insn->imm = offset in the section.
4333 if ((sym
->st_value
% BPF_INSN_SZ
) || (insn
->imm
% BPF_INSN_SZ
)) {
4334 pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
4335 prog
->name
, sym_name
, (size_t)sym
->st_value
, insn
->imm
);
4336 return -LIBBPF_ERRNO__RELOC
;
4339 reloc_desc
->type
= RELO_SUBPROG_ADDR
;
4340 reloc_desc
->insn_idx
= insn_idx
;
4341 reloc_desc
->sym_off
= sym
->st_value
;
4345 type
= bpf_object__section_to_libbpf_map_type(obj
, shdr_idx
);
4346 sym_sec_name
= elf_sec_name(obj
, elf_sec_by_idx(obj
, shdr_idx
));
4348 /* generic map reference relocation */
4349 if (type
== LIBBPF_MAP_UNSPEC
) {
4350 if (!bpf_object__shndx_is_maps(obj
, shdr_idx
)) {
4351 pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n",
4352 prog
->name
, sym_name
, sym_sec_name
);
4353 return -LIBBPF_ERRNO__RELOC
;
4355 for (map_idx
= 0; map_idx
< nr_maps
; map_idx
++) {
4356 map
= &obj
->maps
[map_idx
];
4357 if (map
->libbpf_type
!= type
||
4358 map
->sec_idx
!= sym
->st_shndx
||
4359 map
->sec_offset
!= sym
->st_value
)
4361 pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n",
4362 prog
->name
, map_idx
, map
->name
, map
->sec_idx
,
4363 map
->sec_offset
, insn_idx
);
4366 if (map_idx
>= nr_maps
) {
4367 pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n",
4368 prog
->name
, sym_sec_name
, (size_t)sym
->st_value
);
4369 return -LIBBPF_ERRNO__RELOC
;
4371 reloc_desc
->type
= RELO_LD64
;
4372 reloc_desc
->insn_idx
= insn_idx
;
4373 reloc_desc
->map_idx
= map_idx
;
4374 reloc_desc
->sym_off
= 0; /* sym->st_value determines map_idx */
4378 /* global data map relocation */
4379 if (!bpf_object__shndx_is_data(obj
, shdr_idx
)) {
4380 pr_warn("prog '%s': bad data relo against section '%s'\n",
4381 prog
->name
, sym_sec_name
);
4382 return -LIBBPF_ERRNO__RELOC
;
4384 for (map_idx
= 0; map_idx
< nr_maps
; map_idx
++) {
4385 map
= &obj
->maps
[map_idx
];
4386 if (map
->libbpf_type
!= type
|| map
->sec_idx
!= sym
->st_shndx
)
4388 pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n",
4389 prog
->name
, map_idx
, map
->name
, map
->sec_idx
,
4390 map
->sec_offset
, insn_idx
);
4393 if (map_idx
>= nr_maps
) {
4394 pr_warn("prog '%s': data relo failed to find map for section '%s'\n",
4395 prog
->name
, sym_sec_name
);
4396 return -LIBBPF_ERRNO__RELOC
;
4399 reloc_desc
->type
= RELO_DATA
;
4400 reloc_desc
->insn_idx
= insn_idx
;
4401 reloc_desc
->map_idx
= map_idx
;
4402 reloc_desc
->sym_off
= sym
->st_value
;
4406 static bool prog_contains_insn(const struct bpf_program
*prog
, size_t insn_idx
)
4408 return insn_idx
>= prog
->sec_insn_off
&&
4409 insn_idx
< prog
->sec_insn_off
+ prog
->sec_insn_cnt
;
4412 static struct bpf_program
*find_prog_by_sec_insn(const struct bpf_object
*obj
,
4413 size_t sec_idx
, size_t insn_idx
)
4415 int l
= 0, r
= obj
->nr_programs
- 1, m
;
4416 struct bpf_program
*prog
;
4418 if (!obj
->nr_programs
)
4422 m
= l
+ (r
- l
+ 1) / 2;
4423 prog
= &obj
->programs
[m
];
4425 if (prog
->sec_idx
< sec_idx
||
4426 (prog
->sec_idx
== sec_idx
&& prog
->sec_insn_off
<= insn_idx
))
4431 /* matching program could be at index l, but it still might be the
4432 * wrong one, so we need to double check conditions for the last time
4434 prog
= &obj
->programs
[l
];
4435 if (prog
->sec_idx
== sec_idx
&& prog_contains_insn(prog
, insn_idx
))
4441 bpf_object__collect_prog_relos(struct bpf_object
*obj
, Elf64_Shdr
*shdr
, Elf_Data
*data
)
4443 const char *relo_sec_name
, *sec_name
;
4444 size_t sec_idx
= shdr
->sh_info
, sym_idx
;
4445 struct bpf_program
*prog
;
4446 struct reloc_desc
*relos
;
4448 const char *sym_name
;
4455 if (sec_idx
>= obj
->efile
.sec_cnt
)
4458 scn
= elf_sec_by_idx(obj
, sec_idx
);
4459 scn_data
= elf_sec_data(obj
, scn
);
4461 return -LIBBPF_ERRNO__FORMAT
;
4463 relo_sec_name
= elf_sec_str(obj
, shdr
->sh_name
);
4464 sec_name
= elf_sec_name(obj
, scn
);
4465 if (!relo_sec_name
|| !sec_name
)
4468 pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n",
4469 relo_sec_name
, sec_idx
, sec_name
);
4470 nrels
= shdr
->sh_size
/ shdr
->sh_entsize
;
4472 for (i
= 0; i
< nrels
; i
++) {
4473 rel
= elf_rel_by_idx(data
, i
);
4475 pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name
, i
);
4476 return -LIBBPF_ERRNO__FORMAT
;
4479 sym_idx
= ELF64_R_SYM(rel
->r_info
);
4480 sym
= elf_sym_by_idx(obj
, sym_idx
);
4482 pr_warn("sec '%s': symbol #%zu not found for relo #%d\n",
4483 relo_sec_name
, sym_idx
, i
);
4484 return -LIBBPF_ERRNO__FORMAT
;
4487 if (sym
->st_shndx
>= obj
->efile
.sec_cnt
) {
4488 pr_warn("sec '%s': corrupted symbol #%zu pointing to invalid section #%zu for relo #%d\n",
4489 relo_sec_name
, sym_idx
, (size_t)sym
->st_shndx
, i
);
4490 return -LIBBPF_ERRNO__FORMAT
;
4493 if (rel
->r_offset
% BPF_INSN_SZ
|| rel
->r_offset
>= scn_data
->d_size
) {
4494 pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
4495 relo_sec_name
, (size_t)rel
->r_offset
, i
);
4496 return -LIBBPF_ERRNO__FORMAT
;
4499 insn_idx
= rel
->r_offset
/ BPF_INSN_SZ
;
4500 /* relocations against static functions are recorded as
4501 * relocations against the section that contains a function;
4502 * in such case, symbol will be STT_SECTION and sym.st_name
4503 * will point to empty string (0), so fetch section name
4506 if (ELF64_ST_TYPE(sym
->st_info
) == STT_SECTION
&& sym
->st_name
== 0)
4507 sym_name
= elf_sec_name(obj
, elf_sec_by_idx(obj
, sym
->st_shndx
));
4509 sym_name
= elf_sym_str(obj
, sym
->st_name
);
4510 sym_name
= sym_name
?: "<?";
4512 pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n",
4513 relo_sec_name
, i
, insn_idx
, sym_name
);
4515 prog
= find_prog_by_sec_insn(obj
, sec_idx
, insn_idx
);
4517 pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n",
4518 relo_sec_name
, i
, sec_name
, insn_idx
);
4522 relos
= libbpf_reallocarray(prog
->reloc_desc
,
4523 prog
->nr_reloc
+ 1, sizeof(*relos
));
4526 prog
->reloc_desc
= relos
;
4528 /* adjust insn_idx to local BPF program frame of reference */
4529 insn_idx
-= prog
->sec_insn_off
;
4530 err
= bpf_program__record_reloc(prog
, &relos
[prog
->nr_reloc
],
4531 insn_idx
, sym_name
, sym
, rel
);
4540 static int map_fill_btf_type_info(struct bpf_object
*obj
, struct bpf_map
*map
)
4547 /* if it's BTF-defined map, we don't need to search for type IDs.
4548 * For struct_ops map, it does not need btf_key_type_id and
4549 * btf_value_type_id.
4551 if (map
->sec_idx
== obj
->efile
.btf_maps_shndx
|| bpf_map__is_struct_ops(map
))
4555 * LLVM annotates global data differently in BTF, that is,
4556 * only as '.data', '.bss' or '.rodata'.
4558 if (!bpf_map__is_internal(map
))
4561 id
= btf__find_by_name(obj
->btf
, map
->real_name
);
4565 map
->btf_key_type_id
= 0;
4566 map
->btf_value_type_id
= id
;
4570 static int bpf_get_map_info_from_fdinfo(int fd
, struct bpf_map_info
*info
)
4572 char file
[PATH_MAX
], buff
[4096];
4577 snprintf(file
, sizeof(file
), "/proc/%d/fdinfo/%d", getpid(), fd
);
4578 memset(info
, 0, sizeof(*info
));
4580 fp
= fopen(file
, "re");
4583 pr_warn("failed to open %s: %d. No procfs support?\n", file
,
4588 while (fgets(buff
, sizeof(buff
), fp
)) {
4589 if (sscanf(buff
, "map_type:\t%u", &val
) == 1)
4591 else if (sscanf(buff
, "key_size:\t%u", &val
) == 1)
4592 info
->key_size
= val
;
4593 else if (sscanf(buff
, "value_size:\t%u", &val
) == 1)
4594 info
->value_size
= val
;
4595 else if (sscanf(buff
, "max_entries:\t%u", &val
) == 1)
4596 info
->max_entries
= val
;
4597 else if (sscanf(buff
, "map_flags:\t%i", &val
) == 1)
4598 info
->map_flags
= val
;
4606 bool bpf_map__autocreate(const struct bpf_map
*map
)
4608 return map
->autocreate
;
4611 int bpf_map__set_autocreate(struct bpf_map
*map
, bool autocreate
)
4613 if (map
->obj
->loaded
)
4614 return libbpf_err(-EBUSY
);
4616 map
->autocreate
= autocreate
;
4620 int bpf_map__reuse_fd(struct bpf_map
*map
, int fd
)
4622 struct bpf_map_info info
;
4623 __u32 len
= sizeof(info
), name_len
;
4627 memset(&info
, 0, len
);
4628 err
= bpf_map_get_info_by_fd(fd
, &info
, &len
);
4629 if (err
&& errno
== EINVAL
)
4630 err
= bpf_get_map_info_from_fdinfo(fd
, &info
);
4632 return libbpf_err(err
);
4634 name_len
= strlen(info
.name
);
4635 if (name_len
== BPF_OBJ_NAME_LEN
- 1 && strncmp(map
->name
, info
.name
, name_len
) == 0)
4636 new_name
= strdup(map
->name
);
4638 new_name
= strdup(info
.name
);
4641 return libbpf_err(-errno
);
4644 * Like dup(), but make sure new FD is >= 3 and has O_CLOEXEC set.
4645 * This is similar to what we do in ensure_good_fd(), but without
4646 * closing original FD.
4648 new_fd
= fcntl(fd
, F_DUPFD_CLOEXEC
, 3);
4651 goto err_free_new_name
;
4654 err
= reuse_fd(map
->fd
, new_fd
);
4656 goto err_free_new_name
;
4660 map
->name
= new_name
;
4661 map
->def
.type
= info
.type
;
4662 map
->def
.key_size
= info
.key_size
;
4663 map
->def
.value_size
= info
.value_size
;
4664 map
->def
.max_entries
= info
.max_entries
;
4665 map
->def
.map_flags
= info
.map_flags
;
4666 map
->btf_key_type_id
= info
.btf_key_type_id
;
4667 map
->btf_value_type_id
= info
.btf_value_type_id
;
4669 map
->map_extra
= info
.map_extra
;
4675 return libbpf_err(err
);
4678 __u32
bpf_map__max_entries(const struct bpf_map
*map
)
4680 return map
->def
.max_entries
;
4683 struct bpf_map
*bpf_map__inner_map(struct bpf_map
*map
)
4685 if (!bpf_map_type__is_map_in_map(map
->def
.type
))
4686 return errno
= EINVAL
, NULL
;
4688 return map
->inner_map
;
4691 int bpf_map__set_max_entries(struct bpf_map
*map
, __u32 max_entries
)
4693 if (map
->obj
->loaded
)
4694 return libbpf_err(-EBUSY
);
4696 map
->def
.max_entries
= max_entries
;
4698 /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
4699 if (map_is_ringbuf(map
))
4700 map
->def
.max_entries
= adjust_ringbuf_sz(map
->def
.max_entries
);
4705 static int bpf_object_prepare_token(struct bpf_object
*obj
)
4707 const char *bpffs_path
;
4708 int bpffs_fd
= -1, token_fd
, err
;
4710 enum libbpf_print_level level
;
4712 /* token is explicitly prevented */
4713 if (obj
->token_path
&& obj
->token_path
[0] == '\0') {
4714 pr_debug("object '%s': token is prevented, skipping...\n", obj
->name
);
4718 mandatory
= obj
->token_path
!= NULL
;
4719 level
= mandatory
? LIBBPF_WARN
: LIBBPF_DEBUG
;
4721 bpffs_path
= obj
->token_path
?: BPF_FS_DEFAULT_PATH
;
4722 bpffs_fd
= open(bpffs_path
, O_DIRECTORY
, O_RDWR
);
4725 __pr(level
, "object '%s': failed (%d) to open BPF FS mount at '%s'%s\n",
4726 obj
->name
, err
, bpffs_path
,
4727 mandatory
? "" : ", skipping optional step...");
4728 return mandatory
? err
: 0;
4731 token_fd
= bpf_token_create(bpffs_fd
, 0);
4734 if (!mandatory
&& token_fd
== -ENOENT
) {
4735 pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n",
4736 obj
->name
, bpffs_path
);
4739 __pr(level
, "object '%s': failed (%d) to create BPF token from '%s'%s\n",
4740 obj
->name
, token_fd
, bpffs_path
,
4741 mandatory
? "" : ", skipping optional step...");
4742 return mandatory
? token_fd
: 0;
4745 obj
->feat_cache
= calloc(1, sizeof(*obj
->feat_cache
));
4746 if (!obj
->feat_cache
) {
4751 obj
->token_fd
= token_fd
;
4752 obj
->feat_cache
->token_fd
= token_fd
;
4758 bpf_object__probe_loading(struct bpf_object
*obj
)
4760 char *cp
, errmsg
[STRERR_BUFSIZE
];
4761 struct bpf_insn insns
[] = {
4762 BPF_MOV64_IMM(BPF_REG_0
, 0),
4765 int ret
, insn_cnt
= ARRAY_SIZE(insns
);
4766 LIBBPF_OPTS(bpf_prog_load_opts
, opts
,
4767 .token_fd
= obj
->token_fd
,
4768 .prog_flags
= obj
->token_fd
? BPF_F_TOKEN_FD
: 0,
4771 if (obj
->gen_loader
)
4774 ret
= bump_rlimit_memlock();
4776 pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret
);
4778 /* make sure basic loading works */
4779 ret
= bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER
, NULL
, "GPL", insns
, insn_cnt
, &opts
);
4781 ret
= bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT
, NULL
, "GPL", insns
, insn_cnt
, &opts
);
4784 cp
= libbpf_strerror_r(ret
, errmsg
, sizeof(errmsg
));
4785 pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF "
4786 "program. Make sure your kernel supports BPF "
4787 "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is "
4788 "set to big enough value.\n", __func__
, cp
, ret
);
4796 bool kernel_supports(const struct bpf_object
*obj
, enum kern_feature_id feat_id
)
4798 if (obj
->gen_loader
)
4799 /* To generate loader program assume the latest kernel
4800 * to avoid doing extra prog_load, map_create syscalls.
4805 return feat_supported(obj
->feat_cache
, feat_id
);
4807 return feat_supported(NULL
, feat_id
);
4810 static bool map_is_reuse_compat(const struct bpf_map
*map
, int map_fd
)
4812 struct bpf_map_info map_info
;
4813 char msg
[STRERR_BUFSIZE
];
4814 __u32 map_info_len
= sizeof(map_info
);
4817 memset(&map_info
, 0, map_info_len
);
4818 err
= bpf_map_get_info_by_fd(map_fd
, &map_info
, &map_info_len
);
4819 if (err
&& errno
== EINVAL
)
4820 err
= bpf_get_map_info_from_fdinfo(map_fd
, &map_info
);
4822 pr_warn("failed to get map info for map FD %d: %s\n", map_fd
,
4823 libbpf_strerror_r(errno
, msg
, sizeof(msg
)));
4827 return (map_info
.type
== map
->def
.type
&&
4828 map_info
.key_size
== map
->def
.key_size
&&
4829 map_info
.value_size
== map
->def
.value_size
&&
4830 map_info
.max_entries
== map
->def
.max_entries
&&
4831 map_info
.map_flags
== map
->def
.map_flags
&&
4832 map_info
.map_extra
== map
->map_extra
);
4836 bpf_object__reuse_map(struct bpf_map
*map
)
4838 char *cp
, errmsg
[STRERR_BUFSIZE
];
4841 pin_fd
= bpf_obj_get(map
->pin_path
);
4844 if (err
== -ENOENT
) {
4845 pr_debug("found no pinned map to reuse at '%s'\n",
4850 cp
= libbpf_strerror_r(-err
, errmsg
, sizeof(errmsg
));
4851 pr_warn("couldn't retrieve pinned map '%s': %s\n",
4856 if (!map_is_reuse_compat(map
, pin_fd
)) {
4857 pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
4863 err
= bpf_map__reuse_fd(map
, pin_fd
);
4869 pr_debug("reused pinned map at '%s'\n", map
->pin_path
);
4875 bpf_object__populate_internal_map(struct bpf_object
*obj
, struct bpf_map
*map
)
4877 enum libbpf_map_type map_type
= map
->libbpf_type
;
4878 char *cp
, errmsg
[STRERR_BUFSIZE
];
4881 if (obj
->gen_loader
) {
4882 bpf_gen__map_update_elem(obj
->gen_loader
, map
- obj
->maps
,
4883 map
->mmaped
, map
->def
.value_size
);
4884 if (map_type
== LIBBPF_MAP_RODATA
|| map_type
== LIBBPF_MAP_KCONFIG
)
4885 bpf_gen__map_freeze(obj
->gen_loader
, map
- obj
->maps
);
4888 err
= bpf_map_update_elem(map
->fd
, &zero
, map
->mmaped
, 0);
4891 cp
= libbpf_strerror_r(err
, errmsg
, sizeof(errmsg
));
4892 pr_warn("Error setting initial map(%s) contents: %s\n",
4897 /* Freeze .rodata and .kconfig map as read-only from syscall side. */
4898 if (map_type
== LIBBPF_MAP_RODATA
|| map_type
== LIBBPF_MAP_KCONFIG
) {
4899 err
= bpf_map_freeze(map
->fd
);
4902 cp
= libbpf_strerror_r(err
, errmsg
, sizeof(errmsg
));
4903 pr_warn("Error freezing map(%s) as read-only: %s\n",
4911 static void bpf_map__destroy(struct bpf_map
*map
);
4913 static bool map_is_created(const struct bpf_map
*map
)
4915 return map
->obj
->loaded
|| map
->reused
;
4918 static int bpf_object__create_map(struct bpf_object
*obj
, struct bpf_map
*map
, bool is_inner
)
4920 LIBBPF_OPTS(bpf_map_create_opts
, create_attr
);
4921 struct bpf_map_def
*def
= &map
->def
;
4922 const char *map_name
= NULL
;
4923 int err
= 0, map_fd
;
4925 if (kernel_supports(obj
, FEAT_PROG_NAME
))
4926 map_name
= map
->name
;
4927 create_attr
.map_ifindex
= map
->map_ifindex
;
4928 create_attr
.map_flags
= def
->map_flags
;
4929 create_attr
.numa_node
= map
->numa_node
;
4930 create_attr
.map_extra
= map
->map_extra
;
4931 create_attr
.token_fd
= obj
->token_fd
;
4933 create_attr
.map_flags
|= BPF_F_TOKEN_FD
;
4935 if (bpf_map__is_struct_ops(map
)) {
4936 create_attr
.btf_vmlinux_value_type_id
= map
->btf_vmlinux_value_type_id
;
4937 if (map
->mod_btf_fd
>= 0) {
4938 create_attr
.value_type_btf_obj_fd
= map
->mod_btf_fd
;
4939 create_attr
.map_flags
|= BPF_F_VTYPE_BTF_OBJ_FD
;
4943 if (obj
->btf
&& btf__fd(obj
->btf
) >= 0) {
4944 create_attr
.btf_fd
= btf__fd(obj
->btf
);
4945 create_attr
.btf_key_type_id
= map
->btf_key_type_id
;
4946 create_attr
.btf_value_type_id
= map
->btf_value_type_id
;
4949 if (bpf_map_type__is_map_in_map(def
->type
)) {
4950 if (map
->inner_map
) {
4951 err
= map_set_def_max_entries(map
->inner_map
);
4954 err
= bpf_object__create_map(obj
, map
->inner_map
, true);
4956 pr_warn("map '%s': failed to create inner map: %d\n",
4960 map
->inner_map_fd
= map
->inner_map
->fd
;
4962 if (map
->inner_map_fd
>= 0)
4963 create_attr
.inner_map_fd
= map
->inner_map_fd
;
4966 switch (def
->type
) {
4967 case BPF_MAP_TYPE_PERF_EVENT_ARRAY
:
4968 case BPF_MAP_TYPE_CGROUP_ARRAY
:
4969 case BPF_MAP_TYPE_STACK_TRACE
:
4970 case BPF_MAP_TYPE_ARRAY_OF_MAPS
:
4971 case BPF_MAP_TYPE_HASH_OF_MAPS
:
4972 case BPF_MAP_TYPE_DEVMAP
:
4973 case BPF_MAP_TYPE_DEVMAP_HASH
:
4974 case BPF_MAP_TYPE_CPUMAP
:
4975 case BPF_MAP_TYPE_XSKMAP
:
4976 case BPF_MAP_TYPE_SOCKMAP
:
4977 case BPF_MAP_TYPE_SOCKHASH
:
4978 case BPF_MAP_TYPE_QUEUE
:
4979 case BPF_MAP_TYPE_STACK
:
4980 create_attr
.btf_fd
= 0;
4981 create_attr
.btf_key_type_id
= 0;
4982 create_attr
.btf_value_type_id
= 0;
4983 map
->btf_key_type_id
= 0;
4984 map
->btf_value_type_id
= 0;
4986 case BPF_MAP_TYPE_STRUCT_OPS
:
4987 create_attr
.btf_value_type_id
= 0;
4993 if (obj
->gen_loader
) {
4994 bpf_gen__map_create(obj
->gen_loader
, def
->type
, map_name
,
4995 def
->key_size
, def
->value_size
, def
->max_entries
,
4996 &create_attr
, is_inner
? -1 : map
- obj
->maps
);
4997 /* We keep pretenting we have valid FD to pass various fd >= 0
4998 * checks by just keeping original placeholder FDs in place.
4999 * See bpf_object__add_map() comment.
5000 * This placeholder fd will not be used with any syscall and
5001 * will be reset to -1 eventually.
5005 map_fd
= bpf_map_create(def
->type
, map_name
,
5006 def
->key_size
, def
->value_size
,
5007 def
->max_entries
, &create_attr
);
5009 if (map_fd
< 0 && (create_attr
.btf_key_type_id
|| create_attr
.btf_value_type_id
)) {
5010 char *cp
, errmsg
[STRERR_BUFSIZE
];
5013 cp
= libbpf_strerror_r(err
, errmsg
, sizeof(errmsg
));
5014 pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
5015 map
->name
, cp
, err
);
5016 create_attr
.btf_fd
= 0;
5017 create_attr
.btf_key_type_id
= 0;
5018 create_attr
.btf_value_type_id
= 0;
5019 map
->btf_key_type_id
= 0;
5020 map
->btf_value_type_id
= 0;
5021 map_fd
= bpf_map_create(def
->type
, map_name
,
5022 def
->key_size
, def
->value_size
,
5023 def
->max_entries
, &create_attr
);
5026 if (bpf_map_type__is_map_in_map(def
->type
) && map
->inner_map
) {
5027 if (obj
->gen_loader
)
5028 map
->inner_map
->fd
= -1;
5029 bpf_map__destroy(map
->inner_map
);
5030 zfree(&map
->inner_map
);
5036 /* obj->gen_loader case, prevent reuse_fd() from closing map_fd */
5037 if (map
->fd
== map_fd
)
5040 /* Keep placeholder FD value but now point it to the BPF map object.
5041 * This way everything that relied on this map's FD (e.g., relocated
5042 * ldimm64 instructions) will stay valid and won't need adjustments.
5043 * map->fd stays valid but now point to what map_fd points to.
5045 return reuse_fd(map
->fd
, map_fd
);
5048 static int init_map_in_map_slots(struct bpf_object
*obj
, struct bpf_map
*map
)
5050 const struct bpf_map
*targ_map
;
5054 for (i
= 0; i
< map
->init_slots_sz
; i
++) {
5055 if (!map
->init_slots
[i
])
5058 targ_map
= map
->init_slots
[i
];
5061 if (obj
->gen_loader
) {
5062 bpf_gen__populate_outer_map(obj
->gen_loader
,
5064 targ_map
- obj
->maps
);
5066 err
= bpf_map_update_elem(map
->fd
, &i
, &fd
, 0);
5070 pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
5071 map
->name
, i
, targ_map
->name
, fd
, err
);
5074 pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
5075 map
->name
, i
, targ_map
->name
, fd
);
5078 zfree(&map
->init_slots
);
5079 map
->init_slots_sz
= 0;
5084 static int init_prog_array_slots(struct bpf_object
*obj
, struct bpf_map
*map
)
5086 const struct bpf_program
*targ_prog
;
5090 if (obj
->gen_loader
)
5093 for (i
= 0; i
< map
->init_slots_sz
; i
++) {
5094 if (!map
->init_slots
[i
])
5097 targ_prog
= map
->init_slots
[i
];
5098 fd
= bpf_program__fd(targ_prog
);
5100 err
= bpf_map_update_elem(map
->fd
, &i
, &fd
, 0);
5103 pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %d\n",
5104 map
->name
, i
, targ_prog
->name
, fd
, err
);
5107 pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n",
5108 map
->name
, i
, targ_prog
->name
, fd
);
5111 zfree(&map
->init_slots
);
5112 map
->init_slots_sz
= 0;
5117 static int bpf_object_init_prog_arrays(struct bpf_object
*obj
)
5119 struct bpf_map
*map
;
5122 for (i
= 0; i
< obj
->nr_maps
; i
++) {
5123 map
= &obj
->maps
[i
];
5125 if (!map
->init_slots_sz
|| map
->def
.type
!= BPF_MAP_TYPE_PROG_ARRAY
)
5128 err
= init_prog_array_slots(obj
, map
);
5135 static int map_set_def_max_entries(struct bpf_map
*map
)
5137 if (map
->def
.type
== BPF_MAP_TYPE_PERF_EVENT_ARRAY
&& !map
->def
.max_entries
) {
5140 nr_cpus
= libbpf_num_possible_cpus();
5142 pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
5143 map
->name
, nr_cpus
);
5146 pr_debug("map '%s': setting size to %d\n", map
->name
, nr_cpus
);
5147 map
->def
.max_entries
= nr_cpus
;
5154 bpf_object__create_maps(struct bpf_object
*obj
)
5156 struct bpf_map
*map
;
5157 char *cp
, errmsg
[STRERR_BUFSIZE
];
5162 for (i
= 0; i
< obj
->nr_maps
; i
++) {
5163 map
= &obj
->maps
[i
];
5165 /* To support old kernels, we skip creating global data maps
5166 * (.rodata, .data, .kconfig, etc); later on, during program
5167 * loading, if we detect that at least one of the to-be-loaded
5168 * programs is referencing any global data map, we'll error
5169 * out with program name and relocation index logged.
5170 * This approach allows to accommodate Clang emitting
5171 * unnecessary .rodata.str1.1 sections for string literals,
5172 * but also it allows to have CO-RE applications that use
5173 * global variables in some of BPF programs, but not others.
5174 * If those global variable-using programs are not loaded at
5175 * runtime due to bpf_program__set_autoload(prog, false),
5176 * bpf_object loading will succeed just fine even on old
5179 if (bpf_map__is_internal(map
) && !kernel_supports(obj
, FEAT_GLOBAL_DATA
))
5180 map
->autocreate
= false;
5182 if (!map
->autocreate
) {
5183 pr_debug("map '%s': skipped auto-creating...\n", map
->name
);
5187 err
= map_set_def_max_entries(map
);
5193 if (map
->pin_path
) {
5194 err
= bpf_object__reuse_map(map
);
5196 pr_warn("map '%s': error reusing pinned map\n",
5200 if (retried
&& map
->fd
< 0) {
5201 pr_warn("map '%s': cannot find pinned map\n",
5209 pr_debug("map '%s': skipping creation (preset fd=%d)\n",
5210 map
->name
, map
->fd
);
5212 err
= bpf_object__create_map(obj
, map
, false);
5216 pr_debug("map '%s': created successfully, fd=%d\n",
5217 map
->name
, map
->fd
);
5219 if (bpf_map__is_internal(map
)) {
5220 err
= bpf_object__populate_internal_map(obj
, map
);
5225 if (map
->init_slots_sz
&& map
->def
.type
!= BPF_MAP_TYPE_PROG_ARRAY
) {
5226 err
= init_map_in_map_slots(obj
, map
);
5232 if (map
->pin_path
&& !map
->pinned
) {
5233 err
= bpf_map__pin(map
, NULL
);
5235 if (!retried
&& err
== -EEXIST
) {
5239 pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
5240 map
->name
, map
->pin_path
, err
);
5249 cp
= libbpf_strerror_r(err
, errmsg
, sizeof(errmsg
));
5250 pr_warn("map '%s': failed to create: %s(%d)\n", map
->name
, cp
, err
);
5252 for (j
= 0; j
< i
; j
++)
5253 zclose(obj
->maps
[j
].fd
);
5257 static bool bpf_core_is_flavor_sep(const char *s
)
5259 /* check X___Y name pattern, where X and Y are not underscores */
5260 return s
[0] != '_' && /* X */
5261 s
[1] == '_' && s
[2] == '_' && s
[3] == '_' && /* ___ */
5262 s
[4] != '_'; /* Y */
5265 /* Given 'some_struct_name___with_flavor' return the length of a name prefix
5266 * before last triple underscore. Struct name part after last triple
5267 * underscore is ignored by BPF CO-RE relocation during relocation matching.
5269 size_t bpf_core_essential_name_len(const char *name
)
5271 size_t n
= strlen(name
);
5274 for (i
= n
- 5; i
>= 0; i
--) {
5275 if (bpf_core_is_flavor_sep(name
+ i
))
5281 void bpf_core_free_cands(struct bpf_core_cand_list
*cands
)
5290 int bpf_core_add_cands(struct bpf_core_cand
*local_cand
,
5291 size_t local_essent_len
,
5292 const struct btf
*targ_btf
,
5293 const char *targ_btf_name
,
5295 struct bpf_core_cand_list
*cands
)
5297 struct bpf_core_cand
*new_cands
, *cand
;
5298 const struct btf_type
*t
, *local_t
;
5299 const char *targ_name
, *local_name
;
5300 size_t targ_essent_len
;
5303 local_t
= btf__type_by_id(local_cand
->btf
, local_cand
->id
);
5304 local_name
= btf__str_by_offset(local_cand
->btf
, local_t
->name_off
);
5306 n
= btf__type_cnt(targ_btf
);
5307 for (i
= targ_start_id
; i
< n
; i
++) {
5308 t
= btf__type_by_id(targ_btf
, i
);
5309 if (!btf_kind_core_compat(t
, local_t
))
5312 targ_name
= btf__name_by_offset(targ_btf
, t
->name_off
);
5313 if (str_is_empty(targ_name
))
5316 targ_essent_len
= bpf_core_essential_name_len(targ_name
);
5317 if (targ_essent_len
!= local_essent_len
)
5320 if (strncmp(local_name
, targ_name
, local_essent_len
) != 0)
5323 pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n",
5324 local_cand
->id
, btf_kind_str(local_t
),
5325 local_name
, i
, btf_kind_str(t
), targ_name
,
5327 new_cands
= libbpf_reallocarray(cands
->cands
, cands
->len
+ 1,
5328 sizeof(*cands
->cands
));
5332 cand
= &new_cands
[cands
->len
];
5333 cand
->btf
= targ_btf
;
5336 cands
->cands
= new_cands
;
5342 static int load_module_btfs(struct bpf_object
*obj
)
5344 struct bpf_btf_info info
;
5345 struct module_btf
*mod_btf
;
5351 if (obj
->btf_modules_loaded
)
5354 if (obj
->gen_loader
)
5357 /* don't do this again, even if we find no module BTFs */
5358 obj
->btf_modules_loaded
= true;
5360 /* kernel too old to support module BTFs */
5361 if (!kernel_supports(obj
, FEAT_MODULE_BTF
))
5365 err
= bpf_btf_get_next_id(id
, &id
);
5366 if (err
&& errno
== ENOENT
)
5368 if (err
&& errno
== EPERM
) {
5369 pr_debug("skipping module BTFs loading, missing privileges\n");
5374 pr_warn("failed to iterate BTF objects: %d\n", err
);
5378 fd
= bpf_btf_get_fd_by_id(id
);
5380 if (errno
== ENOENT
)
5381 continue; /* expected race: BTF was unloaded */
5383 pr_warn("failed to get BTF object #%d FD: %d\n", id
, err
);
5388 memset(&info
, 0, sizeof(info
));
5389 info
.name
= ptr_to_u64(name
);
5390 info
.name_len
= sizeof(name
);
5392 err
= bpf_btf_get_info_by_fd(fd
, &info
, &len
);
5395 pr_warn("failed to get BTF object #%d info: %d\n", id
, err
);
5399 /* ignore non-module BTFs */
5400 if (!info
.kernel_btf
|| strcmp(name
, "vmlinux") == 0) {
5405 btf
= btf_get_from_fd(fd
, obj
->btf_vmlinux
);
5406 err
= libbpf_get_error(btf
);
5408 pr_warn("failed to load module [%s]'s BTF object #%d: %d\n",
5413 err
= libbpf_ensure_mem((void **)&obj
->btf_modules
, &obj
->btf_module_cap
,
5414 sizeof(*obj
->btf_modules
), obj
->btf_module_cnt
+ 1);
5418 mod_btf
= &obj
->btf_modules
[obj
->btf_module_cnt
++];
5423 mod_btf
->name
= strdup(name
);
5424 if (!mod_btf
->name
) {
5438 static struct bpf_core_cand_list
*
5439 bpf_core_find_cands(struct bpf_object
*obj
, const struct btf
*local_btf
, __u32 local_type_id
)
5441 struct bpf_core_cand local_cand
= {};
5442 struct bpf_core_cand_list
*cands
;
5443 const struct btf
*main_btf
;
5444 const struct btf_type
*local_t
;
5445 const char *local_name
;
5446 size_t local_essent_len
;
5449 local_cand
.btf
= local_btf
;
5450 local_cand
.id
= local_type_id
;
5451 local_t
= btf__type_by_id(local_btf
, local_type_id
);
5453 return ERR_PTR(-EINVAL
);
5455 local_name
= btf__name_by_offset(local_btf
, local_t
->name_off
);
5456 if (str_is_empty(local_name
))
5457 return ERR_PTR(-EINVAL
);
5458 local_essent_len
= bpf_core_essential_name_len(local_name
);
5460 cands
= calloc(1, sizeof(*cands
));
5462 return ERR_PTR(-ENOMEM
);
5464 /* Attempt to find target candidates in vmlinux BTF first */
5465 main_btf
= obj
->btf_vmlinux_override
?: obj
->btf_vmlinux
;
5466 err
= bpf_core_add_cands(&local_cand
, local_essent_len
, main_btf
, "vmlinux", 1, cands
);
5470 /* if vmlinux BTF has any candidate, don't got for module BTFs */
5474 /* if vmlinux BTF was overridden, don't attempt to load module BTFs */
5475 if (obj
->btf_vmlinux_override
)
5478 /* now look through module BTFs, trying to still find candidates */
5479 err
= load_module_btfs(obj
);
5483 for (i
= 0; i
< obj
->btf_module_cnt
; i
++) {
5484 err
= bpf_core_add_cands(&local_cand
, local_essent_len
,
5485 obj
->btf_modules
[i
].btf
,
5486 obj
->btf_modules
[i
].name
,
5487 btf__type_cnt(obj
->btf_vmlinux
),
5495 bpf_core_free_cands(cands
);
5496 return ERR_PTR(err
);
5499 /* Check local and target types for compatibility. This check is used for
5500 * type-based CO-RE relocations and follow slightly different rules than
5501 * field-based relocations. This function assumes that root types were already
5502 * checked for name match. Beyond that initial root-level name check, names
5503 * are completely ignored. Compatibility rules are as follows:
5504 * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
5505 * kind should match for local and target types (i.e., STRUCT is not
5506 * compatible with UNION);
5507 * - for ENUMs, the size is ignored;
5508 * - for INT, size and signedness are ignored;
5509 * - for ARRAY, dimensionality is ignored, element types are checked for
5510 * compatibility recursively;
5511 * - CONST/VOLATILE/RESTRICT modifiers are ignored;
5512 * - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
5513 * - FUNC_PROTOs are compatible if they have compatible signature: same
5514 * number of input args and compatible return and argument types.
5515 * These rules are not set in stone and probably will be adjusted as we get
5516 * more experience with using BPF CO-RE relocations.
5518 int bpf_core_types_are_compat(const struct btf
*local_btf
, __u32 local_id
,
5519 const struct btf
*targ_btf
, __u32 targ_id
)
5521 return __bpf_core_types_are_compat(local_btf
, local_id
, targ_btf
, targ_id
, 32);
5524 int bpf_core_types_match(const struct btf
*local_btf
, __u32 local_id
,
5525 const struct btf
*targ_btf
, __u32 targ_id
)
5527 return __bpf_core_types_match(local_btf
, local_id
, targ_btf
, targ_id
, false, 32);
5530 static size_t bpf_core_hash_fn(const long key
, void *ctx
)
5535 static bool bpf_core_equal_fn(const long k1
, const long k2
, void *ctx
)
5540 static int record_relo_core(struct bpf_program
*prog
,
5541 const struct bpf_core_relo
*core_relo
, int insn_idx
)
5543 struct reloc_desc
*relos
, *relo
;
5545 relos
= libbpf_reallocarray(prog
->reloc_desc
,
5546 prog
->nr_reloc
+ 1, sizeof(*relos
));
5549 relo
= &relos
[prog
->nr_reloc
];
5550 relo
->type
= RELO_CORE
;
5551 relo
->insn_idx
= insn_idx
;
5552 relo
->core_relo
= core_relo
;
5553 prog
->reloc_desc
= relos
;
5558 static const struct bpf_core_relo
*find_relo_core(struct bpf_program
*prog
, int insn_idx
)
5560 struct reloc_desc
*relo
;
5563 for (i
= 0; i
< prog
->nr_reloc
; i
++) {
5564 relo
= &prog
->reloc_desc
[i
];
5565 if (relo
->type
!= RELO_CORE
|| relo
->insn_idx
!= insn_idx
)
5568 return relo
->core_relo
;
5574 static int bpf_core_resolve_relo(struct bpf_program
*prog
,
5575 const struct bpf_core_relo
*relo
,
5577 const struct btf
*local_btf
,
5578 struct hashmap
*cand_cache
,
5579 struct bpf_core_relo_res
*targ_res
)
5581 struct bpf_core_spec specs_scratch
[3] = {};
5582 struct bpf_core_cand_list
*cands
= NULL
;
5583 const char *prog_name
= prog
->name
;
5584 const struct btf_type
*local_type
;
5585 const char *local_name
;
5586 __u32 local_id
= relo
->type_id
;
5589 local_type
= btf__type_by_id(local_btf
, local_id
);
5593 local_name
= btf__name_by_offset(local_btf
, local_type
->name_off
);
5597 if (relo
->kind
!= BPF_CORE_TYPE_ID_LOCAL
&&
5598 !hashmap__find(cand_cache
, local_id
, &cands
)) {
5599 cands
= bpf_core_find_cands(prog
->obj
, local_btf
, local_id
);
5600 if (IS_ERR(cands
)) {
5601 pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
5602 prog_name
, relo_idx
, local_id
, btf_kind_str(local_type
),
5603 local_name
, PTR_ERR(cands
));
5604 return PTR_ERR(cands
);
5606 err
= hashmap__set(cand_cache
, local_id
, cands
, NULL
, NULL
);
5608 bpf_core_free_cands(cands
);
5613 return bpf_core_calc_relo_insn(prog_name
, relo
, relo_idx
, local_btf
, cands
, specs_scratch
,
5618 bpf_object__relocate_core(struct bpf_object
*obj
, const char *targ_btf_path
)
5620 const struct btf_ext_info_sec
*sec
;
5621 struct bpf_core_relo_res targ_res
;
5622 const struct bpf_core_relo
*rec
;
5623 const struct btf_ext_info
*seg
;
5624 struct hashmap_entry
*entry
;
5625 struct hashmap
*cand_cache
= NULL
;
5626 struct bpf_program
*prog
;
5627 struct bpf_insn
*insn
;
5628 const char *sec_name
;
5629 int i
, err
= 0, insn_idx
, sec_idx
, sec_num
;
5631 if (obj
->btf_ext
->core_relo_info
.len
== 0)
5634 if (targ_btf_path
) {
5635 obj
->btf_vmlinux_override
= btf__parse(targ_btf_path
, NULL
);
5636 err
= libbpf_get_error(obj
->btf_vmlinux_override
);
5638 pr_warn("failed to parse target BTF: %d\n", err
);
5643 cand_cache
= hashmap__new(bpf_core_hash_fn
, bpf_core_equal_fn
, NULL
);
5644 if (IS_ERR(cand_cache
)) {
5645 err
= PTR_ERR(cand_cache
);
5649 seg
= &obj
->btf_ext
->core_relo_info
;
5651 for_each_btf_ext_sec(seg
, sec
) {
5652 sec_idx
= seg
->sec_idxs
[sec_num
];
5655 sec_name
= btf__name_by_offset(obj
->btf
, sec
->sec_name_off
);
5656 if (str_is_empty(sec_name
)) {
5661 pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name
, sec
->num_info
);
5663 for_each_btf_ext_rec(seg
, sec
, i
, rec
) {
5664 if (rec
->insn_off
% BPF_INSN_SZ
)
5666 insn_idx
= rec
->insn_off
/ BPF_INSN_SZ
;
5667 prog
= find_prog_by_sec_insn(obj
, sec_idx
, insn_idx
);
5669 /* When __weak subprog is "overridden" by another instance
5670 * of the subprog from a different object file, linker still
5671 * appends all the .BTF.ext info that used to belong to that
5672 * eliminated subprogram.
5673 * This is similar to what x86-64 linker does for relocations.
5674 * So just ignore such relocations just like we ignore
5675 * subprog instructions when discovering subprograms.
5677 pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n",
5678 sec_name
, i
, insn_idx
);
5681 /* no need to apply CO-RE relocation if the program is
5682 * not going to be loaded
5684 if (!prog
->autoload
)
5687 /* adjust insn_idx from section frame of reference to the local
5688 * program's frame of reference; (sub-)program code is not yet
5689 * relocated, so it's enough to just subtract in-section offset
5691 insn_idx
= insn_idx
- prog
->sec_insn_off
;
5692 if (insn_idx
>= prog
->insns_cnt
)
5694 insn
= &prog
->insns
[insn_idx
];
5696 err
= record_relo_core(prog
, rec
, insn_idx
);
5698 pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n",
5699 prog
->name
, i
, err
);
5703 if (prog
->obj
->gen_loader
)
5706 err
= bpf_core_resolve_relo(prog
, rec
, i
, obj
->btf
, cand_cache
, &targ_res
);
5708 pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
5709 prog
->name
, i
, err
);
5713 err
= bpf_core_patch_insn(prog
->name
, insn
, insn_idx
, rec
, i
, &targ_res
);
5715 pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n",
5716 prog
->name
, i
, insn_idx
, err
);
5723 /* obj->btf_vmlinux and module BTFs are freed after object load */
5724 btf__free(obj
->btf_vmlinux_override
);
5725 obj
->btf_vmlinux_override
= NULL
;
5727 if (!IS_ERR_OR_NULL(cand_cache
)) {
5728 hashmap__for_each_entry(cand_cache
, entry
, i
) {
5729 bpf_core_free_cands(entry
->pvalue
);
5731 hashmap__free(cand_cache
);
5736 /* base map load ldimm64 special constant, used also for log fixup logic */
5737 #define POISON_LDIMM64_MAP_BASE 2001000000
5738 #define POISON_LDIMM64_MAP_PFX "200100"
5740 static void poison_map_ldimm64(struct bpf_program
*prog
, int relo_idx
,
5741 int insn_idx
, struct bpf_insn
*insn
,
5742 int map_idx
, const struct bpf_map
*map
)
5746 pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n",
5747 prog
->name
, relo_idx
, insn_idx
, map_idx
, map
->name
);
5749 /* we turn single ldimm64 into two identical invalid calls */
5750 for (i
= 0; i
< 2; i
++) {
5751 insn
->code
= BPF_JMP
| BPF_CALL
;
5755 /* if this instruction is reachable (not a dead code),
5756 * verifier will complain with something like:
5757 * invalid func unknown#2001000123
5758 * where lower 123 is map index into obj->maps[] array
5760 insn
->imm
= POISON_LDIMM64_MAP_BASE
+ map_idx
;
5766 /* unresolved kfunc call special constant, used also for log fixup logic */
5767 #define POISON_CALL_KFUNC_BASE 2002000000
5768 #define POISON_CALL_KFUNC_PFX "2002"
5770 static void poison_kfunc_call(struct bpf_program
*prog
, int relo_idx
,
5771 int insn_idx
, struct bpf_insn
*insn
,
5772 int ext_idx
, const struct extern_desc
*ext
)
5774 pr_debug("prog '%s': relo #%d: poisoning insn #%d that calls kfunc '%s'\n",
5775 prog
->name
, relo_idx
, insn_idx
, ext
->name
);
5777 /* we turn kfunc call into invalid helper call with identifiable constant */
5778 insn
->code
= BPF_JMP
| BPF_CALL
;
5782 /* if this instruction is reachable (not a dead code),
5783 * verifier will complain with something like:
5784 * invalid func unknown#2001000123
5785 * where lower 123 is extern index into obj->externs[] array
5787 insn
->imm
= POISON_CALL_KFUNC_BASE
+ ext_idx
;
5790 /* Relocate data references within program code:
5792 * - global variable references;
5793 * - extern references.
5796 bpf_object__relocate_data(struct bpf_object
*obj
, struct bpf_program
*prog
)
5800 for (i
= 0; i
< prog
->nr_reloc
; i
++) {
5801 struct reloc_desc
*relo
= &prog
->reloc_desc
[i
];
5802 struct bpf_insn
*insn
= &prog
->insns
[relo
->insn_idx
];
5803 const struct bpf_map
*map
;
5804 struct extern_desc
*ext
;
5806 switch (relo
->type
) {
5808 map
= &obj
->maps
[relo
->map_idx
];
5809 if (obj
->gen_loader
) {
5810 insn
[0].src_reg
= BPF_PSEUDO_MAP_IDX
;
5811 insn
[0].imm
= relo
->map_idx
;
5812 } else if (map
->autocreate
) {
5813 insn
[0].src_reg
= BPF_PSEUDO_MAP_FD
;
5814 insn
[0].imm
= map
->fd
;
5816 poison_map_ldimm64(prog
, i
, relo
->insn_idx
, insn
,
5817 relo
->map_idx
, map
);
5821 map
= &obj
->maps
[relo
->map_idx
];
5822 insn
[1].imm
= insn
[0].imm
+ relo
->sym_off
;
5823 if (obj
->gen_loader
) {
5824 insn
[0].src_reg
= BPF_PSEUDO_MAP_IDX_VALUE
;
5825 insn
[0].imm
= relo
->map_idx
;
5826 } else if (map
->autocreate
) {
5827 insn
[0].src_reg
= BPF_PSEUDO_MAP_VALUE
;
5828 insn
[0].imm
= map
->fd
;
5830 poison_map_ldimm64(prog
, i
, relo
->insn_idx
, insn
,
5831 relo
->map_idx
, map
);
5834 case RELO_EXTERN_LD64
:
5835 ext
= &obj
->externs
[relo
->ext_idx
];
5836 if (ext
->type
== EXT_KCFG
) {
5837 if (obj
->gen_loader
) {
5838 insn
[0].src_reg
= BPF_PSEUDO_MAP_IDX_VALUE
;
5839 insn
[0].imm
= obj
->kconfig_map_idx
;
5841 insn
[0].src_reg
= BPF_PSEUDO_MAP_VALUE
;
5842 insn
[0].imm
= obj
->maps
[obj
->kconfig_map_idx
].fd
;
5844 insn
[1].imm
= ext
->kcfg
.data_off
;
5845 } else /* EXT_KSYM */ {
5846 if (ext
->ksym
.type_id
&& ext
->is_set
) { /* typed ksyms */
5847 insn
[0].src_reg
= BPF_PSEUDO_BTF_ID
;
5848 insn
[0].imm
= ext
->ksym
.kernel_btf_id
;
5849 insn
[1].imm
= ext
->ksym
.kernel_btf_obj_fd
;
5850 } else { /* typeless ksyms or unresolved typed ksyms */
5851 insn
[0].imm
= (__u32
)ext
->ksym
.addr
;
5852 insn
[1].imm
= ext
->ksym
.addr
>> 32;
5856 case RELO_EXTERN_CALL
:
5857 ext
= &obj
->externs
[relo
->ext_idx
];
5858 insn
[0].src_reg
= BPF_PSEUDO_KFUNC_CALL
;
5860 insn
[0].imm
= ext
->ksym
.kernel_btf_id
;
5861 insn
[0].off
= ext
->ksym
.btf_fd_idx
;
5862 } else { /* unresolved weak kfunc call */
5863 poison_kfunc_call(prog
, i
, relo
->insn_idx
, insn
,
5864 relo
->ext_idx
, ext
);
5867 case RELO_SUBPROG_ADDR
:
5868 if (insn
[0].src_reg
!= BPF_PSEUDO_FUNC
) {
5869 pr_warn("prog '%s': relo #%d: bad insn\n",
5873 /* handled already */
5876 /* handled already */
5879 /* will be handled by bpf_program_record_relos() */
5882 pr_warn("prog '%s': relo #%d: bad relo type %d\n",
5883 prog
->name
, i
, relo
->type
);
5891 static int adjust_prog_btf_ext_info(const struct bpf_object
*obj
,
5892 const struct bpf_program
*prog
,
5893 const struct btf_ext_info
*ext_info
,
5894 void **prog_info
, __u32
*prog_rec_cnt
,
5897 void *copy_start
= NULL
, *copy_end
= NULL
;
5898 void *rec
, *rec_end
, *new_prog_info
;
5899 const struct btf_ext_info_sec
*sec
;
5900 size_t old_sz
, new_sz
;
5901 int i
, sec_num
, sec_idx
, off_adj
;
5904 for_each_btf_ext_sec(ext_info
, sec
) {
5905 sec_idx
= ext_info
->sec_idxs
[sec_num
];
5907 if (prog
->sec_idx
!= sec_idx
)
5910 for_each_btf_ext_rec(ext_info
, sec
, i
, rec
) {
5911 __u32 insn_off
= *(__u32
*)rec
/ BPF_INSN_SZ
;
5913 if (insn_off
< prog
->sec_insn_off
)
5915 if (insn_off
>= prog
->sec_insn_off
+ prog
->sec_insn_cnt
)
5920 copy_end
= rec
+ ext_info
->rec_size
;
5926 /* append func/line info of a given (sub-)program to the main
5927 * program func/line info
5929 old_sz
= (size_t)(*prog_rec_cnt
) * ext_info
->rec_size
;
5930 new_sz
= old_sz
+ (copy_end
- copy_start
);
5931 new_prog_info
= realloc(*prog_info
, new_sz
);
5934 *prog_info
= new_prog_info
;
5935 *prog_rec_cnt
= new_sz
/ ext_info
->rec_size
;
5936 memcpy(new_prog_info
+ old_sz
, copy_start
, copy_end
- copy_start
);
5938 /* Kernel instruction offsets are in units of 8-byte
5939 * instructions, while .BTF.ext instruction offsets generated
5940 * by Clang are in units of bytes. So convert Clang offsets
5941 * into kernel offsets and adjust offset according to program
5942 * relocated position.
5944 off_adj
= prog
->sub_insn_off
- prog
->sec_insn_off
;
5945 rec
= new_prog_info
+ old_sz
;
5946 rec_end
= new_prog_info
+ new_sz
;
5947 for (; rec
< rec_end
; rec
+= ext_info
->rec_size
) {
5948 __u32
*insn_off
= rec
;
5950 *insn_off
= *insn_off
/ BPF_INSN_SZ
+ off_adj
;
5952 *prog_rec_sz
= ext_info
->rec_size
;
5960 reloc_prog_func_and_line_info(const struct bpf_object
*obj
,
5961 struct bpf_program
*main_prog
,
5962 const struct bpf_program
*prog
)
5966 /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
5967 * support func/line info
5969 if (!obj
->btf_ext
|| !kernel_supports(obj
, FEAT_BTF_FUNC
))
5972 /* only attempt func info relocation if main program's func_info
5973 * relocation was successful
5975 if (main_prog
!= prog
&& !main_prog
->func_info
)
5978 err
= adjust_prog_btf_ext_info(obj
, prog
, &obj
->btf_ext
->func_info
,
5979 &main_prog
->func_info
,
5980 &main_prog
->func_info_cnt
,
5981 &main_prog
->func_info_rec_size
);
5983 if (err
!= -ENOENT
) {
5984 pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n",
5988 if (main_prog
->func_info
) {
5990 * Some info has already been found but has problem
5991 * in the last btf_ext reloc. Must have to error out.
5993 pr_warn("prog '%s': missing .BTF.ext function info.\n", prog
->name
);
5996 /* Have problem loading the very first info. Ignore the rest. */
5997 pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n",
6002 /* don't relocate line info if main program's relocation failed */
6003 if (main_prog
!= prog
&& !main_prog
->line_info
)
6006 err
= adjust_prog_btf_ext_info(obj
, prog
, &obj
->btf_ext
->line_info
,
6007 &main_prog
->line_info
,
6008 &main_prog
->line_info_cnt
,
6009 &main_prog
->line_info_rec_size
);
6011 if (err
!= -ENOENT
) {
6012 pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n",
6016 if (main_prog
->line_info
) {
6018 * Some info has already been found but has problem
6019 * in the last btf_ext reloc. Must have to error out.
6021 pr_warn("prog '%s': missing .BTF.ext line info.\n", prog
->name
);
6024 /* Have problem loading the very first info. Ignore the rest. */
6025 pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n",
6031 static int cmp_relo_by_insn_idx(const void *key
, const void *elem
)
6033 size_t insn_idx
= *(const size_t *)key
;
6034 const struct reloc_desc
*relo
= elem
;
6036 if (insn_idx
== relo
->insn_idx
)
6038 return insn_idx
< relo
->insn_idx
? -1 : 1;
6041 static struct reloc_desc
*find_prog_insn_relo(const struct bpf_program
*prog
, size_t insn_idx
)
6043 if (!prog
->nr_reloc
)
6045 return bsearch(&insn_idx
, prog
->reloc_desc
, prog
->nr_reloc
,
6046 sizeof(*prog
->reloc_desc
), cmp_relo_by_insn_idx
);
6049 static int append_subprog_relos(struct bpf_program
*main_prog
, struct bpf_program
*subprog
)
6051 int new_cnt
= main_prog
->nr_reloc
+ subprog
->nr_reloc
;
6052 struct reloc_desc
*relos
;
6055 if (main_prog
== subprog
)
6057 relos
= libbpf_reallocarray(main_prog
->reloc_desc
, new_cnt
, sizeof(*relos
));
6058 /* if new count is zero, reallocarray can return a valid NULL result;
6059 * in this case the previous pointer will be freed, so we *have to*
6060 * reassign old pointer to the new value (even if it's NULL)
6062 if (!relos
&& new_cnt
)
6064 if (subprog
->nr_reloc
)
6065 memcpy(relos
+ main_prog
->nr_reloc
, subprog
->reloc_desc
,
6066 sizeof(*relos
) * subprog
->nr_reloc
);
6068 for (i
= main_prog
->nr_reloc
; i
< new_cnt
; i
++)
6069 relos
[i
].insn_idx
+= subprog
->sub_insn_off
;
6070 /* After insn_idx adjustment the 'relos' array is still sorted
6071 * by insn_idx and doesn't break bsearch.
6073 main_prog
->reloc_desc
= relos
;
6074 main_prog
->nr_reloc
= new_cnt
;
6079 bpf_object__append_subprog_code(struct bpf_object
*obj
, struct bpf_program
*main_prog
,
6080 struct bpf_program
*subprog
)
6082 struct bpf_insn
*insns
;
6086 subprog
->sub_insn_off
= main_prog
->insns_cnt
;
6088 new_cnt
= main_prog
->insns_cnt
+ subprog
->insns_cnt
;
6089 insns
= libbpf_reallocarray(main_prog
->insns
, new_cnt
, sizeof(*insns
));
6091 pr_warn("prog '%s': failed to realloc prog code\n", main_prog
->name
);
6094 main_prog
->insns
= insns
;
6095 main_prog
->insns_cnt
= new_cnt
;
6097 memcpy(main_prog
->insns
+ subprog
->sub_insn_off
, subprog
->insns
,
6098 subprog
->insns_cnt
* sizeof(*insns
));
6100 pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
6101 main_prog
->name
, subprog
->insns_cnt
, subprog
->name
);
6103 /* The subprog insns are now appended. Append its relos too. */
6104 err
= append_subprog_relos(main_prog
, subprog
);
6111 bpf_object__reloc_code(struct bpf_object
*obj
, struct bpf_program
*main_prog
,
6112 struct bpf_program
*prog
)
6114 size_t sub_insn_idx
, insn_idx
;
6115 struct bpf_program
*subprog
;
6116 struct reloc_desc
*relo
;
6117 struct bpf_insn
*insn
;
6120 err
= reloc_prog_func_and_line_info(obj
, main_prog
, prog
);
6124 for (insn_idx
= 0; insn_idx
< prog
->sec_insn_cnt
; insn_idx
++) {
6125 insn
= &main_prog
->insns
[prog
->sub_insn_off
+ insn_idx
];
6126 if (!insn_is_subprog_call(insn
) && !insn_is_pseudo_func(insn
))
6129 relo
= find_prog_insn_relo(prog
, insn_idx
);
6130 if (relo
&& relo
->type
== RELO_EXTERN_CALL
)
6131 /* kfunc relocations will be handled later
6132 * in bpf_object__relocate_data()
6135 if (relo
&& relo
->type
!= RELO_CALL
&& relo
->type
!= RELO_SUBPROG_ADDR
) {
6136 pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
6137 prog
->name
, insn_idx
, relo
->type
);
6138 return -LIBBPF_ERRNO__RELOC
;
6141 /* sub-program instruction index is a combination of
6142 * an offset of a symbol pointed to by relocation and
6143 * call instruction's imm field; for global functions,
6144 * call always has imm = -1, but for static functions
6145 * relocation is against STT_SECTION and insn->imm
6146 * points to a start of a static function
6148 * for subprog addr relocation, the relo->sym_off + insn->imm is
6149 * the byte offset in the corresponding section.
6151 if (relo
->type
== RELO_CALL
)
6152 sub_insn_idx
= relo
->sym_off
/ BPF_INSN_SZ
+ insn
->imm
+ 1;
6154 sub_insn_idx
= (relo
->sym_off
+ insn
->imm
) / BPF_INSN_SZ
;
6155 } else if (insn_is_pseudo_func(insn
)) {
6157 * RELO_SUBPROG_ADDR relo is always emitted even if both
6158 * functions are in the same section, so it shouldn't reach here.
6160 pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
6161 prog
->name
, insn_idx
);
6162 return -LIBBPF_ERRNO__RELOC
;
6164 /* if subprogram call is to a static function within
6165 * the same ELF section, there won't be any relocation
6166 * emitted, but it also means there is no additional
6167 * offset necessary, insns->imm is relative to
6168 * instruction's original position within the section
6170 sub_insn_idx
= prog
->sec_insn_off
+ insn_idx
+ insn
->imm
+ 1;
6173 /* we enforce that sub-programs should be in .text section */
6174 subprog
= find_prog_by_sec_insn(obj
, obj
->efile
.text_shndx
, sub_insn_idx
);
6176 pr_warn("prog '%s': no .text section found yet sub-program call exists\n",
6178 return -LIBBPF_ERRNO__RELOC
;
6181 /* if it's the first call instruction calling into this
6182 * subprogram (meaning this subprog hasn't been processed
6183 * yet) within the context of current main program:
6184 * - append it at the end of main program's instructions blog;
6185 * - process is recursively, while current program is put on hold;
6186 * - if that subprogram calls some other not yet processes
6187 * subprogram, same thing will happen recursively until
6188 * there are no more unprocesses subprograms left to append
6191 if (subprog
->sub_insn_off
== 0) {
6192 err
= bpf_object__append_subprog_code(obj
, main_prog
, subprog
);
6195 err
= bpf_object__reloc_code(obj
, main_prog
, subprog
);
6200 /* main_prog->insns memory could have been re-allocated, so
6201 * calculate pointer again
6203 insn
= &main_prog
->insns
[prog
->sub_insn_off
+ insn_idx
];
6204 /* calculate correct instruction position within current main
6205 * prog; each main prog can have a different set of
6206 * subprograms appended (potentially in different order as
6207 * well), so position of any subprog can be different for
6208 * different main programs
6210 insn
->imm
= subprog
->sub_insn_off
- (prog
->sub_insn_off
+ insn_idx
) - 1;
6212 pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
6213 prog
->name
, insn_idx
, insn
->imm
, subprog
->name
, subprog
->sub_insn_off
);
6220 * Relocate sub-program calls.
6222 * Algorithm operates as follows. Each entry-point BPF program (referred to as
6223 * main prog) is processed separately. For each subprog (non-entry functions,
6224 * that can be called from either entry progs or other subprogs) gets their
6225 * sub_insn_off reset to zero. This serves as indicator that this subprogram
6226 * hasn't been yet appended and relocated within current main prog. Once its
6227 * relocated, sub_insn_off will point at the position within current main prog
6228 * where given subprog was appended. This will further be used to relocate all
6229 * the call instructions jumping into this subprog.
6231 * We start with main program and process all call instructions. If the call
6232 * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off
6233 * is zero), subprog instructions are appended at the end of main program's
6234 * instruction array. Then main program is "put on hold" while we recursively
6235 * process newly appended subprogram. If that subprogram calls into another
6236 * subprogram that hasn't been appended, new subprogram is appended again to
6237 * the *main* prog's instructions (subprog's instructions are always left
6238 * untouched, as they need to be in unmodified state for subsequent main progs
6239 * and subprog instructions are always sent only as part of a main prog) and
6240 * the process continues recursively. Once all the subprogs called from a main
6241 * prog or any of its subprogs are appended (and relocated), all their
6242 * positions within finalized instructions array are known, so it's easy to
6243 * rewrite call instructions with correct relative offsets, corresponding to
6244 * desired target subprog.
6246 * Its important to realize that some subprogs might not be called from some
6247 * main prog and any of its called/used subprogs. Those will keep their
6248 * subprog->sub_insn_off as zero at all times and won't be appended to current
6249 * main prog and won't be relocated within the context of current main prog.
6250 * They might still be used from other main progs later.
6252 * Visually this process can be shown as below. Suppose we have two main
6253 * programs mainA and mainB and BPF object contains three subprogs: subA,
6254 * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and
6255 * subC both call subB:
6257 * +--------+ +-------+
6259 * +--+---+ +--+-+-+ +---+--+
6260 * | subA | | subB | | subC |
6261 * +--+---+ +------+ +---+--+
6264 * +---+-------+ +------+----+
6265 * | mainA | | mainB |
6266 * +-----------+ +-----------+
6268 * We'll start relocating mainA, will find subA, append it and start
6269 * processing sub A recursively:
6271 * +-----------+------+
6273 * +-----------+------+
6275 * At this point we notice that subB is used from subA, so we append it and
6276 * relocate (there are no further subcalls from subB):
6278 * +-----------+------+------+
6279 * | mainA | subA | subB |
6280 * +-----------+------+------+
6282 * At this point, we relocate subA calls, then go one level up and finish with
6283 * relocatin mainA calls. mainA is done.
6285 * For mainB process is similar but results in different order. We start with
6286 * mainB and skip subA and subB, as mainB never calls them (at least
6287 * directly), but we see subC is needed, so we append and start processing it:
6289 * +-----------+------+
6291 * +-----------+------+
6292 * Now we see subC needs subB, so we go back to it, append and relocate it:
6294 * +-----------+------+------+
6295 * | mainB | subC | subB |
6296 * +-----------+------+------+
6298 * At this point we unwind recursion, relocate calls in subC, then in mainB.
6301 bpf_object__relocate_calls(struct bpf_object
*obj
, struct bpf_program
*prog
)
6303 struct bpf_program
*subprog
;
6306 /* mark all subprogs as not relocated (yet) within the context of
6307 * current main program
6309 for (i
= 0; i
< obj
->nr_programs
; i
++) {
6310 subprog
= &obj
->programs
[i
];
6311 if (!prog_is_subprog(obj
, subprog
))
6314 subprog
->sub_insn_off
= 0;
6317 err
= bpf_object__reloc_code(obj
, prog
, prog
);
6325 bpf_object__free_relocs(struct bpf_object
*obj
)
6327 struct bpf_program
*prog
;
6330 /* free up relocation descriptors */
6331 for (i
= 0; i
< obj
->nr_programs
; i
++) {
6332 prog
= &obj
->programs
[i
];
6333 zfree(&prog
->reloc_desc
);
6338 static int cmp_relocs(const void *_a
, const void *_b
)
6340 const struct reloc_desc
*a
= _a
;
6341 const struct reloc_desc
*b
= _b
;
6343 if (a
->insn_idx
!= b
->insn_idx
)
6344 return a
->insn_idx
< b
->insn_idx
? -1 : 1;
6346 /* no two relocations should have the same insn_idx, but ... */
6347 if (a
->type
!= b
->type
)
6348 return a
->type
< b
->type
? -1 : 1;
6353 static void bpf_object__sort_relos(struct bpf_object
*obj
)
6357 for (i
= 0; i
< obj
->nr_programs
; i
++) {
6358 struct bpf_program
*p
= &obj
->programs
[i
];
6363 qsort(p
->reloc_desc
, p
->nr_reloc
, sizeof(*p
->reloc_desc
), cmp_relocs
);
6367 static int bpf_prog_assign_exc_cb(struct bpf_object
*obj
, struct bpf_program
*prog
)
6369 const char *str
= "exception_callback:";
6370 size_t pfx_len
= strlen(str
);
6373 if (!obj
->btf
|| !kernel_supports(obj
, FEAT_BTF_DECL_TAG
))
6376 n
= btf__type_cnt(obj
->btf
);
6377 for (i
= 1; i
< n
; i
++) {
6381 t
= btf_type_by_id(obj
->btf
, i
);
6382 if (!btf_is_decl_tag(t
) || btf_decl_tag(t
)->component_idx
!= -1)
6385 name
= btf__str_by_offset(obj
->btf
, t
->name_off
);
6386 if (strncmp(name
, str
, pfx_len
) != 0)
6389 t
= btf_type_by_id(obj
->btf
, t
->type
);
6390 if (!btf_is_func(t
) || btf_func_linkage(t
) != BTF_FUNC_GLOBAL
) {
6391 pr_warn("prog '%s': exception_callback:<value> decl tag not applied to the main program\n",
6395 if (strcmp(prog
->name
, btf__str_by_offset(obj
->btf
, t
->name_off
)) != 0)
6397 /* Multiple callbacks are specified for the same prog,
6398 * the verifier will eventually return an error for this
6399 * case, hence simply skip appending a subprog.
6401 if (prog
->exception_cb_idx
>= 0) {
6402 prog
->exception_cb_idx
= -1;
6407 if (str_is_empty(name
)) {
6408 pr_warn("prog '%s': exception_callback:<value> decl tag contains empty value\n",
6413 for (j
= 0; j
< obj
->nr_programs
; j
++) {
6414 struct bpf_program
*subprog
= &obj
->programs
[j
];
6416 if (!prog_is_subprog(obj
, subprog
))
6418 if (strcmp(name
, subprog
->name
) != 0)
6420 /* Enforce non-hidden, as from verifier point of
6421 * view it expects global functions, whereas the
6422 * mark_btf_static fixes up linkage as static.
6424 if (!subprog
->sym_global
|| subprog
->mark_btf_static
) {
6425 pr_warn("prog '%s': exception callback %s must be a global non-hidden function\n",
6426 prog
->name
, subprog
->name
);
6429 /* Let's see if we already saw a static exception callback with the same name */
6430 if (prog
->exception_cb_idx
>= 0) {
6431 pr_warn("prog '%s': multiple subprogs with same name as exception callback '%s'\n",
6432 prog
->name
, subprog
->name
);
6435 prog
->exception_cb_idx
= j
;
6439 if (prog
->exception_cb_idx
>= 0)
6442 pr_warn("prog '%s': cannot find exception callback '%s'\n", prog
->name
, name
);
6450 enum bpf_prog_type prog_type
;
6451 const char *ctx_name
;
6452 } global_ctx_map
[] = {
6453 { BPF_PROG_TYPE_CGROUP_DEVICE
, "bpf_cgroup_dev_ctx" },
6454 { BPF_PROG_TYPE_CGROUP_SKB
, "__sk_buff" },
6455 { BPF_PROG_TYPE_CGROUP_SOCK
, "bpf_sock" },
6456 { BPF_PROG_TYPE_CGROUP_SOCK_ADDR
, "bpf_sock_addr" },
6457 { BPF_PROG_TYPE_CGROUP_SOCKOPT
, "bpf_sockopt" },
6458 { BPF_PROG_TYPE_CGROUP_SYSCTL
, "bpf_sysctl" },
6459 { BPF_PROG_TYPE_FLOW_DISSECTOR
, "__sk_buff" },
6460 { BPF_PROG_TYPE_KPROBE
, "bpf_user_pt_regs_t" },
6461 { BPF_PROG_TYPE_LWT_IN
, "__sk_buff" },
6462 { BPF_PROG_TYPE_LWT_OUT
, "__sk_buff" },
6463 { BPF_PROG_TYPE_LWT_SEG6LOCAL
, "__sk_buff" },
6464 { BPF_PROG_TYPE_LWT_XMIT
, "__sk_buff" },
6465 { BPF_PROG_TYPE_NETFILTER
, "bpf_nf_ctx" },
6466 { BPF_PROG_TYPE_PERF_EVENT
, "bpf_perf_event_data" },
6467 { BPF_PROG_TYPE_RAW_TRACEPOINT
, "bpf_raw_tracepoint_args" },
6468 { BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE
, "bpf_raw_tracepoint_args" },
6469 { BPF_PROG_TYPE_SCHED_ACT
, "__sk_buff" },
6470 { BPF_PROG_TYPE_SCHED_CLS
, "__sk_buff" },
6471 { BPF_PROG_TYPE_SK_LOOKUP
, "bpf_sk_lookup" },
6472 { BPF_PROG_TYPE_SK_MSG
, "sk_msg_md" },
6473 { BPF_PROG_TYPE_SK_REUSEPORT
, "sk_reuseport_md" },
6474 { BPF_PROG_TYPE_SK_SKB
, "__sk_buff" },
6475 { BPF_PROG_TYPE_SOCK_OPS
, "bpf_sock_ops" },
6476 { BPF_PROG_TYPE_SOCKET_FILTER
, "__sk_buff" },
6477 { BPF_PROG_TYPE_XDP
, "xdp_md" },
6478 /* all other program types don't have "named" context structs */
6481 /* forward declarations for arch-specific underlying types of bpf_user_pt_regs_t typedef,
6482 * for below __builtin_types_compatible_p() checks;
6483 * with this approach we don't need any extra arch-specific #ifdef guards
6486 struct user_pt_regs
;
6487 struct user_regs_struct
;
6489 static bool need_func_arg_type_fixup(const struct btf
*btf
, const struct bpf_program
*prog
,
6490 const char *subprog_name
, int arg_idx
,
6491 int arg_type_id
, const char *ctx_name
)
6493 const struct btf_type
*t
;
6496 /* check if existing parameter already matches verifier expectations */
6497 t
= skip_mods_and_typedefs(btf
, arg_type_id
, NULL
);
6501 /* typedef bpf_user_pt_regs_t is a special PITA case, valid for kprobe
6502 * and perf_event programs, so check this case early on and forget
6503 * about it for subsequent checks
6505 while (btf_is_mod(t
))
6506 t
= btf__type_by_id(btf
, t
->type
);
6507 if (btf_is_typedef(t
) &&
6508 (prog
->type
== BPF_PROG_TYPE_KPROBE
|| prog
->type
== BPF_PROG_TYPE_PERF_EVENT
)) {
6509 tname
= btf__str_by_offset(btf
, t
->name_off
) ?: "<anon>";
6510 if (strcmp(tname
, "bpf_user_pt_regs_t") == 0)
6511 return false; /* canonical type for kprobe/perf_event */
6514 /* now we can ignore typedefs moving forward */
6515 t
= skip_mods_and_typedefs(btf
, t
->type
, NULL
);
6517 /* if it's `void *`, definitely fix up BTF info */
6521 /* if it's already proper canonical type, no need to fix up */
6522 tname
= btf__str_by_offset(btf
, t
->name_off
) ?: "<anon>";
6523 if (btf_is_struct(t
) && strcmp(tname
, ctx_name
) == 0)
6527 switch (prog
->type
) {
6528 case BPF_PROG_TYPE_KPROBE
:
6529 /* `struct pt_regs *` is expected, but we need to fix up */
6530 if (btf_is_struct(t
) && strcmp(tname
, "pt_regs") == 0)
6533 case BPF_PROG_TYPE_PERF_EVENT
:
6534 if (__builtin_types_compatible_p(bpf_user_pt_regs_t
, struct pt_regs
) &&
6535 btf_is_struct(t
) && strcmp(tname
, "pt_regs") == 0)
6537 if (__builtin_types_compatible_p(bpf_user_pt_regs_t
, struct user_pt_regs
) &&
6538 btf_is_struct(t
) && strcmp(tname
, "user_pt_regs") == 0)
6540 if (__builtin_types_compatible_p(bpf_user_pt_regs_t
, struct user_regs_struct
) &&
6541 btf_is_struct(t
) && strcmp(tname
, "user_regs_struct") == 0)
6544 case BPF_PROG_TYPE_RAW_TRACEPOINT
:
6545 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE
:
6546 /* allow u64* as ctx */
6547 if (btf_is_int(t
) && t
->size
== 8)
6555 pr_warn("prog '%s': subprog '%s' arg#%d is expected to be of `struct %s *` type\n",
6556 prog
->name
, subprog_name
, arg_idx
, ctx_name
);
6560 static int clone_func_btf_info(struct btf
*btf
, int orig_fn_id
, struct bpf_program
*prog
)
6562 int fn_id
, fn_proto_id
, ret_type_id
, orig_proto_id
;
6563 int i
, err
, arg_cnt
, fn_name_off
, linkage
;
6564 struct btf_type
*fn_t
, *fn_proto_t
, *t
;
6565 struct btf_param
*p
;
6567 /* caller already validated FUNC -> FUNC_PROTO validity */
6568 fn_t
= btf_type_by_id(btf
, orig_fn_id
);
6569 fn_proto_t
= btf_type_by_id(btf
, fn_t
->type
);
6571 /* Note that each btf__add_xxx() operation invalidates
6572 * all btf_type and string pointers, so we need to be
6573 * very careful when cloning BTF types. BTF type
6574 * pointers have to be always refetched. And to avoid
6575 * problems with invalidated string pointers, we
6576 * add empty strings initially, then just fix up
6577 * name_off offsets in place. Offsets are stable for
6578 * existing strings, so that works out.
6580 fn_name_off
= fn_t
->name_off
; /* we are about to invalidate fn_t */
6581 linkage
= btf_func_linkage(fn_t
);
6582 orig_proto_id
= fn_t
->type
; /* original FUNC_PROTO ID */
6583 ret_type_id
= fn_proto_t
->type
; /* fn_proto_t will be invalidated */
6584 arg_cnt
= btf_vlen(fn_proto_t
);
6586 /* clone FUNC_PROTO and its params */
6587 fn_proto_id
= btf__add_func_proto(btf
, ret_type_id
);
6588 if (fn_proto_id
< 0)
6591 for (i
= 0; i
< arg_cnt
; i
++) {
6594 /* copy original parameter data */
6595 t
= btf_type_by_id(btf
, orig_proto_id
);
6596 p
= &btf_params(t
)[i
];
6597 name_off
= p
->name_off
;
6599 err
= btf__add_func_param(btf
, "", p
->type
);
6603 fn_proto_t
= btf_type_by_id(btf
, fn_proto_id
);
6604 p
= &btf_params(fn_proto_t
)[i
];
6605 p
->name_off
= name_off
; /* use remembered str offset */
6608 /* clone FUNC now, btf__add_func() enforces non-empty name, so use
6609 * entry program's name as a placeholder, which we replace immediately
6610 * with original name_off
6612 fn_id
= btf__add_func(btf
, prog
->name
, linkage
, fn_proto_id
);
6616 fn_t
= btf_type_by_id(btf
, fn_id
);
6617 fn_t
->name_off
= fn_name_off
; /* reuse original string */
6622 /* Check if main program or global subprog's function prototype has `arg:ctx`
6623 * argument tags, and, if necessary, substitute correct type to match what BPF
6624 * verifier would expect, taking into account specific program type. This
6625 * allows to support __arg_ctx tag transparently on old kernels that don't yet
6626 * have a native support for it in the verifier, making user's life much
6629 static int bpf_program_fixup_func_info(struct bpf_object
*obj
, struct bpf_program
*prog
)
6631 const char *ctx_name
= NULL
, *ctx_tag
= "arg:ctx", *fn_name
;
6632 struct bpf_func_info_min
*func_rec
;
6633 struct btf_type
*fn_t
, *fn_proto_t
;
6634 struct btf
*btf
= obj
->btf
;
6635 const struct btf_type
*t
;
6636 struct btf_param
*p
;
6637 int ptr_id
= 0, struct_id
, tag_id
, orig_fn_id
;
6638 int i
, n
, arg_idx
, arg_cnt
, err
, rec_idx
;
6641 /* no .BTF.ext, no problem */
6642 if (!obj
->btf_ext
|| !prog
->func_info
)
6645 /* don't do any fix ups if kernel natively supports __arg_ctx */
6646 if (kernel_supports(obj
, FEAT_ARG_CTX_TAG
))
6649 /* some BPF program types just don't have named context structs, so
6650 * this fallback mechanism doesn't work for them
6652 for (i
= 0; i
< ARRAY_SIZE(global_ctx_map
); i
++) {
6653 if (global_ctx_map
[i
].prog_type
!= prog
->type
)
6655 ctx_name
= global_ctx_map
[i
].ctx_name
;
6661 /* remember original func BTF IDs to detect if we already cloned them */
6662 orig_ids
= calloc(prog
->func_info_cnt
, sizeof(*orig_ids
));
6665 for (i
= 0; i
< prog
->func_info_cnt
; i
++) {
6666 func_rec
= prog
->func_info
+ prog
->func_info_rec_size
* i
;
6667 orig_ids
[i
] = func_rec
->type_id
;
6670 /* go through each DECL_TAG with "arg:ctx" and see if it points to one
6671 * of our subprogs; if yes and subprog is global and needs adjustment,
6672 * clone and adjust FUNC -> FUNC_PROTO combo
6674 for (i
= 1, n
= btf__type_cnt(btf
); i
< n
; i
++) {
6675 /* only DECL_TAG with "arg:ctx" value are interesting */
6676 t
= btf__type_by_id(btf
, i
);
6677 if (!btf_is_decl_tag(t
))
6679 if (strcmp(btf__str_by_offset(btf
, t
->name_off
), ctx_tag
) != 0)
6682 /* only global funcs need adjustment, if at all */
6683 orig_fn_id
= t
->type
;
6684 fn_t
= btf_type_by_id(btf
, orig_fn_id
);
6685 if (!btf_is_func(fn_t
) || btf_func_linkage(fn_t
) != BTF_FUNC_GLOBAL
)
6688 /* sanity check FUNC -> FUNC_PROTO chain, just in case */
6689 fn_proto_t
= btf_type_by_id(btf
, fn_t
->type
);
6690 if (!fn_proto_t
|| !btf_is_func_proto(fn_proto_t
))
6693 /* find corresponding func_info record */
6695 for (rec_idx
= 0; rec_idx
< prog
->func_info_cnt
; rec_idx
++) {
6696 if (orig_ids
[rec_idx
] == t
->type
) {
6697 func_rec
= prog
->func_info
+ prog
->func_info_rec_size
* rec_idx
;
6701 /* current main program doesn't call into this subprog */
6705 /* some more sanity checking of DECL_TAG */
6706 arg_cnt
= btf_vlen(fn_proto_t
);
6707 arg_idx
= btf_decl_tag(t
)->component_idx
;
6708 if (arg_idx
< 0 || arg_idx
>= arg_cnt
)
6711 /* check if we should fix up argument type */
6712 p
= &btf_params(fn_proto_t
)[arg_idx
];
6713 fn_name
= btf__str_by_offset(btf
, fn_t
->name_off
) ?: "<anon>";
6714 if (!need_func_arg_type_fixup(btf
, prog
, fn_name
, arg_idx
, p
->type
, ctx_name
))
6717 /* clone fn/fn_proto, unless we already did it for another arg */
6718 if (func_rec
->type_id
== orig_fn_id
) {
6721 fn_id
= clone_func_btf_info(btf
, orig_fn_id
, prog
);
6727 /* point func_info record to a cloned FUNC type */
6728 func_rec
->type_id
= fn_id
;
6731 /* create PTR -> STRUCT type chain to mark PTR_TO_CTX argument;
6732 * we do it just once per main BPF program, as all global
6733 * funcs share the same program type, so need only PTR ->
6737 struct_id
= btf__add_struct(btf
, ctx_name
, 0);
6738 ptr_id
= btf__add_ptr(btf
, struct_id
);
6739 if (ptr_id
< 0 || struct_id
< 0) {
6745 /* for completeness, clone DECL_TAG and point it to cloned param */
6746 tag_id
= btf__add_decl_tag(btf
, ctx_tag
, func_rec
->type_id
, arg_idx
);
6752 /* all the BTF manipulations invalidated pointers, refetch them */
6753 fn_t
= btf_type_by_id(btf
, func_rec
->type_id
);
6754 fn_proto_t
= btf_type_by_id(btf
, fn_t
->type
);
6756 /* fix up type ID pointed to by param */
6757 p
= &btf_params(fn_proto_t
)[arg_idx
];
6768 static int bpf_object__relocate(struct bpf_object
*obj
, const char *targ_btf_path
)
6770 struct bpf_program
*prog
;
6775 err
= bpf_object__relocate_core(obj
, targ_btf_path
);
6777 pr_warn("failed to perform CO-RE relocations: %d\n",
6781 bpf_object__sort_relos(obj
);
6784 /* Before relocating calls pre-process relocations and mark
6785 * few ld_imm64 instructions that points to subprogs.
6786 * Otherwise bpf_object__reloc_code() later would have to consider
6787 * all ld_imm64 insns as relocation candidates. That would
6788 * reduce relocation speed, since amount of find_prog_insn_relo()
6789 * would increase and most of them will fail to find a relo.
6791 for (i
= 0; i
< obj
->nr_programs
; i
++) {
6792 prog
= &obj
->programs
[i
];
6793 for (j
= 0; j
< prog
->nr_reloc
; j
++) {
6794 struct reloc_desc
*relo
= &prog
->reloc_desc
[j
];
6795 struct bpf_insn
*insn
= &prog
->insns
[relo
->insn_idx
];
6797 /* mark the insn, so it's recognized by insn_is_pseudo_func() */
6798 if (relo
->type
== RELO_SUBPROG_ADDR
)
6799 insn
[0].src_reg
= BPF_PSEUDO_FUNC
;
6803 /* relocate subprogram calls and append used subprograms to main
6804 * programs; each copy of subprogram code needs to be relocated
6805 * differently for each main program, because its code location might
6807 * Append subprog relos to main programs to allow data relos to be
6808 * processed after text is completely relocated.
6810 for (i
= 0; i
< obj
->nr_programs
; i
++) {
6811 prog
= &obj
->programs
[i
];
6812 /* sub-program's sub-calls are relocated within the context of
6813 * its main program only
6815 if (prog_is_subprog(obj
, prog
))
6817 if (!prog
->autoload
)
6820 err
= bpf_object__relocate_calls(obj
, prog
);
6822 pr_warn("prog '%s': failed to relocate calls: %d\n",
6827 err
= bpf_prog_assign_exc_cb(obj
, prog
);
6830 /* Now, also append exception callback if it has not been done already. */
6831 if (prog
->exception_cb_idx
>= 0) {
6832 struct bpf_program
*subprog
= &obj
->programs
[prog
->exception_cb_idx
];
6834 /* Calling exception callback directly is disallowed, which the
6835 * verifier will reject later. In case it was processed already,
6836 * we can skip this step, otherwise for all other valid cases we
6837 * have to append exception callback now.
6839 if (subprog
->sub_insn_off
== 0) {
6840 err
= bpf_object__append_subprog_code(obj
, prog
, subprog
);
6843 err
= bpf_object__reloc_code(obj
, prog
, subprog
);
6849 for (i
= 0; i
< obj
->nr_programs
; i
++) {
6850 prog
= &obj
->programs
[i
];
6851 if (prog_is_subprog(obj
, prog
))
6853 if (!prog
->autoload
)
6856 /* Process data relos for main programs */
6857 err
= bpf_object__relocate_data(obj
, prog
);
6859 pr_warn("prog '%s': failed to relocate data references: %d\n",
6864 /* Fix up .BTF.ext information, if necessary */
6865 err
= bpf_program_fixup_func_info(obj
, prog
);
6867 pr_warn("prog '%s': failed to perform .BTF.ext fix ups: %d\n",
6876 static int bpf_object__collect_st_ops_relos(struct bpf_object
*obj
,
6877 Elf64_Shdr
*shdr
, Elf_Data
*data
);
6879 static int bpf_object__collect_map_relos(struct bpf_object
*obj
,
6880 Elf64_Shdr
*shdr
, Elf_Data
*data
)
6882 const int bpf_ptr_sz
= 8, host_ptr_sz
= sizeof(void *);
6883 int i
, j
, nrels
, new_sz
;
6884 const struct btf_var_secinfo
*vi
= NULL
;
6885 const struct btf_type
*sec
, *var
, *def
;
6886 struct bpf_map
*map
= NULL
, *targ_map
= NULL
;
6887 struct bpf_program
*targ_prog
= NULL
;
6888 bool is_prog_array
, is_map_in_map
;
6889 const struct btf_member
*member
;
6890 const char *name
, *mname
, *type
;
6896 if (!obj
->efile
.btf_maps_sec_btf_id
|| !obj
->btf
)
6898 sec
= btf__type_by_id(obj
->btf
, obj
->efile
.btf_maps_sec_btf_id
);
6902 nrels
= shdr
->sh_size
/ shdr
->sh_entsize
;
6903 for (i
= 0; i
< nrels
; i
++) {
6904 rel
= elf_rel_by_idx(data
, i
);
6906 pr_warn(".maps relo #%d: failed to get ELF relo\n", i
);
6907 return -LIBBPF_ERRNO__FORMAT
;
6910 sym
= elf_sym_by_idx(obj
, ELF64_R_SYM(rel
->r_info
));
6912 pr_warn(".maps relo #%d: symbol %zx not found\n",
6913 i
, (size_t)ELF64_R_SYM(rel
->r_info
));
6914 return -LIBBPF_ERRNO__FORMAT
;
6916 name
= elf_sym_str(obj
, sym
->st_name
) ?: "<?>";
6918 pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n",
6919 i
, (ssize_t
)(rel
->r_info
>> 32), (size_t)sym
->st_value
,
6920 (size_t)rel
->r_offset
, sym
->st_name
, name
);
6922 for (j
= 0; j
< obj
->nr_maps
; j
++) {
6923 map
= &obj
->maps
[j
];
6924 if (map
->sec_idx
!= obj
->efile
.btf_maps_shndx
)
6927 vi
= btf_var_secinfos(sec
) + map
->btf_var_idx
;
6928 if (vi
->offset
<= rel
->r_offset
&&
6929 rel
->r_offset
+ bpf_ptr_sz
<= vi
->offset
+ vi
->size
)
6932 if (j
== obj
->nr_maps
) {
6933 pr_warn(".maps relo #%d: cannot find map '%s' at rel->r_offset %zu\n",
6934 i
, name
, (size_t)rel
->r_offset
);
6938 is_map_in_map
= bpf_map_type__is_map_in_map(map
->def
.type
);
6939 is_prog_array
= map
->def
.type
== BPF_MAP_TYPE_PROG_ARRAY
;
6940 type
= is_map_in_map
? "map" : "prog";
6941 if (is_map_in_map
) {
6942 if (sym
->st_shndx
!= obj
->efile
.btf_maps_shndx
) {
6943 pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
6945 return -LIBBPF_ERRNO__RELOC
;
6947 if (map
->def
.type
== BPF_MAP_TYPE_HASH_OF_MAPS
&&
6948 map
->def
.key_size
!= sizeof(int)) {
6949 pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
6950 i
, map
->name
, sizeof(int));
6953 targ_map
= bpf_object__find_map_by_name(obj
, name
);
6955 pr_warn(".maps relo #%d: '%s' isn't a valid map reference\n",
6959 } else if (is_prog_array
) {
6960 targ_prog
= bpf_object__find_program_by_name(obj
, name
);
6962 pr_warn(".maps relo #%d: '%s' isn't a valid program reference\n",
6966 if (targ_prog
->sec_idx
!= sym
->st_shndx
||
6967 targ_prog
->sec_insn_off
* 8 != sym
->st_value
||
6968 prog_is_subprog(obj
, targ_prog
)) {
6969 pr_warn(".maps relo #%d: '%s' isn't an entry-point program\n",
6971 return -LIBBPF_ERRNO__RELOC
;
6977 var
= btf__type_by_id(obj
->btf
, vi
->type
);
6978 def
= skip_mods_and_typedefs(obj
->btf
, var
->type
, NULL
);
6979 if (btf_vlen(def
) == 0)
6981 member
= btf_members(def
) + btf_vlen(def
) - 1;
6982 mname
= btf__name_by_offset(obj
->btf
, member
->name_off
);
6983 if (strcmp(mname
, "values"))
6986 moff
= btf_member_bit_offset(def
, btf_vlen(def
) - 1) / 8;
6987 if (rel
->r_offset
- vi
->offset
< moff
)
6990 moff
= rel
->r_offset
- vi
->offset
- moff
;
6991 /* here we use BPF pointer size, which is always 64 bit, as we
6992 * are parsing ELF that was built for BPF target
6994 if (moff
% bpf_ptr_sz
)
6997 if (moff
>= map
->init_slots_sz
) {
6999 tmp
= libbpf_reallocarray(map
->init_slots
, new_sz
, host_ptr_sz
);
7002 map
->init_slots
= tmp
;
7003 memset(map
->init_slots
+ map
->init_slots_sz
, 0,
7004 (new_sz
- map
->init_slots_sz
) * host_ptr_sz
);
7005 map
->init_slots_sz
= new_sz
;
7007 map
->init_slots
[moff
] = is_map_in_map
? (void *)targ_map
: (void *)targ_prog
;
7009 pr_debug(".maps relo #%d: map '%s' slot [%d] points to %s '%s'\n",
7010 i
, map
->name
, moff
, type
, name
);
7016 static int bpf_object__collect_relos(struct bpf_object
*obj
)
7020 for (i
= 0; i
< obj
->efile
.sec_cnt
; i
++) {
7021 struct elf_sec_desc
*sec_desc
= &obj
->efile
.secs
[i
];
7026 if (sec_desc
->sec_type
!= SEC_RELO
)
7029 shdr
= sec_desc
->shdr
;
7030 data
= sec_desc
->data
;
7031 idx
= shdr
->sh_info
;
7033 if (shdr
->sh_type
!= SHT_REL
|| idx
< 0 || idx
>= obj
->efile
.sec_cnt
) {
7034 pr_warn("internal error at %d\n", __LINE__
);
7035 return -LIBBPF_ERRNO__INTERNAL
;
7038 if (obj
->efile
.secs
[idx
].sec_type
== SEC_ST_OPS
)
7039 err
= bpf_object__collect_st_ops_relos(obj
, shdr
, data
);
7040 else if (idx
== obj
->efile
.btf_maps_shndx
)
7041 err
= bpf_object__collect_map_relos(obj
, shdr
, data
);
7043 err
= bpf_object__collect_prog_relos(obj
, shdr
, data
);
7048 bpf_object__sort_relos(obj
);
7052 static bool insn_is_helper_call(struct bpf_insn
*insn
, enum bpf_func_id
*func_id
)
7054 if (BPF_CLASS(insn
->code
) == BPF_JMP
&&
7055 BPF_OP(insn
->code
) == BPF_CALL
&&
7056 BPF_SRC(insn
->code
) == BPF_K
&&
7057 insn
->src_reg
== 0 &&
7058 insn
->dst_reg
== 0) {
7059 *func_id
= insn
->imm
;
7065 static int bpf_object__sanitize_prog(struct bpf_object
*obj
, struct bpf_program
*prog
)
7067 struct bpf_insn
*insn
= prog
->insns
;
7068 enum bpf_func_id func_id
;
7071 if (obj
->gen_loader
)
7074 for (i
= 0; i
< prog
->insns_cnt
; i
++, insn
++) {
7075 if (!insn_is_helper_call(insn
, &func_id
))
7078 /* on kernels that don't yet support
7079 * bpf_probe_read_{kernel,user}[_str] helpers, fall back
7080 * to bpf_probe_read() which works well for old kernels
7083 case BPF_FUNC_probe_read_kernel
:
7084 case BPF_FUNC_probe_read_user
:
7085 if (!kernel_supports(obj
, FEAT_PROBE_READ_KERN
))
7086 insn
->imm
= BPF_FUNC_probe_read
;
7088 case BPF_FUNC_probe_read_kernel_str
:
7089 case BPF_FUNC_probe_read_user_str
:
7090 if (!kernel_supports(obj
, FEAT_PROBE_READ_KERN
))
7091 insn
->imm
= BPF_FUNC_probe_read_str
;
7100 static int libbpf_find_attach_btf_id(struct bpf_program
*prog
, const char *attach_name
,
7101 int *btf_obj_fd
, int *btf_type_id
);
7103 /* this is called as prog->sec_def->prog_prepare_load_fn for libbpf-supported sec_defs */
7104 static int libbpf_prepare_prog_load(struct bpf_program
*prog
,
7105 struct bpf_prog_load_opts
*opts
, long cookie
)
7107 enum sec_def_flags def
= cookie
;
7109 /* old kernels might not support specifying expected_attach_type */
7110 if ((def
& SEC_EXP_ATTACH_OPT
) && !kernel_supports(prog
->obj
, FEAT_EXP_ATTACH_TYPE
))
7111 opts
->expected_attach_type
= 0;
7113 if (def
& SEC_SLEEPABLE
)
7114 opts
->prog_flags
|= BPF_F_SLEEPABLE
;
7116 if (prog
->type
== BPF_PROG_TYPE_XDP
&& (def
& SEC_XDP_FRAGS
))
7117 opts
->prog_flags
|= BPF_F_XDP_HAS_FRAGS
;
7119 /* special check for usdt to use uprobe_multi link */
7120 if ((def
& SEC_USDT
) && kernel_supports(prog
->obj
, FEAT_UPROBE_MULTI_LINK
))
7121 prog
->expected_attach_type
= BPF_TRACE_UPROBE_MULTI
;
7123 if ((def
& SEC_ATTACH_BTF
) && !prog
->attach_btf_id
) {
7124 int btf_obj_fd
= 0, btf_type_id
= 0, err
;
7125 const char *attach_name
;
7127 attach_name
= strchr(prog
->sec_name
, '/');
7129 /* if BPF program is annotated with just SEC("fentry")
7130 * (or similar) without declaratively specifying
7131 * target, then it is expected that target will be
7132 * specified with bpf_program__set_attach_target() at
7133 * runtime before BPF object load step. If not, then
7134 * there is nothing to load into the kernel as BPF
7135 * verifier won't be able to validate BPF program
7136 * correctness anyways.
7138 pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n",
7142 attach_name
++; /* skip over / */
7144 err
= libbpf_find_attach_btf_id(prog
, attach_name
, &btf_obj_fd
, &btf_type_id
);
7148 /* cache resolved BTF FD and BTF type ID in the prog */
7149 prog
->attach_btf_obj_fd
= btf_obj_fd
;
7150 prog
->attach_btf_id
= btf_type_id
;
7152 /* but by now libbpf common logic is not utilizing
7153 * prog->atach_btf_obj_fd/prog->attach_btf_id anymore because
7154 * this callback is called after opts were populated by
7155 * libbpf, so this callback has to update opts explicitly here
7157 opts
->attach_btf_obj_fd
= btf_obj_fd
;
7158 opts
->attach_btf_id
= btf_type_id
;
7163 static void fixup_verifier_log(struct bpf_program
*prog
, char *buf
, size_t buf_sz
);
7165 static int bpf_object_load_prog(struct bpf_object
*obj
, struct bpf_program
*prog
,
7166 struct bpf_insn
*insns
, int insns_cnt
,
7167 const char *license
, __u32 kern_version
, int *prog_fd
)
7169 LIBBPF_OPTS(bpf_prog_load_opts
, load_attr
);
7170 const char *prog_name
= NULL
;
7171 char *cp
, errmsg
[STRERR_BUFSIZE
];
7172 size_t log_buf_size
= 0;
7173 char *log_buf
= NULL
, *tmp
;
7174 int btf_fd
, ret
, err
;
7175 bool own_log_buf
= true;
7176 __u32 log_level
= prog
->log_level
;
7178 if (prog
->type
== BPF_PROG_TYPE_UNSPEC
) {
7180 * The program type must be set. Most likely we couldn't find a proper
7181 * section definition at load time, and thus we didn't infer the type.
7183 pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n",
7184 prog
->name
, prog
->sec_name
);
7188 if (!insns
|| !insns_cnt
)
7191 if (kernel_supports(obj
, FEAT_PROG_NAME
))
7192 prog_name
= prog
->name
;
7193 load_attr
.attach_prog_fd
= prog
->attach_prog_fd
;
7194 load_attr
.attach_btf_obj_fd
= prog
->attach_btf_obj_fd
;
7195 load_attr
.attach_btf_id
= prog
->attach_btf_id
;
7196 load_attr
.kern_version
= kern_version
;
7197 load_attr
.prog_ifindex
= prog
->prog_ifindex
;
7199 /* specify func_info/line_info only if kernel supports them */
7200 btf_fd
= btf__fd(obj
->btf
);
7201 if (btf_fd
>= 0 && kernel_supports(obj
, FEAT_BTF_FUNC
)) {
7202 load_attr
.prog_btf_fd
= btf_fd
;
7203 load_attr
.func_info
= prog
->func_info
;
7204 load_attr
.func_info_rec_size
= prog
->func_info_rec_size
;
7205 load_attr
.func_info_cnt
= prog
->func_info_cnt
;
7206 load_attr
.line_info
= prog
->line_info
;
7207 load_attr
.line_info_rec_size
= prog
->line_info_rec_size
;
7208 load_attr
.line_info_cnt
= prog
->line_info_cnt
;
7210 load_attr
.log_level
= log_level
;
7211 load_attr
.prog_flags
= prog
->prog_flags
;
7212 load_attr
.fd_array
= obj
->fd_array
;
7214 load_attr
.token_fd
= obj
->token_fd
;
7216 load_attr
.prog_flags
|= BPF_F_TOKEN_FD
;
7218 /* adjust load_attr if sec_def provides custom preload callback */
7219 if (prog
->sec_def
&& prog
->sec_def
->prog_prepare_load_fn
) {
7220 err
= prog
->sec_def
->prog_prepare_load_fn(prog
, &load_attr
, prog
->sec_def
->cookie
);
7222 pr_warn("prog '%s': failed to prepare load attributes: %d\n",
7226 insns
= prog
->insns
;
7227 insns_cnt
= prog
->insns_cnt
;
7230 /* allow prog_prepare_load_fn to change expected_attach_type */
7231 load_attr
.expected_attach_type
= prog
->expected_attach_type
;
7233 if (obj
->gen_loader
) {
7234 bpf_gen__prog_load(obj
->gen_loader
, prog
->type
, prog
->name
,
7235 license
, insns
, insns_cnt
, &load_attr
,
7236 prog
- obj
->programs
);
7242 /* if log_level is zero, we don't request logs initially even if
7243 * custom log_buf is specified; if the program load fails, then we'll
7244 * bump log_level to 1 and use either custom log_buf or we'll allocate
7245 * our own and retry the load to get details on what failed
7248 if (prog
->log_buf
) {
7249 log_buf
= prog
->log_buf
;
7250 log_buf_size
= prog
->log_size
;
7251 own_log_buf
= false;
7252 } else if (obj
->log_buf
) {
7253 log_buf
= obj
->log_buf
;
7254 log_buf_size
= obj
->log_size
;
7255 own_log_buf
= false;
7257 log_buf_size
= max((size_t)BPF_LOG_BUF_SIZE
, log_buf_size
* 2);
7258 tmp
= realloc(log_buf
, log_buf_size
);
7269 load_attr
.log_buf
= log_buf
;
7270 load_attr
.log_size
= log_buf_size
;
7271 load_attr
.log_level
= log_level
;
7273 ret
= bpf_prog_load(prog
->type
, prog_name
, license
, insns
, insns_cnt
, &load_attr
);
7275 if (log_level
&& own_log_buf
) {
7276 pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
7277 prog
->name
, log_buf
);
7280 if (obj
->has_rodata
&& kernel_supports(obj
, FEAT_PROG_BIND_MAP
)) {
7281 struct bpf_map
*map
;
7284 for (i
= 0; i
< obj
->nr_maps
; i
++) {
7285 map
= &prog
->obj
->maps
[i
];
7286 if (map
->libbpf_type
!= LIBBPF_MAP_RODATA
)
7289 if (bpf_prog_bind_map(ret
, map
->fd
, NULL
)) {
7290 cp
= libbpf_strerror_r(errno
, errmsg
, sizeof(errmsg
));
7291 pr_warn("prog '%s': failed to bind map '%s': %s\n",
7292 prog
->name
, map
->real_name
, cp
);
7293 /* Don't fail hard if can't bind rodata. */
7303 if (log_level
== 0) {
7307 /* On ENOSPC, increase log buffer size and retry, unless custom
7308 * log_buf is specified.
7309 * Be careful to not overflow u32, though. Kernel's log buf size limit
7310 * isn't part of UAPI so it can always be bumped to full 4GB. So don't
7311 * multiply by 2 unless we are sure we'll fit within 32 bits.
7312 * Currently, we'll get -EINVAL when we reach (UINT_MAX >> 2).
7314 if (own_log_buf
&& errno
== ENOSPC
&& log_buf_size
<= UINT_MAX
/ 2)
7319 /* post-process verifier log to improve error descriptions */
7320 fixup_verifier_log(prog
, log_buf
, log_buf_size
);
7322 cp
= libbpf_strerror_r(errno
, errmsg
, sizeof(errmsg
));
7323 pr_warn("prog '%s': BPF program load failed: %s\n", prog
->name
, cp
);
7326 if (own_log_buf
&& log_buf
&& log_buf
[0] != '\0') {
7327 pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
7328 prog
->name
, log_buf
);
7337 static char *find_prev_line(char *buf
, char *cur
)
7341 if (cur
== buf
) /* end of a log buf */
7345 while (p
- 1 >= buf
&& *(p
- 1) != '\n')
7351 static void patch_log(char *buf
, size_t buf_sz
, size_t log_sz
,
7352 char *orig
, size_t orig_sz
, const char *patch
)
7354 /* size of the remaining log content to the right from the to-be-replaced part */
7355 size_t rem_sz
= (buf
+ log_sz
) - (orig
+ orig_sz
);
7356 size_t patch_sz
= strlen(patch
);
7358 if (patch_sz
!= orig_sz
) {
7359 /* If patch line(s) are longer than original piece of verifier log,
7360 * shift log contents by (patch_sz - orig_sz) bytes to the right
7361 * starting from after to-be-replaced part of the log.
7363 * If patch line(s) are shorter than original piece of verifier log,
7364 * shift log contents by (orig_sz - patch_sz) bytes to the left
7365 * starting from after to-be-replaced part of the log
7367 * We need to be careful about not overflowing available
7368 * buf_sz capacity. If that's the case, we'll truncate the end
7369 * of the original log, as necessary.
7371 if (patch_sz
> orig_sz
) {
7372 if (orig
+ patch_sz
>= buf
+ buf_sz
) {
7373 /* patch is big enough to cover remaining space completely */
7374 patch_sz
-= (orig
+ patch_sz
) - (buf
+ buf_sz
) + 1;
7376 } else if (patch_sz
- orig_sz
> buf_sz
- log_sz
) {
7377 /* patch causes part of remaining log to be truncated */
7378 rem_sz
-= (patch_sz
- orig_sz
) - (buf_sz
- log_sz
);
7381 /* shift remaining log to the right by calculated amount */
7382 memmove(orig
+ patch_sz
, orig
+ orig_sz
, rem_sz
);
7385 memcpy(orig
, patch
, patch_sz
);
7388 static void fixup_log_failed_core_relo(struct bpf_program
*prog
,
7389 char *buf
, size_t buf_sz
, size_t log_sz
,
7390 char *line1
, char *line2
, char *line3
)
7392 /* Expected log for failed and not properly guarded CO-RE relocation:
7393 * line1 -> 123: (85) call unknown#195896080
7394 * line2 -> invalid func unknown#195896080
7395 * line3 -> <anything else or end of buffer>
7397 * "123" is the index of the instruction that was poisoned. We extract
7398 * instruction index to find corresponding CO-RE relocation and
7399 * replace this part of the log with more relevant information about
7400 * failed CO-RE relocation.
7402 const struct bpf_core_relo
*relo
;
7403 struct bpf_core_spec spec
;
7404 char patch
[512], spec_buf
[256];
7405 int insn_idx
, err
, spec_len
;
7407 if (sscanf(line1
, "%d: (%*d) call unknown#195896080\n", &insn_idx
) != 1)
7410 relo
= find_relo_core(prog
, insn_idx
);
7414 err
= bpf_core_parse_spec(prog
->name
, prog
->obj
->btf
, relo
, &spec
);
7418 spec_len
= bpf_core_format_spec(spec_buf
, sizeof(spec_buf
), &spec
);
7419 snprintf(patch
, sizeof(patch
),
7420 "%d: <invalid CO-RE relocation>\n"
7421 "failed to resolve CO-RE relocation %s%s\n",
7422 insn_idx
, spec_buf
, spec_len
>= sizeof(spec_buf
) ? "..." : "");
7424 patch_log(buf
, buf_sz
, log_sz
, line1
, line3
- line1
, patch
);
7427 static void fixup_log_missing_map_load(struct bpf_program
*prog
,
7428 char *buf
, size_t buf_sz
, size_t log_sz
,
7429 char *line1
, char *line2
, char *line3
)
7431 /* Expected log for failed and not properly guarded map reference:
7432 * line1 -> 123: (85) call unknown#2001000345
7433 * line2 -> invalid func unknown#2001000345
7434 * line3 -> <anything else or end of buffer>
7436 * "123" is the index of the instruction that was poisoned.
7437 * "345" in "2001000345" is a map index in obj->maps to fetch map name.
7439 struct bpf_object
*obj
= prog
->obj
;
7440 const struct bpf_map
*map
;
7441 int insn_idx
, map_idx
;
7444 if (sscanf(line1
, "%d: (%*d) call unknown#%d\n", &insn_idx
, &map_idx
) != 2)
7447 map_idx
-= POISON_LDIMM64_MAP_BASE
;
7448 if (map_idx
< 0 || map_idx
>= obj
->nr_maps
)
7450 map
= &obj
->maps
[map_idx
];
7452 snprintf(patch
, sizeof(patch
),
7453 "%d: <invalid BPF map reference>\n"
7454 "BPF map '%s' is referenced but wasn't created\n",
7455 insn_idx
, map
->name
);
7457 patch_log(buf
, buf_sz
, log_sz
, line1
, line3
- line1
, patch
);
7460 static void fixup_log_missing_kfunc_call(struct bpf_program
*prog
,
7461 char *buf
, size_t buf_sz
, size_t log_sz
,
7462 char *line1
, char *line2
, char *line3
)
7464 /* Expected log for failed and not properly guarded kfunc call:
7465 * line1 -> 123: (85) call unknown#2002000345
7466 * line2 -> invalid func unknown#2002000345
7467 * line3 -> <anything else or end of buffer>
7469 * "123" is the index of the instruction that was poisoned.
7470 * "345" in "2002000345" is an extern index in obj->externs to fetch kfunc name.
7472 struct bpf_object
*obj
= prog
->obj
;
7473 const struct extern_desc
*ext
;
7474 int insn_idx
, ext_idx
;
7477 if (sscanf(line1
, "%d: (%*d) call unknown#%d\n", &insn_idx
, &ext_idx
) != 2)
7480 ext_idx
-= POISON_CALL_KFUNC_BASE
;
7481 if (ext_idx
< 0 || ext_idx
>= obj
->nr_extern
)
7483 ext
= &obj
->externs
[ext_idx
];
7485 snprintf(patch
, sizeof(patch
),
7486 "%d: <invalid kfunc call>\n"
7487 "kfunc '%s' is referenced but wasn't resolved\n",
7488 insn_idx
, ext
->name
);
7490 patch_log(buf
, buf_sz
, log_sz
, line1
, line3
- line1
, patch
);
7493 static void fixup_verifier_log(struct bpf_program
*prog
, char *buf
, size_t buf_sz
)
7495 /* look for familiar error patterns in last N lines of the log */
7496 const size_t max_last_line_cnt
= 10;
7497 char *prev_line
, *cur_line
, *next_line
;
7504 log_sz
= strlen(buf
) + 1;
7505 next_line
= buf
+ log_sz
- 1;
7507 for (i
= 0; i
< max_last_line_cnt
; i
++, next_line
= cur_line
) {
7508 cur_line
= find_prev_line(buf
, next_line
);
7512 if (str_has_pfx(cur_line
, "invalid func unknown#195896080\n")) {
7513 prev_line
= find_prev_line(buf
, cur_line
);
7517 /* failed CO-RE relocation case */
7518 fixup_log_failed_core_relo(prog
, buf
, buf_sz
, log_sz
,
7519 prev_line
, cur_line
, next_line
);
7521 } else if (str_has_pfx(cur_line
, "invalid func unknown#"POISON_LDIMM64_MAP_PFX
)) {
7522 prev_line
= find_prev_line(buf
, cur_line
);
7526 /* reference to uncreated BPF map */
7527 fixup_log_missing_map_load(prog
, buf
, buf_sz
, log_sz
,
7528 prev_line
, cur_line
, next_line
);
7530 } else if (str_has_pfx(cur_line
, "invalid func unknown#"POISON_CALL_KFUNC_PFX
)) {
7531 prev_line
= find_prev_line(buf
, cur_line
);
7535 /* reference to unresolved kfunc */
7536 fixup_log_missing_kfunc_call(prog
, buf
, buf_sz
, log_sz
,
7537 prev_line
, cur_line
, next_line
);
7543 static int bpf_program_record_relos(struct bpf_program
*prog
)
7545 struct bpf_object
*obj
= prog
->obj
;
7548 for (i
= 0; i
< prog
->nr_reloc
; i
++) {
7549 struct reloc_desc
*relo
= &prog
->reloc_desc
[i
];
7550 struct extern_desc
*ext
= &obj
->externs
[relo
->ext_idx
];
7553 switch (relo
->type
) {
7554 case RELO_EXTERN_LD64
:
7555 if (ext
->type
!= EXT_KSYM
)
7557 kind
= btf_is_var(btf__type_by_id(obj
->btf
, ext
->btf_id
)) ?
7558 BTF_KIND_VAR
: BTF_KIND_FUNC
;
7559 bpf_gen__record_extern(obj
->gen_loader
, ext
->name
,
7560 ext
->is_weak
, !ext
->ksym
.type_id
,
7561 true, kind
, relo
->insn_idx
);
7563 case RELO_EXTERN_CALL
:
7564 bpf_gen__record_extern(obj
->gen_loader
, ext
->name
,
7565 ext
->is_weak
, false, false, BTF_KIND_FUNC
,
7569 struct bpf_core_relo cr
= {
7570 .insn_off
= relo
->insn_idx
* 8,
7571 .type_id
= relo
->core_relo
->type_id
,
7572 .access_str_off
= relo
->core_relo
->access_str_off
,
7573 .kind
= relo
->core_relo
->kind
,
7576 bpf_gen__record_relo_core(obj
->gen_loader
, &cr
);
7587 bpf_object__load_progs(struct bpf_object
*obj
, int log_level
)
7589 struct bpf_program
*prog
;
7593 for (i
= 0; i
< obj
->nr_programs
; i
++) {
7594 prog
= &obj
->programs
[i
];
7595 err
= bpf_object__sanitize_prog(obj
, prog
);
7600 for (i
= 0; i
< obj
->nr_programs
; i
++) {
7601 prog
= &obj
->programs
[i
];
7602 if (prog_is_subprog(obj
, prog
))
7604 if (!prog
->autoload
) {
7605 pr_debug("prog '%s': skipped loading\n", prog
->name
);
7608 prog
->log_level
|= log_level
;
7610 if (obj
->gen_loader
)
7611 bpf_program_record_relos(prog
);
7613 err
= bpf_object_load_prog(obj
, prog
, prog
->insns
, prog
->insns_cnt
,
7614 obj
->license
, obj
->kern_version
, &prog
->fd
);
7616 pr_warn("prog '%s': failed to load: %d\n", prog
->name
, err
);
7621 bpf_object__free_relocs(obj
);
7625 static const struct bpf_sec_def
*find_sec_def(const char *sec_name
);
7627 static int bpf_object_init_progs(struct bpf_object
*obj
, const struct bpf_object_open_opts
*opts
)
7629 struct bpf_program
*prog
;
7632 bpf_object__for_each_program(prog
, obj
) {
7633 prog
->sec_def
= find_sec_def(prog
->sec_name
);
7634 if (!prog
->sec_def
) {
7635 /* couldn't guess, but user might manually specify */
7636 pr_debug("prog '%s': unrecognized ELF section name '%s'\n",
7637 prog
->name
, prog
->sec_name
);
7641 prog
->type
= prog
->sec_def
->prog_type
;
7642 prog
->expected_attach_type
= prog
->sec_def
->expected_attach_type
;
7644 /* sec_def can have custom callback which should be called
7645 * after bpf_program is initialized to adjust its properties
7647 if (prog
->sec_def
->prog_setup_fn
) {
7648 err
= prog
->sec_def
->prog_setup_fn(prog
, prog
->sec_def
->cookie
);
7650 pr_warn("prog '%s': failed to initialize: %d\n",
7660 static struct bpf_object
*bpf_object_open(const char *path
, const void *obj_buf
, size_t obj_buf_sz
,
7661 const struct bpf_object_open_opts
*opts
)
7663 const char *obj_name
, *kconfig
, *btf_tmp_path
, *token_path
;
7664 struct bpf_object
*obj
;
7671 if (elf_version(EV_CURRENT
) == EV_NONE
) {
7672 pr_warn("failed to init libelf for %s\n",
7673 path
? : "(mem buf)");
7674 return ERR_PTR(-LIBBPF_ERRNO__LIBELF
);
7677 if (!OPTS_VALID(opts
, bpf_object_open_opts
))
7678 return ERR_PTR(-EINVAL
);
7680 obj_name
= OPTS_GET(opts
, object_name
, NULL
);
7683 snprintf(tmp_name
, sizeof(tmp_name
), "%lx-%lx",
7684 (unsigned long)obj_buf
,
7685 (unsigned long)obj_buf_sz
);
7686 obj_name
= tmp_name
;
7689 pr_debug("loading object '%s' from buffer\n", obj_name
);
7692 log_buf
= OPTS_GET(opts
, kernel_log_buf
, NULL
);
7693 log_size
= OPTS_GET(opts
, kernel_log_size
, 0);
7694 log_level
= OPTS_GET(opts
, kernel_log_level
, 0);
7695 if (log_size
> UINT_MAX
)
7696 return ERR_PTR(-EINVAL
);
7697 if (log_size
&& !log_buf
)
7698 return ERR_PTR(-EINVAL
);
7700 token_path
= OPTS_GET(opts
, bpf_token_path
, NULL
);
7701 /* if user didn't specify bpf_token_path explicitly, check if
7702 * LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as bpf_token_path
7706 token_path
= getenv("LIBBPF_BPF_TOKEN_PATH");
7707 if (token_path
&& strlen(token_path
) >= PATH_MAX
)
7708 return ERR_PTR(-ENAMETOOLONG
);
7710 obj
= bpf_object__new(path
, obj_buf
, obj_buf_sz
, obj_name
);
7714 obj
->log_buf
= log_buf
;
7715 obj
->log_size
= log_size
;
7716 obj
->log_level
= log_level
;
7719 obj
->token_path
= strdup(token_path
);
7720 if (!obj
->token_path
) {
7726 btf_tmp_path
= OPTS_GET(opts
, btf_custom_path
, NULL
);
7728 if (strlen(btf_tmp_path
) >= PATH_MAX
) {
7729 err
= -ENAMETOOLONG
;
7732 obj
->btf_custom_path
= strdup(btf_tmp_path
);
7733 if (!obj
->btf_custom_path
) {
7739 kconfig
= OPTS_GET(opts
, kconfig
, NULL
);
7741 obj
->kconfig
= strdup(kconfig
);
7742 if (!obj
->kconfig
) {
7748 err
= bpf_object__elf_init(obj
);
7749 err
= err
? : bpf_object__check_endianness(obj
);
7750 err
= err
? : bpf_object__elf_collect(obj
);
7751 err
= err
? : bpf_object__collect_externs(obj
);
7752 err
= err
? : bpf_object_fixup_btf(obj
);
7753 err
= err
? : bpf_object__init_maps(obj
, opts
);
7754 err
= err
? : bpf_object_init_progs(obj
, opts
);
7755 err
= err
? : bpf_object__collect_relos(obj
);
7759 bpf_object__elf_finish(obj
);
7763 bpf_object__close(obj
);
7764 return ERR_PTR(err
);
7768 bpf_object__open_file(const char *path
, const struct bpf_object_open_opts
*opts
)
7771 return libbpf_err_ptr(-EINVAL
);
7773 pr_debug("loading %s\n", path
);
7775 return libbpf_ptr(bpf_object_open(path
, NULL
, 0, opts
));
7778 struct bpf_object
*bpf_object__open(const char *path
)
7780 return bpf_object__open_file(path
, NULL
);
7784 bpf_object__open_mem(const void *obj_buf
, size_t obj_buf_sz
,
7785 const struct bpf_object_open_opts
*opts
)
7787 if (!obj_buf
|| obj_buf_sz
== 0)
7788 return libbpf_err_ptr(-EINVAL
);
7790 return libbpf_ptr(bpf_object_open(NULL
, obj_buf
, obj_buf_sz
, opts
));
7793 static int bpf_object_unload(struct bpf_object
*obj
)
7798 return libbpf_err(-EINVAL
);
7800 for (i
= 0; i
< obj
->nr_maps
; i
++) {
7801 zclose(obj
->maps
[i
].fd
);
7802 if (obj
->maps
[i
].st_ops
)
7803 zfree(&obj
->maps
[i
].st_ops
->kern_vdata
);
7806 for (i
= 0; i
< obj
->nr_programs
; i
++)
7807 bpf_program__unload(&obj
->programs
[i
]);
7812 static int bpf_object__sanitize_maps(struct bpf_object
*obj
)
7816 bpf_object__for_each_map(m
, obj
) {
7817 if (!bpf_map__is_internal(m
))
7819 if (!kernel_supports(obj
, FEAT_ARRAY_MMAP
))
7820 m
->def
.map_flags
&= ~BPF_F_MMAPABLE
;
7826 int libbpf_kallsyms_parse(kallsyms_cb_t cb
, void *ctx
)
7828 char sym_type
, sym_name
[500];
7829 unsigned long long sym_addr
;
7833 f
= fopen("/proc/kallsyms", "re");
7836 pr_warn("failed to open /proc/kallsyms: %d\n", err
);
7841 ret
= fscanf(f
, "%llx %c %499s%*[^\n]\n",
7842 &sym_addr
, &sym_type
, sym_name
);
7843 if (ret
== EOF
&& feof(f
))
7846 pr_warn("failed to read kallsyms entry: %d\n", ret
);
7851 err
= cb(sym_addr
, sym_type
, sym_name
, ctx
);
7860 static int kallsyms_cb(unsigned long long sym_addr
, char sym_type
,
7861 const char *sym_name
, void *ctx
)
7863 struct bpf_object
*obj
= ctx
;
7864 const struct btf_type
*t
;
7865 struct extern_desc
*ext
;
7867 ext
= find_extern_by_name(obj
, sym_name
);
7868 if (!ext
|| ext
->type
!= EXT_KSYM
)
7871 t
= btf__type_by_id(obj
->btf
, ext
->btf_id
);
7875 if (ext
->is_set
&& ext
->ksym
.addr
!= sym_addr
) {
7876 pr_warn("extern (ksym) '%s': resolution is ambiguous: 0x%llx or 0x%llx\n",
7877 sym_name
, ext
->ksym
.addr
, sym_addr
);
7882 ext
->ksym
.addr
= sym_addr
;
7883 pr_debug("extern (ksym) '%s': set to 0x%llx\n", sym_name
, sym_addr
);
7888 static int bpf_object__read_kallsyms_file(struct bpf_object
*obj
)
7890 return libbpf_kallsyms_parse(kallsyms_cb
, obj
);
7893 static int find_ksym_btf_id(struct bpf_object
*obj
, const char *ksym_name
,
7894 __u16 kind
, struct btf
**res_btf
,
7895 struct module_btf
**res_mod_btf
)
7897 struct module_btf
*mod_btf
;
7901 btf
= obj
->btf_vmlinux
;
7903 id
= btf__find_by_name_kind(btf
, ksym_name
, kind
);
7905 if (id
== -ENOENT
) {
7906 err
= load_module_btfs(obj
);
7910 for (i
= 0; i
< obj
->btf_module_cnt
; i
++) {
7911 /* we assume module_btf's BTF FD is always >0 */
7912 mod_btf
= &obj
->btf_modules
[i
];
7914 id
= btf__find_by_name_kind_own(btf
, ksym_name
, kind
);
7923 *res_mod_btf
= mod_btf
;
7927 static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object
*obj
,
7928 struct extern_desc
*ext
)
7930 const struct btf_type
*targ_var
, *targ_type
;
7931 __u32 targ_type_id
, local_type_id
;
7932 struct module_btf
*mod_btf
= NULL
;
7933 const char *targ_var_name
;
7934 struct btf
*btf
= NULL
;
7937 id
= find_ksym_btf_id(obj
, ext
->name
, BTF_KIND_VAR
, &btf
, &mod_btf
);
7939 if (id
== -ESRCH
&& ext
->is_weak
)
7941 pr_warn("extern (var ksym) '%s': not found in kernel BTF\n",
7946 /* find local type_id */
7947 local_type_id
= ext
->ksym
.type_id
;
7949 /* find target type_id */
7950 targ_var
= btf__type_by_id(btf
, id
);
7951 targ_var_name
= btf__name_by_offset(btf
, targ_var
->name_off
);
7952 targ_type
= skip_mods_and_typedefs(btf
, targ_var
->type
, &targ_type_id
);
7954 err
= bpf_core_types_are_compat(obj
->btf
, local_type_id
,
7957 const struct btf_type
*local_type
;
7958 const char *targ_name
, *local_name
;
7960 local_type
= btf__type_by_id(obj
->btf
, local_type_id
);
7961 local_name
= btf__name_by_offset(obj
->btf
, local_type
->name_off
);
7962 targ_name
= btf__name_by_offset(btf
, targ_type
->name_off
);
7964 pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
7965 ext
->name
, local_type_id
,
7966 btf_kind_str(local_type
), local_name
, targ_type_id
,
7967 btf_kind_str(targ_type
), targ_name
);
7972 ext
->ksym
.kernel_btf_obj_fd
= mod_btf
? mod_btf
->fd
: 0;
7973 ext
->ksym
.kernel_btf_id
= id
;
7974 pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n",
7975 ext
->name
, id
, btf_kind_str(targ_var
), targ_var_name
);
7980 static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object
*obj
,
7981 struct extern_desc
*ext
)
7983 int local_func_proto_id
, kfunc_proto_id
, kfunc_id
;
7984 struct module_btf
*mod_btf
= NULL
;
7985 const struct btf_type
*kern_func
;
7986 struct btf
*kern_btf
= NULL
;
7989 local_func_proto_id
= ext
->ksym
.type_id
;
7991 kfunc_id
= find_ksym_btf_id(obj
, ext
->essent_name
?: ext
->name
, BTF_KIND_FUNC
, &kern_btf
,
7994 if (kfunc_id
== -ESRCH
&& ext
->is_weak
)
7996 pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n",
8001 kern_func
= btf__type_by_id(kern_btf
, kfunc_id
);
8002 kfunc_proto_id
= kern_func
->type
;
8004 ret
= bpf_core_types_are_compat(obj
->btf
, local_func_proto_id
,
8005 kern_btf
, kfunc_proto_id
);
8010 pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with %s [%d]\n",
8011 ext
->name
, local_func_proto_id
,
8012 mod_btf
? mod_btf
->name
: "vmlinux", kfunc_proto_id
);
8016 /* set index for module BTF fd in fd_array, if unset */
8017 if (mod_btf
&& !mod_btf
->fd_array_idx
) {
8018 /* insn->off is s16 */
8019 if (obj
->fd_array_cnt
== INT16_MAX
) {
8020 pr_warn("extern (func ksym) '%s': module BTF fd index %d too big to fit in bpf_insn offset\n",
8021 ext
->name
, mod_btf
->fd_array_idx
);
8024 /* Cannot use index 0 for module BTF fd */
8025 if (!obj
->fd_array_cnt
)
8026 obj
->fd_array_cnt
= 1;
8028 ret
= libbpf_ensure_mem((void **)&obj
->fd_array
, &obj
->fd_array_cap
, sizeof(int),
8029 obj
->fd_array_cnt
+ 1);
8032 mod_btf
->fd_array_idx
= obj
->fd_array_cnt
;
8033 /* we assume module BTF FD is always >0 */
8034 obj
->fd_array
[obj
->fd_array_cnt
++] = mod_btf
->fd
;
8038 ext
->ksym
.kernel_btf_id
= kfunc_id
;
8039 ext
->ksym
.btf_fd_idx
= mod_btf
? mod_btf
->fd_array_idx
: 0;
8040 /* Also set kernel_btf_obj_fd to make sure that bpf_object__relocate_data()
8041 * populates FD into ld_imm64 insn when it's used to point to kfunc.
8042 * {kernel_btf_id, btf_fd_idx} -> fixup bpf_call.
8043 * {kernel_btf_id, kernel_btf_obj_fd} -> fixup ld_imm64.
8045 ext
->ksym
.kernel_btf_obj_fd
= mod_btf
? mod_btf
->fd
: 0;
8046 pr_debug("extern (func ksym) '%s': resolved to %s [%d]\n",
8047 ext
->name
, mod_btf
? mod_btf
->name
: "vmlinux", kfunc_id
);
8052 static int bpf_object__resolve_ksyms_btf_id(struct bpf_object
*obj
)
8054 const struct btf_type
*t
;
8055 struct extern_desc
*ext
;
8058 for (i
= 0; i
< obj
->nr_extern
; i
++) {
8059 ext
= &obj
->externs
[i
];
8060 if (ext
->type
!= EXT_KSYM
|| !ext
->ksym
.type_id
)
8063 if (obj
->gen_loader
) {
8065 ext
->ksym
.kernel_btf_obj_fd
= 0;
8066 ext
->ksym
.kernel_btf_id
= 0;
8069 t
= btf__type_by_id(obj
->btf
, ext
->btf_id
);
8071 err
= bpf_object__resolve_ksym_var_btf_id(obj
, ext
);
8073 err
= bpf_object__resolve_ksym_func_btf_id(obj
, ext
);
8080 static int bpf_object__resolve_externs(struct bpf_object
*obj
,
8081 const char *extra_kconfig
)
8083 bool need_config
= false, need_kallsyms
= false;
8084 bool need_vmlinux_btf
= false;
8085 struct extern_desc
*ext
;
8086 void *kcfg_data
= NULL
;
8089 if (obj
->nr_extern
== 0)
8092 if (obj
->kconfig_map_idx
>= 0)
8093 kcfg_data
= obj
->maps
[obj
->kconfig_map_idx
].mmaped
;
8095 for (i
= 0; i
< obj
->nr_extern
; i
++) {
8096 ext
= &obj
->externs
[i
];
8098 if (ext
->type
== EXT_KSYM
) {
8099 if (ext
->ksym
.type_id
)
8100 need_vmlinux_btf
= true;
8102 need_kallsyms
= true;
8104 } else if (ext
->type
== EXT_KCFG
) {
8105 void *ext_ptr
= kcfg_data
+ ext
->kcfg
.data_off
;
8108 /* Kconfig externs need actual /proc/config.gz */
8109 if (str_has_pfx(ext
->name
, "CONFIG_")) {
8114 /* Virtual kcfg externs are customly handled by libbpf */
8115 if (strcmp(ext
->name
, "LINUX_KERNEL_VERSION") == 0) {
8116 value
= get_kernel_version();
8118 pr_warn("extern (kcfg) '%s': failed to get kernel version\n", ext
->name
);
8121 } else if (strcmp(ext
->name
, "LINUX_HAS_BPF_COOKIE") == 0) {
8122 value
= kernel_supports(obj
, FEAT_BPF_COOKIE
);
8123 } else if (strcmp(ext
->name
, "LINUX_HAS_SYSCALL_WRAPPER") == 0) {
8124 value
= kernel_supports(obj
, FEAT_SYSCALL_WRAPPER
);
8125 } else if (!str_has_pfx(ext
->name
, "LINUX_") || !ext
->is_weak
) {
8126 /* Currently libbpf supports only CONFIG_ and LINUX_ prefixed
8127 * __kconfig externs, where LINUX_ ones are virtual and filled out
8128 * customly by libbpf (their values don't come from Kconfig).
8129 * If LINUX_xxx variable is not recognized by libbpf, but is marked
8130 * __weak, it defaults to zero value, just like for CONFIG_xxx
8133 pr_warn("extern (kcfg) '%s': unrecognized virtual extern\n", ext
->name
);
8137 err
= set_kcfg_value_num(ext
, ext_ptr
, value
);
8140 pr_debug("extern (kcfg) '%s': set to 0x%llx\n",
8141 ext
->name
, (long long)value
);
8143 pr_warn("extern '%s': unrecognized extern kind\n", ext
->name
);
8147 if (need_config
&& extra_kconfig
) {
8148 err
= bpf_object__read_kconfig_mem(obj
, extra_kconfig
, kcfg_data
);
8151 need_config
= false;
8152 for (i
= 0; i
< obj
->nr_extern
; i
++) {
8153 ext
= &obj
->externs
[i
];
8154 if (ext
->type
== EXT_KCFG
&& !ext
->is_set
) {
8161 err
= bpf_object__read_kconfig_file(obj
, kcfg_data
);
8165 if (need_kallsyms
) {
8166 err
= bpf_object__read_kallsyms_file(obj
);
8170 if (need_vmlinux_btf
) {
8171 err
= bpf_object__resolve_ksyms_btf_id(obj
);
8175 for (i
= 0; i
< obj
->nr_extern
; i
++) {
8176 ext
= &obj
->externs
[i
];
8178 if (!ext
->is_set
&& !ext
->is_weak
) {
8179 pr_warn("extern '%s' (strong): not resolved\n", ext
->name
);
8181 } else if (!ext
->is_set
) {
8182 pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n",
8190 static void bpf_map_prepare_vdata(const struct bpf_map
*map
)
8192 struct bpf_struct_ops
*st_ops
;
8195 st_ops
= map
->st_ops
;
8196 for (i
= 0; i
< btf_vlen(st_ops
->type
); i
++) {
8197 struct bpf_program
*prog
= st_ops
->progs
[i
];
8204 prog_fd
= bpf_program__fd(prog
);
8205 kern_data
= st_ops
->kern_vdata
+ st_ops
->kern_func_off
[i
];
8206 *(unsigned long *)kern_data
= prog_fd
;
8210 static int bpf_object_prepare_struct_ops(struct bpf_object
*obj
)
8212 struct bpf_map
*map
;
8215 for (i
= 0; i
< obj
->nr_maps
; i
++) {
8216 map
= &obj
->maps
[i
];
8218 if (!bpf_map__is_struct_ops(map
))
8221 if (!map
->autocreate
)
8224 bpf_map_prepare_vdata(map
);
8230 static int bpf_object_load(struct bpf_object
*obj
, int extra_log_level
, const char *target_btf_path
)
8235 return libbpf_err(-EINVAL
);
8238 pr_warn("object '%s': load can't be attempted twice\n", obj
->name
);
8239 return libbpf_err(-EINVAL
);
8242 if (obj
->gen_loader
)
8243 bpf_gen__init(obj
->gen_loader
, extra_log_level
, obj
->nr_programs
, obj
->nr_maps
);
8245 err
= bpf_object_prepare_token(obj
);
8246 err
= err
? : bpf_object__probe_loading(obj
);
8247 err
= err
? : bpf_object__load_vmlinux_btf(obj
, false);
8248 err
= err
? : bpf_object__resolve_externs(obj
, obj
->kconfig
);
8249 err
= err
? : bpf_object__sanitize_maps(obj
);
8250 err
= err
? : bpf_object__init_kern_struct_ops_maps(obj
);
8251 err
= err
? : bpf_object_adjust_struct_ops_autoload(obj
);
8252 err
= err
? : bpf_object__relocate(obj
, obj
->btf_custom_path
? : target_btf_path
);
8253 err
= err
? : bpf_object__sanitize_and_load_btf(obj
);
8254 err
= err
? : bpf_object__create_maps(obj
);
8255 err
= err
? : bpf_object__load_progs(obj
, extra_log_level
);
8256 err
= err
? : bpf_object_init_prog_arrays(obj
);
8257 err
= err
? : bpf_object_prepare_struct_ops(obj
);
8259 if (obj
->gen_loader
) {
8262 btf__set_fd(obj
->btf
, -1);
8264 err
= bpf_gen__finish(obj
->gen_loader
, obj
->nr_programs
, obj
->nr_maps
);
8267 /* clean up fd_array */
8268 zfree(&obj
->fd_array
);
8270 /* clean up module BTFs */
8271 for (i
= 0; i
< obj
->btf_module_cnt
; i
++) {
8272 close(obj
->btf_modules
[i
].fd
);
8273 btf__free(obj
->btf_modules
[i
].btf
);
8274 free(obj
->btf_modules
[i
].name
);
8276 free(obj
->btf_modules
);
8278 /* clean up vmlinux BTF */
8279 btf__free(obj
->btf_vmlinux
);
8280 obj
->btf_vmlinux
= NULL
;
8282 obj
->loaded
= true; /* doesn't matter if successfully or not */
8289 /* unpin any maps that were auto-pinned during load */
8290 for (i
= 0; i
< obj
->nr_maps
; i
++)
8291 if (obj
->maps
[i
].pinned
&& !obj
->maps
[i
].reused
)
8292 bpf_map__unpin(&obj
->maps
[i
], NULL
);
8294 bpf_object_unload(obj
);
8295 pr_warn("failed to load object '%s'\n", obj
->path
);
8296 return libbpf_err(err
);
8299 int bpf_object__load(struct bpf_object
*obj
)
8301 return bpf_object_load(obj
, 0, NULL
);
8304 static int make_parent_dir(const char *path
)
8306 char *cp
, errmsg
[STRERR_BUFSIZE
];
8310 dname
= strdup(path
);
8314 dir
= dirname(dname
);
8315 if (mkdir(dir
, 0700) && errno
!= EEXIST
)
8320 cp
= libbpf_strerror_r(-err
, errmsg
, sizeof(errmsg
));
8321 pr_warn("failed to mkdir %s: %s\n", path
, cp
);
8326 static int check_path(const char *path
)
8328 char *cp
, errmsg
[STRERR_BUFSIZE
];
8329 struct statfs st_fs
;
8336 dname
= strdup(path
);
8340 dir
= dirname(dname
);
8341 if (statfs(dir
, &st_fs
)) {
8342 cp
= libbpf_strerror_r(errno
, errmsg
, sizeof(errmsg
));
8343 pr_warn("failed to statfs %s: %s\n", dir
, cp
);
8348 if (!err
&& st_fs
.f_type
!= BPF_FS_MAGIC
) {
8349 pr_warn("specified path %s is not on BPF FS\n", path
);
8356 int bpf_program__pin(struct bpf_program
*prog
, const char *path
)
8358 char *cp
, errmsg
[STRERR_BUFSIZE
];
8362 pr_warn("prog '%s': can't pin program that wasn't loaded\n", prog
->name
);
8363 return libbpf_err(-EINVAL
);
8366 err
= make_parent_dir(path
);
8368 return libbpf_err(err
);
8370 err
= check_path(path
);
8372 return libbpf_err(err
);
8374 if (bpf_obj_pin(prog
->fd
, path
)) {
8376 cp
= libbpf_strerror_r(err
, errmsg
, sizeof(errmsg
));
8377 pr_warn("prog '%s': failed to pin at '%s': %s\n", prog
->name
, path
, cp
);
8378 return libbpf_err(err
);
8381 pr_debug("prog '%s': pinned at '%s'\n", prog
->name
, path
);
8385 int bpf_program__unpin(struct bpf_program
*prog
, const char *path
)
8390 pr_warn("prog '%s': can't unpin program that wasn't loaded\n", prog
->name
);
8391 return libbpf_err(-EINVAL
);
8394 err
= check_path(path
);
8396 return libbpf_err(err
);
8400 return libbpf_err(-errno
);
8402 pr_debug("prog '%s': unpinned from '%s'\n", prog
->name
, path
);
8406 int bpf_map__pin(struct bpf_map
*map
, const char *path
)
8408 char *cp
, errmsg
[STRERR_BUFSIZE
];
8412 pr_warn("invalid map pointer\n");
8413 return libbpf_err(-EINVAL
);
8416 if (map
->pin_path
) {
8417 if (path
&& strcmp(path
, map
->pin_path
)) {
8418 pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
8419 bpf_map__name(map
), map
->pin_path
, path
);
8420 return libbpf_err(-EINVAL
);
8421 } else if (map
->pinned
) {
8422 pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
8423 bpf_map__name(map
), map
->pin_path
);
8428 pr_warn("missing a path to pin map '%s' at\n",
8429 bpf_map__name(map
));
8430 return libbpf_err(-EINVAL
);
8431 } else if (map
->pinned
) {
8432 pr_warn("map '%s' already pinned\n", bpf_map__name(map
));
8433 return libbpf_err(-EEXIST
);
8436 map
->pin_path
= strdup(path
);
8437 if (!map
->pin_path
) {
8443 err
= make_parent_dir(map
->pin_path
);
8445 return libbpf_err(err
);
8447 err
= check_path(map
->pin_path
);
8449 return libbpf_err(err
);
8451 if (bpf_obj_pin(map
->fd
, map
->pin_path
)) {
8457 pr_debug("pinned map '%s'\n", map
->pin_path
);
8462 cp
= libbpf_strerror_r(-err
, errmsg
, sizeof(errmsg
));
8463 pr_warn("failed to pin map: %s\n", cp
);
8464 return libbpf_err(err
);
8467 int bpf_map__unpin(struct bpf_map
*map
, const char *path
)
8472 pr_warn("invalid map pointer\n");
8473 return libbpf_err(-EINVAL
);
8476 if (map
->pin_path
) {
8477 if (path
&& strcmp(path
, map
->pin_path
)) {
8478 pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
8479 bpf_map__name(map
), map
->pin_path
, path
);
8480 return libbpf_err(-EINVAL
);
8482 path
= map
->pin_path
;
8484 pr_warn("no path to unpin map '%s' from\n",
8485 bpf_map__name(map
));
8486 return libbpf_err(-EINVAL
);
8489 err
= check_path(path
);
8491 return libbpf_err(err
);
8495 return libbpf_err(-errno
);
8497 map
->pinned
= false;
8498 pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map
), path
);
8503 int bpf_map__set_pin_path(struct bpf_map
*map
, const char *path
)
8510 return libbpf_err(-errno
);
8513 free(map
->pin_path
);
8514 map
->pin_path
= new;
8518 __alias(bpf_map__pin_path
)
8519 const char *bpf_map__get_pin_path(const struct bpf_map
*map
);
8521 const char *bpf_map__pin_path(const struct bpf_map
*map
)
8523 return map
->pin_path
;
8526 bool bpf_map__is_pinned(const struct bpf_map
*map
)
8531 static void sanitize_pin_path(char *s
)
8533 /* bpffs disallows periods in path names */
8541 int bpf_object__pin_maps(struct bpf_object
*obj
, const char *path
)
8543 struct bpf_map
*map
;
8547 return libbpf_err(-ENOENT
);
8550 pr_warn("object not yet loaded; load it first\n");
8551 return libbpf_err(-ENOENT
);
8554 bpf_object__for_each_map(map
, obj
) {
8555 char *pin_path
= NULL
;
8558 if (!map
->autocreate
)
8562 err
= pathname_concat(buf
, sizeof(buf
), path
, bpf_map__name(map
));
8564 goto err_unpin_maps
;
8565 sanitize_pin_path(buf
);
8567 } else if (!map
->pin_path
) {
8571 err
= bpf_map__pin(map
, pin_path
);
8573 goto err_unpin_maps
;
8579 while ((map
= bpf_object__prev_map(obj
, map
))) {
8583 bpf_map__unpin(map
, NULL
);
8586 return libbpf_err(err
);
8589 int bpf_object__unpin_maps(struct bpf_object
*obj
, const char *path
)
8591 struct bpf_map
*map
;
8595 return libbpf_err(-ENOENT
);
8597 bpf_object__for_each_map(map
, obj
) {
8598 char *pin_path
= NULL
;
8602 err
= pathname_concat(buf
, sizeof(buf
), path
, bpf_map__name(map
));
8604 return libbpf_err(err
);
8605 sanitize_pin_path(buf
);
8607 } else if (!map
->pin_path
) {
8611 err
= bpf_map__unpin(map
, pin_path
);
8613 return libbpf_err(err
);
8619 int bpf_object__pin_programs(struct bpf_object
*obj
, const char *path
)
8621 struct bpf_program
*prog
;
8626 return libbpf_err(-ENOENT
);
8629 pr_warn("object not yet loaded; load it first\n");
8630 return libbpf_err(-ENOENT
);
8633 bpf_object__for_each_program(prog
, obj
) {
8634 err
= pathname_concat(buf
, sizeof(buf
), path
, prog
->name
);
8636 goto err_unpin_programs
;
8638 err
= bpf_program__pin(prog
, buf
);
8640 goto err_unpin_programs
;
8646 while ((prog
= bpf_object__prev_program(obj
, prog
))) {
8647 if (pathname_concat(buf
, sizeof(buf
), path
, prog
->name
))
8650 bpf_program__unpin(prog
, buf
);
8653 return libbpf_err(err
);
8656 int bpf_object__unpin_programs(struct bpf_object
*obj
, const char *path
)
8658 struct bpf_program
*prog
;
8662 return libbpf_err(-ENOENT
);
8664 bpf_object__for_each_program(prog
, obj
) {
8667 err
= pathname_concat(buf
, sizeof(buf
), path
, prog
->name
);
8669 return libbpf_err(err
);
8671 err
= bpf_program__unpin(prog
, buf
);
8673 return libbpf_err(err
);
8679 int bpf_object__pin(struct bpf_object
*obj
, const char *path
)
8683 err
= bpf_object__pin_maps(obj
, path
);
8685 return libbpf_err(err
);
8687 err
= bpf_object__pin_programs(obj
, path
);
8689 bpf_object__unpin_maps(obj
, path
);
8690 return libbpf_err(err
);
8696 int bpf_object__unpin(struct bpf_object
*obj
, const char *path
)
8700 err
= bpf_object__unpin_programs(obj
, path
);
8702 return libbpf_err(err
);
8704 err
= bpf_object__unpin_maps(obj
, path
);
8706 return libbpf_err(err
);
8711 static void bpf_map__destroy(struct bpf_map
*map
)
8713 if (map
->inner_map
) {
8714 bpf_map__destroy(map
->inner_map
);
8715 zfree(&map
->inner_map
);
8718 zfree(&map
->init_slots
);
8719 map
->init_slots_sz
= 0;
8724 mmap_sz
= bpf_map_mmap_sz(map
->def
.value_size
, map
->def
.max_entries
);
8725 munmap(map
->mmaped
, mmap_sz
);
8730 zfree(&map
->st_ops
->data
);
8731 zfree(&map
->st_ops
->progs
);
8732 zfree(&map
->st_ops
->kern_func_off
);
8733 zfree(&map
->st_ops
);
8737 zfree(&map
->real_name
);
8738 zfree(&map
->pin_path
);
8744 void bpf_object__close(struct bpf_object
*obj
)
8748 if (IS_ERR_OR_NULL(obj
))
8751 usdt_manager_free(obj
->usdt_man
);
8752 obj
->usdt_man
= NULL
;
8754 bpf_gen__free(obj
->gen_loader
);
8755 bpf_object__elf_finish(obj
);
8756 bpf_object_unload(obj
);
8757 btf__free(obj
->btf
);
8758 btf__free(obj
->btf_vmlinux
);
8759 btf_ext__free(obj
->btf_ext
);
8761 for (i
= 0; i
< obj
->nr_maps
; i
++)
8762 bpf_map__destroy(&obj
->maps
[i
]);
8764 zfree(&obj
->btf_custom_path
);
8765 zfree(&obj
->kconfig
);
8767 for (i
= 0; i
< obj
->nr_extern
; i
++)
8768 zfree(&obj
->externs
[i
].essent_name
);
8770 zfree(&obj
->externs
);
8776 if (obj
->programs
&& obj
->nr_programs
) {
8777 for (i
= 0; i
< obj
->nr_programs
; i
++)
8778 bpf_program__exit(&obj
->programs
[i
]);
8780 zfree(&obj
->programs
);
8782 zfree(&obj
->feat_cache
);
8783 zfree(&obj
->token_path
);
8784 if (obj
->token_fd
> 0)
8785 close(obj
->token_fd
);
8790 const char *bpf_object__name(const struct bpf_object
*obj
)
8792 return obj
? obj
->name
: libbpf_err_ptr(-EINVAL
);
8795 unsigned int bpf_object__kversion(const struct bpf_object
*obj
)
8797 return obj
? obj
->kern_version
: 0;
8800 struct btf
*bpf_object__btf(const struct bpf_object
*obj
)
8802 return obj
? obj
->btf
: NULL
;
8805 int bpf_object__btf_fd(const struct bpf_object
*obj
)
8807 return obj
->btf
? btf__fd(obj
->btf
) : -1;
8810 int bpf_object__set_kversion(struct bpf_object
*obj
, __u32 kern_version
)
8813 return libbpf_err(-EINVAL
);
8815 obj
->kern_version
= kern_version
;
8820 int bpf_object__gen_loader(struct bpf_object
*obj
, struct gen_loader_opts
*opts
)
8822 struct bpf_gen
*gen
;
8826 if (!OPTS_VALID(opts
, gen_loader_opts
))
8828 gen
= calloc(sizeof(*gen
), 1);
8832 obj
->gen_loader
= gen
;
8836 static struct bpf_program
*
8837 __bpf_program__iter(const struct bpf_program
*p
, const struct bpf_object
*obj
,
8840 size_t nr_programs
= obj
->nr_programs
;
8847 /* Iter from the beginning */
8848 return forward
? &obj
->programs
[0] :
8849 &obj
->programs
[nr_programs
- 1];
8851 if (p
->obj
!= obj
) {
8852 pr_warn("error: program handler doesn't match object\n");
8853 return errno
= EINVAL
, NULL
;
8856 idx
= (p
- obj
->programs
) + (forward
? 1 : -1);
8857 if (idx
>= obj
->nr_programs
|| idx
< 0)
8859 return &obj
->programs
[idx
];
8862 struct bpf_program
*
8863 bpf_object__next_program(const struct bpf_object
*obj
, struct bpf_program
*prev
)
8865 struct bpf_program
*prog
= prev
;
8868 prog
= __bpf_program__iter(prog
, obj
, true);
8869 } while (prog
&& prog_is_subprog(obj
, prog
));
8874 struct bpf_program
*
8875 bpf_object__prev_program(const struct bpf_object
*obj
, struct bpf_program
*next
)
8877 struct bpf_program
*prog
= next
;
8880 prog
= __bpf_program__iter(prog
, obj
, false);
8881 } while (prog
&& prog_is_subprog(obj
, prog
));
8886 void bpf_program__set_ifindex(struct bpf_program
*prog
, __u32 ifindex
)
8888 prog
->prog_ifindex
= ifindex
;
8891 const char *bpf_program__name(const struct bpf_program
*prog
)
8896 const char *bpf_program__section_name(const struct bpf_program
*prog
)
8898 return prog
->sec_name
;
8901 bool bpf_program__autoload(const struct bpf_program
*prog
)
8903 return prog
->autoload
;
8906 int bpf_program__set_autoload(struct bpf_program
*prog
, bool autoload
)
8908 if (prog
->obj
->loaded
)
8909 return libbpf_err(-EINVAL
);
8911 prog
->autoload
= autoload
;
8915 bool bpf_program__autoattach(const struct bpf_program
*prog
)
8917 return prog
->autoattach
;
8920 void bpf_program__set_autoattach(struct bpf_program
*prog
, bool autoattach
)
8922 prog
->autoattach
= autoattach
;
8925 const struct bpf_insn
*bpf_program__insns(const struct bpf_program
*prog
)
8930 size_t bpf_program__insn_cnt(const struct bpf_program
*prog
)
8932 return prog
->insns_cnt
;
8935 int bpf_program__set_insns(struct bpf_program
*prog
,
8936 struct bpf_insn
*new_insns
, size_t new_insn_cnt
)
8938 struct bpf_insn
*insns
;
8940 if (prog
->obj
->loaded
)
8943 insns
= libbpf_reallocarray(prog
->insns
, new_insn_cnt
, sizeof(*insns
));
8944 /* NULL is a valid return from reallocarray if the new count is zero */
8945 if (!insns
&& new_insn_cnt
) {
8946 pr_warn("prog '%s': failed to realloc prog code\n", prog
->name
);
8949 memcpy(insns
, new_insns
, new_insn_cnt
* sizeof(*insns
));
8951 prog
->insns
= insns
;
8952 prog
->insns_cnt
= new_insn_cnt
;
8956 int bpf_program__fd(const struct bpf_program
*prog
)
8959 return libbpf_err(-EINVAL
);
8962 return libbpf_err(-ENOENT
);
8967 __alias(bpf_program__type
)
8968 enum bpf_prog_type
bpf_program__get_type(const struct bpf_program
*prog
);
8970 enum bpf_prog_type
bpf_program__type(const struct bpf_program
*prog
)
8975 static size_t custom_sec_def_cnt
;
8976 static struct bpf_sec_def
*custom_sec_defs
;
8977 static struct bpf_sec_def custom_fallback_def
;
8978 static bool has_custom_fallback_def
;
8979 static int last_custom_sec_def_handler_id
;
8981 int bpf_program__set_type(struct bpf_program
*prog
, enum bpf_prog_type type
)
8983 if (prog
->obj
->loaded
)
8984 return libbpf_err(-EBUSY
);
8986 /* if type is not changed, do nothing */
8987 if (prog
->type
== type
)
8992 /* If a program type was changed, we need to reset associated SEC()
8993 * handler, as it will be invalid now. The only exception is a generic
8994 * fallback handler, which by definition is program type-agnostic and
8995 * is a catch-all custom handler, optionally set by the application,
8996 * so should be able to handle any type of BPF program.
8998 if (prog
->sec_def
!= &custom_fallback_def
)
8999 prog
->sec_def
= NULL
;
9003 __alias(bpf_program__expected_attach_type
)
9004 enum bpf_attach_type
bpf_program__get_expected_attach_type(const struct bpf_program
*prog
);
9006 enum bpf_attach_type
bpf_program__expected_attach_type(const struct bpf_program
*prog
)
9008 return prog
->expected_attach_type
;
9011 int bpf_program__set_expected_attach_type(struct bpf_program
*prog
,
9012 enum bpf_attach_type type
)
9014 if (prog
->obj
->loaded
)
9015 return libbpf_err(-EBUSY
);
9017 prog
->expected_attach_type
= type
;
9021 __u32
bpf_program__flags(const struct bpf_program
*prog
)
9023 return prog
->prog_flags
;
9026 int bpf_program__set_flags(struct bpf_program
*prog
, __u32 flags
)
9028 if (prog
->obj
->loaded
)
9029 return libbpf_err(-EBUSY
);
9031 prog
->prog_flags
= flags
;
9035 __u32
bpf_program__log_level(const struct bpf_program
*prog
)
9037 return prog
->log_level
;
9040 int bpf_program__set_log_level(struct bpf_program
*prog
, __u32 log_level
)
9042 if (prog
->obj
->loaded
)
9043 return libbpf_err(-EBUSY
);
9045 prog
->log_level
= log_level
;
9049 const char *bpf_program__log_buf(const struct bpf_program
*prog
, size_t *log_size
)
9051 *log_size
= prog
->log_size
;
9052 return prog
->log_buf
;
9055 int bpf_program__set_log_buf(struct bpf_program
*prog
, char *log_buf
, size_t log_size
)
9057 if (log_size
&& !log_buf
)
9059 if (prog
->log_size
> UINT_MAX
)
9061 if (prog
->obj
->loaded
)
9064 prog
->log_buf
= log_buf
;
9065 prog
->log_size
= log_size
;
9069 #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \
9070 .sec = (char *)sec_pfx, \
9071 .prog_type = BPF_PROG_TYPE_##ptype, \
9072 .expected_attach_type = atype, \
9073 .cookie = (long)(flags), \
9074 .prog_prepare_load_fn = libbpf_prepare_prog_load, \
9078 static int attach_kprobe(const struct bpf_program
*prog
, long cookie
, struct bpf_link
**link
);
9079 static int attach_uprobe(const struct bpf_program
*prog
, long cookie
, struct bpf_link
**link
);
9080 static int attach_ksyscall(const struct bpf_program
*prog
, long cookie
, struct bpf_link
**link
);
9081 static int attach_usdt(const struct bpf_program
*prog
, long cookie
, struct bpf_link
**link
);
9082 static int attach_tp(const struct bpf_program
*prog
, long cookie
, struct bpf_link
**link
);
9083 static int attach_raw_tp(const struct bpf_program
*prog
, long cookie
, struct bpf_link
**link
);
9084 static int attach_trace(const struct bpf_program
*prog
, long cookie
, struct bpf_link
**link
);
9085 static int attach_kprobe_multi(const struct bpf_program
*prog
, long cookie
, struct bpf_link
**link
);
9086 static int attach_uprobe_multi(const struct bpf_program
*prog
, long cookie
, struct bpf_link
**link
);
9087 static int attach_lsm(const struct bpf_program
*prog
, long cookie
, struct bpf_link
**link
);
9088 static int attach_iter(const struct bpf_program
*prog
, long cookie
, struct bpf_link
**link
);
9090 static const struct bpf_sec_def section_defs
[] = {
9091 SEC_DEF("socket", SOCKET_FILTER
, 0, SEC_NONE
),
9092 SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT
, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE
, SEC_ATTACHABLE
),
9093 SEC_DEF("sk_reuseport", SK_REUSEPORT
, BPF_SK_REUSEPORT_SELECT
, SEC_ATTACHABLE
),
9094 SEC_DEF("kprobe+", KPROBE
, 0, SEC_NONE
, attach_kprobe
),
9095 SEC_DEF("uprobe+", KPROBE
, 0, SEC_NONE
, attach_uprobe
),
9096 SEC_DEF("uprobe.s+", KPROBE
, 0, SEC_SLEEPABLE
, attach_uprobe
),
9097 SEC_DEF("kretprobe+", KPROBE
, 0, SEC_NONE
, attach_kprobe
),
9098 SEC_DEF("uretprobe+", KPROBE
, 0, SEC_NONE
, attach_uprobe
),
9099 SEC_DEF("uretprobe.s+", KPROBE
, 0, SEC_SLEEPABLE
, attach_uprobe
),
9100 SEC_DEF("kprobe.multi+", KPROBE
, BPF_TRACE_KPROBE_MULTI
, SEC_NONE
, attach_kprobe_multi
),
9101 SEC_DEF("kretprobe.multi+", KPROBE
, BPF_TRACE_KPROBE_MULTI
, SEC_NONE
, attach_kprobe_multi
),
9102 SEC_DEF("uprobe.multi+", KPROBE
, BPF_TRACE_UPROBE_MULTI
, SEC_NONE
, attach_uprobe_multi
),
9103 SEC_DEF("uretprobe.multi+", KPROBE
, BPF_TRACE_UPROBE_MULTI
, SEC_NONE
, attach_uprobe_multi
),
9104 SEC_DEF("uprobe.multi.s+", KPROBE
, BPF_TRACE_UPROBE_MULTI
, SEC_SLEEPABLE
, attach_uprobe_multi
),
9105 SEC_DEF("uretprobe.multi.s+", KPROBE
, BPF_TRACE_UPROBE_MULTI
, SEC_SLEEPABLE
, attach_uprobe_multi
),
9106 SEC_DEF("ksyscall+", KPROBE
, 0, SEC_NONE
, attach_ksyscall
),
9107 SEC_DEF("kretsyscall+", KPROBE
, 0, SEC_NONE
, attach_ksyscall
),
9108 SEC_DEF("usdt+", KPROBE
, 0, SEC_USDT
, attach_usdt
),
9109 SEC_DEF("usdt.s+", KPROBE
, 0, SEC_USDT
| SEC_SLEEPABLE
, attach_usdt
),
9110 SEC_DEF("tc/ingress", SCHED_CLS
, BPF_TCX_INGRESS
, SEC_NONE
), /* alias for tcx */
9111 SEC_DEF("tc/egress", SCHED_CLS
, BPF_TCX_EGRESS
, SEC_NONE
), /* alias for tcx */
9112 SEC_DEF("tcx/ingress", SCHED_CLS
, BPF_TCX_INGRESS
, SEC_NONE
),
9113 SEC_DEF("tcx/egress", SCHED_CLS
, BPF_TCX_EGRESS
, SEC_NONE
),
9114 SEC_DEF("tc", SCHED_CLS
, 0, SEC_NONE
), /* deprecated / legacy, use tcx */
9115 SEC_DEF("classifier", SCHED_CLS
, 0, SEC_NONE
), /* deprecated / legacy, use tcx */
9116 SEC_DEF("action", SCHED_ACT
, 0, SEC_NONE
), /* deprecated / legacy, use tcx */
9117 SEC_DEF("netkit/primary", SCHED_CLS
, BPF_NETKIT_PRIMARY
, SEC_NONE
),
9118 SEC_DEF("netkit/peer", SCHED_CLS
, BPF_NETKIT_PEER
, SEC_NONE
),
9119 SEC_DEF("tracepoint+", TRACEPOINT
, 0, SEC_NONE
, attach_tp
),
9120 SEC_DEF("tp+", TRACEPOINT
, 0, SEC_NONE
, attach_tp
),
9121 SEC_DEF("raw_tracepoint+", RAW_TRACEPOINT
, 0, SEC_NONE
, attach_raw_tp
),
9122 SEC_DEF("raw_tp+", RAW_TRACEPOINT
, 0, SEC_NONE
, attach_raw_tp
),
9123 SEC_DEF("raw_tracepoint.w+", RAW_TRACEPOINT_WRITABLE
, 0, SEC_NONE
, attach_raw_tp
),
9124 SEC_DEF("raw_tp.w+", RAW_TRACEPOINT_WRITABLE
, 0, SEC_NONE
, attach_raw_tp
),
9125 SEC_DEF("tp_btf+", TRACING
, BPF_TRACE_RAW_TP
, SEC_ATTACH_BTF
, attach_trace
),
9126 SEC_DEF("fentry+", TRACING
, BPF_TRACE_FENTRY
, SEC_ATTACH_BTF
, attach_trace
),
9127 SEC_DEF("fmod_ret+", TRACING
, BPF_MODIFY_RETURN
, SEC_ATTACH_BTF
, attach_trace
),
9128 SEC_DEF("fexit+", TRACING
, BPF_TRACE_FEXIT
, SEC_ATTACH_BTF
, attach_trace
),
9129 SEC_DEF("fentry.s+", TRACING
, BPF_TRACE_FENTRY
, SEC_ATTACH_BTF
| SEC_SLEEPABLE
, attach_trace
),
9130 SEC_DEF("fmod_ret.s+", TRACING
, BPF_MODIFY_RETURN
, SEC_ATTACH_BTF
| SEC_SLEEPABLE
, attach_trace
),
9131 SEC_DEF("fexit.s+", TRACING
, BPF_TRACE_FEXIT
, SEC_ATTACH_BTF
| SEC_SLEEPABLE
, attach_trace
),
9132 SEC_DEF("freplace+", EXT
, 0, SEC_ATTACH_BTF
, attach_trace
),
9133 SEC_DEF("lsm+", LSM
, BPF_LSM_MAC
, SEC_ATTACH_BTF
, attach_lsm
),
9134 SEC_DEF("lsm.s+", LSM
, BPF_LSM_MAC
, SEC_ATTACH_BTF
| SEC_SLEEPABLE
, attach_lsm
),
9135 SEC_DEF("lsm_cgroup+", LSM
, BPF_LSM_CGROUP
, SEC_ATTACH_BTF
),
9136 SEC_DEF("iter+", TRACING
, BPF_TRACE_ITER
, SEC_ATTACH_BTF
, attach_iter
),
9137 SEC_DEF("iter.s+", TRACING
, BPF_TRACE_ITER
, SEC_ATTACH_BTF
| SEC_SLEEPABLE
, attach_iter
),
9138 SEC_DEF("syscall", SYSCALL
, 0, SEC_SLEEPABLE
),
9139 SEC_DEF("xdp.frags/devmap", XDP
, BPF_XDP_DEVMAP
, SEC_XDP_FRAGS
),
9140 SEC_DEF("xdp/devmap", XDP
, BPF_XDP_DEVMAP
, SEC_ATTACHABLE
),
9141 SEC_DEF("xdp.frags/cpumap", XDP
, BPF_XDP_CPUMAP
, SEC_XDP_FRAGS
),
9142 SEC_DEF("xdp/cpumap", XDP
, BPF_XDP_CPUMAP
, SEC_ATTACHABLE
),
9143 SEC_DEF("xdp.frags", XDP
, BPF_XDP
, SEC_XDP_FRAGS
),
9144 SEC_DEF("xdp", XDP
, BPF_XDP
, SEC_ATTACHABLE_OPT
),
9145 SEC_DEF("perf_event", PERF_EVENT
, 0, SEC_NONE
),
9146 SEC_DEF("lwt_in", LWT_IN
, 0, SEC_NONE
),
9147 SEC_DEF("lwt_out", LWT_OUT
, 0, SEC_NONE
),
9148 SEC_DEF("lwt_xmit", LWT_XMIT
, 0, SEC_NONE
),
9149 SEC_DEF("lwt_seg6local", LWT_SEG6LOCAL
, 0, SEC_NONE
),
9150 SEC_DEF("sockops", SOCK_OPS
, BPF_CGROUP_SOCK_OPS
, SEC_ATTACHABLE_OPT
),
9151 SEC_DEF("sk_skb/stream_parser", SK_SKB
, BPF_SK_SKB_STREAM_PARSER
, SEC_ATTACHABLE_OPT
),
9152 SEC_DEF("sk_skb/stream_verdict",SK_SKB
, BPF_SK_SKB_STREAM_VERDICT
, SEC_ATTACHABLE_OPT
),
9153 SEC_DEF("sk_skb", SK_SKB
, 0, SEC_NONE
),
9154 SEC_DEF("sk_msg", SK_MSG
, BPF_SK_MSG_VERDICT
, SEC_ATTACHABLE_OPT
),
9155 SEC_DEF("lirc_mode2", LIRC_MODE2
, BPF_LIRC_MODE2
, SEC_ATTACHABLE_OPT
),
9156 SEC_DEF("flow_dissector", FLOW_DISSECTOR
, BPF_FLOW_DISSECTOR
, SEC_ATTACHABLE_OPT
),
9157 SEC_DEF("cgroup_skb/ingress", CGROUP_SKB
, BPF_CGROUP_INET_INGRESS
, SEC_ATTACHABLE_OPT
),
9158 SEC_DEF("cgroup_skb/egress", CGROUP_SKB
, BPF_CGROUP_INET_EGRESS
, SEC_ATTACHABLE_OPT
),
9159 SEC_DEF("cgroup/skb", CGROUP_SKB
, 0, SEC_NONE
),
9160 SEC_DEF("cgroup/sock_create", CGROUP_SOCK
, BPF_CGROUP_INET_SOCK_CREATE
, SEC_ATTACHABLE
),
9161 SEC_DEF("cgroup/sock_release", CGROUP_SOCK
, BPF_CGROUP_INET_SOCK_RELEASE
, SEC_ATTACHABLE
),
9162 SEC_DEF("cgroup/sock", CGROUP_SOCK
, BPF_CGROUP_INET_SOCK_CREATE
, SEC_ATTACHABLE_OPT
),
9163 SEC_DEF("cgroup/post_bind4", CGROUP_SOCK
, BPF_CGROUP_INET4_POST_BIND
, SEC_ATTACHABLE
),
9164 SEC_DEF("cgroup/post_bind6", CGROUP_SOCK
, BPF_CGROUP_INET6_POST_BIND
, SEC_ATTACHABLE
),
9165 SEC_DEF("cgroup/bind4", CGROUP_SOCK_ADDR
, BPF_CGROUP_INET4_BIND
, SEC_ATTACHABLE
),
9166 SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR
, BPF_CGROUP_INET6_BIND
, SEC_ATTACHABLE
),
9167 SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR
, BPF_CGROUP_INET4_CONNECT
, SEC_ATTACHABLE
),
9168 SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR
, BPF_CGROUP_INET6_CONNECT
, SEC_ATTACHABLE
),
9169 SEC_DEF("cgroup/connect_unix", CGROUP_SOCK_ADDR
, BPF_CGROUP_UNIX_CONNECT
, SEC_ATTACHABLE
),
9170 SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR
, BPF_CGROUP_UDP4_SENDMSG
, SEC_ATTACHABLE
),
9171 SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR
, BPF_CGROUP_UDP6_SENDMSG
, SEC_ATTACHABLE
),
9172 SEC_DEF("cgroup/sendmsg_unix", CGROUP_SOCK_ADDR
, BPF_CGROUP_UNIX_SENDMSG
, SEC_ATTACHABLE
),
9173 SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR
, BPF_CGROUP_UDP4_RECVMSG
, SEC_ATTACHABLE
),
9174 SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR
, BPF_CGROUP_UDP6_RECVMSG
, SEC_ATTACHABLE
),
9175 SEC_DEF("cgroup/recvmsg_unix", CGROUP_SOCK_ADDR
, BPF_CGROUP_UNIX_RECVMSG
, SEC_ATTACHABLE
),
9176 SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR
, BPF_CGROUP_INET4_GETPEERNAME
, SEC_ATTACHABLE
),
9177 SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR
, BPF_CGROUP_INET6_GETPEERNAME
, SEC_ATTACHABLE
),
9178 SEC_DEF("cgroup/getpeername_unix", CGROUP_SOCK_ADDR
, BPF_CGROUP_UNIX_GETPEERNAME
, SEC_ATTACHABLE
),
9179 SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR
, BPF_CGROUP_INET4_GETSOCKNAME
, SEC_ATTACHABLE
),
9180 SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR
, BPF_CGROUP_INET6_GETSOCKNAME
, SEC_ATTACHABLE
),
9181 SEC_DEF("cgroup/getsockname_unix", CGROUP_SOCK_ADDR
, BPF_CGROUP_UNIX_GETSOCKNAME
, SEC_ATTACHABLE
),
9182 SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL
, BPF_CGROUP_SYSCTL
, SEC_ATTACHABLE
),
9183 SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT
, BPF_CGROUP_GETSOCKOPT
, SEC_ATTACHABLE
),
9184 SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT
, BPF_CGROUP_SETSOCKOPT
, SEC_ATTACHABLE
),
9185 SEC_DEF("cgroup/dev", CGROUP_DEVICE
, BPF_CGROUP_DEVICE
, SEC_ATTACHABLE_OPT
),
9186 SEC_DEF("struct_ops+", STRUCT_OPS
, 0, SEC_NONE
),
9187 SEC_DEF("struct_ops.s+", STRUCT_OPS
, 0, SEC_SLEEPABLE
),
9188 SEC_DEF("sk_lookup", SK_LOOKUP
, BPF_SK_LOOKUP
, SEC_ATTACHABLE
),
9189 SEC_DEF("netfilter", NETFILTER
, BPF_NETFILTER
, SEC_NONE
),
9192 int libbpf_register_prog_handler(const char *sec
,
9193 enum bpf_prog_type prog_type
,
9194 enum bpf_attach_type exp_attach_type
,
9195 const struct libbpf_prog_handler_opts
*opts
)
9197 struct bpf_sec_def
*sec_def
;
9199 if (!OPTS_VALID(opts
, libbpf_prog_handler_opts
))
9200 return libbpf_err(-EINVAL
);
9202 if (last_custom_sec_def_handler_id
== INT_MAX
) /* prevent overflow */
9203 return libbpf_err(-E2BIG
);
9206 sec_def
= libbpf_reallocarray(custom_sec_defs
, custom_sec_def_cnt
+ 1,
9209 return libbpf_err(-ENOMEM
);
9211 custom_sec_defs
= sec_def
;
9212 sec_def
= &custom_sec_defs
[custom_sec_def_cnt
];
9214 if (has_custom_fallback_def
)
9215 return libbpf_err(-EBUSY
);
9217 sec_def
= &custom_fallback_def
;
9220 sec_def
->sec
= sec
? strdup(sec
) : NULL
;
9221 if (sec
&& !sec_def
->sec
)
9222 return libbpf_err(-ENOMEM
);
9224 sec_def
->prog_type
= prog_type
;
9225 sec_def
->expected_attach_type
= exp_attach_type
;
9226 sec_def
->cookie
= OPTS_GET(opts
, cookie
, 0);
9228 sec_def
->prog_setup_fn
= OPTS_GET(opts
, prog_setup_fn
, NULL
);
9229 sec_def
->prog_prepare_load_fn
= OPTS_GET(opts
, prog_prepare_load_fn
, NULL
);
9230 sec_def
->prog_attach_fn
= OPTS_GET(opts
, prog_attach_fn
, NULL
);
9232 sec_def
->handler_id
= ++last_custom_sec_def_handler_id
;
9235 custom_sec_def_cnt
++;
9237 has_custom_fallback_def
= true;
9239 return sec_def
->handler_id
;
9242 int libbpf_unregister_prog_handler(int handler_id
)
9244 struct bpf_sec_def
*sec_defs
;
9247 if (handler_id
<= 0)
9248 return libbpf_err(-EINVAL
);
9250 if (has_custom_fallback_def
&& custom_fallback_def
.handler_id
== handler_id
) {
9251 memset(&custom_fallback_def
, 0, sizeof(custom_fallback_def
));
9252 has_custom_fallback_def
= false;
9256 for (i
= 0; i
< custom_sec_def_cnt
; i
++) {
9257 if (custom_sec_defs
[i
].handler_id
== handler_id
)
9261 if (i
== custom_sec_def_cnt
)
9262 return libbpf_err(-ENOENT
);
9264 free(custom_sec_defs
[i
].sec
);
9265 for (i
= i
+ 1; i
< custom_sec_def_cnt
; i
++)
9266 custom_sec_defs
[i
- 1] = custom_sec_defs
[i
];
9267 custom_sec_def_cnt
--;
9269 /* try to shrink the array, but it's ok if we couldn't */
9270 sec_defs
= libbpf_reallocarray(custom_sec_defs
, custom_sec_def_cnt
, sizeof(*sec_defs
));
9271 /* if new count is zero, reallocarray can return a valid NULL result;
9272 * in this case the previous pointer will be freed, so we *have to*
9273 * reassign old pointer to the new value (even if it's NULL)
9275 if (sec_defs
|| custom_sec_def_cnt
== 0)
9276 custom_sec_defs
= sec_defs
;
9281 static bool sec_def_matches(const struct bpf_sec_def
*sec_def
, const char *sec_name
)
9283 size_t len
= strlen(sec_def
->sec
);
9285 /* "type/" always has to have proper SEC("type/extras") form */
9286 if (sec_def
->sec
[len
- 1] == '/') {
9287 if (str_has_pfx(sec_name
, sec_def
->sec
))
9292 /* "type+" means it can be either exact SEC("type") or
9293 * well-formed SEC("type/extras") with proper '/' separator
9295 if (sec_def
->sec
[len
- 1] == '+') {
9297 /* not even a prefix */
9298 if (strncmp(sec_name
, sec_def
->sec
, len
) != 0)
9300 /* exact match or has '/' separator */
9301 if (sec_name
[len
] == '\0' || sec_name
[len
] == '/')
9306 return strcmp(sec_name
, sec_def
->sec
) == 0;
9309 static const struct bpf_sec_def
*find_sec_def(const char *sec_name
)
9311 const struct bpf_sec_def
*sec_def
;
9314 n
= custom_sec_def_cnt
;
9315 for (i
= 0; i
< n
; i
++) {
9316 sec_def
= &custom_sec_defs
[i
];
9317 if (sec_def_matches(sec_def
, sec_name
))
9321 n
= ARRAY_SIZE(section_defs
);
9322 for (i
= 0; i
< n
; i
++) {
9323 sec_def
= §ion_defs
[i
];
9324 if (sec_def_matches(sec_def
, sec_name
))
9328 if (has_custom_fallback_def
)
9329 return &custom_fallback_def
;
9334 #define MAX_TYPE_NAME_SIZE 32
9336 static char *libbpf_get_type_names(bool attach_type
)
9338 int i
, len
= ARRAY_SIZE(section_defs
) * MAX_TYPE_NAME_SIZE
;
9346 /* Forge string buf with all available names */
9347 for (i
= 0; i
< ARRAY_SIZE(section_defs
); i
++) {
9348 const struct bpf_sec_def
*sec_def
= §ion_defs
[i
];
9351 if (sec_def
->prog_prepare_load_fn
!= libbpf_prepare_prog_load
)
9354 if (!(sec_def
->cookie
& SEC_ATTACHABLE
))
9358 if (strlen(buf
) + strlen(section_defs
[i
].sec
) + 2 > len
) {
9363 strcat(buf
, section_defs
[i
].sec
);
9369 int libbpf_prog_type_by_name(const char *name
, enum bpf_prog_type
*prog_type
,
9370 enum bpf_attach_type
*expected_attach_type
)
9372 const struct bpf_sec_def
*sec_def
;
9376 return libbpf_err(-EINVAL
);
9378 sec_def
= find_sec_def(name
);
9380 *prog_type
= sec_def
->prog_type
;
9381 *expected_attach_type
= sec_def
->expected_attach_type
;
9385 pr_debug("failed to guess program type from ELF section '%s'\n", name
);
9386 type_names
= libbpf_get_type_names(false);
9387 if (type_names
!= NULL
) {
9388 pr_debug("supported section(type) names are:%s\n", type_names
);
9392 return libbpf_err(-ESRCH
);
9395 const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t
)
9397 if (t
< 0 || t
>= ARRAY_SIZE(attach_type_name
))
9400 return attach_type_name
[t
];
9403 const char *libbpf_bpf_link_type_str(enum bpf_link_type t
)
9405 if (t
< 0 || t
>= ARRAY_SIZE(link_type_name
))
9408 return link_type_name
[t
];
9411 const char *libbpf_bpf_map_type_str(enum bpf_map_type t
)
9413 if (t
< 0 || t
>= ARRAY_SIZE(map_type_name
))
9416 return map_type_name
[t
];
9419 const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t
)
9421 if (t
< 0 || t
>= ARRAY_SIZE(prog_type_name
))
9424 return prog_type_name
[t
];
9427 static struct bpf_map
*find_struct_ops_map_by_offset(struct bpf_object
*obj
,
9431 struct bpf_map
*map
;
9434 for (i
= 0; i
< obj
->nr_maps
; i
++) {
9435 map
= &obj
->maps
[i
];
9436 if (!bpf_map__is_struct_ops(map
))
9438 if (map
->sec_idx
== sec_idx
&&
9439 map
->sec_offset
<= offset
&&
9440 offset
- map
->sec_offset
< map
->def
.value_size
)
9447 /* Collect the reloc from ELF, populate the st_ops->progs[], and update
9448 * st_ops->data for shadow type.
9450 static int bpf_object__collect_st_ops_relos(struct bpf_object
*obj
,
9451 Elf64_Shdr
*shdr
, Elf_Data
*data
)
9453 const struct btf_member
*member
;
9454 struct bpf_struct_ops
*st_ops
;
9455 struct bpf_program
*prog
;
9456 unsigned int shdr_idx
;
9457 const struct btf
*btf
;
9458 struct bpf_map
*map
;
9459 unsigned int moff
, insn_idx
;
9467 nrels
= shdr
->sh_size
/ shdr
->sh_entsize
;
9468 for (i
= 0; i
< nrels
; i
++) {
9469 rel
= elf_rel_by_idx(data
, i
);
9471 pr_warn("struct_ops reloc: failed to get %d reloc\n", i
);
9472 return -LIBBPF_ERRNO__FORMAT
;
9475 sym
= elf_sym_by_idx(obj
, ELF64_R_SYM(rel
->r_info
));
9477 pr_warn("struct_ops reloc: symbol %zx not found\n",
9478 (size_t)ELF64_R_SYM(rel
->r_info
));
9479 return -LIBBPF_ERRNO__FORMAT
;
9482 name
= elf_sym_str(obj
, sym
->st_name
) ?: "<?>";
9483 map
= find_struct_ops_map_by_offset(obj
, shdr
->sh_info
, rel
->r_offset
);
9485 pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n",
9486 (size_t)rel
->r_offset
);
9490 moff
= rel
->r_offset
- map
->sec_offset
;
9491 shdr_idx
= sym
->st_shndx
;
9492 st_ops
= map
->st_ops
;
9493 pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel->r_offset %zu map->sec_offset %zu name %d (\'%s\')\n",
9495 (long long)(rel
->r_info
>> 32),
9496 (long long)sym
->st_value
,
9497 shdr_idx
, (size_t)rel
->r_offset
,
9498 map
->sec_offset
, sym
->st_name
, name
);
9500 if (shdr_idx
>= SHN_LORESERVE
) {
9501 pr_warn("struct_ops reloc %s: rel->r_offset %zu shdr_idx %u unsupported non-static function\n",
9502 map
->name
, (size_t)rel
->r_offset
, shdr_idx
);
9503 return -LIBBPF_ERRNO__RELOC
;
9505 if (sym
->st_value
% BPF_INSN_SZ
) {
9506 pr_warn("struct_ops reloc %s: invalid target program offset %llu\n",
9507 map
->name
, (unsigned long long)sym
->st_value
);
9508 return -LIBBPF_ERRNO__FORMAT
;
9510 insn_idx
= sym
->st_value
/ BPF_INSN_SZ
;
9512 member
= find_member_by_offset(st_ops
->type
, moff
* 8);
9514 pr_warn("struct_ops reloc %s: cannot find member at moff %u\n",
9518 member_idx
= member
- btf_members(st_ops
->type
);
9519 name
= btf__name_by_offset(btf
, member
->name_off
);
9521 if (!resolve_func_ptr(btf
, member
->type
, NULL
)) {
9522 pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n",
9527 prog
= find_prog_by_sec_insn(obj
, shdr_idx
, insn_idx
);
9529 pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",
9530 map
->name
, shdr_idx
, name
);
9534 /* prevent the use of BPF prog with invalid type */
9535 if (prog
->type
!= BPF_PROG_TYPE_STRUCT_OPS
) {
9536 pr_warn("struct_ops reloc %s: prog %s is not struct_ops BPF program\n",
9537 map
->name
, prog
->name
);
9541 st_ops
->progs
[member_idx
] = prog
;
9543 /* st_ops->data will be exposed to users, being returned by
9544 * bpf_map__initial_value() as a pointer to the shadow
9545 * type. All function pointers in the original struct type
9546 * should be converted to a pointer to struct bpf_program
9547 * in the shadow type.
9549 *((struct bpf_program
**)(st_ops
->data
+ moff
)) = prog
;
9555 #define BTF_TRACE_PREFIX "btf_trace_"
9556 #define BTF_LSM_PREFIX "bpf_lsm_"
9557 #define BTF_ITER_PREFIX "bpf_iter_"
9558 #define BTF_MAX_NAME_SIZE 128
9560 void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type
,
9561 const char **prefix
, int *kind
)
9563 switch (attach_type
) {
9564 case BPF_TRACE_RAW_TP
:
9565 *prefix
= BTF_TRACE_PREFIX
;
9566 *kind
= BTF_KIND_TYPEDEF
;
9569 case BPF_LSM_CGROUP
:
9570 *prefix
= BTF_LSM_PREFIX
;
9571 *kind
= BTF_KIND_FUNC
;
9573 case BPF_TRACE_ITER
:
9574 *prefix
= BTF_ITER_PREFIX
;
9575 *kind
= BTF_KIND_FUNC
;
9579 *kind
= BTF_KIND_FUNC
;
9583 static int find_btf_by_prefix_kind(const struct btf
*btf
, const char *prefix
,
9584 const char *name
, __u32 kind
)
9586 char btf_type_name
[BTF_MAX_NAME_SIZE
];
9589 ret
= snprintf(btf_type_name
, sizeof(btf_type_name
),
9590 "%s%s", prefix
, name
);
9591 /* snprintf returns the number of characters written excluding the
9592 * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
9593 * indicates truncation.
9595 if (ret
< 0 || ret
>= sizeof(btf_type_name
))
9596 return -ENAMETOOLONG
;
9597 return btf__find_by_name_kind(btf
, btf_type_name
, kind
);
9600 static inline int find_attach_btf_id(struct btf
*btf
, const char *name
,
9601 enum bpf_attach_type attach_type
)
9606 btf_get_kernel_prefix_kind(attach_type
, &prefix
, &kind
);
9607 return find_btf_by_prefix_kind(btf
, prefix
, name
, kind
);
9610 int libbpf_find_vmlinux_btf_id(const char *name
,
9611 enum bpf_attach_type attach_type
)
9616 btf
= btf__load_vmlinux_btf();
9617 err
= libbpf_get_error(btf
);
9619 pr_warn("vmlinux BTF is not found\n");
9620 return libbpf_err(err
);
9623 err
= find_attach_btf_id(btf
, name
, attach_type
);
9625 pr_warn("%s is not found in vmlinux BTF\n", name
);
9628 return libbpf_err(err
);
9631 static int libbpf_find_prog_btf_id(const char *name
, __u32 attach_prog_fd
)
9633 struct bpf_prog_info info
;
9634 __u32 info_len
= sizeof(info
);
9638 memset(&info
, 0, info_len
);
9639 err
= bpf_prog_get_info_by_fd(attach_prog_fd
, &info
, &info_len
);
9641 pr_warn("failed bpf_prog_get_info_by_fd for FD %d: %d\n",
9642 attach_prog_fd
, err
);
9648 pr_warn("The target program doesn't have BTF\n");
9651 btf
= btf__load_from_kernel_by_id(info
.btf_id
);
9652 err
= libbpf_get_error(btf
);
9654 pr_warn("Failed to get BTF %d of the program: %d\n", info
.btf_id
, err
);
9657 err
= btf__find_by_name_kind(btf
, name
, BTF_KIND_FUNC
);
9660 pr_warn("%s is not found in prog's BTF\n", name
);
9667 static int find_kernel_btf_id(struct bpf_object
*obj
, const char *attach_name
,
9668 enum bpf_attach_type attach_type
,
9669 int *btf_obj_fd
, int *btf_type_id
)
9673 ret
= find_attach_btf_id(obj
->btf_vmlinux
, attach_name
, attach_type
);
9675 *btf_obj_fd
= 0; /* vmlinux BTF */
9682 ret
= load_module_btfs(obj
);
9686 for (i
= 0; i
< obj
->btf_module_cnt
; i
++) {
9687 const struct module_btf
*mod
= &obj
->btf_modules
[i
];
9689 ret
= find_attach_btf_id(mod
->btf
, attach_name
, attach_type
);
9691 *btf_obj_fd
= mod
->fd
;
9704 static int libbpf_find_attach_btf_id(struct bpf_program
*prog
, const char *attach_name
,
9705 int *btf_obj_fd
, int *btf_type_id
)
9707 enum bpf_attach_type attach_type
= prog
->expected_attach_type
;
9708 __u32 attach_prog_fd
= prog
->attach_prog_fd
;
9711 /* BPF program's BTF ID */
9712 if (prog
->type
== BPF_PROG_TYPE_EXT
|| attach_prog_fd
) {
9713 if (!attach_prog_fd
) {
9714 pr_warn("prog '%s': attach program FD is not set\n", prog
->name
);
9717 err
= libbpf_find_prog_btf_id(attach_name
, attach_prog_fd
);
9719 pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %d\n",
9720 prog
->name
, attach_prog_fd
, attach_name
, err
);
9728 /* kernel/module BTF ID */
9729 if (prog
->obj
->gen_loader
) {
9730 bpf_gen__record_attach_target(prog
->obj
->gen_loader
, attach_name
, attach_type
);
9734 err
= find_kernel_btf_id(prog
->obj
, attach_name
,
9735 attach_type
, btf_obj_fd
,
9739 pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %d\n",
9740 prog
->name
, attach_name
, err
);
9746 int libbpf_attach_type_by_name(const char *name
,
9747 enum bpf_attach_type
*attach_type
)
9750 const struct bpf_sec_def
*sec_def
;
9753 return libbpf_err(-EINVAL
);
9755 sec_def
= find_sec_def(name
);
9757 pr_debug("failed to guess attach type based on ELF section name '%s'\n", name
);
9758 type_names
= libbpf_get_type_names(true);
9759 if (type_names
!= NULL
) {
9760 pr_debug("attachable section(type) names are:%s\n", type_names
);
9764 return libbpf_err(-EINVAL
);
9767 if (sec_def
->prog_prepare_load_fn
!= libbpf_prepare_prog_load
)
9768 return libbpf_err(-EINVAL
);
9769 if (!(sec_def
->cookie
& SEC_ATTACHABLE
))
9770 return libbpf_err(-EINVAL
);
9772 *attach_type
= sec_def
->expected_attach_type
;
9776 int bpf_map__fd(const struct bpf_map
*map
)
9779 return libbpf_err(-EINVAL
);
9780 if (!map_is_created(map
))
9785 static bool map_uses_real_name(const struct bpf_map
*map
)
9787 /* Since libbpf started to support custom .data.* and .rodata.* maps,
9788 * their user-visible name differs from kernel-visible name. Users see
9789 * such map's corresponding ELF section name as a map name.
9790 * This check distinguishes .data/.rodata from .data.* and .rodata.*
9791 * maps to know which name has to be returned to the user.
9793 if (map
->libbpf_type
== LIBBPF_MAP_DATA
&& strcmp(map
->real_name
, DATA_SEC
) != 0)
9795 if (map
->libbpf_type
== LIBBPF_MAP_RODATA
&& strcmp(map
->real_name
, RODATA_SEC
) != 0)
9800 const char *bpf_map__name(const struct bpf_map
*map
)
9805 if (map_uses_real_name(map
))
9806 return map
->real_name
;
9811 enum bpf_map_type
bpf_map__type(const struct bpf_map
*map
)
9813 return map
->def
.type
;
9816 int bpf_map__set_type(struct bpf_map
*map
, enum bpf_map_type type
)
9818 if (map_is_created(map
))
9819 return libbpf_err(-EBUSY
);
9820 map
->def
.type
= type
;
9824 __u32
bpf_map__map_flags(const struct bpf_map
*map
)
9826 return map
->def
.map_flags
;
9829 int bpf_map__set_map_flags(struct bpf_map
*map
, __u32 flags
)
9831 if (map_is_created(map
))
9832 return libbpf_err(-EBUSY
);
9833 map
->def
.map_flags
= flags
;
9837 __u64
bpf_map__map_extra(const struct bpf_map
*map
)
9839 return map
->map_extra
;
9842 int bpf_map__set_map_extra(struct bpf_map
*map
, __u64 map_extra
)
9844 if (map_is_created(map
))
9845 return libbpf_err(-EBUSY
);
9846 map
->map_extra
= map_extra
;
9850 __u32
bpf_map__numa_node(const struct bpf_map
*map
)
9852 return map
->numa_node
;
9855 int bpf_map__set_numa_node(struct bpf_map
*map
, __u32 numa_node
)
9857 if (map_is_created(map
))
9858 return libbpf_err(-EBUSY
);
9859 map
->numa_node
= numa_node
;
9863 __u32
bpf_map__key_size(const struct bpf_map
*map
)
9865 return map
->def
.key_size
;
9868 int bpf_map__set_key_size(struct bpf_map
*map
, __u32 size
)
9870 if (map_is_created(map
))
9871 return libbpf_err(-EBUSY
);
9872 map
->def
.key_size
= size
;
9876 __u32
bpf_map__value_size(const struct bpf_map
*map
)
9878 return map
->def
.value_size
;
9881 static int map_btf_datasec_resize(struct bpf_map
*map
, __u32 size
)
9884 struct btf_type
*datasec_type
, *var_type
;
9885 struct btf_var_secinfo
*var
;
9886 const struct btf_type
*array_type
;
9887 const struct btf_array
*array
;
9888 int vlen
, element_sz
, new_array_id
;
9891 /* check btf existence */
9892 btf
= bpf_object__btf(map
->obj
);
9896 /* verify map is datasec */
9897 datasec_type
= btf_type_by_id(btf
, bpf_map__btf_value_type_id(map
));
9898 if (!btf_is_datasec(datasec_type
)) {
9899 pr_warn("map '%s': cannot be resized, map value type is not a datasec\n",
9900 bpf_map__name(map
));
9904 /* verify datasec has at least one var */
9905 vlen
= btf_vlen(datasec_type
);
9907 pr_warn("map '%s': cannot be resized, map value datasec is empty\n",
9908 bpf_map__name(map
));
9912 /* verify last var in the datasec is an array */
9913 var
= &btf_var_secinfos(datasec_type
)[vlen
- 1];
9914 var_type
= btf_type_by_id(btf
, var
->type
);
9915 array_type
= skip_mods_and_typedefs(btf
, var_type
->type
, NULL
);
9916 if (!btf_is_array(array_type
)) {
9917 pr_warn("map '%s': cannot be resized, last var must be an array\n",
9918 bpf_map__name(map
));
9922 /* verify request size aligns with array */
9923 array
= btf_array(array_type
);
9924 element_sz
= btf__resolve_size(btf
, array
->type
);
9925 if (element_sz
<= 0 || (size
- var
->offset
) % element_sz
!= 0) {
9926 pr_warn("map '%s': cannot be resized, element size (%d) doesn't align with new total size (%u)\n",
9927 bpf_map__name(map
), element_sz
, size
);
9931 /* create a new array based on the existing array, but with new length */
9932 nr_elements
= (size
- var
->offset
) / element_sz
;
9933 new_array_id
= btf__add_array(btf
, array
->index_type
, array
->type
, nr_elements
);
9934 if (new_array_id
< 0)
9935 return new_array_id
;
9937 /* adding a new btf type invalidates existing pointers to btf objects,
9938 * so refresh pointers before proceeding
9940 datasec_type
= btf_type_by_id(btf
, map
->btf_value_type_id
);
9941 var
= &btf_var_secinfos(datasec_type
)[vlen
- 1];
9942 var_type
= btf_type_by_id(btf
, var
->type
);
9944 /* finally update btf info */
9945 datasec_type
->size
= size
;
9946 var
->size
= size
- var
->offset
;
9947 var_type
->type
= new_array_id
;
9952 int bpf_map__set_value_size(struct bpf_map
*map
, __u32 size
)
9954 if (map
->obj
->loaded
|| map
->reused
)
9955 return libbpf_err(-EBUSY
);
9959 size_t mmap_old_sz
, mmap_new_sz
;
9961 mmap_old_sz
= bpf_map_mmap_sz(map
->def
.value_size
, map
->def
.max_entries
);
9962 mmap_new_sz
= bpf_map_mmap_sz(size
, map
->def
.max_entries
);
9963 err
= bpf_map_mmap_resize(map
, mmap_old_sz
, mmap_new_sz
);
9965 pr_warn("map '%s': failed to resize memory-mapped region: %d\n",
9966 bpf_map__name(map
), err
);
9969 err
= map_btf_datasec_resize(map
, size
);
9970 if (err
&& err
!= -ENOENT
) {
9971 pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %d\n",
9972 bpf_map__name(map
), err
);
9973 map
->btf_value_type_id
= 0;
9974 map
->btf_key_type_id
= 0;
9978 map
->def
.value_size
= size
;
9982 __u32
bpf_map__btf_key_type_id(const struct bpf_map
*map
)
9984 return map
? map
->btf_key_type_id
: 0;
9987 __u32
bpf_map__btf_value_type_id(const struct bpf_map
*map
)
9989 return map
? map
->btf_value_type_id
: 0;
9992 int bpf_map__set_initial_value(struct bpf_map
*map
,
9993 const void *data
, size_t size
)
9995 if (map
->obj
->loaded
|| map
->reused
)
9996 return libbpf_err(-EBUSY
);
9998 if (!map
->mmaped
|| map
->libbpf_type
== LIBBPF_MAP_KCONFIG
||
9999 size
!= map
->def
.value_size
)
10000 return libbpf_err(-EINVAL
);
10002 memcpy(map
->mmaped
, data
, size
);
10006 void *bpf_map__initial_value(struct bpf_map
*map
, size_t *psize
)
10008 if (bpf_map__is_struct_ops(map
)) {
10010 *psize
= map
->def
.value_size
;
10011 return map
->st_ops
->data
;
10016 *psize
= map
->def
.value_size
;
10017 return map
->mmaped
;
10020 bool bpf_map__is_internal(const struct bpf_map
*map
)
10022 return map
->libbpf_type
!= LIBBPF_MAP_UNSPEC
;
10025 __u32
bpf_map__ifindex(const struct bpf_map
*map
)
10027 return map
->map_ifindex
;
10030 int bpf_map__set_ifindex(struct bpf_map
*map
, __u32 ifindex
)
10032 if (map_is_created(map
))
10033 return libbpf_err(-EBUSY
);
10034 map
->map_ifindex
= ifindex
;
10038 int bpf_map__set_inner_map_fd(struct bpf_map
*map
, int fd
)
10040 if (!bpf_map_type__is_map_in_map(map
->def
.type
)) {
10041 pr_warn("error: unsupported map type\n");
10042 return libbpf_err(-EINVAL
);
10044 if (map
->inner_map_fd
!= -1) {
10045 pr_warn("error: inner_map_fd already specified\n");
10046 return libbpf_err(-EINVAL
);
10048 if (map
->inner_map
) {
10049 bpf_map__destroy(map
->inner_map
);
10050 zfree(&map
->inner_map
);
10052 map
->inner_map_fd
= fd
;
10056 static struct bpf_map
*
10057 __bpf_map__iter(const struct bpf_map
*m
, const struct bpf_object
*obj
, int i
)
10060 struct bpf_map
*s
, *e
;
10062 if (!obj
|| !obj
->maps
)
10063 return errno
= EINVAL
, NULL
;
10066 e
= obj
->maps
+ obj
->nr_maps
;
10068 if ((m
< s
) || (m
>= e
)) {
10069 pr_warn("error in %s: map handler doesn't belong to object\n",
10071 return errno
= EINVAL
, NULL
;
10074 idx
= (m
- obj
->maps
) + i
;
10075 if (idx
>= obj
->nr_maps
|| idx
< 0)
10077 return &obj
->maps
[idx
];
10081 bpf_object__next_map(const struct bpf_object
*obj
, const struct bpf_map
*prev
)
10086 return __bpf_map__iter(prev
, obj
, 1);
10090 bpf_object__prev_map(const struct bpf_object
*obj
, const struct bpf_map
*next
)
10092 if (next
== NULL
) {
10095 return obj
->maps
+ obj
->nr_maps
- 1;
10098 return __bpf_map__iter(next
, obj
, -1);
10102 bpf_object__find_map_by_name(const struct bpf_object
*obj
, const char *name
)
10104 struct bpf_map
*pos
;
10106 bpf_object__for_each_map(pos
, obj
) {
10107 /* if it's a special internal map name (which always starts
10108 * with dot) then check if that special name matches the
10109 * real map name (ELF section name)
10111 if (name
[0] == '.') {
10112 if (pos
->real_name
&& strcmp(pos
->real_name
, name
) == 0)
10116 /* otherwise map name has to be an exact match */
10117 if (map_uses_real_name(pos
)) {
10118 if (strcmp(pos
->real_name
, name
) == 0)
10122 if (strcmp(pos
->name
, name
) == 0)
10125 return errno
= ENOENT
, NULL
;
10129 bpf_object__find_map_fd_by_name(const struct bpf_object
*obj
, const char *name
)
10131 return bpf_map__fd(bpf_object__find_map_by_name(obj
, name
));
10134 static int validate_map_op(const struct bpf_map
*map
, size_t key_sz
,
10135 size_t value_sz
, bool check_value_sz
)
10137 if (!map_is_created(map
)) /* map is not yet created */
10140 if (map
->def
.key_size
!= key_sz
) {
10141 pr_warn("map '%s': unexpected key size %zu provided, expected %u\n",
10142 map
->name
, key_sz
, map
->def
.key_size
);
10146 if (!check_value_sz
)
10149 switch (map
->def
.type
) {
10150 case BPF_MAP_TYPE_PERCPU_ARRAY
:
10151 case BPF_MAP_TYPE_PERCPU_HASH
:
10152 case BPF_MAP_TYPE_LRU_PERCPU_HASH
:
10153 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE
: {
10154 int num_cpu
= libbpf_num_possible_cpus();
10155 size_t elem_sz
= roundup(map
->def
.value_size
, 8);
10157 if (value_sz
!= num_cpu
* elem_sz
) {
10158 pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n",
10159 map
->name
, value_sz
, num_cpu
, elem_sz
, num_cpu
* elem_sz
);
10165 if (map
->def
.value_size
!= value_sz
) {
10166 pr_warn("map '%s': unexpected value size %zu provided, expected %u\n",
10167 map
->name
, value_sz
, map
->def
.value_size
);
10175 int bpf_map__lookup_elem(const struct bpf_map
*map
,
10176 const void *key
, size_t key_sz
,
10177 void *value
, size_t value_sz
, __u64 flags
)
10181 err
= validate_map_op(map
, key_sz
, value_sz
, true);
10183 return libbpf_err(err
);
10185 return bpf_map_lookup_elem_flags(map
->fd
, key
, value
, flags
);
10188 int bpf_map__update_elem(const struct bpf_map
*map
,
10189 const void *key
, size_t key_sz
,
10190 const void *value
, size_t value_sz
, __u64 flags
)
10194 err
= validate_map_op(map
, key_sz
, value_sz
, true);
10196 return libbpf_err(err
);
10198 return bpf_map_update_elem(map
->fd
, key
, value
, flags
);
10201 int bpf_map__delete_elem(const struct bpf_map
*map
,
10202 const void *key
, size_t key_sz
, __u64 flags
)
10206 err
= validate_map_op(map
, key_sz
, 0, false /* check_value_sz */);
10208 return libbpf_err(err
);
10210 return bpf_map_delete_elem_flags(map
->fd
, key
, flags
);
10213 int bpf_map__lookup_and_delete_elem(const struct bpf_map
*map
,
10214 const void *key
, size_t key_sz
,
10215 void *value
, size_t value_sz
, __u64 flags
)
10219 err
= validate_map_op(map
, key_sz
, value_sz
, true);
10221 return libbpf_err(err
);
10223 return bpf_map_lookup_and_delete_elem_flags(map
->fd
, key
, value
, flags
);
10226 int bpf_map__get_next_key(const struct bpf_map
*map
,
10227 const void *cur_key
, void *next_key
, size_t key_sz
)
10231 err
= validate_map_op(map
, key_sz
, 0, false /* check_value_sz */);
10233 return libbpf_err(err
);
10235 return bpf_map_get_next_key(map
->fd
, cur_key
, next_key
);
10238 long libbpf_get_error(const void *ptr
)
10240 if (!IS_ERR_OR_NULL(ptr
))
10244 errno
= -PTR_ERR(ptr
);
10246 /* If ptr == NULL, then errno should be already set by the failing
10247 * API, because libbpf never returns NULL on success and it now always
10248 * sets errno on error. So no extra errno handling for ptr == NULL
10254 /* Replace link's underlying BPF program with the new one */
10255 int bpf_link__update_program(struct bpf_link
*link
, struct bpf_program
*prog
)
10259 ret
= bpf_link_update(bpf_link__fd(link
), bpf_program__fd(prog
), NULL
);
10260 return libbpf_err_errno(ret
);
10263 /* Release "ownership" of underlying BPF resource (typically, BPF program
10264 * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
10265 * link, when destructed through bpf_link__destroy() call won't attempt to
10266 * detach/unregisted that BPF resource. This is useful in situations where,
10267 * say, attached BPF program has to outlive userspace program that attached it
10268 * in the system. Depending on type of BPF program, though, there might be
10269 * additional steps (like pinning BPF program in BPF FS) necessary to ensure
10270 * exit of userspace program doesn't trigger automatic detachment and clean up
10271 * inside the kernel.
10273 void bpf_link__disconnect(struct bpf_link
*link
)
10275 link
->disconnected
= true;
10278 int bpf_link__destroy(struct bpf_link
*link
)
10282 if (IS_ERR_OR_NULL(link
))
10285 if (!link
->disconnected
&& link
->detach
)
10286 err
= link
->detach(link
);
10287 if (link
->pin_path
)
10288 free(link
->pin_path
);
10290 link
->dealloc(link
);
10294 return libbpf_err(err
);
10297 int bpf_link__fd(const struct bpf_link
*link
)
10302 const char *bpf_link__pin_path(const struct bpf_link
*link
)
10304 return link
->pin_path
;
10307 static int bpf_link__detach_fd(struct bpf_link
*link
)
10309 return libbpf_err_errno(close(link
->fd
));
10312 struct bpf_link
*bpf_link__open(const char *path
)
10314 struct bpf_link
*link
;
10317 fd
= bpf_obj_get(path
);
10320 pr_warn("failed to open link at %s: %d\n", path
, fd
);
10321 return libbpf_err_ptr(fd
);
10324 link
= calloc(1, sizeof(*link
));
10327 return libbpf_err_ptr(-ENOMEM
);
10329 link
->detach
= &bpf_link__detach_fd
;
10332 link
->pin_path
= strdup(path
);
10333 if (!link
->pin_path
) {
10334 bpf_link__destroy(link
);
10335 return libbpf_err_ptr(-ENOMEM
);
10341 int bpf_link__detach(struct bpf_link
*link
)
10343 return bpf_link_detach(link
->fd
) ? -errno
: 0;
10346 int bpf_link__pin(struct bpf_link
*link
, const char *path
)
10350 if (link
->pin_path
)
10351 return libbpf_err(-EBUSY
);
10352 err
= make_parent_dir(path
);
10354 return libbpf_err(err
);
10355 err
= check_path(path
);
10357 return libbpf_err(err
);
10359 link
->pin_path
= strdup(path
);
10360 if (!link
->pin_path
)
10361 return libbpf_err(-ENOMEM
);
10363 if (bpf_obj_pin(link
->fd
, link
->pin_path
)) {
10365 zfree(&link
->pin_path
);
10366 return libbpf_err(err
);
10369 pr_debug("link fd=%d: pinned at %s\n", link
->fd
, link
->pin_path
);
10373 int bpf_link__unpin(struct bpf_link
*link
)
10377 if (!link
->pin_path
)
10378 return libbpf_err(-EINVAL
);
10380 err
= unlink(link
->pin_path
);
10384 pr_debug("link fd=%d: unpinned from %s\n", link
->fd
, link
->pin_path
);
10385 zfree(&link
->pin_path
);
10389 struct bpf_link_perf
{
10390 struct bpf_link link
;
10392 /* legacy kprobe support: keep track of probe identifier and type */
10393 char *legacy_probe_name
;
10394 bool legacy_is_kprobe
;
10395 bool legacy_is_retprobe
;
10398 static int remove_kprobe_event_legacy(const char *probe_name
, bool retprobe
);
10399 static int remove_uprobe_event_legacy(const char *probe_name
, bool retprobe
);
10401 static int bpf_link_perf_detach(struct bpf_link
*link
)
10403 struct bpf_link_perf
*perf_link
= container_of(link
, struct bpf_link_perf
, link
);
10406 if (ioctl(perf_link
->perf_event_fd
, PERF_EVENT_IOC_DISABLE
, 0) < 0)
10409 if (perf_link
->perf_event_fd
!= link
->fd
)
10410 close(perf_link
->perf_event_fd
);
10413 /* legacy uprobe/kprobe needs to be removed after perf event fd closure */
10414 if (perf_link
->legacy_probe_name
) {
10415 if (perf_link
->legacy_is_kprobe
) {
10416 err
= remove_kprobe_event_legacy(perf_link
->legacy_probe_name
,
10417 perf_link
->legacy_is_retprobe
);
10419 err
= remove_uprobe_event_legacy(perf_link
->legacy_probe_name
,
10420 perf_link
->legacy_is_retprobe
);
10427 static void bpf_link_perf_dealloc(struct bpf_link
*link
)
10429 struct bpf_link_perf
*perf_link
= container_of(link
, struct bpf_link_perf
, link
);
10431 free(perf_link
->legacy_probe_name
);
10435 struct bpf_link
*bpf_program__attach_perf_event_opts(const struct bpf_program
*prog
, int pfd
,
10436 const struct bpf_perf_event_opts
*opts
)
10438 char errmsg
[STRERR_BUFSIZE
];
10439 struct bpf_link_perf
*link
;
10440 int prog_fd
, link_fd
= -1, err
;
10441 bool force_ioctl_attach
;
10443 if (!OPTS_VALID(opts
, bpf_perf_event_opts
))
10444 return libbpf_err_ptr(-EINVAL
);
10447 pr_warn("prog '%s': invalid perf event FD %d\n",
10449 return libbpf_err_ptr(-EINVAL
);
10451 prog_fd
= bpf_program__fd(prog
);
10453 pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
10455 return libbpf_err_ptr(-EINVAL
);
10458 link
= calloc(1, sizeof(*link
));
10460 return libbpf_err_ptr(-ENOMEM
);
10461 link
->link
.detach
= &bpf_link_perf_detach
;
10462 link
->link
.dealloc
= &bpf_link_perf_dealloc
;
10463 link
->perf_event_fd
= pfd
;
10465 force_ioctl_attach
= OPTS_GET(opts
, force_ioctl_attach
, false);
10466 if (kernel_supports(prog
->obj
, FEAT_PERF_LINK
) && !force_ioctl_attach
) {
10467 DECLARE_LIBBPF_OPTS(bpf_link_create_opts
, link_opts
,
10468 .perf_event
.bpf_cookie
= OPTS_GET(opts
, bpf_cookie
, 0));
10470 link_fd
= bpf_link_create(prog_fd
, pfd
, BPF_PERF_EVENT
, &link_opts
);
10473 pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n",
10475 err
, libbpf_strerror_r(err
, errmsg
, sizeof(errmsg
)));
10478 link
->link
.fd
= link_fd
;
10480 if (OPTS_GET(opts
, bpf_cookie
, 0)) {
10481 pr_warn("prog '%s': user context value is not supported\n", prog
->name
);
10486 if (ioctl(pfd
, PERF_EVENT_IOC_SET_BPF
, prog_fd
) < 0) {
10488 pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n",
10489 prog
->name
, pfd
, libbpf_strerror_r(err
, errmsg
, sizeof(errmsg
)));
10490 if (err
== -EPROTO
)
10491 pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
10495 link
->link
.fd
= pfd
;
10497 if (ioctl(pfd
, PERF_EVENT_IOC_ENABLE
, 0) < 0) {
10499 pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n",
10500 prog
->name
, pfd
, libbpf_strerror_r(err
, errmsg
, sizeof(errmsg
)));
10504 return &link
->link
;
10509 return libbpf_err_ptr(err
);
10512 struct bpf_link
*bpf_program__attach_perf_event(const struct bpf_program
*prog
, int pfd
)
10514 return bpf_program__attach_perf_event_opts(prog
, pfd
, NULL
);
10518 * this function is expected to parse integer in the range of [0, 2^31-1] from
10519 * given file using scanf format string fmt. If actual parsed value is
10520 * negative, the result might be indistinguishable from error
10522 static int parse_uint_from_file(const char *file
, const char *fmt
)
10524 char buf
[STRERR_BUFSIZE
];
10528 f
= fopen(file
, "re");
10531 pr_debug("failed to open '%s': %s\n", file
,
10532 libbpf_strerror_r(err
, buf
, sizeof(buf
)));
10535 err
= fscanf(f
, fmt
, &ret
);
10537 err
= err
== EOF
? -EIO
: -errno
;
10538 pr_debug("failed to parse '%s': %s\n", file
,
10539 libbpf_strerror_r(err
, buf
, sizeof(buf
)));
10547 static int determine_kprobe_perf_type(void)
10549 const char *file
= "/sys/bus/event_source/devices/kprobe/type";
10551 return parse_uint_from_file(file
, "%d\n");
10554 static int determine_uprobe_perf_type(void)
10556 const char *file
= "/sys/bus/event_source/devices/uprobe/type";
10558 return parse_uint_from_file(file
, "%d\n");
10561 static int determine_kprobe_retprobe_bit(void)
10563 const char *file
= "/sys/bus/event_source/devices/kprobe/format/retprobe";
10565 return parse_uint_from_file(file
, "config:%d\n");
10568 static int determine_uprobe_retprobe_bit(void)
10570 const char *file
= "/sys/bus/event_source/devices/uprobe/format/retprobe";
10572 return parse_uint_from_file(file
, "config:%d\n");
10575 #define PERF_UPROBE_REF_CTR_OFFSET_BITS 32
10576 #define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32
10578 static int perf_event_open_probe(bool uprobe
, bool retprobe
, const char *name
,
10579 uint64_t offset
, int pid
, size_t ref_ctr_off
)
10581 const size_t attr_sz
= sizeof(struct perf_event_attr
);
10582 struct perf_event_attr attr
;
10583 char errmsg
[STRERR_BUFSIZE
];
10586 if ((__u64
)ref_ctr_off
>= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS
))
10589 memset(&attr
, 0, attr_sz
);
10591 type
= uprobe
? determine_uprobe_perf_type()
10592 : determine_kprobe_perf_type();
10594 pr_warn("failed to determine %s perf type: %s\n",
10595 uprobe
? "uprobe" : "kprobe",
10596 libbpf_strerror_r(type
, errmsg
, sizeof(errmsg
)));
10600 int bit
= uprobe
? determine_uprobe_retprobe_bit()
10601 : determine_kprobe_retprobe_bit();
10604 pr_warn("failed to determine %s retprobe bit: %s\n",
10605 uprobe
? "uprobe" : "kprobe",
10606 libbpf_strerror_r(bit
, errmsg
, sizeof(errmsg
)));
10609 attr
.config
|= 1 << bit
;
10611 attr
.size
= attr_sz
;
10613 attr
.config
|= (__u64
)ref_ctr_off
<< PERF_UPROBE_REF_CTR_OFFSET_SHIFT
;
10614 attr
.config1
= ptr_to_u64(name
); /* kprobe_func or uprobe_path */
10615 attr
.config2
= offset
; /* kprobe_addr or probe_offset */
10617 /* pid filter is meaningful only for uprobes */
10618 pfd
= syscall(__NR_perf_event_open
, &attr
,
10619 pid
< 0 ? -1 : pid
/* pid */,
10620 pid
== -1 ? 0 : -1 /* cpu */,
10621 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC
);
10622 return pfd
>= 0 ? pfd
: -errno
;
10625 static int append_to_file(const char *file
, const char *fmt
, ...)
10627 int fd
, n
, err
= 0;
10632 n
= vsnprintf(buf
, sizeof(buf
), fmt
, ap
);
10635 if (n
< 0 || n
>= sizeof(buf
))
10638 fd
= open(file
, O_WRONLY
| O_APPEND
| O_CLOEXEC
, 0);
10642 if (write(fd
, buf
, n
) < 0)
10649 #define DEBUGFS "/sys/kernel/debug/tracing"
10650 #define TRACEFS "/sys/kernel/tracing"
10652 static bool use_debugfs(void)
10654 static int has_debugfs
= -1;
10656 if (has_debugfs
< 0)
10657 has_debugfs
= faccessat(AT_FDCWD
, DEBUGFS
, F_OK
, AT_EACCESS
) == 0;
10659 return has_debugfs
== 1;
10662 static const char *tracefs_path(void)
10664 return use_debugfs() ? DEBUGFS
: TRACEFS
;
10667 static const char *tracefs_kprobe_events(void)
10669 return use_debugfs() ? DEBUGFS
"/kprobe_events" : TRACEFS
"/kprobe_events";
10672 static const char *tracefs_uprobe_events(void)
10674 return use_debugfs() ? DEBUGFS
"/uprobe_events" : TRACEFS
"/uprobe_events";
10677 static const char *tracefs_available_filter_functions(void)
10679 return use_debugfs() ? DEBUGFS
"/available_filter_functions"
10680 : TRACEFS
"/available_filter_functions";
10683 static const char *tracefs_available_filter_functions_addrs(void)
10685 return use_debugfs() ? DEBUGFS
"/available_filter_functions_addrs"
10686 : TRACEFS
"/available_filter_functions_addrs";
10689 static void gen_kprobe_legacy_event_name(char *buf
, size_t buf_sz
,
10690 const char *kfunc_name
, size_t offset
)
10692 static int index
= 0;
10695 snprintf(buf
, buf_sz
, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name
, offset
,
10696 __sync_fetch_and_add(&index
, 1));
10698 /* sanitize binary_path in the probe name */
10699 for (i
= 0; buf
[i
]; i
++) {
10700 if (!isalnum(buf
[i
]))
10705 static int add_kprobe_event_legacy(const char *probe_name
, bool retprobe
,
10706 const char *kfunc_name
, size_t offset
)
10708 return append_to_file(tracefs_kprobe_events(), "%c:%s/%s %s+0x%zx",
10709 retprobe
? 'r' : 'p',
10710 retprobe
? "kretprobes" : "kprobes",
10711 probe_name
, kfunc_name
, offset
);
10714 static int remove_kprobe_event_legacy(const char *probe_name
, bool retprobe
)
10716 return append_to_file(tracefs_kprobe_events(), "-:%s/%s",
10717 retprobe
? "kretprobes" : "kprobes", probe_name
);
10720 static int determine_kprobe_perf_type_legacy(const char *probe_name
, bool retprobe
)
10724 snprintf(file
, sizeof(file
), "%s/events/%s/%s/id",
10725 tracefs_path(), retprobe
? "kretprobes" : "kprobes", probe_name
);
10727 return parse_uint_from_file(file
, "%d\n");
10730 static int perf_event_kprobe_open_legacy(const char *probe_name
, bool retprobe
,
10731 const char *kfunc_name
, size_t offset
, int pid
)
10733 const size_t attr_sz
= sizeof(struct perf_event_attr
);
10734 struct perf_event_attr attr
;
10735 char errmsg
[STRERR_BUFSIZE
];
10736 int type
, pfd
, err
;
10738 err
= add_kprobe_event_legacy(probe_name
, retprobe
, kfunc_name
, offset
);
10740 pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n",
10741 kfunc_name
, offset
,
10742 libbpf_strerror_r(err
, errmsg
, sizeof(errmsg
)));
10745 type
= determine_kprobe_perf_type_legacy(probe_name
, retprobe
);
10748 pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n",
10749 kfunc_name
, offset
,
10750 libbpf_strerror_r(err
, errmsg
, sizeof(errmsg
)));
10751 goto err_clean_legacy
;
10754 memset(&attr
, 0, attr_sz
);
10755 attr
.size
= attr_sz
;
10756 attr
.config
= type
;
10757 attr
.type
= PERF_TYPE_TRACEPOINT
;
10759 pfd
= syscall(__NR_perf_event_open
, &attr
,
10760 pid
< 0 ? -1 : pid
, /* pid */
10761 pid
== -1 ? 0 : -1, /* cpu */
10762 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC
);
10765 pr_warn("legacy kprobe perf_event_open() failed: %s\n",
10766 libbpf_strerror_r(err
, errmsg
, sizeof(errmsg
)));
10767 goto err_clean_legacy
;
10772 /* Clear the newly added legacy kprobe_event */
10773 remove_kprobe_event_legacy(probe_name
, retprobe
);
10777 static const char *arch_specific_syscall_pfx(void)
10779 #if defined(__x86_64__)
10781 #elif defined(__i386__)
10783 #elif defined(__s390x__)
10785 #elif defined(__s390__)
10787 #elif defined(__arm__)
10789 #elif defined(__aarch64__)
10791 #elif defined(__mips__)
10793 #elif defined(__riscv)
10795 #elif defined(__powerpc__)
10797 #elif defined(__powerpc64__)
10798 return "powerpc64";
10804 int probe_kern_syscall_wrapper(int token_fd
)
10806 char syscall_name
[64];
10807 const char *ksys_pfx
;
10809 ksys_pfx
= arch_specific_syscall_pfx();
10813 snprintf(syscall_name
, sizeof(syscall_name
), "__%s_sys_bpf", ksys_pfx
);
10815 if (determine_kprobe_perf_type() >= 0) {
10818 pfd
= perf_event_open_probe(false, false, syscall_name
, 0, getpid(), 0);
10822 return pfd
>= 0 ? 1 : 0;
10823 } else { /* legacy mode */
10824 char probe_name
[128];
10826 gen_kprobe_legacy_event_name(probe_name
, sizeof(probe_name
), syscall_name
, 0);
10827 if (add_kprobe_event_legacy(probe_name
, false, syscall_name
, 0) < 0)
10830 (void)remove_kprobe_event_legacy(probe_name
, false);
10836 bpf_program__attach_kprobe_opts(const struct bpf_program
*prog
,
10837 const char *func_name
,
10838 const struct bpf_kprobe_opts
*opts
)
10840 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts
, pe_opts
);
10841 enum probe_attach_mode attach_mode
;
10842 char errmsg
[STRERR_BUFSIZE
];
10843 char *legacy_probe
= NULL
;
10844 struct bpf_link
*link
;
10846 bool retprobe
, legacy
;
10849 if (!OPTS_VALID(opts
, bpf_kprobe_opts
))
10850 return libbpf_err_ptr(-EINVAL
);
10852 attach_mode
= OPTS_GET(opts
, attach_mode
, PROBE_ATTACH_MODE_DEFAULT
);
10853 retprobe
= OPTS_GET(opts
, retprobe
, false);
10854 offset
= OPTS_GET(opts
, offset
, 0);
10855 pe_opts
.bpf_cookie
= OPTS_GET(opts
, bpf_cookie
, 0);
10857 legacy
= determine_kprobe_perf_type() < 0;
10858 switch (attach_mode
) {
10859 case PROBE_ATTACH_MODE_LEGACY
:
10861 pe_opts
.force_ioctl_attach
= true;
10863 case PROBE_ATTACH_MODE_PERF
:
10865 return libbpf_err_ptr(-ENOTSUP
);
10866 pe_opts
.force_ioctl_attach
= true;
10868 case PROBE_ATTACH_MODE_LINK
:
10869 if (legacy
|| !kernel_supports(prog
->obj
, FEAT_PERF_LINK
))
10870 return libbpf_err_ptr(-ENOTSUP
);
10872 case PROBE_ATTACH_MODE_DEFAULT
:
10875 return libbpf_err_ptr(-EINVAL
);
10879 pfd
= perf_event_open_probe(false /* uprobe */, retprobe
,
10881 -1 /* pid */, 0 /* ref_ctr_off */);
10883 char probe_name
[256];
10885 gen_kprobe_legacy_event_name(probe_name
, sizeof(probe_name
),
10886 func_name
, offset
);
10888 legacy_probe
= strdup(probe_name
);
10890 return libbpf_err_ptr(-ENOMEM
);
10892 pfd
= perf_event_kprobe_open_legacy(legacy_probe
, retprobe
, func_name
,
10893 offset
, -1 /* pid */);
10897 pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n",
10898 prog
->name
, retprobe
? "kretprobe" : "kprobe",
10900 libbpf_strerror_r(err
, errmsg
, sizeof(errmsg
)));
10903 link
= bpf_program__attach_perf_event_opts(prog
, pfd
, &pe_opts
);
10904 err
= libbpf_get_error(link
);
10907 pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n",
10908 prog
->name
, retprobe
? "kretprobe" : "kprobe",
10910 libbpf_strerror_r(err
, errmsg
, sizeof(errmsg
)));
10911 goto err_clean_legacy
;
10914 struct bpf_link_perf
*perf_link
= container_of(link
, struct bpf_link_perf
, link
);
10916 perf_link
->legacy_probe_name
= legacy_probe
;
10917 perf_link
->legacy_is_kprobe
= true;
10918 perf_link
->legacy_is_retprobe
= retprobe
;
10925 remove_kprobe_event_legacy(legacy_probe
, retprobe
);
10927 free(legacy_probe
);
10928 return libbpf_err_ptr(err
);
10931 struct bpf_link
*bpf_program__attach_kprobe(const struct bpf_program
*prog
,
10933 const char *func_name
)
10935 DECLARE_LIBBPF_OPTS(bpf_kprobe_opts
, opts
,
10936 .retprobe
= retprobe
,
10939 return bpf_program__attach_kprobe_opts(prog
, func_name
, &opts
);
10942 struct bpf_link
*bpf_program__attach_ksyscall(const struct bpf_program
*prog
,
10943 const char *syscall_name
,
10944 const struct bpf_ksyscall_opts
*opts
)
10946 LIBBPF_OPTS(bpf_kprobe_opts
, kprobe_opts
);
10947 char func_name
[128];
10949 if (!OPTS_VALID(opts
, bpf_ksyscall_opts
))
10950 return libbpf_err_ptr(-EINVAL
);
10952 if (kernel_supports(prog
->obj
, FEAT_SYSCALL_WRAPPER
)) {
10953 /* arch_specific_syscall_pfx() should never return NULL here
10954 * because it is guarded by kernel_supports(). However, since
10955 * compiler does not know that we have an explicit conditional
10958 snprintf(func_name
, sizeof(func_name
), "__%s_sys_%s",
10959 arch_specific_syscall_pfx() ? : "", syscall_name
);
10961 snprintf(func_name
, sizeof(func_name
), "__se_sys_%s", syscall_name
);
10964 kprobe_opts
.retprobe
= OPTS_GET(opts
, retprobe
, false);
10965 kprobe_opts
.bpf_cookie
= OPTS_GET(opts
, bpf_cookie
, 0);
10967 return bpf_program__attach_kprobe_opts(prog
, func_name
, &kprobe_opts
);
10970 /* Adapted from perf/util/string.c */
10971 bool glob_match(const char *str
, const char *pat
)
10973 while (*str
&& *pat
&& *pat
!= '*') {
10974 if (*pat
== '?') { /* Matches any single character */
10984 /* Check wild card */
10986 while (*pat
== '*')
10988 if (!*pat
) /* Tail wild card matches all */
10991 if (glob_match(str
++, pat
))
10994 return !*str
&& !*pat
;
10997 struct kprobe_multi_resolve
{
10998 const char *pattern
;
10999 unsigned long *addrs
;
11004 struct avail_kallsyms_data
{
11007 struct kprobe_multi_resolve
*res
;
11010 static int avail_func_cmp(const void *a
, const void *b
)
11012 return strcmp(*(const char **)a
, *(const char **)b
);
11015 static int avail_kallsyms_cb(unsigned long long sym_addr
, char sym_type
,
11016 const char *sym_name
, void *ctx
)
11018 struct avail_kallsyms_data
*data
= ctx
;
11019 struct kprobe_multi_resolve
*res
= data
->res
;
11022 if (!bsearch(&sym_name
, data
->syms
, data
->cnt
, sizeof(*data
->syms
), avail_func_cmp
))
11025 err
= libbpf_ensure_mem((void **)&res
->addrs
, &res
->cap
, sizeof(*res
->addrs
), res
->cnt
+ 1);
11029 res
->addrs
[res
->cnt
++] = (unsigned long)sym_addr
;
11033 static int libbpf_available_kallsyms_parse(struct kprobe_multi_resolve
*res
)
11035 const char *available_functions_file
= tracefs_available_filter_functions();
11036 struct avail_kallsyms_data data
;
11037 char sym_name
[500];
11039 int err
= 0, ret
, i
;
11040 char **syms
= NULL
;
11041 size_t cap
= 0, cnt
= 0;
11043 f
= fopen(available_functions_file
, "re");
11046 pr_warn("failed to open %s: %d\n", available_functions_file
, err
);
11053 ret
= fscanf(f
, "%499s%*[^\n]\n", sym_name
);
11054 if (ret
== EOF
&& feof(f
))
11058 pr_warn("failed to parse available_filter_functions entry: %d\n", ret
);
11063 if (!glob_match(sym_name
, res
->pattern
))
11066 err
= libbpf_ensure_mem((void **)&syms
, &cap
, sizeof(*syms
), cnt
+ 1);
11070 name
= strdup(sym_name
);
11076 syms
[cnt
++] = name
;
11079 /* no entries found, bail out */
11085 /* sort available functions */
11086 qsort(syms
, cnt
, sizeof(*syms
), avail_func_cmp
);
11091 libbpf_kallsyms_parse(avail_kallsyms_cb
, &data
);
11097 for (i
= 0; i
< cnt
; i
++)
11098 free((char *)syms
[i
]);
11105 static bool has_available_filter_functions_addrs(void)
11107 return access(tracefs_available_filter_functions_addrs(), R_OK
) != -1;
11110 static int libbpf_available_kprobes_parse(struct kprobe_multi_resolve
*res
)
11112 const char *available_path
= tracefs_available_filter_functions_addrs();
11113 char sym_name
[500];
11116 unsigned long long sym_addr
;
11118 f
= fopen(available_path
, "re");
11121 pr_warn("failed to open %s: %d\n", available_path
, err
);
11126 ret
= fscanf(f
, "%llx %499s%*[^\n]\n", &sym_addr
, sym_name
);
11127 if (ret
== EOF
&& feof(f
))
11131 pr_warn("failed to parse available_filter_functions_addrs entry: %d\n",
11137 if (!glob_match(sym_name
, res
->pattern
))
11140 err
= libbpf_ensure_mem((void **)&res
->addrs
, &res
->cap
,
11141 sizeof(*res
->addrs
), res
->cnt
+ 1);
11145 res
->addrs
[res
->cnt
++] = (unsigned long)sym_addr
;
11157 bpf_program__attach_kprobe_multi_opts(const struct bpf_program
*prog
,
11158 const char *pattern
,
11159 const struct bpf_kprobe_multi_opts
*opts
)
11161 LIBBPF_OPTS(bpf_link_create_opts
, lopts
);
11162 struct kprobe_multi_resolve res
= {
11163 .pattern
= pattern
,
11165 struct bpf_link
*link
= NULL
;
11166 char errmsg
[STRERR_BUFSIZE
];
11167 const unsigned long *addrs
;
11168 int err
, link_fd
, prog_fd
;
11169 const __u64
*cookies
;
11174 if (!OPTS_VALID(opts
, bpf_kprobe_multi_opts
))
11175 return libbpf_err_ptr(-EINVAL
);
11177 syms
= OPTS_GET(opts
, syms
, false);
11178 addrs
= OPTS_GET(opts
, addrs
, false);
11179 cnt
= OPTS_GET(opts
, cnt
, false);
11180 cookies
= OPTS_GET(opts
, cookies
, false);
11182 if (!pattern
&& !addrs
&& !syms
)
11183 return libbpf_err_ptr(-EINVAL
);
11184 if (pattern
&& (addrs
|| syms
|| cookies
|| cnt
))
11185 return libbpf_err_ptr(-EINVAL
);
11186 if (!pattern
&& !cnt
)
11187 return libbpf_err_ptr(-EINVAL
);
11189 return libbpf_err_ptr(-EINVAL
);
11192 if (has_available_filter_functions_addrs())
11193 err
= libbpf_available_kprobes_parse(&res
);
11195 err
= libbpf_available_kallsyms_parse(&res
);
11202 retprobe
= OPTS_GET(opts
, retprobe
, false);
11204 lopts
.kprobe_multi
.syms
= syms
;
11205 lopts
.kprobe_multi
.addrs
= addrs
;
11206 lopts
.kprobe_multi
.cookies
= cookies
;
11207 lopts
.kprobe_multi
.cnt
= cnt
;
11208 lopts
.kprobe_multi
.flags
= retprobe
? BPF_F_KPROBE_MULTI_RETURN
: 0;
11210 link
= calloc(1, sizeof(*link
));
11215 link
->detach
= &bpf_link__detach_fd
;
11217 prog_fd
= bpf_program__fd(prog
);
11218 link_fd
= bpf_link_create(prog_fd
, 0, BPF_TRACE_KPROBE_MULTI
, &lopts
);
11221 pr_warn("prog '%s': failed to attach: %s\n",
11222 prog
->name
, libbpf_strerror_r(err
, errmsg
, sizeof(errmsg
)));
11225 link
->fd
= link_fd
;
11232 return libbpf_err_ptr(err
);
11235 static int attach_kprobe(const struct bpf_program
*prog
, long cookie
, struct bpf_link
**link
)
11237 DECLARE_LIBBPF_OPTS(bpf_kprobe_opts
, opts
);
11238 unsigned long offset
= 0;
11239 const char *func_name
;
11245 /* no auto-attach for SEC("kprobe") and SEC("kretprobe") */
11246 if (strcmp(prog
->sec_name
, "kprobe") == 0 || strcmp(prog
->sec_name
, "kretprobe") == 0)
11249 opts
.retprobe
= str_has_pfx(prog
->sec_name
, "kretprobe/");
11251 func_name
= prog
->sec_name
+ sizeof("kretprobe/") - 1;
11253 func_name
= prog
->sec_name
+ sizeof("kprobe/") - 1;
11255 n
= sscanf(func_name
, "%m[a-zA-Z0-9_.]+%li", &func
, &offset
);
11257 pr_warn("kprobe name is invalid: %s\n", func_name
);
11260 if (opts
.retprobe
&& offset
!= 0) {
11262 pr_warn("kretprobes do not support offset specification\n");
11266 opts
.offset
= offset
;
11267 *link
= bpf_program__attach_kprobe_opts(prog
, func
, &opts
);
11269 return libbpf_get_error(*link
);
11272 static int attach_ksyscall(const struct bpf_program
*prog
, long cookie
, struct bpf_link
**link
)
11274 LIBBPF_OPTS(bpf_ksyscall_opts
, opts
);
11275 const char *syscall_name
;
11279 /* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */
11280 if (strcmp(prog
->sec_name
, "ksyscall") == 0 || strcmp(prog
->sec_name
, "kretsyscall") == 0)
11283 opts
.retprobe
= str_has_pfx(prog
->sec_name
, "kretsyscall/");
11285 syscall_name
= prog
->sec_name
+ sizeof("kretsyscall/") - 1;
11287 syscall_name
= prog
->sec_name
+ sizeof("ksyscall/") - 1;
11289 *link
= bpf_program__attach_ksyscall(prog
, syscall_name
, &opts
);
11290 return *link
? 0 : -errno
;
11293 static int attach_kprobe_multi(const struct bpf_program
*prog
, long cookie
, struct bpf_link
**link
)
11295 LIBBPF_OPTS(bpf_kprobe_multi_opts
, opts
);
11302 /* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */
11303 if (strcmp(prog
->sec_name
, "kprobe.multi") == 0 ||
11304 strcmp(prog
->sec_name
, "kretprobe.multi") == 0)
11307 opts
.retprobe
= str_has_pfx(prog
->sec_name
, "kretprobe.multi/");
11309 spec
= prog
->sec_name
+ sizeof("kretprobe.multi/") - 1;
11311 spec
= prog
->sec_name
+ sizeof("kprobe.multi/") - 1;
11313 n
= sscanf(spec
, "%m[a-zA-Z0-9_.*?]", &pattern
);
11315 pr_warn("kprobe multi pattern is invalid: %s\n", pattern
);
11319 *link
= bpf_program__attach_kprobe_multi_opts(prog
, pattern
, &opts
);
11321 return libbpf_get_error(*link
);
11324 static int attach_uprobe_multi(const struct bpf_program
*prog
, long cookie
, struct bpf_link
**link
)
11326 char *probe_type
= NULL
, *binary_path
= NULL
, *func_name
= NULL
;
11327 LIBBPF_OPTS(bpf_uprobe_multi_opts
, opts
);
11328 int n
, ret
= -EINVAL
;
11332 n
= sscanf(prog
->sec_name
, "%m[^/]/%m[^:]:%m[^\n]",
11333 &probe_type
, &binary_path
, &func_name
);
11336 /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
11340 opts
.retprobe
= strcmp(probe_type
, "uretprobe.multi") == 0;
11341 *link
= bpf_program__attach_uprobe_multi(prog
, -1, binary_path
, func_name
, &opts
);
11342 ret
= libbpf_get_error(*link
);
11345 pr_warn("prog '%s': invalid format of section definition '%s'\n", prog
->name
,
11355 static void gen_uprobe_legacy_event_name(char *buf
, size_t buf_sz
,
11356 const char *binary_path
, uint64_t offset
)
11360 snprintf(buf
, buf_sz
, "libbpf_%u_%s_0x%zx", getpid(), binary_path
, (size_t)offset
);
11362 /* sanitize binary_path in the probe name */
11363 for (i
= 0; buf
[i
]; i
++) {
11364 if (!isalnum(buf
[i
]))
11369 static inline int add_uprobe_event_legacy(const char *probe_name
, bool retprobe
,
11370 const char *binary_path
, size_t offset
)
11372 return append_to_file(tracefs_uprobe_events(), "%c:%s/%s %s:0x%zx",
11373 retprobe
? 'r' : 'p',
11374 retprobe
? "uretprobes" : "uprobes",
11375 probe_name
, binary_path
, offset
);
11378 static inline int remove_uprobe_event_legacy(const char *probe_name
, bool retprobe
)
11380 return append_to_file(tracefs_uprobe_events(), "-:%s/%s",
11381 retprobe
? "uretprobes" : "uprobes", probe_name
);
11384 static int determine_uprobe_perf_type_legacy(const char *probe_name
, bool retprobe
)
11388 snprintf(file
, sizeof(file
), "%s/events/%s/%s/id",
11389 tracefs_path(), retprobe
? "uretprobes" : "uprobes", probe_name
);
11391 return parse_uint_from_file(file
, "%d\n");
11394 static int perf_event_uprobe_open_legacy(const char *probe_name
, bool retprobe
,
11395 const char *binary_path
, size_t offset
, int pid
)
11397 const size_t attr_sz
= sizeof(struct perf_event_attr
);
11398 struct perf_event_attr attr
;
11399 int type
, pfd
, err
;
11401 err
= add_uprobe_event_legacy(probe_name
, retprobe
, binary_path
, offset
);
11403 pr_warn("failed to add legacy uprobe event for %s:0x%zx: %d\n",
11404 binary_path
, (size_t)offset
, err
);
11407 type
= determine_uprobe_perf_type_legacy(probe_name
, retprobe
);
11410 pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n",
11411 binary_path
, offset
, err
);
11412 goto err_clean_legacy
;
11415 memset(&attr
, 0, attr_sz
);
11416 attr
.size
= attr_sz
;
11417 attr
.config
= type
;
11418 attr
.type
= PERF_TYPE_TRACEPOINT
;
11420 pfd
= syscall(__NR_perf_event_open
, &attr
,
11421 pid
< 0 ? -1 : pid
, /* pid */
11422 pid
== -1 ? 0 : -1, /* cpu */
11423 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC
);
11426 pr_warn("legacy uprobe perf_event_open() failed: %d\n", err
);
11427 goto err_clean_legacy
;
11432 /* Clear the newly added legacy uprobe_event */
11433 remove_uprobe_event_legacy(probe_name
, retprobe
);
11437 /* Find offset of function name in archive specified by path. Currently
11438 * supported are .zip files that do not compress their contents, as used on
11439 * Android in the form of APKs, for example. "file_name" is the name of the ELF
11440 * file inside the archive. "func_name" matches symbol name or name@@LIB for
11441 * library functions.
11443 * An overview of the APK format specifically provided here:
11444 * https://en.wikipedia.org/w/index.php?title=Apk_(file_format)&oldid=1139099120#Package_contents
11446 static long elf_find_func_offset_from_archive(const char *archive_path
, const char *file_name
,
11447 const char *func_name
)
11449 struct zip_archive
*archive
;
11450 struct zip_entry entry
;
11454 archive
= zip_archive_open(archive_path
);
11455 if (IS_ERR(archive
)) {
11456 ret
= PTR_ERR(archive
);
11457 pr_warn("zip: failed to open %s: %ld\n", archive_path
, ret
);
11461 ret
= zip_archive_find_entry(archive
, file_name
, &entry
);
11463 pr_warn("zip: could not find archive member %s in %s: %ld\n", file_name
,
11464 archive_path
, ret
);
11467 pr_debug("zip: found entry for %s in %s at 0x%lx\n", file_name
, archive_path
,
11468 (unsigned long)entry
.data_offset
);
11470 if (entry
.compression
) {
11471 pr_warn("zip: entry %s of %s is compressed and cannot be handled\n", file_name
,
11473 ret
= -LIBBPF_ERRNO__FORMAT
;
11477 elf
= elf_memory((void *)entry
.data
, entry
.data_length
);
11479 pr_warn("elf: could not read elf file %s from %s: %s\n", file_name
, archive_path
,
11481 ret
= -LIBBPF_ERRNO__LIBELF
;
11485 ret
= elf_find_func_offset(elf
, file_name
, func_name
);
11487 pr_debug("elf: symbol address match for %s of %s in %s: 0x%x + 0x%lx = 0x%lx\n",
11488 func_name
, file_name
, archive_path
, entry
.data_offset
, ret
,
11489 ret
+ entry
.data_offset
);
11490 ret
+= entry
.data_offset
;
11495 zip_archive_close(archive
);
11499 static const char *arch_specific_lib_paths(void)
11502 * Based on https://packages.debian.org/sid/libc6.
11504 * Assume that the traced program is built for the same architecture
11505 * as libbpf, which should cover the vast majority of cases.
11507 #if defined(__x86_64__)
11508 return "/lib/x86_64-linux-gnu";
11509 #elif defined(__i386__)
11510 return "/lib/i386-linux-gnu";
11511 #elif defined(__s390x__)
11512 return "/lib/s390x-linux-gnu";
11513 #elif defined(__s390__)
11514 return "/lib/s390-linux-gnu";
11515 #elif defined(__arm__) && defined(__SOFTFP__)
11516 return "/lib/arm-linux-gnueabi";
11517 #elif defined(__arm__) && !defined(__SOFTFP__)
11518 return "/lib/arm-linux-gnueabihf";
11519 #elif defined(__aarch64__)
11520 return "/lib/aarch64-linux-gnu";
11521 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64
11522 return "/lib/mips64el-linux-gnuabi64";
11523 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32
11524 return "/lib/mipsel-linux-gnu";
11525 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
11526 return "/lib/powerpc64le-linux-gnu";
11527 #elif defined(__sparc__) && defined(__arch64__)
11528 return "/lib/sparc64-linux-gnu";
11529 #elif defined(__riscv) && __riscv_xlen == 64
11530 return "/lib/riscv64-linux-gnu";
11536 /* Get full path to program/shared library. */
11537 static int resolve_full_path(const char *file
, char *result
, size_t result_sz
)
11539 const char *search_paths
[3] = {};
11542 if (str_has_sfx(file
, ".so") || strstr(file
, ".so.")) {
11543 search_paths
[0] = getenv("LD_LIBRARY_PATH");
11544 search_paths
[1] = "/usr/lib64:/usr/lib";
11545 search_paths
[2] = arch_specific_lib_paths();
11548 search_paths
[0] = getenv("PATH");
11549 search_paths
[1] = "/usr/bin:/usr/sbin";
11550 perm
= R_OK
| X_OK
;
11553 for (i
= 0; i
< ARRAY_SIZE(search_paths
); i
++) {
11556 if (!search_paths
[i
])
11558 for (s
= search_paths
[i
]; s
!= NULL
; s
= strchr(s
, ':')) {
11564 next_path
= strchr(s
, ':');
11565 seg_len
= next_path
? next_path
- s
: strlen(s
);
11568 snprintf(result
, result_sz
, "%.*s/%s", seg_len
, s
, file
);
11569 /* ensure it has required permissions */
11570 if (faccessat(AT_FDCWD
, result
, perm
, AT_EACCESS
) < 0)
11572 pr_debug("resolved '%s' to '%s'\n", file
, result
);
11580 bpf_program__attach_uprobe_multi(const struct bpf_program
*prog
,
11583 const char *func_pattern
,
11584 const struct bpf_uprobe_multi_opts
*opts
)
11586 const unsigned long *ref_ctr_offsets
= NULL
, *offsets
= NULL
;
11587 LIBBPF_OPTS(bpf_link_create_opts
, lopts
);
11588 unsigned long *resolved_offsets
= NULL
;
11589 int err
= 0, link_fd
, prog_fd
;
11590 struct bpf_link
*link
= NULL
;
11591 char errmsg
[STRERR_BUFSIZE
];
11592 char full_path
[PATH_MAX
];
11593 const __u64
*cookies
;
11597 if (!OPTS_VALID(opts
, bpf_uprobe_multi_opts
))
11598 return libbpf_err_ptr(-EINVAL
);
11600 syms
= OPTS_GET(opts
, syms
, NULL
);
11601 offsets
= OPTS_GET(opts
, offsets
, NULL
);
11602 ref_ctr_offsets
= OPTS_GET(opts
, ref_ctr_offsets
, NULL
);
11603 cookies
= OPTS_GET(opts
, cookies
, NULL
);
11604 cnt
= OPTS_GET(opts
, cnt
, 0);
11607 * User can specify 2 mutually exclusive set of inputs:
11609 * 1) use only path/func_pattern/pid arguments
11611 * 2) use path/pid with allowed combinations of:
11612 * syms/offsets/ref_ctr_offsets/cookies/cnt
11614 * - syms and offsets are mutually exclusive
11615 * - ref_ctr_offsets and cookies are optional
11617 * Any other usage results in error.
11621 return libbpf_err_ptr(-EINVAL
);
11622 if (!func_pattern
&& cnt
== 0)
11623 return libbpf_err_ptr(-EINVAL
);
11625 if (func_pattern
) {
11626 if (syms
|| offsets
|| ref_ctr_offsets
|| cookies
|| cnt
)
11627 return libbpf_err_ptr(-EINVAL
);
11629 if (!!syms
== !!offsets
)
11630 return libbpf_err_ptr(-EINVAL
);
11633 if (func_pattern
) {
11634 if (!strchr(path
, '/')) {
11635 err
= resolve_full_path(path
, full_path
, sizeof(full_path
));
11637 pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
11638 prog
->name
, path
, err
);
11639 return libbpf_err_ptr(err
);
11644 err
= elf_resolve_pattern_offsets(path
, func_pattern
,
11645 &resolved_offsets
, &cnt
);
11647 return libbpf_err_ptr(err
);
11648 offsets
= resolved_offsets
;
11650 err
= elf_resolve_syms_offsets(path
, cnt
, syms
, &resolved_offsets
, STT_FUNC
);
11652 return libbpf_err_ptr(err
);
11653 offsets
= resolved_offsets
;
11656 lopts
.uprobe_multi
.path
= path
;
11657 lopts
.uprobe_multi
.offsets
= offsets
;
11658 lopts
.uprobe_multi
.ref_ctr_offsets
= ref_ctr_offsets
;
11659 lopts
.uprobe_multi
.cookies
= cookies
;
11660 lopts
.uprobe_multi
.cnt
= cnt
;
11661 lopts
.uprobe_multi
.flags
= OPTS_GET(opts
, retprobe
, false) ? BPF_F_UPROBE_MULTI_RETURN
: 0;
11666 lopts
.uprobe_multi
.pid
= pid
;
11668 link
= calloc(1, sizeof(*link
));
11673 link
->detach
= &bpf_link__detach_fd
;
11675 prog_fd
= bpf_program__fd(prog
);
11676 link_fd
= bpf_link_create(prog_fd
, 0, BPF_TRACE_UPROBE_MULTI
, &lopts
);
11679 pr_warn("prog '%s': failed to attach multi-uprobe: %s\n",
11680 prog
->name
, libbpf_strerror_r(err
, errmsg
, sizeof(errmsg
)));
11683 link
->fd
= link_fd
;
11684 free(resolved_offsets
);
11688 free(resolved_offsets
);
11690 return libbpf_err_ptr(err
);
11693 LIBBPF_API
struct bpf_link
*
11694 bpf_program__attach_uprobe_opts(const struct bpf_program
*prog
, pid_t pid
,
11695 const char *binary_path
, size_t func_offset
,
11696 const struct bpf_uprobe_opts
*opts
)
11698 const char *archive_path
= NULL
, *archive_sep
= NULL
;
11699 char errmsg
[STRERR_BUFSIZE
], *legacy_probe
= NULL
;
11700 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts
, pe_opts
);
11701 enum probe_attach_mode attach_mode
;
11702 char full_path
[PATH_MAX
];
11703 struct bpf_link
*link
;
11704 size_t ref_ctr_off
;
11706 bool retprobe
, legacy
;
11707 const char *func_name
;
11709 if (!OPTS_VALID(opts
, bpf_uprobe_opts
))
11710 return libbpf_err_ptr(-EINVAL
);
11712 attach_mode
= OPTS_GET(opts
, attach_mode
, PROBE_ATTACH_MODE_DEFAULT
);
11713 retprobe
= OPTS_GET(opts
, retprobe
, false);
11714 ref_ctr_off
= OPTS_GET(opts
, ref_ctr_offset
, 0);
11715 pe_opts
.bpf_cookie
= OPTS_GET(opts
, bpf_cookie
, 0);
11718 return libbpf_err_ptr(-EINVAL
);
11720 /* Check if "binary_path" refers to an archive. */
11721 archive_sep
= strstr(binary_path
, "!/");
11723 full_path
[0] = '\0';
11724 libbpf_strlcpy(full_path
, binary_path
,
11725 min(sizeof(full_path
), (size_t)(archive_sep
- binary_path
+ 1)));
11726 archive_path
= full_path
;
11727 binary_path
= archive_sep
+ 2;
11728 } else if (!strchr(binary_path
, '/')) {
11729 err
= resolve_full_path(binary_path
, full_path
, sizeof(full_path
));
11731 pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
11732 prog
->name
, binary_path
, err
);
11733 return libbpf_err_ptr(err
);
11735 binary_path
= full_path
;
11737 func_name
= OPTS_GET(opts
, func_name
, NULL
);
11741 if (archive_path
) {
11742 sym_off
= elf_find_func_offset_from_archive(archive_path
, binary_path
,
11744 binary_path
= archive_path
;
11746 sym_off
= elf_find_func_offset_from_file(binary_path
, func_name
);
11749 return libbpf_err_ptr(sym_off
);
11750 func_offset
+= sym_off
;
11753 legacy
= determine_uprobe_perf_type() < 0;
11754 switch (attach_mode
) {
11755 case PROBE_ATTACH_MODE_LEGACY
:
11757 pe_opts
.force_ioctl_attach
= true;
11759 case PROBE_ATTACH_MODE_PERF
:
11761 return libbpf_err_ptr(-ENOTSUP
);
11762 pe_opts
.force_ioctl_attach
= true;
11764 case PROBE_ATTACH_MODE_LINK
:
11765 if (legacy
|| !kernel_supports(prog
->obj
, FEAT_PERF_LINK
))
11766 return libbpf_err_ptr(-ENOTSUP
);
11768 case PROBE_ATTACH_MODE_DEFAULT
:
11771 return libbpf_err_ptr(-EINVAL
);
11775 pfd
= perf_event_open_probe(true /* uprobe */, retprobe
, binary_path
,
11776 func_offset
, pid
, ref_ctr_off
);
11778 char probe_name
[PATH_MAX
+ 64];
11781 return libbpf_err_ptr(-EINVAL
);
11783 gen_uprobe_legacy_event_name(probe_name
, sizeof(probe_name
),
11784 binary_path
, func_offset
);
11786 legacy_probe
= strdup(probe_name
);
11788 return libbpf_err_ptr(-ENOMEM
);
11790 pfd
= perf_event_uprobe_open_legacy(legacy_probe
, retprobe
,
11791 binary_path
, func_offset
, pid
);
11795 pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
11796 prog
->name
, retprobe
? "uretprobe" : "uprobe",
11797 binary_path
, func_offset
,
11798 libbpf_strerror_r(err
, errmsg
, sizeof(errmsg
)));
11802 link
= bpf_program__attach_perf_event_opts(prog
, pfd
, &pe_opts
);
11803 err
= libbpf_get_error(link
);
11806 pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
11807 prog
->name
, retprobe
? "uretprobe" : "uprobe",
11808 binary_path
, func_offset
,
11809 libbpf_strerror_r(err
, errmsg
, sizeof(errmsg
)));
11810 goto err_clean_legacy
;
11813 struct bpf_link_perf
*perf_link
= container_of(link
, struct bpf_link_perf
, link
);
11815 perf_link
->legacy_probe_name
= legacy_probe
;
11816 perf_link
->legacy_is_kprobe
= false;
11817 perf_link
->legacy_is_retprobe
= retprobe
;
11823 remove_uprobe_event_legacy(legacy_probe
, retprobe
);
11825 free(legacy_probe
);
11826 return libbpf_err_ptr(err
);
11829 /* Format of u[ret]probe section definition supporting auto-attach:
11830 * u[ret]probe/binary:function[+offset]
11832 * binary can be an absolute/relative path or a filename; the latter is resolved to a
11833 * full binary path via bpf_program__attach_uprobe_opts.
11835 * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be
11836 * specified (and auto-attach is not possible) or the above format is specified for
11839 static int attach_uprobe(const struct bpf_program
*prog
, long cookie
, struct bpf_link
**link
)
11841 DECLARE_LIBBPF_OPTS(bpf_uprobe_opts
, opts
);
11842 char *probe_type
= NULL
, *binary_path
= NULL
, *func_name
= NULL
, *func_off
;
11843 int n
, c
, ret
= -EINVAL
;
11848 n
= sscanf(prog
->sec_name
, "%m[^/]/%m[^:]:%m[^\n]",
11849 &probe_type
, &binary_path
, &func_name
);
11852 /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
11856 pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n",
11857 prog
->name
, prog
->sec_name
);
11860 /* check if user specifies `+offset`, if yes, this should be
11861 * the last part of the string, make sure sscanf read to EOL
11863 func_off
= strrchr(func_name
, '+');
11865 n
= sscanf(func_off
, "+%li%n", &offset
, &c
);
11866 if (n
== 1 && *(func_off
+ c
) == '\0')
11867 func_off
[0] = '\0';
11871 opts
.retprobe
= strcmp(probe_type
, "uretprobe") == 0 ||
11872 strcmp(probe_type
, "uretprobe.s") == 0;
11873 if (opts
.retprobe
&& offset
!= 0) {
11874 pr_warn("prog '%s': uretprobes do not support offset specification\n",
11878 opts
.func_name
= func_name
;
11879 *link
= bpf_program__attach_uprobe_opts(prog
, -1, binary_path
, offset
, &opts
);
11880 ret
= libbpf_get_error(*link
);
11883 pr_warn("prog '%s': invalid format of section definition '%s'\n", prog
->name
,
11894 struct bpf_link
*bpf_program__attach_uprobe(const struct bpf_program
*prog
,
11895 bool retprobe
, pid_t pid
,
11896 const char *binary_path
,
11897 size_t func_offset
)
11899 DECLARE_LIBBPF_OPTS(bpf_uprobe_opts
, opts
, .retprobe
= retprobe
);
11901 return bpf_program__attach_uprobe_opts(prog
, pid
, binary_path
, func_offset
, &opts
);
11904 struct bpf_link
*bpf_program__attach_usdt(const struct bpf_program
*prog
,
11905 pid_t pid
, const char *binary_path
,
11906 const char *usdt_provider
, const char *usdt_name
,
11907 const struct bpf_usdt_opts
*opts
)
11909 char resolved_path
[512];
11910 struct bpf_object
*obj
= prog
->obj
;
11911 struct bpf_link
*link
;
11915 if (!OPTS_VALID(opts
, bpf_uprobe_opts
))
11916 return libbpf_err_ptr(-EINVAL
);
11918 if (bpf_program__fd(prog
) < 0) {
11919 pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
11921 return libbpf_err_ptr(-EINVAL
);
11925 return libbpf_err_ptr(-EINVAL
);
11927 if (!strchr(binary_path
, '/')) {
11928 err
= resolve_full_path(binary_path
, resolved_path
, sizeof(resolved_path
));
11930 pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
11931 prog
->name
, binary_path
, err
);
11932 return libbpf_err_ptr(err
);
11934 binary_path
= resolved_path
;
11937 /* USDT manager is instantiated lazily on first USDT attach. It will
11938 * be destroyed together with BPF object in bpf_object__close().
11940 if (IS_ERR(obj
->usdt_man
))
11941 return libbpf_ptr(obj
->usdt_man
);
11942 if (!obj
->usdt_man
) {
11943 obj
->usdt_man
= usdt_manager_new(obj
);
11944 if (IS_ERR(obj
->usdt_man
))
11945 return libbpf_ptr(obj
->usdt_man
);
11948 usdt_cookie
= OPTS_GET(opts
, usdt_cookie
, 0);
11949 link
= usdt_manager_attach_usdt(obj
->usdt_man
, prog
, pid
, binary_path
,
11950 usdt_provider
, usdt_name
, usdt_cookie
);
11951 err
= libbpf_get_error(link
);
11953 return libbpf_err_ptr(err
);
11957 static int attach_usdt(const struct bpf_program
*prog
, long cookie
, struct bpf_link
**link
)
11959 char *path
= NULL
, *provider
= NULL
, *name
= NULL
;
11960 const char *sec_name
;
11963 sec_name
= bpf_program__section_name(prog
);
11964 if (strcmp(sec_name
, "usdt") == 0) {
11965 /* no auto-attach for just SEC("usdt") */
11970 n
= sscanf(sec_name
, "usdt/%m[^:]:%m[^:]:%m[^:]", &path
, &provider
, &name
);
11972 pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n",
11976 *link
= bpf_program__attach_usdt(prog
, -1 /* any process */, path
,
11977 provider
, name
, NULL
);
11978 err
= libbpf_get_error(*link
);
11986 static int determine_tracepoint_id(const char *tp_category
,
11987 const char *tp_name
)
11989 char file
[PATH_MAX
];
11992 ret
= snprintf(file
, sizeof(file
), "%s/events/%s/%s/id",
11993 tracefs_path(), tp_category
, tp_name
);
11996 if (ret
>= sizeof(file
)) {
11997 pr_debug("tracepoint %s/%s path is too long\n",
11998 tp_category
, tp_name
);
12001 return parse_uint_from_file(file
, "%d\n");
12004 static int perf_event_open_tracepoint(const char *tp_category
,
12005 const char *tp_name
)
12007 const size_t attr_sz
= sizeof(struct perf_event_attr
);
12008 struct perf_event_attr attr
;
12009 char errmsg
[STRERR_BUFSIZE
];
12010 int tp_id
, pfd
, err
;
12012 tp_id
= determine_tracepoint_id(tp_category
, tp_name
);
12014 pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
12015 tp_category
, tp_name
,
12016 libbpf_strerror_r(tp_id
, errmsg
, sizeof(errmsg
)));
12020 memset(&attr
, 0, attr_sz
);
12021 attr
.type
= PERF_TYPE_TRACEPOINT
;
12022 attr
.size
= attr_sz
;
12023 attr
.config
= tp_id
;
12025 pfd
= syscall(__NR_perf_event_open
, &attr
, -1 /* pid */, 0 /* cpu */,
12026 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC
);
12029 pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n",
12030 tp_category
, tp_name
,
12031 libbpf_strerror_r(err
, errmsg
, sizeof(errmsg
)));
12037 struct bpf_link
*bpf_program__attach_tracepoint_opts(const struct bpf_program
*prog
,
12038 const char *tp_category
,
12039 const char *tp_name
,
12040 const struct bpf_tracepoint_opts
*opts
)
12042 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts
, pe_opts
);
12043 char errmsg
[STRERR_BUFSIZE
];
12044 struct bpf_link
*link
;
12047 if (!OPTS_VALID(opts
, bpf_tracepoint_opts
))
12048 return libbpf_err_ptr(-EINVAL
);
12050 pe_opts
.bpf_cookie
= OPTS_GET(opts
, bpf_cookie
, 0);
12052 pfd
= perf_event_open_tracepoint(tp_category
, tp_name
);
12054 pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
12055 prog
->name
, tp_category
, tp_name
,
12056 libbpf_strerror_r(pfd
, errmsg
, sizeof(errmsg
)));
12057 return libbpf_err_ptr(pfd
);
12059 link
= bpf_program__attach_perf_event_opts(prog
, pfd
, &pe_opts
);
12060 err
= libbpf_get_error(link
);
12063 pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
12064 prog
->name
, tp_category
, tp_name
,
12065 libbpf_strerror_r(err
, errmsg
, sizeof(errmsg
)));
12066 return libbpf_err_ptr(err
);
12071 struct bpf_link
*bpf_program__attach_tracepoint(const struct bpf_program
*prog
,
12072 const char *tp_category
,
12073 const char *tp_name
)
12075 return bpf_program__attach_tracepoint_opts(prog
, tp_category
, tp_name
, NULL
);
12078 static int attach_tp(const struct bpf_program
*prog
, long cookie
, struct bpf_link
**link
)
12080 char *sec_name
, *tp_cat
, *tp_name
;
12084 /* no auto-attach for SEC("tp") or SEC("tracepoint") */
12085 if (strcmp(prog
->sec_name
, "tp") == 0 || strcmp(prog
->sec_name
, "tracepoint") == 0)
12088 sec_name
= strdup(prog
->sec_name
);
12092 /* extract "tp/<category>/<name>" or "tracepoint/<category>/<name>" */
12093 if (str_has_pfx(prog
->sec_name
, "tp/"))
12094 tp_cat
= sec_name
+ sizeof("tp/") - 1;
12096 tp_cat
= sec_name
+ sizeof("tracepoint/") - 1;
12097 tp_name
= strchr(tp_cat
, '/');
12105 *link
= bpf_program__attach_tracepoint(prog
, tp_cat
, tp_name
);
12107 return libbpf_get_error(*link
);
12110 struct bpf_link
*bpf_program__attach_raw_tracepoint(const struct bpf_program
*prog
,
12111 const char *tp_name
)
12113 char errmsg
[STRERR_BUFSIZE
];
12114 struct bpf_link
*link
;
12117 prog_fd
= bpf_program__fd(prog
);
12119 pr_warn("prog '%s': can't attach before loaded\n", prog
->name
);
12120 return libbpf_err_ptr(-EINVAL
);
12123 link
= calloc(1, sizeof(*link
));
12125 return libbpf_err_ptr(-ENOMEM
);
12126 link
->detach
= &bpf_link__detach_fd
;
12128 pfd
= bpf_raw_tracepoint_open(tp_name
, prog_fd
);
12132 pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
12133 prog
->name
, tp_name
, libbpf_strerror_r(pfd
, errmsg
, sizeof(errmsg
)));
12134 return libbpf_err_ptr(pfd
);
12140 static int attach_raw_tp(const struct bpf_program
*prog
, long cookie
, struct bpf_link
**link
)
12142 static const char *const prefixes
[] = {
12146 "raw_tracepoint.w",
12149 const char *tp_name
= NULL
;
12153 for (i
= 0; i
< ARRAY_SIZE(prefixes
); i
++) {
12156 if (!str_has_pfx(prog
->sec_name
, prefixes
[i
]))
12159 pfx_len
= strlen(prefixes
[i
]);
12160 /* no auto-attach case of, e.g., SEC("raw_tp") */
12161 if (prog
->sec_name
[pfx_len
] == '\0')
12164 if (prog
->sec_name
[pfx_len
] != '/')
12167 tp_name
= prog
->sec_name
+ pfx_len
+ 1;
12172 pr_warn("prog '%s': invalid section name '%s'\n",
12173 prog
->name
, prog
->sec_name
);
12177 *link
= bpf_program__attach_raw_tracepoint(prog
, tp_name
);
12178 return libbpf_get_error(*link
);
12181 /* Common logic for all BPF program types that attach to a btf_id */
12182 static struct bpf_link
*bpf_program__attach_btf_id(const struct bpf_program
*prog
,
12183 const struct bpf_trace_opts
*opts
)
12185 LIBBPF_OPTS(bpf_link_create_opts
, link_opts
);
12186 char errmsg
[STRERR_BUFSIZE
];
12187 struct bpf_link
*link
;
12190 if (!OPTS_VALID(opts
, bpf_trace_opts
))
12191 return libbpf_err_ptr(-EINVAL
);
12193 prog_fd
= bpf_program__fd(prog
);
12195 pr_warn("prog '%s': can't attach before loaded\n", prog
->name
);
12196 return libbpf_err_ptr(-EINVAL
);
12199 link
= calloc(1, sizeof(*link
));
12201 return libbpf_err_ptr(-ENOMEM
);
12202 link
->detach
= &bpf_link__detach_fd
;
12204 /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */
12205 link_opts
.tracing
.cookie
= OPTS_GET(opts
, cookie
, 0);
12206 pfd
= bpf_link_create(prog_fd
, 0, bpf_program__expected_attach_type(prog
), &link_opts
);
12210 pr_warn("prog '%s': failed to attach: %s\n",
12211 prog
->name
, libbpf_strerror_r(pfd
, errmsg
, sizeof(errmsg
)));
12212 return libbpf_err_ptr(pfd
);
12218 struct bpf_link
*bpf_program__attach_trace(const struct bpf_program
*prog
)
12220 return bpf_program__attach_btf_id(prog
, NULL
);
12223 struct bpf_link
*bpf_program__attach_trace_opts(const struct bpf_program
*prog
,
12224 const struct bpf_trace_opts
*opts
)
12226 return bpf_program__attach_btf_id(prog
, opts
);
12229 struct bpf_link
*bpf_program__attach_lsm(const struct bpf_program
*prog
)
12231 return bpf_program__attach_btf_id(prog
, NULL
);
12234 static int attach_trace(const struct bpf_program
*prog
, long cookie
, struct bpf_link
**link
)
12236 *link
= bpf_program__attach_trace(prog
);
12237 return libbpf_get_error(*link
);
12240 static int attach_lsm(const struct bpf_program
*prog
, long cookie
, struct bpf_link
**link
)
12242 *link
= bpf_program__attach_lsm(prog
);
12243 return libbpf_get_error(*link
);
12246 static struct bpf_link
*
12247 bpf_program_attach_fd(const struct bpf_program
*prog
,
12248 int target_fd
, const char *target_name
,
12249 const struct bpf_link_create_opts
*opts
)
12251 enum bpf_attach_type attach_type
;
12252 char errmsg
[STRERR_BUFSIZE
];
12253 struct bpf_link
*link
;
12254 int prog_fd
, link_fd
;
12256 prog_fd
= bpf_program__fd(prog
);
12258 pr_warn("prog '%s': can't attach before loaded\n", prog
->name
);
12259 return libbpf_err_ptr(-EINVAL
);
12262 link
= calloc(1, sizeof(*link
));
12264 return libbpf_err_ptr(-ENOMEM
);
12265 link
->detach
= &bpf_link__detach_fd
;
12267 attach_type
= bpf_program__expected_attach_type(prog
);
12268 link_fd
= bpf_link_create(prog_fd
, target_fd
, attach_type
, opts
);
12272 pr_warn("prog '%s': failed to attach to %s: %s\n",
12273 prog
->name
, target_name
,
12274 libbpf_strerror_r(link_fd
, errmsg
, sizeof(errmsg
)));
12275 return libbpf_err_ptr(link_fd
);
12277 link
->fd
= link_fd
;
12282 bpf_program__attach_cgroup(const struct bpf_program
*prog
, int cgroup_fd
)
12284 return bpf_program_attach_fd(prog
, cgroup_fd
, "cgroup", NULL
);
12288 bpf_program__attach_netns(const struct bpf_program
*prog
, int netns_fd
)
12290 return bpf_program_attach_fd(prog
, netns_fd
, "netns", NULL
);
12293 struct bpf_link
*bpf_program__attach_xdp(const struct bpf_program
*prog
, int ifindex
)
12295 /* target_fd/target_ifindex use the same field in LINK_CREATE */
12296 return bpf_program_attach_fd(prog
, ifindex
, "xdp", NULL
);
12300 bpf_program__attach_tcx(const struct bpf_program
*prog
, int ifindex
,
12301 const struct bpf_tcx_opts
*opts
)
12303 LIBBPF_OPTS(bpf_link_create_opts
, link_create_opts
);
12307 if (!OPTS_VALID(opts
, bpf_tcx_opts
))
12308 return libbpf_err_ptr(-EINVAL
);
12310 relative_id
= OPTS_GET(opts
, relative_id
, 0);
12311 relative_fd
= OPTS_GET(opts
, relative_fd
, 0);
12313 /* validate we don't have unexpected combinations of non-zero fields */
12315 pr_warn("prog '%s': target netdevice ifindex cannot be zero\n",
12317 return libbpf_err_ptr(-EINVAL
);
12319 if (relative_fd
&& relative_id
) {
12320 pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n",
12322 return libbpf_err_ptr(-EINVAL
);
12325 link_create_opts
.tcx
.expected_revision
= OPTS_GET(opts
, expected_revision
, 0);
12326 link_create_opts
.tcx
.relative_fd
= relative_fd
;
12327 link_create_opts
.tcx
.relative_id
= relative_id
;
12328 link_create_opts
.flags
= OPTS_GET(opts
, flags
, 0);
12330 /* target_fd/target_ifindex use the same field in LINK_CREATE */
12331 return bpf_program_attach_fd(prog
, ifindex
, "tcx", &link_create_opts
);
12335 bpf_program__attach_netkit(const struct bpf_program
*prog
, int ifindex
,
12336 const struct bpf_netkit_opts
*opts
)
12338 LIBBPF_OPTS(bpf_link_create_opts
, link_create_opts
);
12342 if (!OPTS_VALID(opts
, bpf_netkit_opts
))
12343 return libbpf_err_ptr(-EINVAL
);
12345 relative_id
= OPTS_GET(opts
, relative_id
, 0);
12346 relative_fd
= OPTS_GET(opts
, relative_fd
, 0);
12348 /* validate we don't have unexpected combinations of non-zero fields */
12350 pr_warn("prog '%s': target netdevice ifindex cannot be zero\n",
12352 return libbpf_err_ptr(-EINVAL
);
12354 if (relative_fd
&& relative_id
) {
12355 pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n",
12357 return libbpf_err_ptr(-EINVAL
);
12360 link_create_opts
.netkit
.expected_revision
= OPTS_GET(opts
, expected_revision
, 0);
12361 link_create_opts
.netkit
.relative_fd
= relative_fd
;
12362 link_create_opts
.netkit
.relative_id
= relative_id
;
12363 link_create_opts
.flags
= OPTS_GET(opts
, flags
, 0);
12365 return bpf_program_attach_fd(prog
, ifindex
, "netkit", &link_create_opts
);
12368 struct bpf_link
*bpf_program__attach_freplace(const struct bpf_program
*prog
,
12370 const char *attach_func_name
)
12374 if (!!target_fd
!= !!attach_func_name
) {
12375 pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
12377 return libbpf_err_ptr(-EINVAL
);
12380 if (prog
->type
!= BPF_PROG_TYPE_EXT
) {
12381 pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace",
12383 return libbpf_err_ptr(-EINVAL
);
12387 LIBBPF_OPTS(bpf_link_create_opts
, target_opts
);
12389 btf_id
= libbpf_find_prog_btf_id(attach_func_name
, target_fd
);
12391 return libbpf_err_ptr(btf_id
);
12393 target_opts
.target_btf_id
= btf_id
;
12395 return bpf_program_attach_fd(prog
, target_fd
, "freplace",
12398 /* no target, so use raw_tracepoint_open for compatibility
12401 return bpf_program__attach_trace(prog
);
12406 bpf_program__attach_iter(const struct bpf_program
*prog
,
12407 const struct bpf_iter_attach_opts
*opts
)
12409 DECLARE_LIBBPF_OPTS(bpf_link_create_opts
, link_create_opts
);
12410 char errmsg
[STRERR_BUFSIZE
];
12411 struct bpf_link
*link
;
12412 int prog_fd
, link_fd
;
12413 __u32 target_fd
= 0;
12415 if (!OPTS_VALID(opts
, bpf_iter_attach_opts
))
12416 return libbpf_err_ptr(-EINVAL
);
12418 link_create_opts
.iter_info
= OPTS_GET(opts
, link_info
, (void *)0);
12419 link_create_opts
.iter_info_len
= OPTS_GET(opts
, link_info_len
, 0);
12421 prog_fd
= bpf_program__fd(prog
);
12423 pr_warn("prog '%s': can't attach before loaded\n", prog
->name
);
12424 return libbpf_err_ptr(-EINVAL
);
12427 link
= calloc(1, sizeof(*link
));
12429 return libbpf_err_ptr(-ENOMEM
);
12430 link
->detach
= &bpf_link__detach_fd
;
12432 link_fd
= bpf_link_create(prog_fd
, target_fd
, BPF_TRACE_ITER
,
12433 &link_create_opts
);
12437 pr_warn("prog '%s': failed to attach to iterator: %s\n",
12438 prog
->name
, libbpf_strerror_r(link_fd
, errmsg
, sizeof(errmsg
)));
12439 return libbpf_err_ptr(link_fd
);
12441 link
->fd
= link_fd
;
12445 static int attach_iter(const struct bpf_program
*prog
, long cookie
, struct bpf_link
**link
)
12447 *link
= bpf_program__attach_iter(prog
, NULL
);
12448 return libbpf_get_error(*link
);
12451 struct bpf_link
*bpf_program__attach_netfilter(const struct bpf_program
*prog
,
12452 const struct bpf_netfilter_opts
*opts
)
12454 LIBBPF_OPTS(bpf_link_create_opts
, lopts
);
12455 struct bpf_link
*link
;
12456 int prog_fd
, link_fd
;
12458 if (!OPTS_VALID(opts
, bpf_netfilter_opts
))
12459 return libbpf_err_ptr(-EINVAL
);
12461 prog_fd
= bpf_program__fd(prog
);
12463 pr_warn("prog '%s': can't attach before loaded\n", prog
->name
);
12464 return libbpf_err_ptr(-EINVAL
);
12467 link
= calloc(1, sizeof(*link
));
12469 return libbpf_err_ptr(-ENOMEM
);
12471 link
->detach
= &bpf_link__detach_fd
;
12473 lopts
.netfilter
.pf
= OPTS_GET(opts
, pf
, 0);
12474 lopts
.netfilter
.hooknum
= OPTS_GET(opts
, hooknum
, 0);
12475 lopts
.netfilter
.priority
= OPTS_GET(opts
, priority
, 0);
12476 lopts
.netfilter
.flags
= OPTS_GET(opts
, flags
, 0);
12478 link_fd
= bpf_link_create(prog_fd
, 0, BPF_NETFILTER
, &lopts
);
12480 char errmsg
[STRERR_BUFSIZE
];
12484 pr_warn("prog '%s': failed to attach to netfilter: %s\n",
12485 prog
->name
, libbpf_strerror_r(link_fd
, errmsg
, sizeof(errmsg
)));
12486 return libbpf_err_ptr(link_fd
);
12488 link
->fd
= link_fd
;
12493 struct bpf_link
*bpf_program__attach(const struct bpf_program
*prog
)
12495 struct bpf_link
*link
= NULL
;
12498 if (!prog
->sec_def
|| !prog
->sec_def
->prog_attach_fn
)
12499 return libbpf_err_ptr(-EOPNOTSUPP
);
12501 err
= prog
->sec_def
->prog_attach_fn(prog
, prog
->sec_def
->cookie
, &link
);
12503 return libbpf_err_ptr(err
);
12505 /* When calling bpf_program__attach() explicitly, auto-attach support
12506 * is expected to work, so NULL returned link is considered an error.
12507 * This is different for skeleton's attach, see comment in
12508 * bpf_object__attach_skeleton().
12511 return libbpf_err_ptr(-EOPNOTSUPP
);
12516 struct bpf_link_struct_ops
{
12517 struct bpf_link link
;
12521 static int bpf_link__detach_struct_ops(struct bpf_link
*link
)
12523 struct bpf_link_struct_ops
*st_link
;
12526 st_link
= container_of(link
, struct bpf_link_struct_ops
, link
);
12528 if (st_link
->map_fd
< 0)
12529 /* w/o a real link */
12530 return bpf_map_delete_elem(link
->fd
, &zero
);
12532 return close(link
->fd
);
12535 struct bpf_link
*bpf_map__attach_struct_ops(const struct bpf_map
*map
)
12537 struct bpf_link_struct_ops
*link
;
12541 if (!bpf_map__is_struct_ops(map
) || map
->fd
== -1)
12542 return libbpf_err_ptr(-EINVAL
);
12544 link
= calloc(1, sizeof(*link
));
12546 return libbpf_err_ptr(-EINVAL
);
12548 /* kern_vdata should be prepared during the loading phase. */
12549 err
= bpf_map_update_elem(map
->fd
, &zero
, map
->st_ops
->kern_vdata
, 0);
12550 /* It can be EBUSY if the map has been used to create or
12551 * update a link before. We don't allow updating the value of
12552 * a struct_ops once it is set. That ensures that the value
12553 * never changed. So, it is safe to skip EBUSY.
12555 if (err
&& (!(map
->def
.map_flags
& BPF_F_LINK
) || err
!= -EBUSY
)) {
12557 return libbpf_err_ptr(err
);
12560 link
->link
.detach
= bpf_link__detach_struct_ops
;
12562 if (!(map
->def
.map_flags
& BPF_F_LINK
)) {
12563 /* w/o a real link */
12564 link
->link
.fd
= map
->fd
;
12566 return &link
->link
;
12569 fd
= bpf_link_create(map
->fd
, 0, BPF_STRUCT_OPS
, NULL
);
12572 return libbpf_err_ptr(fd
);
12575 link
->link
.fd
= fd
;
12576 link
->map_fd
= map
->fd
;
12578 return &link
->link
;
12582 * Swap the back struct_ops of a link with a new struct_ops map.
12584 int bpf_link__update_map(struct bpf_link
*link
, const struct bpf_map
*map
)
12586 struct bpf_link_struct_ops
*st_ops_link
;
12590 if (!bpf_map__is_struct_ops(map
) || !map_is_created(map
))
12593 st_ops_link
= container_of(link
, struct bpf_link_struct_ops
, link
);
12594 /* Ensure the type of a link is correct */
12595 if (st_ops_link
->map_fd
< 0)
12598 err
= bpf_map_update_elem(map
->fd
, &zero
, map
->st_ops
->kern_vdata
, 0);
12599 /* It can be EBUSY if the map has been used to create or
12600 * update a link before. We don't allow updating the value of
12601 * a struct_ops once it is set. That ensures that the value
12602 * never changed. So, it is safe to skip EBUSY.
12604 if (err
&& err
!= -EBUSY
)
12607 err
= bpf_link_update(link
->fd
, map
->fd
, NULL
);
12611 st_ops_link
->map_fd
= map
->fd
;
12616 typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t
)(struct perf_event_header
*hdr
,
12617 void *private_data
);
12619 static enum bpf_perf_event_ret
12620 perf_event_read_simple(void *mmap_mem
, size_t mmap_size
, size_t page_size
,
12621 void **copy_mem
, size_t *copy_size
,
12622 bpf_perf_event_print_t fn
, void *private_data
)
12624 struct perf_event_mmap_page
*header
= mmap_mem
;
12625 __u64 data_head
= ring_buffer_read_head(header
);
12626 __u64 data_tail
= header
->data_tail
;
12627 void *base
= ((__u8
*)header
) + page_size
;
12628 int ret
= LIBBPF_PERF_EVENT_CONT
;
12629 struct perf_event_header
*ehdr
;
12632 while (data_head
!= data_tail
) {
12633 ehdr
= base
+ (data_tail
& (mmap_size
- 1));
12634 ehdr_size
= ehdr
->size
;
12636 if (((void *)ehdr
) + ehdr_size
> base
+ mmap_size
) {
12637 void *copy_start
= ehdr
;
12638 size_t len_first
= base
+ mmap_size
- copy_start
;
12639 size_t len_secnd
= ehdr_size
- len_first
;
12641 if (*copy_size
< ehdr_size
) {
12643 *copy_mem
= malloc(ehdr_size
);
12646 ret
= LIBBPF_PERF_EVENT_ERROR
;
12649 *copy_size
= ehdr_size
;
12652 memcpy(*copy_mem
, copy_start
, len_first
);
12653 memcpy(*copy_mem
+ len_first
, base
, len_secnd
);
12657 ret
= fn(ehdr
, private_data
);
12658 data_tail
+= ehdr_size
;
12659 if (ret
!= LIBBPF_PERF_EVENT_CONT
)
12663 ring_buffer_write_tail(header
, data_tail
);
12664 return libbpf_err(ret
);
12667 struct perf_buffer
;
12669 struct perf_buffer_params
{
12670 struct perf_event_attr
*attr
;
12671 /* if event_cb is specified, it takes precendence */
12672 perf_buffer_event_fn event_cb
;
12673 /* sample_cb and lost_cb are higher-level common-case callbacks */
12674 perf_buffer_sample_fn sample_cb
;
12675 perf_buffer_lost_fn lost_cb
;
12682 struct perf_cpu_buf
{
12683 struct perf_buffer
*pb
;
12684 void *base
; /* mmap()'ed memory */
12685 void *buf
; /* for reconstructing segmented data */
12692 struct perf_buffer
{
12693 perf_buffer_event_fn event_cb
;
12694 perf_buffer_sample_fn sample_cb
;
12695 perf_buffer_lost_fn lost_cb
;
12696 void *ctx
; /* passed into callbacks */
12700 struct perf_cpu_buf
**cpu_bufs
;
12701 struct epoll_event
*events
;
12702 int cpu_cnt
; /* number of allocated CPU buffers */
12703 int epoll_fd
; /* perf event FD */
12704 int map_fd
; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
12707 static void perf_buffer__free_cpu_buf(struct perf_buffer
*pb
,
12708 struct perf_cpu_buf
*cpu_buf
)
12712 if (cpu_buf
->base
&&
12713 munmap(cpu_buf
->base
, pb
->mmap_size
+ pb
->page_size
))
12714 pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf
->cpu
);
12715 if (cpu_buf
->fd
>= 0) {
12716 ioctl(cpu_buf
->fd
, PERF_EVENT_IOC_DISABLE
, 0);
12717 close(cpu_buf
->fd
);
12719 free(cpu_buf
->buf
);
12723 void perf_buffer__free(struct perf_buffer
*pb
)
12727 if (IS_ERR_OR_NULL(pb
))
12729 if (pb
->cpu_bufs
) {
12730 for (i
= 0; i
< pb
->cpu_cnt
; i
++) {
12731 struct perf_cpu_buf
*cpu_buf
= pb
->cpu_bufs
[i
];
12736 bpf_map_delete_elem(pb
->map_fd
, &cpu_buf
->map_key
);
12737 perf_buffer__free_cpu_buf(pb
, cpu_buf
);
12739 free(pb
->cpu_bufs
);
12741 if (pb
->epoll_fd
>= 0)
12742 close(pb
->epoll_fd
);
12747 static struct perf_cpu_buf
*
12748 perf_buffer__open_cpu_buf(struct perf_buffer
*pb
, struct perf_event_attr
*attr
,
12749 int cpu
, int map_key
)
12751 struct perf_cpu_buf
*cpu_buf
;
12752 char msg
[STRERR_BUFSIZE
];
12755 cpu_buf
= calloc(1, sizeof(*cpu_buf
));
12757 return ERR_PTR(-ENOMEM
);
12760 cpu_buf
->cpu
= cpu
;
12761 cpu_buf
->map_key
= map_key
;
12763 cpu_buf
->fd
= syscall(__NR_perf_event_open
, attr
, -1 /* pid */, cpu
,
12764 -1, PERF_FLAG_FD_CLOEXEC
);
12765 if (cpu_buf
->fd
< 0) {
12767 pr_warn("failed to open perf buffer event on cpu #%d: %s\n",
12768 cpu
, libbpf_strerror_r(err
, msg
, sizeof(msg
)));
12772 cpu_buf
->base
= mmap(NULL
, pb
->mmap_size
+ pb
->page_size
,
12773 PROT_READ
| PROT_WRITE
, MAP_SHARED
,
12775 if (cpu_buf
->base
== MAP_FAILED
) {
12776 cpu_buf
->base
= NULL
;
12778 pr_warn("failed to mmap perf buffer on cpu #%d: %s\n",
12779 cpu
, libbpf_strerror_r(err
, msg
, sizeof(msg
)));
12783 if (ioctl(cpu_buf
->fd
, PERF_EVENT_IOC_ENABLE
, 0) < 0) {
12785 pr_warn("failed to enable perf buffer event on cpu #%d: %s\n",
12786 cpu
, libbpf_strerror_r(err
, msg
, sizeof(msg
)));
12793 perf_buffer__free_cpu_buf(pb
, cpu_buf
);
12794 return (struct perf_cpu_buf
*)ERR_PTR(err
);
12797 static struct perf_buffer
*__perf_buffer__new(int map_fd
, size_t page_cnt
,
12798 struct perf_buffer_params
*p
);
12800 struct perf_buffer
*perf_buffer__new(int map_fd
, size_t page_cnt
,
12801 perf_buffer_sample_fn sample_cb
,
12802 perf_buffer_lost_fn lost_cb
,
12804 const struct perf_buffer_opts
*opts
)
12806 const size_t attr_sz
= sizeof(struct perf_event_attr
);
12807 struct perf_buffer_params p
= {};
12808 struct perf_event_attr attr
;
12809 __u32 sample_period
;
12811 if (!OPTS_VALID(opts
, perf_buffer_opts
))
12812 return libbpf_err_ptr(-EINVAL
);
12814 sample_period
= OPTS_GET(opts
, sample_period
, 1);
12815 if (!sample_period
)
12818 memset(&attr
, 0, attr_sz
);
12819 attr
.size
= attr_sz
;
12820 attr
.config
= PERF_COUNT_SW_BPF_OUTPUT
;
12821 attr
.type
= PERF_TYPE_SOFTWARE
;
12822 attr
.sample_type
= PERF_SAMPLE_RAW
;
12823 attr
.sample_period
= sample_period
;
12824 attr
.wakeup_events
= sample_period
;
12827 p
.sample_cb
= sample_cb
;
12828 p
.lost_cb
= lost_cb
;
12831 return libbpf_ptr(__perf_buffer__new(map_fd
, page_cnt
, &p
));
12834 struct perf_buffer
*perf_buffer__new_raw(int map_fd
, size_t page_cnt
,
12835 struct perf_event_attr
*attr
,
12836 perf_buffer_event_fn event_cb
, void *ctx
,
12837 const struct perf_buffer_raw_opts
*opts
)
12839 struct perf_buffer_params p
= {};
12842 return libbpf_err_ptr(-EINVAL
);
12844 if (!OPTS_VALID(opts
, perf_buffer_raw_opts
))
12845 return libbpf_err_ptr(-EINVAL
);
12848 p
.event_cb
= event_cb
;
12850 p
.cpu_cnt
= OPTS_GET(opts
, cpu_cnt
, 0);
12851 p
.cpus
= OPTS_GET(opts
, cpus
, NULL
);
12852 p
.map_keys
= OPTS_GET(opts
, map_keys
, NULL
);
12854 return libbpf_ptr(__perf_buffer__new(map_fd
, page_cnt
, &p
));
12857 static struct perf_buffer
*__perf_buffer__new(int map_fd
, size_t page_cnt
,
12858 struct perf_buffer_params
*p
)
12860 const char *online_cpus_file
= "/sys/devices/system/cpu/online";
12861 struct bpf_map_info map
;
12862 char msg
[STRERR_BUFSIZE
];
12863 struct perf_buffer
*pb
;
12864 bool *online
= NULL
;
12865 __u32 map_info_len
;
12868 if (page_cnt
== 0 || (page_cnt
& (page_cnt
- 1))) {
12869 pr_warn("page count should be power of two, but is %zu\n",
12871 return ERR_PTR(-EINVAL
);
12874 /* best-effort sanity checks */
12875 memset(&map
, 0, sizeof(map
));
12876 map_info_len
= sizeof(map
);
12877 err
= bpf_map_get_info_by_fd(map_fd
, &map
, &map_info_len
);
12880 /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
12881 * -EBADFD, -EFAULT, or -E2BIG on real error
12883 if (err
!= -EINVAL
) {
12884 pr_warn("failed to get map info for map FD %d: %s\n",
12885 map_fd
, libbpf_strerror_r(err
, msg
, sizeof(msg
)));
12886 return ERR_PTR(err
);
12888 pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n",
12891 if (map
.type
!= BPF_MAP_TYPE_PERF_EVENT_ARRAY
) {
12892 pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
12894 return ERR_PTR(-EINVAL
);
12898 pb
= calloc(1, sizeof(*pb
));
12900 return ERR_PTR(-ENOMEM
);
12902 pb
->event_cb
= p
->event_cb
;
12903 pb
->sample_cb
= p
->sample_cb
;
12904 pb
->lost_cb
= p
->lost_cb
;
12907 pb
->page_size
= getpagesize();
12908 pb
->mmap_size
= pb
->page_size
* page_cnt
;
12909 pb
->map_fd
= map_fd
;
12911 pb
->epoll_fd
= epoll_create1(EPOLL_CLOEXEC
);
12912 if (pb
->epoll_fd
< 0) {
12914 pr_warn("failed to create epoll instance: %s\n",
12915 libbpf_strerror_r(err
, msg
, sizeof(msg
)));
12919 if (p
->cpu_cnt
> 0) {
12920 pb
->cpu_cnt
= p
->cpu_cnt
;
12922 pb
->cpu_cnt
= libbpf_num_possible_cpus();
12923 if (pb
->cpu_cnt
< 0) {
12927 if (map
.max_entries
&& map
.max_entries
< pb
->cpu_cnt
)
12928 pb
->cpu_cnt
= map
.max_entries
;
12931 pb
->events
= calloc(pb
->cpu_cnt
, sizeof(*pb
->events
));
12934 pr_warn("failed to allocate events: out of memory\n");
12937 pb
->cpu_bufs
= calloc(pb
->cpu_cnt
, sizeof(*pb
->cpu_bufs
));
12938 if (!pb
->cpu_bufs
) {
12940 pr_warn("failed to allocate buffers: out of memory\n");
12944 err
= parse_cpu_mask_file(online_cpus_file
, &online
, &n
);
12946 pr_warn("failed to get online CPU mask: %d\n", err
);
12950 for (i
= 0, j
= 0; i
< pb
->cpu_cnt
; i
++) {
12951 struct perf_cpu_buf
*cpu_buf
;
12954 cpu
= p
->cpu_cnt
> 0 ? p
->cpus
[i
] : i
;
12955 map_key
= p
->cpu_cnt
> 0 ? p
->map_keys
[i
] : i
;
12957 /* in case user didn't explicitly requested particular CPUs to
12958 * be attached to, skip offline/not present CPUs
12960 if (p
->cpu_cnt
<= 0 && (cpu
>= n
|| !online
[cpu
]))
12963 cpu_buf
= perf_buffer__open_cpu_buf(pb
, p
->attr
, cpu
, map_key
);
12964 if (IS_ERR(cpu_buf
)) {
12965 err
= PTR_ERR(cpu_buf
);
12969 pb
->cpu_bufs
[j
] = cpu_buf
;
12971 err
= bpf_map_update_elem(pb
->map_fd
, &map_key
,
12975 pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
12976 cpu
, map_key
, cpu_buf
->fd
,
12977 libbpf_strerror_r(err
, msg
, sizeof(msg
)));
12981 pb
->events
[j
].events
= EPOLLIN
;
12982 pb
->events
[j
].data
.ptr
= cpu_buf
;
12983 if (epoll_ctl(pb
->epoll_fd
, EPOLL_CTL_ADD
, cpu_buf
->fd
,
12984 &pb
->events
[j
]) < 0) {
12986 pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
12988 libbpf_strerror_r(err
, msg
, sizeof(msg
)));
13001 perf_buffer__free(pb
);
13002 return ERR_PTR(err
);
13005 struct perf_sample_raw
{
13006 struct perf_event_header header
;
13011 struct perf_sample_lost
{
13012 struct perf_event_header header
;
13015 uint64_t sample_id
;
13018 static enum bpf_perf_event_ret
13019 perf_buffer__process_record(struct perf_event_header
*e
, void *ctx
)
13021 struct perf_cpu_buf
*cpu_buf
= ctx
;
13022 struct perf_buffer
*pb
= cpu_buf
->pb
;
13025 /* user wants full control over parsing perf event */
13027 return pb
->event_cb(pb
->ctx
, cpu_buf
->cpu
, e
);
13030 case PERF_RECORD_SAMPLE
: {
13031 struct perf_sample_raw
*s
= data
;
13034 pb
->sample_cb(pb
->ctx
, cpu_buf
->cpu
, s
->data
, s
->size
);
13037 case PERF_RECORD_LOST
: {
13038 struct perf_sample_lost
*s
= data
;
13041 pb
->lost_cb(pb
->ctx
, cpu_buf
->cpu
, s
->lost
);
13045 pr_warn("unknown perf sample type %d\n", e
->type
);
13046 return LIBBPF_PERF_EVENT_ERROR
;
13048 return LIBBPF_PERF_EVENT_CONT
;
13051 static int perf_buffer__process_records(struct perf_buffer
*pb
,
13052 struct perf_cpu_buf
*cpu_buf
)
13054 enum bpf_perf_event_ret ret
;
13056 ret
= perf_event_read_simple(cpu_buf
->base
, pb
->mmap_size
,
13057 pb
->page_size
, &cpu_buf
->buf
,
13058 &cpu_buf
->buf_size
,
13059 perf_buffer__process_record
, cpu_buf
);
13060 if (ret
!= LIBBPF_PERF_EVENT_CONT
)
13065 int perf_buffer__epoll_fd(const struct perf_buffer
*pb
)
13067 return pb
->epoll_fd
;
13070 int perf_buffer__poll(struct perf_buffer
*pb
, int timeout_ms
)
13074 cnt
= epoll_wait(pb
->epoll_fd
, pb
->events
, pb
->cpu_cnt
, timeout_ms
);
13078 for (i
= 0; i
< cnt
; i
++) {
13079 struct perf_cpu_buf
*cpu_buf
= pb
->events
[i
].data
.ptr
;
13081 err
= perf_buffer__process_records(pb
, cpu_buf
);
13083 pr_warn("error while processing records: %d\n", err
);
13084 return libbpf_err(err
);
13090 /* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
13093 size_t perf_buffer__buffer_cnt(const struct perf_buffer
*pb
)
13095 return pb
->cpu_cnt
;
13099 * Return perf_event FD of a ring buffer in *buf_idx* slot of
13100 * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using
13101 * select()/poll()/epoll() Linux syscalls.
13103 int perf_buffer__buffer_fd(const struct perf_buffer
*pb
, size_t buf_idx
)
13105 struct perf_cpu_buf
*cpu_buf
;
13107 if (buf_idx
>= pb
->cpu_cnt
)
13108 return libbpf_err(-EINVAL
);
13110 cpu_buf
= pb
->cpu_bufs
[buf_idx
];
13112 return libbpf_err(-ENOENT
);
13114 return cpu_buf
->fd
;
13117 int perf_buffer__buffer(struct perf_buffer
*pb
, int buf_idx
, void **buf
, size_t *buf_size
)
13119 struct perf_cpu_buf
*cpu_buf
;
13121 if (buf_idx
>= pb
->cpu_cnt
)
13122 return libbpf_err(-EINVAL
);
13124 cpu_buf
= pb
->cpu_bufs
[buf_idx
];
13126 return libbpf_err(-ENOENT
);
13128 *buf
= cpu_buf
->base
;
13129 *buf_size
= pb
->mmap_size
;
13134 * Consume data from perf ring buffer corresponding to slot *buf_idx* in
13135 * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
13136 * consume, do nothing and return success.
13141 int perf_buffer__consume_buffer(struct perf_buffer
*pb
, size_t buf_idx
)
13143 struct perf_cpu_buf
*cpu_buf
;
13145 if (buf_idx
>= pb
->cpu_cnt
)
13146 return libbpf_err(-EINVAL
);
13148 cpu_buf
= pb
->cpu_bufs
[buf_idx
];
13150 return libbpf_err(-ENOENT
);
13152 return perf_buffer__process_records(pb
, cpu_buf
);
13155 int perf_buffer__consume(struct perf_buffer
*pb
)
13159 for (i
= 0; i
< pb
->cpu_cnt
; i
++) {
13160 struct perf_cpu_buf
*cpu_buf
= pb
->cpu_bufs
[i
];
13165 err
= perf_buffer__process_records(pb
, cpu_buf
);
13167 pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i
, err
);
13168 return libbpf_err(err
);
13174 int bpf_program__set_attach_target(struct bpf_program
*prog
,
13175 int attach_prog_fd
,
13176 const char *attach_func_name
)
13178 int btf_obj_fd
= 0, btf_id
= 0, err
;
13180 if (!prog
|| attach_prog_fd
< 0)
13181 return libbpf_err(-EINVAL
);
13183 if (prog
->obj
->loaded
)
13184 return libbpf_err(-EINVAL
);
13186 if (attach_prog_fd
&& !attach_func_name
) {
13187 /* remember attach_prog_fd and let bpf_program__load() find
13188 * BTF ID during the program load
13190 prog
->attach_prog_fd
= attach_prog_fd
;
13194 if (attach_prog_fd
) {
13195 btf_id
= libbpf_find_prog_btf_id(attach_func_name
,
13198 return libbpf_err(btf_id
);
13200 if (!attach_func_name
)
13201 return libbpf_err(-EINVAL
);
13203 /* load btf_vmlinux, if not yet */
13204 err
= bpf_object__load_vmlinux_btf(prog
->obj
, true);
13206 return libbpf_err(err
);
13207 err
= find_kernel_btf_id(prog
->obj
, attach_func_name
,
13208 prog
->expected_attach_type
,
13209 &btf_obj_fd
, &btf_id
);
13211 return libbpf_err(err
);
13214 prog
->attach_btf_id
= btf_id
;
13215 prog
->attach_btf_obj_fd
= btf_obj_fd
;
13216 prog
->attach_prog_fd
= attach_prog_fd
;
13220 int parse_cpu_mask_str(const char *s
, bool **mask
, int *mask_sz
)
13222 int err
= 0, n
, len
, start
, end
= -1;
13228 /* Each sub string separated by ',' has format \d+-\d+ or \d+ */
13230 if (*s
== ',' || *s
== '\n') {
13234 n
= sscanf(s
, "%d%n-%d%n", &start
, &len
, &end
, &len
);
13235 if (n
<= 0 || n
> 2) {
13236 pr_warn("Failed to get CPU range %s: %d\n", s
, n
);
13239 } else if (n
== 1) {
13242 if (start
< 0 || start
> end
) {
13243 pr_warn("Invalid CPU range [%d,%d] in %s\n",
13248 tmp
= realloc(*mask
, end
+ 1);
13254 memset(tmp
+ *mask_sz
, 0, start
- *mask_sz
);
13255 memset(tmp
+ start
, 1, end
- start
+ 1);
13256 *mask_sz
= end
+ 1;
13260 pr_warn("Empty CPU range\n");
13270 int parse_cpu_mask_file(const char *fcpu
, bool **mask
, int *mask_sz
)
13272 int fd
, err
= 0, len
;
13275 fd
= open(fcpu
, O_RDONLY
| O_CLOEXEC
);
13278 pr_warn("Failed to open cpu mask file %s: %d\n", fcpu
, err
);
13281 len
= read(fd
, buf
, sizeof(buf
));
13284 err
= len
? -errno
: -EINVAL
;
13285 pr_warn("Failed to read cpu mask from %s: %d\n", fcpu
, err
);
13288 if (len
>= sizeof(buf
)) {
13289 pr_warn("CPU mask is too big in file %s\n", fcpu
);
13294 return parse_cpu_mask_str(buf
, mask
, mask_sz
);
13297 int libbpf_num_possible_cpus(void)
13299 static const char *fcpu
= "/sys/devices/system/cpu/possible";
13301 int err
, n
, i
, tmp_cpus
;
13304 tmp_cpus
= READ_ONCE(cpus
);
13308 err
= parse_cpu_mask_file(fcpu
, &mask
, &n
);
13310 return libbpf_err(err
);
13313 for (i
= 0; i
< n
; i
++) {
13319 WRITE_ONCE(cpus
, tmp_cpus
);
13323 static int populate_skeleton_maps(const struct bpf_object
*obj
,
13324 struct bpf_map_skeleton
*maps
,
13329 for (i
= 0; i
< map_cnt
; i
++) {
13330 struct bpf_map
**map
= maps
[i
].map
;
13331 const char *name
= maps
[i
].name
;
13332 void **mmaped
= maps
[i
].mmaped
;
13334 *map
= bpf_object__find_map_by_name(obj
, name
);
13336 pr_warn("failed to find skeleton map '%s'\n", name
);
13340 /* externs shouldn't be pre-setup from user code */
13341 if (mmaped
&& (*map
)->libbpf_type
!= LIBBPF_MAP_KCONFIG
)
13342 *mmaped
= (*map
)->mmaped
;
13347 static int populate_skeleton_progs(const struct bpf_object
*obj
,
13348 struct bpf_prog_skeleton
*progs
,
13353 for (i
= 0; i
< prog_cnt
; i
++) {
13354 struct bpf_program
**prog
= progs
[i
].prog
;
13355 const char *name
= progs
[i
].name
;
13357 *prog
= bpf_object__find_program_by_name(obj
, name
);
13359 pr_warn("failed to find skeleton program '%s'\n", name
);
13366 int bpf_object__open_skeleton(struct bpf_object_skeleton
*s
,
13367 const struct bpf_object_open_opts
*opts
)
13369 DECLARE_LIBBPF_OPTS(bpf_object_open_opts
, skel_opts
,
13370 .object_name
= s
->name
,
13372 struct bpf_object
*obj
;
13375 /* Attempt to preserve opts->object_name, unless overriden by user
13376 * explicitly. Overwriting object name for skeletons is discouraged,
13377 * as it breaks global data maps, because they contain object name
13378 * prefix as their own map name prefix. When skeleton is generated,
13379 * bpftool is making an assumption that this name will stay the same.
13382 memcpy(&skel_opts
, opts
, sizeof(*opts
));
13383 if (!opts
->object_name
)
13384 skel_opts
.object_name
= s
->name
;
13387 obj
= bpf_object__open_mem(s
->data
, s
->data_sz
, &skel_opts
);
13388 err
= libbpf_get_error(obj
);
13390 pr_warn("failed to initialize skeleton BPF object '%s': %d\n",
13392 return libbpf_err(err
);
13396 err
= populate_skeleton_maps(obj
, s
->maps
, s
->map_cnt
);
13398 pr_warn("failed to populate skeleton maps for '%s': %d\n", s
->name
, err
);
13399 return libbpf_err(err
);
13402 err
= populate_skeleton_progs(obj
, s
->progs
, s
->prog_cnt
);
13404 pr_warn("failed to populate skeleton progs for '%s': %d\n", s
->name
, err
);
13405 return libbpf_err(err
);
13411 int bpf_object__open_subskeleton(struct bpf_object_subskeleton
*s
)
13413 int err
, len
, var_idx
, i
;
13414 const char *var_name
;
13415 const struct bpf_map
*map
;
13418 const struct btf_type
*map_type
, *var_type
;
13419 const struct bpf_var_skeleton
*var_skel
;
13420 struct btf_var_secinfo
*var
;
13423 return libbpf_err(-EINVAL
);
13425 btf
= bpf_object__btf(s
->obj
);
13427 pr_warn("subskeletons require BTF at runtime (object %s)\n",
13428 bpf_object__name(s
->obj
));
13429 return libbpf_err(-errno
);
13432 err
= populate_skeleton_maps(s
->obj
, s
->maps
, s
->map_cnt
);
13434 pr_warn("failed to populate subskeleton maps: %d\n", err
);
13435 return libbpf_err(err
);
13438 err
= populate_skeleton_progs(s
->obj
, s
->progs
, s
->prog_cnt
);
13440 pr_warn("failed to populate subskeleton maps: %d\n", err
);
13441 return libbpf_err(err
);
13444 for (var_idx
= 0; var_idx
< s
->var_cnt
; var_idx
++) {
13445 var_skel
= &s
->vars
[var_idx
];
13446 map
= *var_skel
->map
;
13447 map_type_id
= bpf_map__btf_value_type_id(map
);
13448 map_type
= btf__type_by_id(btf
, map_type_id
);
13450 if (!btf_is_datasec(map_type
)) {
13451 pr_warn("type for map '%1$s' is not a datasec: %2$s",
13452 bpf_map__name(map
),
13453 __btf_kind_str(btf_kind(map_type
)));
13454 return libbpf_err(-EINVAL
);
13457 len
= btf_vlen(map_type
);
13458 var
= btf_var_secinfos(map_type
);
13459 for (i
= 0; i
< len
; i
++, var
++) {
13460 var_type
= btf__type_by_id(btf
, var
->type
);
13461 var_name
= btf__name_by_offset(btf
, var_type
->name_off
);
13462 if (strcmp(var_name
, var_skel
->name
) == 0) {
13463 *var_skel
->addr
= map
->mmaped
+ var
->offset
;
13471 void bpf_object__destroy_subskeleton(struct bpf_object_subskeleton
*s
)
13481 int bpf_object__load_skeleton(struct bpf_object_skeleton
*s
)
13485 err
= bpf_object__load(*s
->obj
);
13487 pr_warn("failed to load BPF skeleton '%s': %d\n", s
->name
, err
);
13488 return libbpf_err(err
);
13491 for (i
= 0; i
< s
->map_cnt
; i
++) {
13492 struct bpf_map
*map
= *s
->maps
[i
].map
;
13493 size_t mmap_sz
= bpf_map_mmap_sz(map
->def
.value_size
, map
->def
.max_entries
);
13494 int prot
, map_fd
= map
->fd
;
13495 void **mmaped
= s
->maps
[i
].mmaped
;
13500 if (!(map
->def
.map_flags
& BPF_F_MMAPABLE
)) {
13505 if (map
->def
.map_flags
& BPF_F_RDONLY_PROG
)
13508 prot
= PROT_READ
| PROT_WRITE
;
13510 /* Remap anonymous mmap()-ed "map initialization image" as
13511 * a BPF map-backed mmap()-ed memory, but preserving the same
13512 * memory address. This will cause kernel to change process'
13513 * page table to point to a different piece of kernel memory,
13514 * but from userspace point of view memory address (and its
13515 * contents, being identical at this point) will stay the
13516 * same. This mapping will be released by bpf_object__close()
13517 * as per normal clean up procedure, so we don't need to worry
13518 * about it from skeleton's clean up perspective.
13520 *mmaped
= mmap(map
->mmaped
, mmap_sz
, prot
, MAP_SHARED
| MAP_FIXED
, map_fd
, 0);
13521 if (*mmaped
== MAP_FAILED
) {
13524 pr_warn("failed to re-mmap() map '%s': %d\n",
13525 bpf_map__name(map
), err
);
13526 return libbpf_err(err
);
13533 int bpf_object__attach_skeleton(struct bpf_object_skeleton
*s
)
13537 for (i
= 0; i
< s
->prog_cnt
; i
++) {
13538 struct bpf_program
*prog
= *s
->progs
[i
].prog
;
13539 struct bpf_link
**link
= s
->progs
[i
].link
;
13541 if (!prog
->autoload
|| !prog
->autoattach
)
13544 /* auto-attaching not supported for this program */
13545 if (!prog
->sec_def
|| !prog
->sec_def
->prog_attach_fn
)
13548 /* if user already set the link manually, don't attempt auto-attach */
13552 err
= prog
->sec_def
->prog_attach_fn(prog
, prog
->sec_def
->cookie
, link
);
13554 pr_warn("prog '%s': failed to auto-attach: %d\n",
13555 bpf_program__name(prog
), err
);
13556 return libbpf_err(err
);
13559 /* It's possible that for some SEC() definitions auto-attach
13560 * is supported in some cases (e.g., if definition completely
13561 * specifies target information), but is not in other cases.
13562 * SEC("uprobe") is one such case. If user specified target
13563 * binary and function name, such BPF program can be
13564 * auto-attached. But if not, it shouldn't trigger skeleton's
13565 * attach to fail. It should just be skipped.
13566 * attach_fn signals such case with returning 0 (no error) and
13567 * setting link to NULL.
13574 void bpf_object__detach_skeleton(struct bpf_object_skeleton
*s
)
13578 for (i
= 0; i
< s
->prog_cnt
; i
++) {
13579 struct bpf_link
**link
= s
->progs
[i
].link
;
13581 bpf_link__destroy(*link
);
13586 void bpf_object__destroy_skeleton(struct bpf_object_skeleton
*s
)
13592 bpf_object__detach_skeleton(s
);
13594 bpf_object__close(*s
->obj
);