]> git.ipfire.org Git - thirdparty/linux.git/blob - tools/lib/bpf/libbpf.c
672fca94ff534b03baa8ab51dd8dfbc33c529f3b
[thirdparty/linux.git] / tools / lib / bpf / libbpf.c
1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3 /*
4 * Common eBPF ELF object loading operations.
5 *
6 * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
7 * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
8 * Copyright (C) 2015 Huawei Inc.
9 * Copyright (C) 2017 Nicira, Inc.
10 * Copyright (C) 2019 Isovalent, Inc.
11 */
12
13 #ifndef _GNU_SOURCE
14 #define _GNU_SOURCE
15 #endif
16 #include <stdlib.h>
17 #include <stdio.h>
18 #include <stdarg.h>
19 #include <libgen.h>
20 #include <inttypes.h>
21 #include <limits.h>
22 #include <string.h>
23 #include <unistd.h>
24 #include <endian.h>
25 #include <fcntl.h>
26 #include <errno.h>
27 #include <ctype.h>
28 #include <asm/unistd.h>
29 #include <linux/err.h>
30 #include <linux/kernel.h>
31 #include <linux/bpf.h>
32 #include <linux/btf.h>
33 #include <linux/filter.h>
34 #include <linux/limits.h>
35 #include <linux/perf_event.h>
36 #include <linux/bpf_perf_event.h>
37 #include <linux/ring_buffer.h>
38 #include <sys/epoll.h>
39 #include <sys/ioctl.h>
40 #include <sys/mman.h>
41 #include <sys/stat.h>
42 #include <sys/types.h>
43 #include <sys/vfs.h>
44 #include <sys/utsname.h>
45 #include <sys/resource.h>
46 #include <libelf.h>
47 #include <gelf.h>
48 #include <zlib.h>
49
50 #include "libbpf.h"
51 #include "bpf.h"
52 #include "btf.h"
53 #include "str_error.h"
54 #include "libbpf_internal.h"
55 #include "hashmap.h"
56 #include "bpf_gen_internal.h"
57 #include "zip.h"
58
59 #ifndef BPF_FS_MAGIC
60 #define BPF_FS_MAGIC 0xcafe4a11
61 #endif
62
63 #define BPF_FS_DEFAULT_PATH "/sys/fs/bpf"
64
65 #define BPF_INSN_SZ (sizeof(struct bpf_insn))
66
67 /* vsprintf() in __base_pr() uses nonliteral format string. It may break
68 * compilation if user enables corresponding warning. Disable it explicitly.
69 */
70 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
71
72 #define __printf(a, b) __attribute__((format(printf, a, b)))
73
74 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
75 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog);
76 static int map_set_def_max_entries(struct bpf_map *map);
77
78 static const char * const attach_type_name[] = {
79 [BPF_CGROUP_INET_INGRESS] = "cgroup_inet_ingress",
80 [BPF_CGROUP_INET_EGRESS] = "cgroup_inet_egress",
81 [BPF_CGROUP_INET_SOCK_CREATE] = "cgroup_inet_sock_create",
82 [BPF_CGROUP_INET_SOCK_RELEASE] = "cgroup_inet_sock_release",
83 [BPF_CGROUP_SOCK_OPS] = "cgroup_sock_ops",
84 [BPF_CGROUP_DEVICE] = "cgroup_device",
85 [BPF_CGROUP_INET4_BIND] = "cgroup_inet4_bind",
86 [BPF_CGROUP_INET6_BIND] = "cgroup_inet6_bind",
87 [BPF_CGROUP_INET4_CONNECT] = "cgroup_inet4_connect",
88 [BPF_CGROUP_INET6_CONNECT] = "cgroup_inet6_connect",
89 [BPF_CGROUP_UNIX_CONNECT] = "cgroup_unix_connect",
90 [BPF_CGROUP_INET4_POST_BIND] = "cgroup_inet4_post_bind",
91 [BPF_CGROUP_INET6_POST_BIND] = "cgroup_inet6_post_bind",
92 [BPF_CGROUP_INET4_GETPEERNAME] = "cgroup_inet4_getpeername",
93 [BPF_CGROUP_INET6_GETPEERNAME] = "cgroup_inet6_getpeername",
94 [BPF_CGROUP_UNIX_GETPEERNAME] = "cgroup_unix_getpeername",
95 [BPF_CGROUP_INET4_GETSOCKNAME] = "cgroup_inet4_getsockname",
96 [BPF_CGROUP_INET6_GETSOCKNAME] = "cgroup_inet6_getsockname",
97 [BPF_CGROUP_UNIX_GETSOCKNAME] = "cgroup_unix_getsockname",
98 [BPF_CGROUP_UDP4_SENDMSG] = "cgroup_udp4_sendmsg",
99 [BPF_CGROUP_UDP6_SENDMSG] = "cgroup_udp6_sendmsg",
100 [BPF_CGROUP_UNIX_SENDMSG] = "cgroup_unix_sendmsg",
101 [BPF_CGROUP_SYSCTL] = "cgroup_sysctl",
102 [BPF_CGROUP_UDP4_RECVMSG] = "cgroup_udp4_recvmsg",
103 [BPF_CGROUP_UDP6_RECVMSG] = "cgroup_udp6_recvmsg",
104 [BPF_CGROUP_UNIX_RECVMSG] = "cgroup_unix_recvmsg",
105 [BPF_CGROUP_GETSOCKOPT] = "cgroup_getsockopt",
106 [BPF_CGROUP_SETSOCKOPT] = "cgroup_setsockopt",
107 [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser",
108 [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict",
109 [BPF_SK_SKB_VERDICT] = "sk_skb_verdict",
110 [BPF_SK_MSG_VERDICT] = "sk_msg_verdict",
111 [BPF_LIRC_MODE2] = "lirc_mode2",
112 [BPF_FLOW_DISSECTOR] = "flow_dissector",
113 [BPF_TRACE_RAW_TP] = "trace_raw_tp",
114 [BPF_TRACE_FENTRY] = "trace_fentry",
115 [BPF_TRACE_FEXIT] = "trace_fexit",
116 [BPF_MODIFY_RETURN] = "modify_return",
117 [BPF_LSM_MAC] = "lsm_mac",
118 [BPF_LSM_CGROUP] = "lsm_cgroup",
119 [BPF_SK_LOOKUP] = "sk_lookup",
120 [BPF_TRACE_ITER] = "trace_iter",
121 [BPF_XDP_DEVMAP] = "xdp_devmap",
122 [BPF_XDP_CPUMAP] = "xdp_cpumap",
123 [BPF_XDP] = "xdp",
124 [BPF_SK_REUSEPORT_SELECT] = "sk_reuseport_select",
125 [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_reuseport_select_or_migrate",
126 [BPF_PERF_EVENT] = "perf_event",
127 [BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi",
128 [BPF_STRUCT_OPS] = "struct_ops",
129 [BPF_NETFILTER] = "netfilter",
130 [BPF_TCX_INGRESS] = "tcx_ingress",
131 [BPF_TCX_EGRESS] = "tcx_egress",
132 [BPF_TRACE_UPROBE_MULTI] = "trace_uprobe_multi",
133 [BPF_NETKIT_PRIMARY] = "netkit_primary",
134 [BPF_NETKIT_PEER] = "netkit_peer",
135 };
136
137 static const char * const link_type_name[] = {
138 [BPF_LINK_TYPE_UNSPEC] = "unspec",
139 [BPF_LINK_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
140 [BPF_LINK_TYPE_TRACING] = "tracing",
141 [BPF_LINK_TYPE_CGROUP] = "cgroup",
142 [BPF_LINK_TYPE_ITER] = "iter",
143 [BPF_LINK_TYPE_NETNS] = "netns",
144 [BPF_LINK_TYPE_XDP] = "xdp",
145 [BPF_LINK_TYPE_PERF_EVENT] = "perf_event",
146 [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi",
147 [BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops",
148 [BPF_LINK_TYPE_NETFILTER] = "netfilter",
149 [BPF_LINK_TYPE_TCX] = "tcx",
150 [BPF_LINK_TYPE_UPROBE_MULTI] = "uprobe_multi",
151 [BPF_LINK_TYPE_NETKIT] = "netkit",
152 };
153
154 static const char * const map_type_name[] = {
155 [BPF_MAP_TYPE_UNSPEC] = "unspec",
156 [BPF_MAP_TYPE_HASH] = "hash",
157 [BPF_MAP_TYPE_ARRAY] = "array",
158 [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array",
159 [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array",
160 [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash",
161 [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array",
162 [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace",
163 [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array",
164 [BPF_MAP_TYPE_LRU_HASH] = "lru_hash",
165 [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash",
166 [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie",
167 [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps",
168 [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps",
169 [BPF_MAP_TYPE_DEVMAP] = "devmap",
170 [BPF_MAP_TYPE_DEVMAP_HASH] = "devmap_hash",
171 [BPF_MAP_TYPE_SOCKMAP] = "sockmap",
172 [BPF_MAP_TYPE_CPUMAP] = "cpumap",
173 [BPF_MAP_TYPE_XSKMAP] = "xskmap",
174 [BPF_MAP_TYPE_SOCKHASH] = "sockhash",
175 [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage",
176 [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray",
177 [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage",
178 [BPF_MAP_TYPE_QUEUE] = "queue",
179 [BPF_MAP_TYPE_STACK] = "stack",
180 [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage",
181 [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops",
182 [BPF_MAP_TYPE_RINGBUF] = "ringbuf",
183 [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage",
184 [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage",
185 [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter",
186 [BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf",
187 [BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage",
188 };
189
190 static const char * const prog_type_name[] = {
191 [BPF_PROG_TYPE_UNSPEC] = "unspec",
192 [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter",
193 [BPF_PROG_TYPE_KPROBE] = "kprobe",
194 [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls",
195 [BPF_PROG_TYPE_SCHED_ACT] = "sched_act",
196 [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint",
197 [BPF_PROG_TYPE_XDP] = "xdp",
198 [BPF_PROG_TYPE_PERF_EVENT] = "perf_event",
199 [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb",
200 [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock",
201 [BPF_PROG_TYPE_LWT_IN] = "lwt_in",
202 [BPF_PROG_TYPE_LWT_OUT] = "lwt_out",
203 [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit",
204 [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops",
205 [BPF_PROG_TYPE_SK_SKB] = "sk_skb",
206 [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device",
207 [BPF_PROG_TYPE_SK_MSG] = "sk_msg",
208 [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
209 [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
210 [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local",
211 [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2",
212 [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport",
213 [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector",
214 [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl",
215 [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable",
216 [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt",
217 [BPF_PROG_TYPE_TRACING] = "tracing",
218 [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops",
219 [BPF_PROG_TYPE_EXT] = "ext",
220 [BPF_PROG_TYPE_LSM] = "lsm",
221 [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup",
222 [BPF_PROG_TYPE_SYSCALL] = "syscall",
223 [BPF_PROG_TYPE_NETFILTER] = "netfilter",
224 };
225
226 static int __base_pr(enum libbpf_print_level level, const char *format,
227 va_list args)
228 {
229 if (level == LIBBPF_DEBUG)
230 return 0;
231
232 return vfprintf(stderr, format, args);
233 }
234
235 static libbpf_print_fn_t __libbpf_pr = __base_pr;
236
237 libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
238 {
239 libbpf_print_fn_t old_print_fn;
240
241 old_print_fn = __atomic_exchange_n(&__libbpf_pr, fn, __ATOMIC_RELAXED);
242
243 return old_print_fn;
244 }
245
246 __printf(2, 3)
247 void libbpf_print(enum libbpf_print_level level, const char *format, ...)
248 {
249 va_list args;
250 int old_errno;
251 libbpf_print_fn_t print_fn;
252
253 print_fn = __atomic_load_n(&__libbpf_pr, __ATOMIC_RELAXED);
254 if (!print_fn)
255 return;
256
257 old_errno = errno;
258
259 va_start(args, format);
260 __libbpf_pr(level, format, args);
261 va_end(args);
262
263 errno = old_errno;
264 }
265
266 static void pr_perm_msg(int err)
267 {
268 struct rlimit limit;
269 char buf[100];
270
271 if (err != -EPERM || geteuid() != 0)
272 return;
273
274 err = getrlimit(RLIMIT_MEMLOCK, &limit);
275 if (err)
276 return;
277
278 if (limit.rlim_cur == RLIM_INFINITY)
279 return;
280
281 if (limit.rlim_cur < 1024)
282 snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur);
283 else if (limit.rlim_cur < 1024*1024)
284 snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024);
285 else
286 snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024));
287
288 pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n",
289 buf);
290 }
291
292 #define STRERR_BUFSIZE 128
293
294 /* Copied from tools/perf/util/util.h */
295 #ifndef zfree
296 # define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
297 #endif
298
299 #ifndef zclose
300 # define zclose(fd) ({ \
301 int ___err = 0; \
302 if ((fd) >= 0) \
303 ___err = close((fd)); \
304 fd = -1; \
305 ___err; })
306 #endif
307
308 static inline __u64 ptr_to_u64(const void *ptr)
309 {
310 return (__u64) (unsigned long) ptr;
311 }
312
313 int libbpf_set_strict_mode(enum libbpf_strict_mode mode)
314 {
315 /* as of v1.0 libbpf_set_strict_mode() is a no-op */
316 return 0;
317 }
318
319 __u32 libbpf_major_version(void)
320 {
321 return LIBBPF_MAJOR_VERSION;
322 }
323
324 __u32 libbpf_minor_version(void)
325 {
326 return LIBBPF_MINOR_VERSION;
327 }
328
329 const char *libbpf_version_string(void)
330 {
331 #define __S(X) #X
332 #define _S(X) __S(X)
333 return "v" _S(LIBBPF_MAJOR_VERSION) "." _S(LIBBPF_MINOR_VERSION);
334 #undef _S
335 #undef __S
336 }
337
338 enum reloc_type {
339 RELO_LD64,
340 RELO_CALL,
341 RELO_DATA,
342 RELO_EXTERN_LD64,
343 RELO_EXTERN_CALL,
344 RELO_SUBPROG_ADDR,
345 RELO_CORE,
346 };
347
348 struct reloc_desc {
349 enum reloc_type type;
350 int insn_idx;
351 union {
352 const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */
353 struct {
354 int map_idx;
355 int sym_off;
356 int ext_idx;
357 };
358 };
359 };
360
361 /* stored as sec_def->cookie for all libbpf-supported SEC()s */
362 enum sec_def_flags {
363 SEC_NONE = 0,
364 /* expected_attach_type is optional, if kernel doesn't support that */
365 SEC_EXP_ATTACH_OPT = 1,
366 /* legacy, only used by libbpf_get_type_names() and
367 * libbpf_attach_type_by_name(), not used by libbpf itself at all.
368 * This used to be associated with cgroup (and few other) BPF programs
369 * that were attachable through BPF_PROG_ATTACH command. Pretty
370 * meaningless nowadays, though.
371 */
372 SEC_ATTACHABLE = 2,
373 SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT,
374 /* attachment target is specified through BTF ID in either kernel or
375 * other BPF program's BTF object
376 */
377 SEC_ATTACH_BTF = 4,
378 /* BPF program type allows sleeping/blocking in kernel */
379 SEC_SLEEPABLE = 8,
380 /* BPF program support non-linear XDP buffer */
381 SEC_XDP_FRAGS = 16,
382 /* Setup proper attach type for usdt probes. */
383 SEC_USDT = 32,
384 };
385
386 struct bpf_sec_def {
387 char *sec;
388 enum bpf_prog_type prog_type;
389 enum bpf_attach_type expected_attach_type;
390 long cookie;
391 int handler_id;
392
393 libbpf_prog_setup_fn_t prog_setup_fn;
394 libbpf_prog_prepare_load_fn_t prog_prepare_load_fn;
395 libbpf_prog_attach_fn_t prog_attach_fn;
396 };
397
398 /*
399 * bpf_prog should be a better name but it has been used in
400 * linux/filter.h.
401 */
402 struct bpf_program {
403 char *name;
404 char *sec_name;
405 size_t sec_idx;
406 const struct bpf_sec_def *sec_def;
407 /* this program's instruction offset (in number of instructions)
408 * within its containing ELF section
409 */
410 size_t sec_insn_off;
411 /* number of original instructions in ELF section belonging to this
412 * program, not taking into account subprogram instructions possible
413 * appended later during relocation
414 */
415 size_t sec_insn_cnt;
416 /* Offset (in number of instructions) of the start of instruction
417 * belonging to this BPF program within its containing main BPF
418 * program. For the entry-point (main) BPF program, this is always
419 * zero. For a sub-program, this gets reset before each of main BPF
420 * programs are processed and relocated and is used to determined
421 * whether sub-program was already appended to the main program, and
422 * if yes, at which instruction offset.
423 */
424 size_t sub_insn_off;
425
426 /* instructions that belong to BPF program; insns[0] is located at
427 * sec_insn_off instruction within its ELF section in ELF file, so
428 * when mapping ELF file instruction index to the local instruction,
429 * one needs to subtract sec_insn_off; and vice versa.
430 */
431 struct bpf_insn *insns;
432 /* actual number of instruction in this BPF program's image; for
433 * entry-point BPF programs this includes the size of main program
434 * itself plus all the used sub-programs, appended at the end
435 */
436 size_t insns_cnt;
437
438 struct reloc_desc *reloc_desc;
439 int nr_reloc;
440
441 /* BPF verifier log settings */
442 char *log_buf;
443 size_t log_size;
444 __u32 log_level;
445
446 struct bpf_object *obj;
447
448 int fd;
449 bool autoload;
450 bool autoattach;
451 bool sym_global;
452 bool mark_btf_static;
453 enum bpf_prog_type type;
454 enum bpf_attach_type expected_attach_type;
455 int exception_cb_idx;
456
457 int prog_ifindex;
458 __u32 attach_btf_obj_fd;
459 __u32 attach_btf_id;
460 __u32 attach_prog_fd;
461
462 void *func_info;
463 __u32 func_info_rec_size;
464 __u32 func_info_cnt;
465
466 void *line_info;
467 __u32 line_info_rec_size;
468 __u32 line_info_cnt;
469 __u32 prog_flags;
470 };
471
472 struct bpf_struct_ops {
473 const char *tname;
474 const struct btf_type *type;
475 struct bpf_program **progs;
476 __u32 *kern_func_off;
477 /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */
478 void *data;
479 /* e.g. struct bpf_struct_ops_tcp_congestion_ops in
480 * btf_vmlinux's format.
481 * struct bpf_struct_ops_tcp_congestion_ops {
482 * [... some other kernel fields ...]
483 * struct tcp_congestion_ops data;
484 * }
485 * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops)
486 * bpf_map__init_kern_struct_ops() will populate the "kern_vdata"
487 * from "data".
488 */
489 void *kern_vdata;
490 __u32 type_id;
491 };
492
493 #define DATA_SEC ".data"
494 #define BSS_SEC ".bss"
495 #define RODATA_SEC ".rodata"
496 #define KCONFIG_SEC ".kconfig"
497 #define KSYMS_SEC ".ksyms"
498 #define STRUCT_OPS_SEC ".struct_ops"
499 #define STRUCT_OPS_LINK_SEC ".struct_ops.link"
500
501 enum libbpf_map_type {
502 LIBBPF_MAP_UNSPEC,
503 LIBBPF_MAP_DATA,
504 LIBBPF_MAP_BSS,
505 LIBBPF_MAP_RODATA,
506 LIBBPF_MAP_KCONFIG,
507 };
508
509 struct bpf_map_def {
510 unsigned int type;
511 unsigned int key_size;
512 unsigned int value_size;
513 unsigned int max_entries;
514 unsigned int map_flags;
515 };
516
517 struct bpf_map {
518 struct bpf_object *obj;
519 char *name;
520 /* real_name is defined for special internal maps (.rodata*,
521 * .data*, .bss, .kconfig) and preserves their original ELF section
522 * name. This is important to be able to find corresponding BTF
523 * DATASEC information.
524 */
525 char *real_name;
526 int fd;
527 int sec_idx;
528 size_t sec_offset;
529 int map_ifindex;
530 int inner_map_fd;
531 struct bpf_map_def def;
532 __u32 numa_node;
533 __u32 btf_var_idx;
534 int mod_btf_fd;
535 __u32 btf_key_type_id;
536 __u32 btf_value_type_id;
537 __u32 btf_vmlinux_value_type_id;
538 enum libbpf_map_type libbpf_type;
539 void *mmaped;
540 struct bpf_struct_ops *st_ops;
541 struct bpf_map *inner_map;
542 void **init_slots;
543 int init_slots_sz;
544 char *pin_path;
545 bool pinned;
546 bool reused;
547 bool autocreate;
548 __u64 map_extra;
549 };
550
551 enum extern_type {
552 EXT_UNKNOWN,
553 EXT_KCFG,
554 EXT_KSYM,
555 };
556
557 enum kcfg_type {
558 KCFG_UNKNOWN,
559 KCFG_CHAR,
560 KCFG_BOOL,
561 KCFG_INT,
562 KCFG_TRISTATE,
563 KCFG_CHAR_ARR,
564 };
565
566 struct extern_desc {
567 enum extern_type type;
568 int sym_idx;
569 int btf_id;
570 int sec_btf_id;
571 const char *name;
572 char *essent_name;
573 bool is_set;
574 bool is_weak;
575 union {
576 struct {
577 enum kcfg_type type;
578 int sz;
579 int align;
580 int data_off;
581 bool is_signed;
582 } kcfg;
583 struct {
584 unsigned long long addr;
585
586 /* target btf_id of the corresponding kernel var. */
587 int kernel_btf_obj_fd;
588 int kernel_btf_id;
589
590 /* local btf_id of the ksym extern's type. */
591 __u32 type_id;
592 /* BTF fd index to be patched in for insn->off, this is
593 * 0 for vmlinux BTF, index in obj->fd_array for module
594 * BTF
595 */
596 __s16 btf_fd_idx;
597 } ksym;
598 };
599 };
600
601 struct module_btf {
602 struct btf *btf;
603 char *name;
604 __u32 id;
605 int fd;
606 int fd_array_idx;
607 };
608
609 enum sec_type {
610 SEC_UNUSED = 0,
611 SEC_RELO,
612 SEC_BSS,
613 SEC_DATA,
614 SEC_RODATA,
615 SEC_ST_OPS,
616 };
617
618 struct elf_sec_desc {
619 enum sec_type sec_type;
620 Elf64_Shdr *shdr;
621 Elf_Data *data;
622 };
623
624 struct elf_state {
625 int fd;
626 const void *obj_buf;
627 size_t obj_buf_sz;
628 Elf *elf;
629 Elf64_Ehdr *ehdr;
630 Elf_Data *symbols;
631 size_t shstrndx; /* section index for section name strings */
632 size_t strtabidx;
633 struct elf_sec_desc *secs;
634 size_t sec_cnt;
635 int btf_maps_shndx;
636 __u32 btf_maps_sec_btf_id;
637 int text_shndx;
638 int symbols_shndx;
639 bool has_st_ops;
640 };
641
642 struct usdt_manager;
643
644 struct bpf_object {
645 char name[BPF_OBJ_NAME_LEN];
646 char license[64];
647 __u32 kern_version;
648
649 struct bpf_program *programs;
650 size_t nr_programs;
651 struct bpf_map *maps;
652 size_t nr_maps;
653 size_t maps_cap;
654
655 char *kconfig;
656 struct extern_desc *externs;
657 int nr_extern;
658 int kconfig_map_idx;
659
660 bool loaded;
661 bool has_subcalls;
662 bool has_rodata;
663
664 struct bpf_gen *gen_loader;
665
666 /* Information when doing ELF related work. Only valid if efile.elf is not NULL */
667 struct elf_state efile;
668
669 struct btf *btf;
670 struct btf_ext *btf_ext;
671
672 /* Parse and load BTF vmlinux if any of the programs in the object need
673 * it at load time.
674 */
675 struct btf *btf_vmlinux;
676 /* Path to the custom BTF to be used for BPF CO-RE relocations as an
677 * override for vmlinux BTF.
678 */
679 char *btf_custom_path;
680 /* vmlinux BTF override for CO-RE relocations */
681 struct btf *btf_vmlinux_override;
682 /* Lazily initialized kernel module BTFs */
683 struct module_btf *btf_modules;
684 bool btf_modules_loaded;
685 size_t btf_module_cnt;
686 size_t btf_module_cap;
687
688 /* optional log settings passed to BPF_BTF_LOAD and BPF_PROG_LOAD commands */
689 char *log_buf;
690 size_t log_size;
691 __u32 log_level;
692
693 int *fd_array;
694 size_t fd_array_cap;
695 size_t fd_array_cnt;
696
697 struct usdt_manager *usdt_man;
698
699 struct kern_feature_cache *feat_cache;
700 char *token_path;
701 int token_fd;
702
703 char path[];
704 };
705
706 static const char *elf_sym_str(const struct bpf_object *obj, size_t off);
707 static const char *elf_sec_str(const struct bpf_object *obj, size_t off);
708 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx);
709 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
710 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn);
711 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
712 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
713 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx);
714 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx);
715
716 void bpf_program__unload(struct bpf_program *prog)
717 {
718 if (!prog)
719 return;
720
721 zclose(prog->fd);
722
723 zfree(&prog->func_info);
724 zfree(&prog->line_info);
725 }
726
727 static void bpf_program__exit(struct bpf_program *prog)
728 {
729 if (!prog)
730 return;
731
732 bpf_program__unload(prog);
733 zfree(&prog->name);
734 zfree(&prog->sec_name);
735 zfree(&prog->insns);
736 zfree(&prog->reloc_desc);
737
738 prog->nr_reloc = 0;
739 prog->insns_cnt = 0;
740 prog->sec_idx = -1;
741 }
742
743 static bool insn_is_subprog_call(const struct bpf_insn *insn)
744 {
745 return BPF_CLASS(insn->code) == BPF_JMP &&
746 BPF_OP(insn->code) == BPF_CALL &&
747 BPF_SRC(insn->code) == BPF_K &&
748 insn->src_reg == BPF_PSEUDO_CALL &&
749 insn->dst_reg == 0 &&
750 insn->off == 0;
751 }
752
753 static bool is_call_insn(const struct bpf_insn *insn)
754 {
755 return insn->code == (BPF_JMP | BPF_CALL);
756 }
757
758 static bool insn_is_pseudo_func(struct bpf_insn *insn)
759 {
760 return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
761 }
762
763 static int
764 bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
765 const char *name, size_t sec_idx, const char *sec_name,
766 size_t sec_off, void *insn_data, size_t insn_data_sz)
767 {
768 if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
769 pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
770 sec_name, name, sec_off, insn_data_sz);
771 return -EINVAL;
772 }
773
774 memset(prog, 0, sizeof(*prog));
775 prog->obj = obj;
776
777 prog->sec_idx = sec_idx;
778 prog->sec_insn_off = sec_off / BPF_INSN_SZ;
779 prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ;
780 /* insns_cnt can later be increased by appending used subprograms */
781 prog->insns_cnt = prog->sec_insn_cnt;
782
783 prog->type = BPF_PROG_TYPE_UNSPEC;
784 prog->fd = -1;
785 prog->exception_cb_idx = -1;
786
787 /* libbpf's convention for SEC("?abc...") is that it's just like
788 * SEC("abc...") but the corresponding bpf_program starts out with
789 * autoload set to false.
790 */
791 if (sec_name[0] == '?') {
792 prog->autoload = false;
793 /* from now on forget there was ? in section name */
794 sec_name++;
795 } else {
796 prog->autoload = true;
797 }
798
799 prog->autoattach = true;
800
801 /* inherit object's log_level */
802 prog->log_level = obj->log_level;
803
804 prog->sec_name = strdup(sec_name);
805 if (!prog->sec_name)
806 goto errout;
807
808 prog->name = strdup(name);
809 if (!prog->name)
810 goto errout;
811
812 prog->insns = malloc(insn_data_sz);
813 if (!prog->insns)
814 goto errout;
815 memcpy(prog->insns, insn_data, insn_data_sz);
816
817 return 0;
818 errout:
819 pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
820 bpf_program__exit(prog);
821 return -ENOMEM;
822 }
823
824 static int
825 bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
826 const char *sec_name, int sec_idx)
827 {
828 Elf_Data *symbols = obj->efile.symbols;
829 struct bpf_program *prog, *progs;
830 void *data = sec_data->d_buf;
831 size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms;
832 int nr_progs, err, i;
833 const char *name;
834 Elf64_Sym *sym;
835
836 progs = obj->programs;
837 nr_progs = obj->nr_programs;
838 nr_syms = symbols->d_size / sizeof(Elf64_Sym);
839
840 for (i = 0; i < nr_syms; i++) {
841 sym = elf_sym_by_idx(obj, i);
842
843 if (sym->st_shndx != sec_idx)
844 continue;
845 if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
846 continue;
847
848 prog_sz = sym->st_size;
849 sec_off = sym->st_value;
850
851 name = elf_sym_str(obj, sym->st_name);
852 if (!name) {
853 pr_warn("sec '%s': failed to get symbol name for offset %zu\n",
854 sec_name, sec_off);
855 return -LIBBPF_ERRNO__FORMAT;
856 }
857
858 if (sec_off + prog_sz > sec_sz) {
859 pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
860 sec_name, sec_off);
861 return -LIBBPF_ERRNO__FORMAT;
862 }
863
864 if (sec_idx != obj->efile.text_shndx && ELF64_ST_BIND(sym->st_info) == STB_LOCAL) {
865 pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name);
866 return -ENOTSUP;
867 }
868
869 pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
870 sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz);
871
872 progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs));
873 if (!progs) {
874 /*
875 * In this case the original obj->programs
876 * is still valid, so don't need special treat for
877 * bpf_close_object().
878 */
879 pr_warn("sec '%s': failed to alloc memory for new program '%s'\n",
880 sec_name, name);
881 return -ENOMEM;
882 }
883 obj->programs = progs;
884
885 prog = &progs[nr_progs];
886
887 err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name,
888 sec_off, data + sec_off, prog_sz);
889 if (err)
890 return err;
891
892 if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL)
893 prog->sym_global = true;
894
895 /* if function is a global/weak symbol, but has restricted
896 * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC
897 * as static to enable more permissive BPF verification mode
898 * with more outside context available to BPF verifier
899 */
900 if (prog->sym_global && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
901 || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL))
902 prog->mark_btf_static = true;
903
904 nr_progs++;
905 obj->nr_programs = nr_progs;
906 }
907
908 return 0;
909 }
910
911 static const struct btf_member *
912 find_member_by_offset(const struct btf_type *t, __u32 bit_offset)
913 {
914 struct btf_member *m;
915 int i;
916
917 for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
918 if (btf_member_bit_offset(t, i) == bit_offset)
919 return m;
920 }
921
922 return NULL;
923 }
924
925 static const struct btf_member *
926 find_member_by_name(const struct btf *btf, const struct btf_type *t,
927 const char *name)
928 {
929 struct btf_member *m;
930 int i;
931
932 for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
933 if (!strcmp(btf__name_by_offset(btf, m->name_off), name))
934 return m;
935 }
936
937 return NULL;
938 }
939
940 static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
941 __u16 kind, struct btf **res_btf,
942 struct module_btf **res_mod_btf);
943
944 #define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
945 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
946 const char *name, __u32 kind);
947
948 static int
949 find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw,
950 struct module_btf **mod_btf,
951 const struct btf_type **type, __u32 *type_id,
952 const struct btf_type **vtype, __u32 *vtype_id,
953 const struct btf_member **data_member)
954 {
955 const struct btf_type *kern_type, *kern_vtype;
956 const struct btf_member *kern_data_member;
957 struct btf *btf;
958 __s32 kern_vtype_id, kern_type_id;
959 char tname[256];
960 __u32 i;
961
962 snprintf(tname, sizeof(tname), "%.*s",
963 (int)bpf_core_essential_name_len(tname_raw), tname_raw);
964
965 kern_type_id = find_ksym_btf_id(obj, tname, BTF_KIND_STRUCT,
966 &btf, mod_btf);
967 if (kern_type_id < 0) {
968 pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
969 tname);
970 return kern_type_id;
971 }
972 kern_type = btf__type_by_id(btf, kern_type_id);
973
974 /* Find the corresponding "map_value" type that will be used
975 * in map_update(BPF_MAP_TYPE_STRUCT_OPS). For example,
976 * find "struct bpf_struct_ops_tcp_congestion_ops" from the
977 * btf_vmlinux.
978 */
979 kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX,
980 tname, BTF_KIND_STRUCT);
981 if (kern_vtype_id < 0) {
982 pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n",
983 STRUCT_OPS_VALUE_PREFIX, tname);
984 return kern_vtype_id;
985 }
986 kern_vtype = btf__type_by_id(btf, kern_vtype_id);
987
988 /* Find "struct tcp_congestion_ops" from
989 * struct bpf_struct_ops_tcp_congestion_ops {
990 * [ ... ]
991 * struct tcp_congestion_ops data;
992 * }
993 */
994 kern_data_member = btf_members(kern_vtype);
995 for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) {
996 if (kern_data_member->type == kern_type_id)
997 break;
998 }
999 if (i == btf_vlen(kern_vtype)) {
1000 pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n",
1001 tname, STRUCT_OPS_VALUE_PREFIX, tname);
1002 return -EINVAL;
1003 }
1004
1005 *type = kern_type;
1006 *type_id = kern_type_id;
1007 *vtype = kern_vtype;
1008 *vtype_id = kern_vtype_id;
1009 *data_member = kern_data_member;
1010
1011 return 0;
1012 }
1013
1014 static bool bpf_map__is_struct_ops(const struct bpf_map *map)
1015 {
1016 return map->def.type == BPF_MAP_TYPE_STRUCT_OPS;
1017 }
1018
1019 static bool is_valid_st_ops_program(struct bpf_object *obj,
1020 const struct bpf_program *prog)
1021 {
1022 int i;
1023
1024 for (i = 0; i < obj->nr_programs; i++) {
1025 if (&obj->programs[i] == prog)
1026 return prog->type == BPF_PROG_TYPE_STRUCT_OPS;
1027 }
1028
1029 return false;
1030 }
1031
1032 /* For each struct_ops program P, referenced from some struct_ops map M,
1033 * enable P.autoload if there are Ms for which M.autocreate is true,
1034 * disable P.autoload if for all Ms M.autocreate is false.
1035 * Don't change P.autoload for programs that are not referenced from any maps.
1036 */
1037 static int bpf_object_adjust_struct_ops_autoload(struct bpf_object *obj)
1038 {
1039 struct bpf_program *prog, *slot_prog;
1040 struct bpf_map *map;
1041 int i, j, k, vlen;
1042
1043 for (i = 0; i < obj->nr_programs; ++i) {
1044 int should_load = false;
1045 int use_cnt = 0;
1046
1047 prog = &obj->programs[i];
1048 if (prog->type != BPF_PROG_TYPE_STRUCT_OPS)
1049 continue;
1050
1051 for (j = 0; j < obj->nr_maps; ++j) {
1052 map = &obj->maps[j];
1053 if (!bpf_map__is_struct_ops(map))
1054 continue;
1055
1056 vlen = btf_vlen(map->st_ops->type);
1057 for (k = 0; k < vlen; ++k) {
1058 slot_prog = map->st_ops->progs[k];
1059 if (prog != slot_prog)
1060 continue;
1061
1062 use_cnt++;
1063 if (map->autocreate)
1064 should_load = true;
1065 }
1066 }
1067 if (use_cnt)
1068 prog->autoload = should_load;
1069 }
1070
1071 return 0;
1072 }
1073
1074 /* Init the map's fields that depend on kern_btf */
1075 static int bpf_map__init_kern_struct_ops(struct bpf_map *map)
1076 {
1077 const struct btf_member *member, *kern_member, *kern_data_member;
1078 const struct btf_type *type, *kern_type, *kern_vtype;
1079 __u32 i, kern_type_id, kern_vtype_id, kern_data_off;
1080 struct bpf_object *obj = map->obj;
1081 const struct btf *btf = obj->btf;
1082 struct bpf_struct_ops *st_ops;
1083 const struct btf *kern_btf;
1084 struct module_btf *mod_btf;
1085 void *data, *kern_data;
1086 const char *tname;
1087 int err;
1088
1089 st_ops = map->st_ops;
1090 type = st_ops->type;
1091 tname = st_ops->tname;
1092 err = find_struct_ops_kern_types(obj, tname, &mod_btf,
1093 &kern_type, &kern_type_id,
1094 &kern_vtype, &kern_vtype_id,
1095 &kern_data_member);
1096 if (err)
1097 return err;
1098
1099 kern_btf = mod_btf ? mod_btf->btf : obj->btf_vmlinux;
1100
1101 pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n",
1102 map->name, st_ops->type_id, kern_type_id, kern_vtype_id);
1103
1104 map->mod_btf_fd = mod_btf ? mod_btf->fd : -1;
1105 map->def.value_size = kern_vtype->size;
1106 map->btf_vmlinux_value_type_id = kern_vtype_id;
1107
1108 st_ops->kern_vdata = calloc(1, kern_vtype->size);
1109 if (!st_ops->kern_vdata)
1110 return -ENOMEM;
1111
1112 data = st_ops->data;
1113 kern_data_off = kern_data_member->offset / 8;
1114 kern_data = st_ops->kern_vdata + kern_data_off;
1115
1116 member = btf_members(type);
1117 for (i = 0; i < btf_vlen(type); i++, member++) {
1118 const struct btf_type *mtype, *kern_mtype;
1119 __u32 mtype_id, kern_mtype_id;
1120 void *mdata, *kern_mdata;
1121 __s64 msize, kern_msize;
1122 __u32 moff, kern_moff;
1123 __u32 kern_member_idx;
1124 const char *mname;
1125
1126 mname = btf__name_by_offset(btf, member->name_off);
1127 kern_member = find_member_by_name(kern_btf, kern_type, mname);
1128 if (!kern_member) {
1129 pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n",
1130 map->name, mname);
1131 return -ENOTSUP;
1132 }
1133
1134 kern_member_idx = kern_member - btf_members(kern_type);
1135 if (btf_member_bitfield_size(type, i) ||
1136 btf_member_bitfield_size(kern_type, kern_member_idx)) {
1137 pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n",
1138 map->name, mname);
1139 return -ENOTSUP;
1140 }
1141
1142 moff = member->offset / 8;
1143 kern_moff = kern_member->offset / 8;
1144
1145 mdata = data + moff;
1146 kern_mdata = kern_data + kern_moff;
1147
1148 mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id);
1149 kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type,
1150 &kern_mtype_id);
1151 if (BTF_INFO_KIND(mtype->info) !=
1152 BTF_INFO_KIND(kern_mtype->info)) {
1153 pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n",
1154 map->name, mname, BTF_INFO_KIND(mtype->info),
1155 BTF_INFO_KIND(kern_mtype->info));
1156 return -ENOTSUP;
1157 }
1158
1159 if (btf_is_ptr(mtype)) {
1160 struct bpf_program *prog;
1161
1162 /* Update the value from the shadow type */
1163 prog = *(void **)mdata;
1164 st_ops->progs[i] = prog;
1165 if (!prog)
1166 continue;
1167 if (!is_valid_st_ops_program(obj, prog)) {
1168 pr_warn("struct_ops init_kern %s: member %s is not a struct_ops program\n",
1169 map->name, mname);
1170 return -ENOTSUP;
1171 }
1172
1173 kern_mtype = skip_mods_and_typedefs(kern_btf,
1174 kern_mtype->type,
1175 &kern_mtype_id);
1176
1177 /* mtype->type must be a func_proto which was
1178 * guaranteed in bpf_object__collect_st_ops_relos(),
1179 * so only check kern_mtype for func_proto here.
1180 */
1181 if (!btf_is_func_proto(kern_mtype)) {
1182 pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n",
1183 map->name, mname);
1184 return -ENOTSUP;
1185 }
1186
1187 if (mod_btf)
1188 prog->attach_btf_obj_fd = mod_btf->fd;
1189
1190 /* if we haven't yet processed this BPF program, record proper
1191 * attach_btf_id and member_idx
1192 */
1193 if (!prog->attach_btf_id) {
1194 prog->attach_btf_id = kern_type_id;
1195 prog->expected_attach_type = kern_member_idx;
1196 }
1197
1198 /* struct_ops BPF prog can be re-used between multiple
1199 * .struct_ops & .struct_ops.link as long as it's the
1200 * same struct_ops struct definition and the same
1201 * function pointer field
1202 */
1203 if (prog->attach_btf_id != kern_type_id) {
1204 pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: attach_btf_id %u != kern_type_id %u\n",
1205 map->name, mname, prog->name, prog->sec_name, prog->type,
1206 prog->attach_btf_id, kern_type_id);
1207 return -EINVAL;
1208 }
1209 if (prog->expected_attach_type != kern_member_idx) {
1210 pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: expected_attach_type %u != kern_member_idx %u\n",
1211 map->name, mname, prog->name, prog->sec_name, prog->type,
1212 prog->expected_attach_type, kern_member_idx);
1213 return -EINVAL;
1214 }
1215
1216 st_ops->kern_func_off[i] = kern_data_off + kern_moff;
1217
1218 pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n",
1219 map->name, mname, prog->name, moff,
1220 kern_moff);
1221
1222 continue;
1223 }
1224
1225 msize = btf__resolve_size(btf, mtype_id);
1226 kern_msize = btf__resolve_size(kern_btf, kern_mtype_id);
1227 if (msize < 0 || kern_msize < 0 || msize != kern_msize) {
1228 pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n",
1229 map->name, mname, (ssize_t)msize,
1230 (ssize_t)kern_msize);
1231 return -ENOTSUP;
1232 }
1233
1234 pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n",
1235 map->name, mname, (unsigned int)msize,
1236 moff, kern_moff);
1237 memcpy(kern_mdata, mdata, msize);
1238 }
1239
1240 return 0;
1241 }
1242
1243 static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
1244 {
1245 struct bpf_map *map;
1246 size_t i;
1247 int err;
1248
1249 for (i = 0; i < obj->nr_maps; i++) {
1250 map = &obj->maps[i];
1251
1252 if (!bpf_map__is_struct_ops(map))
1253 continue;
1254
1255 if (!map->autocreate)
1256 continue;
1257
1258 err = bpf_map__init_kern_struct_ops(map);
1259 if (err)
1260 return err;
1261 }
1262
1263 return 0;
1264 }
1265
1266 static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name,
1267 int shndx, Elf_Data *data)
1268 {
1269 const struct btf_type *type, *datasec;
1270 const struct btf_var_secinfo *vsi;
1271 struct bpf_struct_ops *st_ops;
1272 const char *tname, *var_name;
1273 __s32 type_id, datasec_id;
1274 const struct btf *btf;
1275 struct bpf_map *map;
1276 __u32 i;
1277
1278 if (shndx == -1)
1279 return 0;
1280
1281 btf = obj->btf;
1282 datasec_id = btf__find_by_name_kind(btf, sec_name,
1283 BTF_KIND_DATASEC);
1284 if (datasec_id < 0) {
1285 pr_warn("struct_ops init: DATASEC %s not found\n",
1286 sec_name);
1287 return -EINVAL;
1288 }
1289
1290 datasec = btf__type_by_id(btf, datasec_id);
1291 vsi = btf_var_secinfos(datasec);
1292 for (i = 0; i < btf_vlen(datasec); i++, vsi++) {
1293 type = btf__type_by_id(obj->btf, vsi->type);
1294 var_name = btf__name_by_offset(obj->btf, type->name_off);
1295
1296 type_id = btf__resolve_type(obj->btf, vsi->type);
1297 if (type_id < 0) {
1298 pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n",
1299 vsi->type, sec_name);
1300 return -EINVAL;
1301 }
1302
1303 type = btf__type_by_id(obj->btf, type_id);
1304 tname = btf__name_by_offset(obj->btf, type->name_off);
1305 if (!tname[0]) {
1306 pr_warn("struct_ops init: anonymous type is not supported\n");
1307 return -ENOTSUP;
1308 }
1309 if (!btf_is_struct(type)) {
1310 pr_warn("struct_ops init: %s is not a struct\n", tname);
1311 return -EINVAL;
1312 }
1313
1314 map = bpf_object__add_map(obj);
1315 if (IS_ERR(map))
1316 return PTR_ERR(map);
1317
1318 map->sec_idx = shndx;
1319 map->sec_offset = vsi->offset;
1320 map->name = strdup(var_name);
1321 if (!map->name)
1322 return -ENOMEM;
1323 map->btf_value_type_id = type_id;
1324
1325 /* Follow same convention as for programs autoload:
1326 * SEC("?.struct_ops") means map is not created by default.
1327 */
1328 if (sec_name[0] == '?') {
1329 map->autocreate = false;
1330 /* from now on forget there was ? in section name */
1331 sec_name++;
1332 }
1333
1334 map->def.type = BPF_MAP_TYPE_STRUCT_OPS;
1335 map->def.key_size = sizeof(int);
1336 map->def.value_size = type->size;
1337 map->def.max_entries = 1;
1338 map->def.map_flags = strcmp(sec_name, STRUCT_OPS_LINK_SEC) == 0 ? BPF_F_LINK : 0;
1339
1340 map->st_ops = calloc(1, sizeof(*map->st_ops));
1341 if (!map->st_ops)
1342 return -ENOMEM;
1343 st_ops = map->st_ops;
1344 st_ops->data = malloc(type->size);
1345 st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs));
1346 st_ops->kern_func_off = malloc(btf_vlen(type) *
1347 sizeof(*st_ops->kern_func_off));
1348 if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off)
1349 return -ENOMEM;
1350
1351 if (vsi->offset + type->size > data->d_size) {
1352 pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n",
1353 var_name, sec_name);
1354 return -EINVAL;
1355 }
1356
1357 memcpy(st_ops->data,
1358 data->d_buf + vsi->offset,
1359 type->size);
1360 st_ops->tname = tname;
1361 st_ops->type = type;
1362 st_ops->type_id = type_id;
1363
1364 pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n",
1365 tname, type_id, var_name, vsi->offset);
1366 }
1367
1368 return 0;
1369 }
1370
1371 static int bpf_object_init_struct_ops(struct bpf_object *obj)
1372 {
1373 const char *sec_name;
1374 int sec_idx, err;
1375
1376 for (sec_idx = 0; sec_idx < obj->efile.sec_cnt; ++sec_idx) {
1377 struct elf_sec_desc *desc = &obj->efile.secs[sec_idx];
1378
1379 if (desc->sec_type != SEC_ST_OPS)
1380 continue;
1381
1382 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1383 if (!sec_name)
1384 return -LIBBPF_ERRNO__FORMAT;
1385
1386 err = init_struct_ops_maps(obj, sec_name, sec_idx, desc->data);
1387 if (err)
1388 return err;
1389 }
1390
1391 return 0;
1392 }
1393
1394 static struct bpf_object *bpf_object__new(const char *path,
1395 const void *obj_buf,
1396 size_t obj_buf_sz,
1397 const char *obj_name)
1398 {
1399 struct bpf_object *obj;
1400 char *end;
1401
1402 obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
1403 if (!obj) {
1404 pr_warn("alloc memory failed for %s\n", path);
1405 return ERR_PTR(-ENOMEM);
1406 }
1407
1408 strcpy(obj->path, path);
1409 if (obj_name) {
1410 libbpf_strlcpy(obj->name, obj_name, sizeof(obj->name));
1411 } else {
1412 /* Using basename() GNU version which doesn't modify arg. */
1413 libbpf_strlcpy(obj->name, basename((void *)path), sizeof(obj->name));
1414 end = strchr(obj->name, '.');
1415 if (end)
1416 *end = 0;
1417 }
1418
1419 obj->efile.fd = -1;
1420 /*
1421 * Caller of this function should also call
1422 * bpf_object__elf_finish() after data collection to return
1423 * obj_buf to user. If not, we should duplicate the buffer to
1424 * avoid user freeing them before elf finish.
1425 */
1426 obj->efile.obj_buf = obj_buf;
1427 obj->efile.obj_buf_sz = obj_buf_sz;
1428 obj->efile.btf_maps_shndx = -1;
1429 obj->kconfig_map_idx = -1;
1430
1431 obj->kern_version = get_kernel_version();
1432 obj->loaded = false;
1433
1434 return obj;
1435 }
1436
1437 static void bpf_object__elf_finish(struct bpf_object *obj)
1438 {
1439 if (!obj->efile.elf)
1440 return;
1441
1442 elf_end(obj->efile.elf);
1443 obj->efile.elf = NULL;
1444 obj->efile.symbols = NULL;
1445
1446 zfree(&obj->efile.secs);
1447 obj->efile.sec_cnt = 0;
1448 zclose(obj->efile.fd);
1449 obj->efile.obj_buf = NULL;
1450 obj->efile.obj_buf_sz = 0;
1451 }
1452
1453 static int bpf_object__elf_init(struct bpf_object *obj)
1454 {
1455 Elf64_Ehdr *ehdr;
1456 int err = 0;
1457 Elf *elf;
1458
1459 if (obj->efile.elf) {
1460 pr_warn("elf: init internal error\n");
1461 return -LIBBPF_ERRNO__LIBELF;
1462 }
1463
1464 if (obj->efile.obj_buf_sz > 0) {
1465 /* obj_buf should have been validated by bpf_object__open_mem(). */
1466 elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz);
1467 } else {
1468 obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC);
1469 if (obj->efile.fd < 0) {
1470 char errmsg[STRERR_BUFSIZE], *cp;
1471
1472 err = -errno;
1473 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
1474 pr_warn("elf: failed to open %s: %s\n", obj->path, cp);
1475 return err;
1476 }
1477
1478 elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
1479 }
1480
1481 if (!elf) {
1482 pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1));
1483 err = -LIBBPF_ERRNO__LIBELF;
1484 goto errout;
1485 }
1486
1487 obj->efile.elf = elf;
1488
1489 if (elf_kind(elf) != ELF_K_ELF) {
1490 err = -LIBBPF_ERRNO__FORMAT;
1491 pr_warn("elf: '%s' is not a proper ELF object\n", obj->path);
1492 goto errout;
1493 }
1494
1495 if (gelf_getclass(elf) != ELFCLASS64) {
1496 err = -LIBBPF_ERRNO__FORMAT;
1497 pr_warn("elf: '%s' is not a 64-bit ELF object\n", obj->path);
1498 goto errout;
1499 }
1500
1501 obj->efile.ehdr = ehdr = elf64_getehdr(elf);
1502 if (!obj->efile.ehdr) {
1503 pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1));
1504 err = -LIBBPF_ERRNO__FORMAT;
1505 goto errout;
1506 }
1507
1508 if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) {
1509 pr_warn("elf: failed to get section names section index for %s: %s\n",
1510 obj->path, elf_errmsg(-1));
1511 err = -LIBBPF_ERRNO__FORMAT;
1512 goto errout;
1513 }
1514
1515 /* ELF is corrupted/truncated, avoid calling elf_strptr. */
1516 if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) {
1517 pr_warn("elf: failed to get section names strings from %s: %s\n",
1518 obj->path, elf_errmsg(-1));
1519 err = -LIBBPF_ERRNO__FORMAT;
1520 goto errout;
1521 }
1522
1523 /* Old LLVM set e_machine to EM_NONE */
1524 if (ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF)) {
1525 pr_warn("elf: %s is not a valid eBPF object file\n", obj->path);
1526 err = -LIBBPF_ERRNO__FORMAT;
1527 goto errout;
1528 }
1529
1530 return 0;
1531 errout:
1532 bpf_object__elf_finish(obj);
1533 return err;
1534 }
1535
1536 static int bpf_object__check_endianness(struct bpf_object *obj)
1537 {
1538 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1539 if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
1540 return 0;
1541 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
1542 if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
1543 return 0;
1544 #else
1545 # error "Unrecognized __BYTE_ORDER__"
1546 #endif
1547 pr_warn("elf: endianness mismatch in %s.\n", obj->path);
1548 return -LIBBPF_ERRNO__ENDIAN;
1549 }
1550
1551 static int
1552 bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
1553 {
1554 if (!data) {
1555 pr_warn("invalid license section in %s\n", obj->path);
1556 return -LIBBPF_ERRNO__FORMAT;
1557 }
1558 /* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't
1559 * go over allowed ELF data section buffer
1560 */
1561 libbpf_strlcpy(obj->license, data, min(size + 1, sizeof(obj->license)));
1562 pr_debug("license of %s is %s\n", obj->path, obj->license);
1563 return 0;
1564 }
1565
1566 static int
1567 bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
1568 {
1569 __u32 kver;
1570
1571 if (!data || size != sizeof(kver)) {
1572 pr_warn("invalid kver section in %s\n", obj->path);
1573 return -LIBBPF_ERRNO__FORMAT;
1574 }
1575 memcpy(&kver, data, sizeof(kver));
1576 obj->kern_version = kver;
1577 pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
1578 return 0;
1579 }
1580
1581 static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
1582 {
1583 if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
1584 type == BPF_MAP_TYPE_HASH_OF_MAPS)
1585 return true;
1586 return false;
1587 }
1588
1589 static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size)
1590 {
1591 Elf_Data *data;
1592 Elf_Scn *scn;
1593
1594 if (!name)
1595 return -EINVAL;
1596
1597 scn = elf_sec_by_name(obj, name);
1598 data = elf_sec_data(obj, scn);
1599 if (data) {
1600 *size = data->d_size;
1601 return 0; /* found it */
1602 }
1603
1604 return -ENOENT;
1605 }
1606
1607 static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *name)
1608 {
1609 Elf_Data *symbols = obj->efile.symbols;
1610 const char *sname;
1611 size_t si;
1612
1613 for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) {
1614 Elf64_Sym *sym = elf_sym_by_idx(obj, si);
1615
1616 if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT)
1617 continue;
1618
1619 if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
1620 ELF64_ST_BIND(sym->st_info) != STB_WEAK)
1621 continue;
1622
1623 sname = elf_sym_str(obj, sym->st_name);
1624 if (!sname) {
1625 pr_warn("failed to get sym name string for var %s\n", name);
1626 return ERR_PTR(-EIO);
1627 }
1628 if (strcmp(name, sname) == 0)
1629 return sym;
1630 }
1631
1632 return ERR_PTR(-ENOENT);
1633 }
1634
1635 /* Some versions of Android don't provide memfd_create() in their libc
1636 * implementation, so avoid complications and just go straight to Linux
1637 * syscall.
1638 */
1639 static int sys_memfd_create(const char *name, unsigned flags)
1640 {
1641 return syscall(__NR_memfd_create, name, flags);
1642 }
1643
1644 static int create_placeholder_fd(void)
1645 {
1646 int fd;
1647
1648 fd = ensure_good_fd(sys_memfd_create("libbpf-placeholder-fd", MFD_CLOEXEC));
1649 if (fd < 0)
1650 return -errno;
1651 return fd;
1652 }
1653
1654 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
1655 {
1656 struct bpf_map *map;
1657 int err;
1658
1659 err = libbpf_ensure_mem((void **)&obj->maps, &obj->maps_cap,
1660 sizeof(*obj->maps), obj->nr_maps + 1);
1661 if (err)
1662 return ERR_PTR(err);
1663
1664 map = &obj->maps[obj->nr_maps++];
1665 map->obj = obj;
1666 /* Preallocate map FD without actually creating BPF map just yet.
1667 * These map FD "placeholders" will be reused later without changing
1668 * FD value when map is actually created in the kernel.
1669 *
1670 * This is useful to be able to perform BPF program relocations
1671 * without having to create BPF maps before that step. This allows us
1672 * to finalize and load BTF very late in BPF object's loading phase,
1673 * right before BPF maps have to be created and BPF programs have to
1674 * be loaded. By having these map FD placeholders we can perform all
1675 * the sanitizations, relocations, and any other adjustments before we
1676 * start creating actual BPF kernel objects (BTF, maps, progs).
1677 */
1678 map->fd = create_placeholder_fd();
1679 if (map->fd < 0)
1680 return ERR_PTR(map->fd);
1681 map->inner_map_fd = -1;
1682 map->autocreate = true;
1683
1684 return map;
1685 }
1686
1687 static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries)
1688 {
1689 const long page_sz = sysconf(_SC_PAGE_SIZE);
1690 size_t map_sz;
1691
1692 map_sz = (size_t)roundup(value_sz, 8) * max_entries;
1693 map_sz = roundup(map_sz, page_sz);
1694 return map_sz;
1695 }
1696
1697 static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz)
1698 {
1699 void *mmaped;
1700
1701 if (!map->mmaped)
1702 return -EINVAL;
1703
1704 if (old_sz == new_sz)
1705 return 0;
1706
1707 mmaped = mmap(NULL, new_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1708 if (mmaped == MAP_FAILED)
1709 return -errno;
1710
1711 memcpy(mmaped, map->mmaped, min(old_sz, new_sz));
1712 munmap(map->mmaped, old_sz);
1713 map->mmaped = mmaped;
1714 return 0;
1715 }
1716
1717 static char *internal_map_name(struct bpf_object *obj, const char *real_name)
1718 {
1719 char map_name[BPF_OBJ_NAME_LEN], *p;
1720 int pfx_len, sfx_len = max((size_t)7, strlen(real_name));
1721
1722 /* This is one of the more confusing parts of libbpf for various
1723 * reasons, some of which are historical. The original idea for naming
1724 * internal names was to include as much of BPF object name prefix as
1725 * possible, so that it can be distinguished from similar internal
1726 * maps of a different BPF object.
1727 * As an example, let's say we have bpf_object named 'my_object_name'
1728 * and internal map corresponding to '.rodata' ELF section. The final
1729 * map name advertised to user and to the kernel will be
1730 * 'my_objec.rodata', taking first 8 characters of object name and
1731 * entire 7 characters of '.rodata'.
1732 * Somewhat confusingly, if internal map ELF section name is shorter
1733 * than 7 characters, e.g., '.bss', we still reserve 7 characters
1734 * for the suffix, even though we only have 4 actual characters, and
1735 * resulting map will be called 'my_objec.bss', not even using all 15
1736 * characters allowed by the kernel. Oh well, at least the truncated
1737 * object name is somewhat consistent in this case. But if the map
1738 * name is '.kconfig', we'll still have entirety of '.kconfig' added
1739 * (8 chars) and thus will be left with only first 7 characters of the
1740 * object name ('my_obje'). Happy guessing, user, that the final map
1741 * name will be "my_obje.kconfig".
1742 * Now, with libbpf starting to support arbitrarily named .rodata.*
1743 * and .data.* data sections, it's possible that ELF section name is
1744 * longer than allowed 15 chars, so we now need to be careful to take
1745 * only up to 15 first characters of ELF name, taking no BPF object
1746 * name characters at all. So '.rodata.abracadabra' will result in
1747 * '.rodata.abracad' kernel and user-visible name.
1748 * We need to keep this convoluted logic intact for .data, .bss and
1749 * .rodata maps, but for new custom .data.custom and .rodata.custom
1750 * maps we use their ELF names as is, not prepending bpf_object name
1751 * in front. We still need to truncate them to 15 characters for the
1752 * kernel. Full name can be recovered for such maps by using DATASEC
1753 * BTF type associated with such map's value type, though.
1754 */
1755 if (sfx_len >= BPF_OBJ_NAME_LEN)
1756 sfx_len = BPF_OBJ_NAME_LEN - 1;
1757
1758 /* if there are two or more dots in map name, it's a custom dot map */
1759 if (strchr(real_name + 1, '.') != NULL)
1760 pfx_len = 0;
1761 else
1762 pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, strlen(obj->name));
1763
1764 snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
1765 sfx_len, real_name);
1766
1767 /* sanitise map name to characters allowed by kernel */
1768 for (p = map_name; *p && p < map_name + sizeof(map_name); p++)
1769 if (!isalnum(*p) && *p != '_' && *p != '.')
1770 *p = '_';
1771
1772 return strdup(map_name);
1773 }
1774
1775 static int
1776 map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map);
1777
1778 /* Internal BPF map is mmap()'able only if at least one of corresponding
1779 * DATASEC's VARs are to be exposed through BPF skeleton. I.e., it's a GLOBAL
1780 * variable and it's not marked as __hidden (which turns it into, effectively,
1781 * a STATIC variable).
1782 */
1783 static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map)
1784 {
1785 const struct btf_type *t, *vt;
1786 struct btf_var_secinfo *vsi;
1787 int i, n;
1788
1789 if (!map->btf_value_type_id)
1790 return false;
1791
1792 t = btf__type_by_id(obj->btf, map->btf_value_type_id);
1793 if (!btf_is_datasec(t))
1794 return false;
1795
1796 vsi = btf_var_secinfos(t);
1797 for (i = 0, n = btf_vlen(t); i < n; i++, vsi++) {
1798 vt = btf__type_by_id(obj->btf, vsi->type);
1799 if (!btf_is_var(vt))
1800 continue;
1801
1802 if (btf_var(vt)->linkage != BTF_VAR_STATIC)
1803 return true;
1804 }
1805
1806 return false;
1807 }
1808
1809 static int
1810 bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
1811 const char *real_name, int sec_idx, void *data, size_t data_sz)
1812 {
1813 struct bpf_map_def *def;
1814 struct bpf_map *map;
1815 size_t mmap_sz;
1816 int err;
1817
1818 map = bpf_object__add_map(obj);
1819 if (IS_ERR(map))
1820 return PTR_ERR(map);
1821
1822 map->libbpf_type = type;
1823 map->sec_idx = sec_idx;
1824 map->sec_offset = 0;
1825 map->real_name = strdup(real_name);
1826 map->name = internal_map_name(obj, real_name);
1827 if (!map->real_name || !map->name) {
1828 zfree(&map->real_name);
1829 zfree(&map->name);
1830 return -ENOMEM;
1831 }
1832
1833 def = &map->def;
1834 def->type = BPF_MAP_TYPE_ARRAY;
1835 def->key_size = sizeof(int);
1836 def->value_size = data_sz;
1837 def->max_entries = 1;
1838 def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
1839 ? BPF_F_RDONLY_PROG : 0;
1840
1841 /* failures are fine because of maps like .rodata.str1.1 */
1842 (void) map_fill_btf_type_info(obj, map);
1843
1844 if (map_is_mmapable(obj, map))
1845 def->map_flags |= BPF_F_MMAPABLE;
1846
1847 pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
1848 map->name, map->sec_idx, map->sec_offset, def->map_flags);
1849
1850 mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
1851 map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
1852 MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1853 if (map->mmaped == MAP_FAILED) {
1854 err = -errno;
1855 map->mmaped = NULL;
1856 pr_warn("failed to alloc map '%s' content buffer: %d\n",
1857 map->name, err);
1858 zfree(&map->real_name);
1859 zfree(&map->name);
1860 return err;
1861 }
1862
1863 if (data)
1864 memcpy(map->mmaped, data, data_sz);
1865
1866 pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
1867 return 0;
1868 }
1869
1870 static int bpf_object__init_global_data_maps(struct bpf_object *obj)
1871 {
1872 struct elf_sec_desc *sec_desc;
1873 const char *sec_name;
1874 int err = 0, sec_idx;
1875
1876 /*
1877 * Populate obj->maps with libbpf internal maps.
1878 */
1879 for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) {
1880 sec_desc = &obj->efile.secs[sec_idx];
1881
1882 /* Skip recognized sections with size 0. */
1883 if (!sec_desc->data || sec_desc->data->d_size == 0)
1884 continue;
1885
1886 switch (sec_desc->sec_type) {
1887 case SEC_DATA:
1888 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1889 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
1890 sec_name, sec_idx,
1891 sec_desc->data->d_buf,
1892 sec_desc->data->d_size);
1893 break;
1894 case SEC_RODATA:
1895 obj->has_rodata = true;
1896 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1897 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
1898 sec_name, sec_idx,
1899 sec_desc->data->d_buf,
1900 sec_desc->data->d_size);
1901 break;
1902 case SEC_BSS:
1903 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1904 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
1905 sec_name, sec_idx,
1906 NULL,
1907 sec_desc->data->d_size);
1908 break;
1909 default:
1910 /* skip */
1911 break;
1912 }
1913 if (err)
1914 return err;
1915 }
1916 return 0;
1917 }
1918
1919
1920 static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
1921 const void *name)
1922 {
1923 int i;
1924
1925 for (i = 0; i < obj->nr_extern; i++) {
1926 if (strcmp(obj->externs[i].name, name) == 0)
1927 return &obj->externs[i];
1928 }
1929 return NULL;
1930 }
1931
1932 static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
1933 char value)
1934 {
1935 switch (ext->kcfg.type) {
1936 case KCFG_BOOL:
1937 if (value == 'm') {
1938 pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n",
1939 ext->name, value);
1940 return -EINVAL;
1941 }
1942 *(bool *)ext_val = value == 'y' ? true : false;
1943 break;
1944 case KCFG_TRISTATE:
1945 if (value == 'y')
1946 *(enum libbpf_tristate *)ext_val = TRI_YES;
1947 else if (value == 'm')
1948 *(enum libbpf_tristate *)ext_val = TRI_MODULE;
1949 else /* value == 'n' */
1950 *(enum libbpf_tristate *)ext_val = TRI_NO;
1951 break;
1952 case KCFG_CHAR:
1953 *(char *)ext_val = value;
1954 break;
1955 case KCFG_UNKNOWN:
1956 case KCFG_INT:
1957 case KCFG_CHAR_ARR:
1958 default:
1959 pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n",
1960 ext->name, value);
1961 return -EINVAL;
1962 }
1963 ext->is_set = true;
1964 return 0;
1965 }
1966
1967 static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
1968 const char *value)
1969 {
1970 size_t len;
1971
1972 if (ext->kcfg.type != KCFG_CHAR_ARR) {
1973 pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n",
1974 ext->name, value);
1975 return -EINVAL;
1976 }
1977
1978 len = strlen(value);
1979 if (value[len - 1] != '"') {
1980 pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
1981 ext->name, value);
1982 return -EINVAL;
1983 }
1984
1985 /* strip quotes */
1986 len -= 2;
1987 if (len >= ext->kcfg.sz) {
1988 pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n",
1989 ext->name, value, len, ext->kcfg.sz - 1);
1990 len = ext->kcfg.sz - 1;
1991 }
1992 memcpy(ext_val, value + 1, len);
1993 ext_val[len] = '\0';
1994 ext->is_set = true;
1995 return 0;
1996 }
1997
1998 static int parse_u64(const char *value, __u64 *res)
1999 {
2000 char *value_end;
2001 int err;
2002
2003 errno = 0;
2004 *res = strtoull(value, &value_end, 0);
2005 if (errno) {
2006 err = -errno;
2007 pr_warn("failed to parse '%s' as integer: %d\n", value, err);
2008 return err;
2009 }
2010 if (*value_end) {
2011 pr_warn("failed to parse '%s' as integer completely\n", value);
2012 return -EINVAL;
2013 }
2014 return 0;
2015 }
2016
2017 static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
2018 {
2019 int bit_sz = ext->kcfg.sz * 8;
2020
2021 if (ext->kcfg.sz == 8)
2022 return true;
2023
2024 /* Validate that value stored in u64 fits in integer of `ext->sz`
2025 * bytes size without any loss of information. If the target integer
2026 * is signed, we rely on the following limits of integer type of
2027 * Y bits and subsequent transformation:
2028 *
2029 * -2^(Y-1) <= X <= 2^(Y-1) - 1
2030 * 0 <= X + 2^(Y-1) <= 2^Y - 1
2031 * 0 <= X + 2^(Y-1) < 2^Y
2032 *
2033 * For unsigned target integer, check that all the (64 - Y) bits are
2034 * zero.
2035 */
2036 if (ext->kcfg.is_signed)
2037 return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
2038 else
2039 return (v >> bit_sz) == 0;
2040 }
2041
2042 static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
2043 __u64 value)
2044 {
2045 if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR &&
2046 ext->kcfg.type != KCFG_BOOL) {
2047 pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n",
2048 ext->name, (unsigned long long)value);
2049 return -EINVAL;
2050 }
2051 if (ext->kcfg.type == KCFG_BOOL && value > 1) {
2052 pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n",
2053 ext->name, (unsigned long long)value);
2054 return -EINVAL;
2055
2056 }
2057 if (!is_kcfg_value_in_range(ext, value)) {
2058 pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n",
2059 ext->name, (unsigned long long)value, ext->kcfg.sz);
2060 return -ERANGE;
2061 }
2062 switch (ext->kcfg.sz) {
2063 case 1:
2064 *(__u8 *)ext_val = value;
2065 break;
2066 case 2:
2067 *(__u16 *)ext_val = value;
2068 break;
2069 case 4:
2070 *(__u32 *)ext_val = value;
2071 break;
2072 case 8:
2073 *(__u64 *)ext_val = value;
2074 break;
2075 default:
2076 return -EINVAL;
2077 }
2078 ext->is_set = true;
2079 return 0;
2080 }
2081
2082 static int bpf_object__process_kconfig_line(struct bpf_object *obj,
2083 char *buf, void *data)
2084 {
2085 struct extern_desc *ext;
2086 char *sep, *value;
2087 int len, err = 0;
2088 void *ext_val;
2089 __u64 num;
2090
2091 if (!str_has_pfx(buf, "CONFIG_"))
2092 return 0;
2093
2094 sep = strchr(buf, '=');
2095 if (!sep) {
2096 pr_warn("failed to parse '%s': no separator\n", buf);
2097 return -EINVAL;
2098 }
2099
2100 /* Trim ending '\n' */
2101 len = strlen(buf);
2102 if (buf[len - 1] == '\n')
2103 buf[len - 1] = '\0';
2104 /* Split on '=' and ensure that a value is present. */
2105 *sep = '\0';
2106 if (!sep[1]) {
2107 *sep = '=';
2108 pr_warn("failed to parse '%s': no value\n", buf);
2109 return -EINVAL;
2110 }
2111
2112 ext = find_extern_by_name(obj, buf);
2113 if (!ext || ext->is_set)
2114 return 0;
2115
2116 ext_val = data + ext->kcfg.data_off;
2117 value = sep + 1;
2118
2119 switch (*value) {
2120 case 'y': case 'n': case 'm':
2121 err = set_kcfg_value_tri(ext, ext_val, *value);
2122 break;
2123 case '"':
2124 err = set_kcfg_value_str(ext, ext_val, value);
2125 break;
2126 default:
2127 /* assume integer */
2128 err = parse_u64(value, &num);
2129 if (err) {
2130 pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext->name, value);
2131 return err;
2132 }
2133 if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
2134 pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext->name, value);
2135 return -EINVAL;
2136 }
2137 err = set_kcfg_value_num(ext, ext_val, num);
2138 break;
2139 }
2140 if (err)
2141 return err;
2142 pr_debug("extern (kcfg) '%s': set to %s\n", ext->name, value);
2143 return 0;
2144 }
2145
2146 static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
2147 {
2148 char buf[PATH_MAX];
2149 struct utsname uts;
2150 int len, err = 0;
2151 gzFile file;
2152
2153 uname(&uts);
2154 len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release);
2155 if (len < 0)
2156 return -EINVAL;
2157 else if (len >= PATH_MAX)
2158 return -ENAMETOOLONG;
2159
2160 /* gzopen also accepts uncompressed files. */
2161 file = gzopen(buf, "re");
2162 if (!file)
2163 file = gzopen("/proc/config.gz", "re");
2164
2165 if (!file) {
2166 pr_warn("failed to open system Kconfig\n");
2167 return -ENOENT;
2168 }
2169
2170 while (gzgets(file, buf, sizeof(buf))) {
2171 err = bpf_object__process_kconfig_line(obj, buf, data);
2172 if (err) {
2173 pr_warn("error parsing system Kconfig line '%s': %d\n",
2174 buf, err);
2175 goto out;
2176 }
2177 }
2178
2179 out:
2180 gzclose(file);
2181 return err;
2182 }
2183
2184 static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
2185 const char *config, void *data)
2186 {
2187 char buf[PATH_MAX];
2188 int err = 0;
2189 FILE *file;
2190
2191 file = fmemopen((void *)config, strlen(config), "r");
2192 if (!file) {
2193 err = -errno;
2194 pr_warn("failed to open in-memory Kconfig: %d\n", err);
2195 return err;
2196 }
2197
2198 while (fgets(buf, sizeof(buf), file)) {
2199 err = bpf_object__process_kconfig_line(obj, buf, data);
2200 if (err) {
2201 pr_warn("error parsing in-memory Kconfig line '%s': %d\n",
2202 buf, err);
2203 break;
2204 }
2205 }
2206
2207 fclose(file);
2208 return err;
2209 }
2210
2211 static int bpf_object__init_kconfig_map(struct bpf_object *obj)
2212 {
2213 struct extern_desc *last_ext = NULL, *ext;
2214 size_t map_sz;
2215 int i, err;
2216
2217 for (i = 0; i < obj->nr_extern; i++) {
2218 ext = &obj->externs[i];
2219 if (ext->type == EXT_KCFG)
2220 last_ext = ext;
2221 }
2222
2223 if (!last_ext)
2224 return 0;
2225
2226 map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
2227 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
2228 ".kconfig", obj->efile.symbols_shndx,
2229 NULL, map_sz);
2230 if (err)
2231 return err;
2232
2233 obj->kconfig_map_idx = obj->nr_maps - 1;
2234
2235 return 0;
2236 }
2237
2238 const struct btf_type *
2239 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
2240 {
2241 const struct btf_type *t = btf__type_by_id(btf, id);
2242
2243 if (res_id)
2244 *res_id = id;
2245
2246 while (btf_is_mod(t) || btf_is_typedef(t)) {
2247 if (res_id)
2248 *res_id = t->type;
2249 t = btf__type_by_id(btf, t->type);
2250 }
2251
2252 return t;
2253 }
2254
2255 static const struct btf_type *
2256 resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
2257 {
2258 const struct btf_type *t;
2259
2260 t = skip_mods_and_typedefs(btf, id, NULL);
2261 if (!btf_is_ptr(t))
2262 return NULL;
2263
2264 t = skip_mods_and_typedefs(btf, t->type, res_id);
2265
2266 return btf_is_func_proto(t) ? t : NULL;
2267 }
2268
2269 static const char *__btf_kind_str(__u16 kind)
2270 {
2271 switch (kind) {
2272 case BTF_KIND_UNKN: return "void";
2273 case BTF_KIND_INT: return "int";
2274 case BTF_KIND_PTR: return "ptr";
2275 case BTF_KIND_ARRAY: return "array";
2276 case BTF_KIND_STRUCT: return "struct";
2277 case BTF_KIND_UNION: return "union";
2278 case BTF_KIND_ENUM: return "enum";
2279 case BTF_KIND_FWD: return "fwd";
2280 case BTF_KIND_TYPEDEF: return "typedef";
2281 case BTF_KIND_VOLATILE: return "volatile";
2282 case BTF_KIND_CONST: return "const";
2283 case BTF_KIND_RESTRICT: return "restrict";
2284 case BTF_KIND_FUNC: return "func";
2285 case BTF_KIND_FUNC_PROTO: return "func_proto";
2286 case BTF_KIND_VAR: return "var";
2287 case BTF_KIND_DATASEC: return "datasec";
2288 case BTF_KIND_FLOAT: return "float";
2289 case BTF_KIND_DECL_TAG: return "decl_tag";
2290 case BTF_KIND_TYPE_TAG: return "type_tag";
2291 case BTF_KIND_ENUM64: return "enum64";
2292 default: return "unknown";
2293 }
2294 }
2295
2296 const char *btf_kind_str(const struct btf_type *t)
2297 {
2298 return __btf_kind_str(btf_kind(t));
2299 }
2300
2301 /*
2302 * Fetch integer attribute of BTF map definition. Such attributes are
2303 * represented using a pointer to an array, in which dimensionality of array
2304 * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
2305 * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
2306 * type definition, while using only sizeof(void *) space in ELF data section.
2307 */
2308 static bool get_map_field_int(const char *map_name, const struct btf *btf,
2309 const struct btf_member *m, __u32 *res)
2310 {
2311 const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
2312 const char *name = btf__name_by_offset(btf, m->name_off);
2313 const struct btf_array *arr_info;
2314 const struct btf_type *arr_t;
2315
2316 if (!btf_is_ptr(t)) {
2317 pr_warn("map '%s': attr '%s': expected PTR, got %s.\n",
2318 map_name, name, btf_kind_str(t));
2319 return false;
2320 }
2321
2322 arr_t = btf__type_by_id(btf, t->type);
2323 if (!arr_t) {
2324 pr_warn("map '%s': attr '%s': type [%u] not found.\n",
2325 map_name, name, t->type);
2326 return false;
2327 }
2328 if (!btf_is_array(arr_t)) {
2329 pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n",
2330 map_name, name, btf_kind_str(arr_t));
2331 return false;
2332 }
2333 arr_info = btf_array(arr_t);
2334 *res = arr_info->nelems;
2335 return true;
2336 }
2337
2338 static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name)
2339 {
2340 int len;
2341
2342 len = snprintf(buf, buf_sz, "%s/%s", path, name);
2343 if (len < 0)
2344 return -EINVAL;
2345 if (len >= buf_sz)
2346 return -ENAMETOOLONG;
2347
2348 return 0;
2349 }
2350
2351 static int build_map_pin_path(struct bpf_map *map, const char *path)
2352 {
2353 char buf[PATH_MAX];
2354 int err;
2355
2356 if (!path)
2357 path = BPF_FS_DEFAULT_PATH;
2358
2359 err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
2360 if (err)
2361 return err;
2362
2363 return bpf_map__set_pin_path(map, buf);
2364 }
2365
2366 /* should match definition in bpf_helpers.h */
2367 enum libbpf_pin_type {
2368 LIBBPF_PIN_NONE,
2369 /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
2370 LIBBPF_PIN_BY_NAME,
2371 };
2372
2373 int parse_btf_map_def(const char *map_name, struct btf *btf,
2374 const struct btf_type *def_t, bool strict,
2375 struct btf_map_def *map_def, struct btf_map_def *inner_def)
2376 {
2377 const struct btf_type *t;
2378 const struct btf_member *m;
2379 bool is_inner = inner_def == NULL;
2380 int vlen, i;
2381
2382 vlen = btf_vlen(def_t);
2383 m = btf_members(def_t);
2384 for (i = 0; i < vlen; i++, m++) {
2385 const char *name = btf__name_by_offset(btf, m->name_off);
2386
2387 if (!name) {
2388 pr_warn("map '%s': invalid field #%d.\n", map_name, i);
2389 return -EINVAL;
2390 }
2391 if (strcmp(name, "type") == 0) {
2392 if (!get_map_field_int(map_name, btf, m, &map_def->map_type))
2393 return -EINVAL;
2394 map_def->parts |= MAP_DEF_MAP_TYPE;
2395 } else if (strcmp(name, "max_entries") == 0) {
2396 if (!get_map_field_int(map_name, btf, m, &map_def->max_entries))
2397 return -EINVAL;
2398 map_def->parts |= MAP_DEF_MAX_ENTRIES;
2399 } else if (strcmp(name, "map_flags") == 0) {
2400 if (!get_map_field_int(map_name, btf, m, &map_def->map_flags))
2401 return -EINVAL;
2402 map_def->parts |= MAP_DEF_MAP_FLAGS;
2403 } else if (strcmp(name, "numa_node") == 0) {
2404 if (!get_map_field_int(map_name, btf, m, &map_def->numa_node))
2405 return -EINVAL;
2406 map_def->parts |= MAP_DEF_NUMA_NODE;
2407 } else if (strcmp(name, "key_size") == 0) {
2408 __u32 sz;
2409
2410 if (!get_map_field_int(map_name, btf, m, &sz))
2411 return -EINVAL;
2412 if (map_def->key_size && map_def->key_size != sz) {
2413 pr_warn("map '%s': conflicting key size %u != %u.\n",
2414 map_name, map_def->key_size, sz);
2415 return -EINVAL;
2416 }
2417 map_def->key_size = sz;
2418 map_def->parts |= MAP_DEF_KEY_SIZE;
2419 } else if (strcmp(name, "key") == 0) {
2420 __s64 sz;
2421
2422 t = btf__type_by_id(btf, m->type);
2423 if (!t) {
2424 pr_warn("map '%s': key type [%d] not found.\n",
2425 map_name, m->type);
2426 return -EINVAL;
2427 }
2428 if (!btf_is_ptr(t)) {
2429 pr_warn("map '%s': key spec is not PTR: %s.\n",
2430 map_name, btf_kind_str(t));
2431 return -EINVAL;
2432 }
2433 sz = btf__resolve_size(btf, t->type);
2434 if (sz < 0) {
2435 pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
2436 map_name, t->type, (ssize_t)sz);
2437 return sz;
2438 }
2439 if (map_def->key_size && map_def->key_size != sz) {
2440 pr_warn("map '%s': conflicting key size %u != %zd.\n",
2441 map_name, map_def->key_size, (ssize_t)sz);
2442 return -EINVAL;
2443 }
2444 map_def->key_size = sz;
2445 map_def->key_type_id = t->type;
2446 map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE;
2447 } else if (strcmp(name, "value_size") == 0) {
2448 __u32 sz;
2449
2450 if (!get_map_field_int(map_name, btf, m, &sz))
2451 return -EINVAL;
2452 if (map_def->value_size && map_def->value_size != sz) {
2453 pr_warn("map '%s': conflicting value size %u != %u.\n",
2454 map_name, map_def->value_size, sz);
2455 return -EINVAL;
2456 }
2457 map_def->value_size = sz;
2458 map_def->parts |= MAP_DEF_VALUE_SIZE;
2459 } else if (strcmp(name, "value") == 0) {
2460 __s64 sz;
2461
2462 t = btf__type_by_id(btf, m->type);
2463 if (!t) {
2464 pr_warn("map '%s': value type [%d] not found.\n",
2465 map_name, m->type);
2466 return -EINVAL;
2467 }
2468 if (!btf_is_ptr(t)) {
2469 pr_warn("map '%s': value spec is not PTR: %s.\n",
2470 map_name, btf_kind_str(t));
2471 return -EINVAL;
2472 }
2473 sz = btf__resolve_size(btf, t->type);
2474 if (sz < 0) {
2475 pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
2476 map_name, t->type, (ssize_t)sz);
2477 return sz;
2478 }
2479 if (map_def->value_size && map_def->value_size != sz) {
2480 pr_warn("map '%s': conflicting value size %u != %zd.\n",
2481 map_name, map_def->value_size, (ssize_t)sz);
2482 return -EINVAL;
2483 }
2484 map_def->value_size = sz;
2485 map_def->value_type_id = t->type;
2486 map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE;
2487 }
2488 else if (strcmp(name, "values") == 0) {
2489 bool is_map_in_map = bpf_map_type__is_map_in_map(map_def->map_type);
2490 bool is_prog_array = map_def->map_type == BPF_MAP_TYPE_PROG_ARRAY;
2491 const char *desc = is_map_in_map ? "map-in-map inner" : "prog-array value";
2492 char inner_map_name[128];
2493 int err;
2494
2495 if (is_inner) {
2496 pr_warn("map '%s': multi-level inner maps not supported.\n",
2497 map_name);
2498 return -ENOTSUP;
2499 }
2500 if (i != vlen - 1) {
2501 pr_warn("map '%s': '%s' member should be last.\n",
2502 map_name, name);
2503 return -EINVAL;
2504 }
2505 if (!is_map_in_map && !is_prog_array) {
2506 pr_warn("map '%s': should be map-in-map or prog-array.\n",
2507 map_name);
2508 return -ENOTSUP;
2509 }
2510 if (map_def->value_size && map_def->value_size != 4) {
2511 pr_warn("map '%s': conflicting value size %u != 4.\n",
2512 map_name, map_def->value_size);
2513 return -EINVAL;
2514 }
2515 map_def->value_size = 4;
2516 t = btf__type_by_id(btf, m->type);
2517 if (!t) {
2518 pr_warn("map '%s': %s type [%d] not found.\n",
2519 map_name, desc, m->type);
2520 return -EINVAL;
2521 }
2522 if (!btf_is_array(t) || btf_array(t)->nelems) {
2523 pr_warn("map '%s': %s spec is not a zero-sized array.\n",
2524 map_name, desc);
2525 return -EINVAL;
2526 }
2527 t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL);
2528 if (!btf_is_ptr(t)) {
2529 pr_warn("map '%s': %s def is of unexpected kind %s.\n",
2530 map_name, desc, btf_kind_str(t));
2531 return -EINVAL;
2532 }
2533 t = skip_mods_and_typedefs(btf, t->type, NULL);
2534 if (is_prog_array) {
2535 if (!btf_is_func_proto(t)) {
2536 pr_warn("map '%s': prog-array value def is of unexpected kind %s.\n",
2537 map_name, btf_kind_str(t));
2538 return -EINVAL;
2539 }
2540 continue;
2541 }
2542 if (!btf_is_struct(t)) {
2543 pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
2544 map_name, btf_kind_str(t));
2545 return -EINVAL;
2546 }
2547
2548 snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name);
2549 err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL);
2550 if (err)
2551 return err;
2552
2553 map_def->parts |= MAP_DEF_INNER_MAP;
2554 } else if (strcmp(name, "pinning") == 0) {
2555 __u32 val;
2556
2557 if (is_inner) {
2558 pr_warn("map '%s': inner def can't be pinned.\n", map_name);
2559 return -EINVAL;
2560 }
2561 if (!get_map_field_int(map_name, btf, m, &val))
2562 return -EINVAL;
2563 if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) {
2564 pr_warn("map '%s': invalid pinning value %u.\n",
2565 map_name, val);
2566 return -EINVAL;
2567 }
2568 map_def->pinning = val;
2569 map_def->parts |= MAP_DEF_PINNING;
2570 } else if (strcmp(name, "map_extra") == 0) {
2571 __u32 map_extra;
2572
2573 if (!get_map_field_int(map_name, btf, m, &map_extra))
2574 return -EINVAL;
2575 map_def->map_extra = map_extra;
2576 map_def->parts |= MAP_DEF_MAP_EXTRA;
2577 } else {
2578 if (strict) {
2579 pr_warn("map '%s': unknown field '%s'.\n", map_name, name);
2580 return -ENOTSUP;
2581 }
2582 pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name);
2583 }
2584 }
2585
2586 if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) {
2587 pr_warn("map '%s': map type isn't specified.\n", map_name);
2588 return -EINVAL;
2589 }
2590
2591 return 0;
2592 }
2593
2594 static size_t adjust_ringbuf_sz(size_t sz)
2595 {
2596 __u32 page_sz = sysconf(_SC_PAGE_SIZE);
2597 __u32 mul;
2598
2599 /* if user forgot to set any size, make sure they see error */
2600 if (sz == 0)
2601 return 0;
2602 /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be
2603 * a power-of-2 multiple of kernel's page size. If user diligently
2604 * satisified these conditions, pass the size through.
2605 */
2606 if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz))
2607 return sz;
2608
2609 /* Otherwise find closest (page_sz * power_of_2) product bigger than
2610 * user-set size to satisfy both user size request and kernel
2611 * requirements and substitute correct max_entries for map creation.
2612 */
2613 for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) {
2614 if (mul * page_sz > sz)
2615 return mul * page_sz;
2616 }
2617
2618 /* if it's impossible to satisfy the conditions (i.e., user size is
2619 * very close to UINT_MAX but is not a power-of-2 multiple of
2620 * page_size) then just return original size and let kernel reject it
2621 */
2622 return sz;
2623 }
2624
2625 static bool map_is_ringbuf(const struct bpf_map *map)
2626 {
2627 return map->def.type == BPF_MAP_TYPE_RINGBUF ||
2628 map->def.type == BPF_MAP_TYPE_USER_RINGBUF;
2629 }
2630
2631 static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def)
2632 {
2633 map->def.type = def->map_type;
2634 map->def.key_size = def->key_size;
2635 map->def.value_size = def->value_size;
2636 map->def.max_entries = def->max_entries;
2637 map->def.map_flags = def->map_flags;
2638 map->map_extra = def->map_extra;
2639
2640 map->numa_node = def->numa_node;
2641 map->btf_key_type_id = def->key_type_id;
2642 map->btf_value_type_id = def->value_type_id;
2643
2644 /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
2645 if (map_is_ringbuf(map))
2646 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
2647
2648 if (def->parts & MAP_DEF_MAP_TYPE)
2649 pr_debug("map '%s': found type = %u.\n", map->name, def->map_type);
2650
2651 if (def->parts & MAP_DEF_KEY_TYPE)
2652 pr_debug("map '%s': found key [%u], sz = %u.\n",
2653 map->name, def->key_type_id, def->key_size);
2654 else if (def->parts & MAP_DEF_KEY_SIZE)
2655 pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size);
2656
2657 if (def->parts & MAP_DEF_VALUE_TYPE)
2658 pr_debug("map '%s': found value [%u], sz = %u.\n",
2659 map->name, def->value_type_id, def->value_size);
2660 else if (def->parts & MAP_DEF_VALUE_SIZE)
2661 pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size);
2662
2663 if (def->parts & MAP_DEF_MAX_ENTRIES)
2664 pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries);
2665 if (def->parts & MAP_DEF_MAP_FLAGS)
2666 pr_debug("map '%s': found map_flags = 0x%x.\n", map->name, def->map_flags);
2667 if (def->parts & MAP_DEF_MAP_EXTRA)
2668 pr_debug("map '%s': found map_extra = 0x%llx.\n", map->name,
2669 (unsigned long long)def->map_extra);
2670 if (def->parts & MAP_DEF_PINNING)
2671 pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning);
2672 if (def->parts & MAP_DEF_NUMA_NODE)
2673 pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node);
2674
2675 if (def->parts & MAP_DEF_INNER_MAP)
2676 pr_debug("map '%s': found inner map definition.\n", map->name);
2677 }
2678
2679 static const char *btf_var_linkage_str(__u32 linkage)
2680 {
2681 switch (linkage) {
2682 case BTF_VAR_STATIC: return "static";
2683 case BTF_VAR_GLOBAL_ALLOCATED: return "global";
2684 case BTF_VAR_GLOBAL_EXTERN: return "extern";
2685 default: return "unknown";
2686 }
2687 }
2688
2689 static int bpf_object__init_user_btf_map(struct bpf_object *obj,
2690 const struct btf_type *sec,
2691 int var_idx, int sec_idx,
2692 const Elf_Data *data, bool strict,
2693 const char *pin_root_path)
2694 {
2695 struct btf_map_def map_def = {}, inner_def = {};
2696 const struct btf_type *var, *def;
2697 const struct btf_var_secinfo *vi;
2698 const struct btf_var *var_extra;
2699 const char *map_name;
2700 struct bpf_map *map;
2701 int err;
2702
2703 vi = btf_var_secinfos(sec) + var_idx;
2704 var = btf__type_by_id(obj->btf, vi->type);
2705 var_extra = btf_var(var);
2706 map_name = btf__name_by_offset(obj->btf, var->name_off);
2707
2708 if (map_name == NULL || map_name[0] == '\0') {
2709 pr_warn("map #%d: empty name.\n", var_idx);
2710 return -EINVAL;
2711 }
2712 if ((__u64)vi->offset + vi->size > data->d_size) {
2713 pr_warn("map '%s' BTF data is corrupted.\n", map_name);
2714 return -EINVAL;
2715 }
2716 if (!btf_is_var(var)) {
2717 pr_warn("map '%s': unexpected var kind %s.\n",
2718 map_name, btf_kind_str(var));
2719 return -EINVAL;
2720 }
2721 if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
2722 pr_warn("map '%s': unsupported map linkage %s.\n",
2723 map_name, btf_var_linkage_str(var_extra->linkage));
2724 return -EOPNOTSUPP;
2725 }
2726
2727 def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
2728 if (!btf_is_struct(def)) {
2729 pr_warn("map '%s': unexpected def kind %s.\n",
2730 map_name, btf_kind_str(var));
2731 return -EINVAL;
2732 }
2733 if (def->size > vi->size) {
2734 pr_warn("map '%s': invalid def size.\n", map_name);
2735 return -EINVAL;
2736 }
2737
2738 map = bpf_object__add_map(obj);
2739 if (IS_ERR(map))
2740 return PTR_ERR(map);
2741 map->name = strdup(map_name);
2742 if (!map->name) {
2743 pr_warn("map '%s': failed to alloc map name.\n", map_name);
2744 return -ENOMEM;
2745 }
2746 map->libbpf_type = LIBBPF_MAP_UNSPEC;
2747 map->def.type = BPF_MAP_TYPE_UNSPEC;
2748 map->sec_idx = sec_idx;
2749 map->sec_offset = vi->offset;
2750 map->btf_var_idx = var_idx;
2751 pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
2752 map_name, map->sec_idx, map->sec_offset);
2753
2754 err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def);
2755 if (err)
2756 return err;
2757
2758 fill_map_from_def(map, &map_def);
2759
2760 if (map_def.pinning == LIBBPF_PIN_BY_NAME) {
2761 err = build_map_pin_path(map, pin_root_path);
2762 if (err) {
2763 pr_warn("map '%s': couldn't build pin path.\n", map->name);
2764 return err;
2765 }
2766 }
2767
2768 if (map_def.parts & MAP_DEF_INNER_MAP) {
2769 map->inner_map = calloc(1, sizeof(*map->inner_map));
2770 if (!map->inner_map)
2771 return -ENOMEM;
2772 map->inner_map->fd = create_placeholder_fd();
2773 if (map->inner_map->fd < 0)
2774 return map->inner_map->fd;
2775 map->inner_map->sec_idx = sec_idx;
2776 map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1);
2777 if (!map->inner_map->name)
2778 return -ENOMEM;
2779 sprintf(map->inner_map->name, "%s.inner", map_name);
2780
2781 fill_map_from_def(map->inner_map, &inner_def);
2782 }
2783
2784 err = map_fill_btf_type_info(obj, map);
2785 if (err)
2786 return err;
2787
2788 return 0;
2789 }
2790
2791 static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
2792 const char *pin_root_path)
2793 {
2794 const struct btf_type *sec = NULL;
2795 int nr_types, i, vlen, err;
2796 const struct btf_type *t;
2797 const char *name;
2798 Elf_Data *data;
2799 Elf_Scn *scn;
2800
2801 if (obj->efile.btf_maps_shndx < 0)
2802 return 0;
2803
2804 scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
2805 data = elf_sec_data(obj, scn);
2806 if (!scn || !data) {
2807 pr_warn("elf: failed to get %s map definitions for %s\n",
2808 MAPS_ELF_SEC, obj->path);
2809 return -EINVAL;
2810 }
2811
2812 nr_types = btf__type_cnt(obj->btf);
2813 for (i = 1; i < nr_types; i++) {
2814 t = btf__type_by_id(obj->btf, i);
2815 if (!btf_is_datasec(t))
2816 continue;
2817 name = btf__name_by_offset(obj->btf, t->name_off);
2818 if (strcmp(name, MAPS_ELF_SEC) == 0) {
2819 sec = t;
2820 obj->efile.btf_maps_sec_btf_id = i;
2821 break;
2822 }
2823 }
2824
2825 if (!sec) {
2826 pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
2827 return -ENOENT;
2828 }
2829
2830 vlen = btf_vlen(sec);
2831 for (i = 0; i < vlen; i++) {
2832 err = bpf_object__init_user_btf_map(obj, sec, i,
2833 obj->efile.btf_maps_shndx,
2834 data, strict,
2835 pin_root_path);
2836 if (err)
2837 return err;
2838 }
2839
2840 return 0;
2841 }
2842
2843 static int bpf_object__init_maps(struct bpf_object *obj,
2844 const struct bpf_object_open_opts *opts)
2845 {
2846 const char *pin_root_path;
2847 bool strict;
2848 int err = 0;
2849
2850 strict = !OPTS_GET(opts, relaxed_maps, false);
2851 pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
2852
2853 err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
2854 err = err ?: bpf_object__init_global_data_maps(obj);
2855 err = err ?: bpf_object__init_kconfig_map(obj);
2856 err = err ?: bpf_object_init_struct_ops(obj);
2857
2858 return err;
2859 }
2860
2861 static bool section_have_execinstr(struct bpf_object *obj, int idx)
2862 {
2863 Elf64_Shdr *sh;
2864
2865 sh = elf_sec_hdr(obj, elf_sec_by_idx(obj, idx));
2866 if (!sh)
2867 return false;
2868
2869 return sh->sh_flags & SHF_EXECINSTR;
2870 }
2871
2872 static bool starts_with_qmark(const char *s)
2873 {
2874 return s && s[0] == '?';
2875 }
2876
2877 static bool btf_needs_sanitization(struct bpf_object *obj)
2878 {
2879 bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
2880 bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
2881 bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
2882 bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
2883 bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
2884 bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
2885 bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
2886 bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC);
2887
2888 return !has_func || !has_datasec || !has_func_global || !has_float ||
2889 !has_decl_tag || !has_type_tag || !has_enum64 || !has_qmark_datasec;
2890 }
2891
2892 static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
2893 {
2894 bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
2895 bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
2896 bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
2897 bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
2898 bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
2899 bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
2900 bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
2901 bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC);
2902 int enum64_placeholder_id = 0;
2903 struct btf_type *t;
2904 int i, j, vlen;
2905
2906 for (i = 1; i < btf__type_cnt(btf); i++) {
2907 t = (struct btf_type *)btf__type_by_id(btf, i);
2908
2909 if ((!has_datasec && btf_is_var(t)) || (!has_decl_tag && btf_is_decl_tag(t))) {
2910 /* replace VAR/DECL_TAG with INT */
2911 t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
2912 /*
2913 * using size = 1 is the safest choice, 4 will be too
2914 * big and cause kernel BTF validation failure if
2915 * original variable took less than 4 bytes
2916 */
2917 t->size = 1;
2918 *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
2919 } else if (!has_datasec && btf_is_datasec(t)) {
2920 /* replace DATASEC with STRUCT */
2921 const struct btf_var_secinfo *v = btf_var_secinfos(t);
2922 struct btf_member *m = btf_members(t);
2923 struct btf_type *vt;
2924 char *name;
2925
2926 name = (char *)btf__name_by_offset(btf, t->name_off);
2927 while (*name) {
2928 if (*name == '.' || *name == '?')
2929 *name = '_';
2930 name++;
2931 }
2932
2933 vlen = btf_vlen(t);
2934 t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
2935 for (j = 0; j < vlen; j++, v++, m++) {
2936 /* order of field assignments is important */
2937 m->offset = v->offset * 8;
2938 m->type = v->type;
2939 /* preserve variable name as member name */
2940 vt = (void *)btf__type_by_id(btf, v->type);
2941 m->name_off = vt->name_off;
2942 }
2943 } else if (!has_qmark_datasec && btf_is_datasec(t) &&
2944 starts_with_qmark(btf__name_by_offset(btf, t->name_off))) {
2945 /* replace '?' prefix with '_' for DATASEC names */
2946 char *name;
2947
2948 name = (char *)btf__name_by_offset(btf, t->name_off);
2949 if (name[0] == '?')
2950 name[0] = '_';
2951 } else if (!has_func && btf_is_func_proto(t)) {
2952 /* replace FUNC_PROTO with ENUM */
2953 vlen = btf_vlen(t);
2954 t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
2955 t->size = sizeof(__u32); /* kernel enforced */
2956 } else if (!has_func && btf_is_func(t)) {
2957 /* replace FUNC with TYPEDEF */
2958 t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
2959 } else if (!has_func_global && btf_is_func(t)) {
2960 /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
2961 t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
2962 } else if (!has_float && btf_is_float(t)) {
2963 /* replace FLOAT with an equally-sized empty STRUCT;
2964 * since C compilers do not accept e.g. "float" as a
2965 * valid struct name, make it anonymous
2966 */
2967 t->name_off = 0;
2968 t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0);
2969 } else if (!has_type_tag && btf_is_type_tag(t)) {
2970 /* replace TYPE_TAG with a CONST */
2971 t->name_off = 0;
2972 t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0);
2973 } else if (!has_enum64 && btf_is_enum(t)) {
2974 /* clear the kflag */
2975 t->info = btf_type_info(btf_kind(t), btf_vlen(t), false);
2976 } else if (!has_enum64 && btf_is_enum64(t)) {
2977 /* replace ENUM64 with a union */
2978 struct btf_member *m;
2979
2980 if (enum64_placeholder_id == 0) {
2981 enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0);
2982 if (enum64_placeholder_id < 0)
2983 return enum64_placeholder_id;
2984
2985 t = (struct btf_type *)btf__type_by_id(btf, i);
2986 }
2987
2988 m = btf_members(t);
2989 vlen = btf_vlen(t);
2990 t->info = BTF_INFO_ENC(BTF_KIND_UNION, 0, vlen);
2991 for (j = 0; j < vlen; j++, m++) {
2992 m->type = enum64_placeholder_id;
2993 m->offset = 0;
2994 }
2995 }
2996 }
2997
2998 return 0;
2999 }
3000
3001 static bool libbpf_needs_btf(const struct bpf_object *obj)
3002 {
3003 return obj->efile.btf_maps_shndx >= 0 ||
3004 obj->efile.has_st_ops ||
3005 obj->nr_extern > 0;
3006 }
3007
3008 static bool kernel_needs_btf(const struct bpf_object *obj)
3009 {
3010 return obj->efile.has_st_ops;
3011 }
3012
3013 static int bpf_object__init_btf(struct bpf_object *obj,
3014 Elf_Data *btf_data,
3015 Elf_Data *btf_ext_data)
3016 {
3017 int err = -ENOENT;
3018
3019 if (btf_data) {
3020 obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
3021 err = libbpf_get_error(obj->btf);
3022 if (err) {
3023 obj->btf = NULL;
3024 pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err);
3025 goto out;
3026 }
3027 /* enforce 8-byte pointers for BPF-targeted BTFs */
3028 btf__set_pointer_size(obj->btf, 8);
3029 }
3030 if (btf_ext_data) {
3031 struct btf_ext_info *ext_segs[3];
3032 int seg_num, sec_num;
3033
3034 if (!obj->btf) {
3035 pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
3036 BTF_EXT_ELF_SEC, BTF_ELF_SEC);
3037 goto out;
3038 }
3039 obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
3040 err = libbpf_get_error(obj->btf_ext);
3041 if (err) {
3042 pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n",
3043 BTF_EXT_ELF_SEC, err);
3044 obj->btf_ext = NULL;
3045 goto out;
3046 }
3047
3048 /* setup .BTF.ext to ELF section mapping */
3049 ext_segs[0] = &obj->btf_ext->func_info;
3050 ext_segs[1] = &obj->btf_ext->line_info;
3051 ext_segs[2] = &obj->btf_ext->core_relo_info;
3052 for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) {
3053 struct btf_ext_info *seg = ext_segs[seg_num];
3054 const struct btf_ext_info_sec *sec;
3055 const char *sec_name;
3056 Elf_Scn *scn;
3057
3058 if (seg->sec_cnt == 0)
3059 continue;
3060
3061 seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs));
3062 if (!seg->sec_idxs) {
3063 err = -ENOMEM;
3064 goto out;
3065 }
3066
3067 sec_num = 0;
3068 for_each_btf_ext_sec(seg, sec) {
3069 /* preventively increment index to avoid doing
3070 * this before every continue below
3071 */
3072 sec_num++;
3073
3074 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
3075 if (str_is_empty(sec_name))
3076 continue;
3077 scn = elf_sec_by_name(obj, sec_name);
3078 if (!scn)
3079 continue;
3080
3081 seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn);
3082 }
3083 }
3084 }
3085 out:
3086 if (err && libbpf_needs_btf(obj)) {
3087 pr_warn("BTF is required, but is missing or corrupted.\n");
3088 return err;
3089 }
3090 return 0;
3091 }
3092
3093 static int compare_vsi_off(const void *_a, const void *_b)
3094 {
3095 const struct btf_var_secinfo *a = _a;
3096 const struct btf_var_secinfo *b = _b;
3097
3098 return a->offset - b->offset;
3099 }
3100
3101 static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
3102 struct btf_type *t)
3103 {
3104 __u32 size = 0, i, vars = btf_vlen(t);
3105 const char *sec_name = btf__name_by_offset(btf, t->name_off);
3106 struct btf_var_secinfo *vsi;
3107 bool fixup_offsets = false;
3108 int err;
3109
3110 if (!sec_name) {
3111 pr_debug("No name found in string section for DATASEC kind.\n");
3112 return -ENOENT;
3113 }
3114
3115 /* Extern-backing datasecs (.ksyms, .kconfig) have their size and
3116 * variable offsets set at the previous step. Further, not every
3117 * extern BTF VAR has corresponding ELF symbol preserved, so we skip
3118 * all fixups altogether for such sections and go straight to sorting
3119 * VARs within their DATASEC.
3120 */
3121 if (strcmp(sec_name, KCONFIG_SEC) == 0 || strcmp(sec_name, KSYMS_SEC) == 0)
3122 goto sort_vars;
3123
3124 /* Clang leaves DATASEC size and VAR offsets as zeroes, so we need to
3125 * fix this up. But BPF static linker already fixes this up and fills
3126 * all the sizes and offsets during static linking. So this step has
3127 * to be optional. But the STV_HIDDEN handling is non-optional for any
3128 * non-extern DATASEC, so the variable fixup loop below handles both
3129 * functions at the same time, paying the cost of BTF VAR <-> ELF
3130 * symbol matching just once.
3131 */
3132 if (t->size == 0) {
3133 err = find_elf_sec_sz(obj, sec_name, &size);
3134 if (err || !size) {
3135 pr_debug("sec '%s': failed to determine size from ELF: size %u, err %d\n",
3136 sec_name, size, err);
3137 return -ENOENT;
3138 }
3139
3140 t->size = size;
3141 fixup_offsets = true;
3142 }
3143
3144 for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
3145 const struct btf_type *t_var;
3146 struct btf_var *var;
3147 const char *var_name;
3148 Elf64_Sym *sym;
3149
3150 t_var = btf__type_by_id(btf, vsi->type);
3151 if (!t_var || !btf_is_var(t_var)) {
3152 pr_debug("sec '%s': unexpected non-VAR type found\n", sec_name);
3153 return -EINVAL;
3154 }
3155
3156 var = btf_var(t_var);
3157 if (var->linkage == BTF_VAR_STATIC || var->linkage == BTF_VAR_GLOBAL_EXTERN)
3158 continue;
3159
3160 var_name = btf__name_by_offset(btf, t_var->name_off);
3161 if (!var_name) {
3162 pr_debug("sec '%s': failed to find name of DATASEC's member #%d\n",
3163 sec_name, i);
3164 return -ENOENT;
3165 }
3166
3167 sym = find_elf_var_sym(obj, var_name);
3168 if (IS_ERR(sym)) {
3169 pr_debug("sec '%s': failed to find ELF symbol for VAR '%s'\n",
3170 sec_name, var_name);
3171 return -ENOENT;
3172 }
3173
3174 if (fixup_offsets)
3175 vsi->offset = sym->st_value;
3176
3177 /* if variable is a global/weak symbol, but has restricted
3178 * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF VAR
3179 * as static. This follows similar logic for functions (BPF
3180 * subprogs) and influences libbpf's further decisions about
3181 * whether to make global data BPF array maps as
3182 * BPF_F_MMAPABLE.
3183 */
3184 if (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
3185 || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)
3186 var->linkage = BTF_VAR_STATIC;
3187 }
3188
3189 sort_vars:
3190 qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off);
3191 return 0;
3192 }
3193
3194 static int bpf_object_fixup_btf(struct bpf_object *obj)
3195 {
3196 int i, n, err = 0;
3197
3198 if (!obj->btf)
3199 return 0;
3200
3201 n = btf__type_cnt(obj->btf);
3202 for (i = 1; i < n; i++) {
3203 struct btf_type *t = btf_type_by_id(obj->btf, i);
3204
3205 /* Loader needs to fix up some of the things compiler
3206 * couldn't get its hands on while emitting BTF. This
3207 * is section size and global variable offset. We use
3208 * the info from the ELF itself for this purpose.
3209 */
3210 if (btf_is_datasec(t)) {
3211 err = btf_fixup_datasec(obj, obj->btf, t);
3212 if (err)
3213 return err;
3214 }
3215 }
3216
3217 return 0;
3218 }
3219
3220 static bool prog_needs_vmlinux_btf(struct bpf_program *prog)
3221 {
3222 if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
3223 prog->type == BPF_PROG_TYPE_LSM)
3224 return true;
3225
3226 /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
3227 * also need vmlinux BTF
3228 */
3229 if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd)
3230 return true;
3231
3232 return false;
3233 }
3234
3235 static bool map_needs_vmlinux_btf(struct bpf_map *map)
3236 {
3237 return bpf_map__is_struct_ops(map);
3238 }
3239
3240 static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
3241 {
3242 struct bpf_program *prog;
3243 struct bpf_map *map;
3244 int i;
3245
3246 /* CO-RE relocations need kernel BTF, only when btf_custom_path
3247 * is not specified
3248 */
3249 if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path)
3250 return true;
3251
3252 /* Support for typed ksyms needs kernel BTF */
3253 for (i = 0; i < obj->nr_extern; i++) {
3254 const struct extern_desc *ext;
3255
3256 ext = &obj->externs[i];
3257 if (ext->type == EXT_KSYM && ext->ksym.type_id)
3258 return true;
3259 }
3260
3261 bpf_object__for_each_program(prog, obj) {
3262 if (!prog->autoload)
3263 continue;
3264 if (prog_needs_vmlinux_btf(prog))
3265 return true;
3266 }
3267
3268 bpf_object__for_each_map(map, obj) {
3269 if (map_needs_vmlinux_btf(map))
3270 return true;
3271 }
3272
3273 return false;
3274 }
3275
3276 static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
3277 {
3278 int err;
3279
3280 /* btf_vmlinux could be loaded earlier */
3281 if (obj->btf_vmlinux || obj->gen_loader)
3282 return 0;
3283
3284 if (!force && !obj_needs_vmlinux_btf(obj))
3285 return 0;
3286
3287 obj->btf_vmlinux = btf__load_vmlinux_btf();
3288 err = libbpf_get_error(obj->btf_vmlinux);
3289 if (err) {
3290 pr_warn("Error loading vmlinux BTF: %d\n", err);
3291 obj->btf_vmlinux = NULL;
3292 return err;
3293 }
3294 return 0;
3295 }
3296
3297 static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
3298 {
3299 struct btf *kern_btf = obj->btf;
3300 bool btf_mandatory, sanitize;
3301 int i, err = 0;
3302
3303 if (!obj->btf)
3304 return 0;
3305
3306 if (!kernel_supports(obj, FEAT_BTF)) {
3307 if (kernel_needs_btf(obj)) {
3308 err = -EOPNOTSUPP;
3309 goto report;
3310 }
3311 pr_debug("Kernel doesn't support BTF, skipping uploading it.\n");
3312 return 0;
3313 }
3314
3315 /* Even though some subprogs are global/weak, user might prefer more
3316 * permissive BPF verification process that BPF verifier performs for
3317 * static functions, taking into account more context from the caller
3318 * functions. In such case, they need to mark such subprogs with
3319 * __attribute__((visibility("hidden"))) and libbpf will adjust
3320 * corresponding FUNC BTF type to be marked as static and trigger more
3321 * involved BPF verification process.
3322 */
3323 for (i = 0; i < obj->nr_programs; i++) {
3324 struct bpf_program *prog = &obj->programs[i];
3325 struct btf_type *t;
3326 const char *name;
3327 int j, n;
3328
3329 if (!prog->mark_btf_static || !prog_is_subprog(obj, prog))
3330 continue;
3331
3332 n = btf__type_cnt(obj->btf);
3333 for (j = 1; j < n; j++) {
3334 t = btf_type_by_id(obj->btf, j);
3335 if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL)
3336 continue;
3337
3338 name = btf__str_by_offset(obj->btf, t->name_off);
3339 if (strcmp(name, prog->name) != 0)
3340 continue;
3341
3342 t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0);
3343 break;
3344 }
3345 }
3346
3347 sanitize = btf_needs_sanitization(obj);
3348 if (sanitize) {
3349 const void *raw_data;
3350 __u32 sz;
3351
3352 /* clone BTF to sanitize a copy and leave the original intact */
3353 raw_data = btf__raw_data(obj->btf, &sz);
3354 kern_btf = btf__new(raw_data, sz);
3355 err = libbpf_get_error(kern_btf);
3356 if (err)
3357 return err;
3358
3359 /* enforce 8-byte pointers for BPF-targeted BTFs */
3360 btf__set_pointer_size(obj->btf, 8);
3361 err = bpf_object__sanitize_btf(obj, kern_btf);
3362 if (err)
3363 return err;
3364 }
3365
3366 if (obj->gen_loader) {
3367 __u32 raw_size = 0;
3368 const void *raw_data = btf__raw_data(kern_btf, &raw_size);
3369
3370 if (!raw_data)
3371 return -ENOMEM;
3372 bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size);
3373 /* Pretend to have valid FD to pass various fd >= 0 checks.
3374 * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
3375 */
3376 btf__set_fd(kern_btf, 0);
3377 } else {
3378 /* currently BPF_BTF_LOAD only supports log_level 1 */
3379 err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size,
3380 obj->log_level ? 1 : 0, obj->token_fd);
3381 }
3382 if (sanitize) {
3383 if (!err) {
3384 /* move fd to libbpf's BTF */
3385 btf__set_fd(obj->btf, btf__fd(kern_btf));
3386 btf__set_fd(kern_btf, -1);
3387 }
3388 btf__free(kern_btf);
3389 }
3390 report:
3391 if (err) {
3392 btf_mandatory = kernel_needs_btf(obj);
3393 pr_warn("Error loading .BTF into kernel: %d. %s\n", err,
3394 btf_mandatory ? "BTF is mandatory, can't proceed."
3395 : "BTF is optional, ignoring.");
3396 if (!btf_mandatory)
3397 err = 0;
3398 }
3399 return err;
3400 }
3401
3402 static const char *elf_sym_str(const struct bpf_object *obj, size_t off)
3403 {
3404 const char *name;
3405
3406 name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off);
3407 if (!name) {
3408 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
3409 off, obj->path, elf_errmsg(-1));
3410 return NULL;
3411 }
3412
3413 return name;
3414 }
3415
3416 static const char *elf_sec_str(const struct bpf_object *obj, size_t off)
3417 {
3418 const char *name;
3419
3420 name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off);
3421 if (!name) {
3422 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
3423 off, obj->path, elf_errmsg(-1));
3424 return NULL;
3425 }
3426
3427 return name;
3428 }
3429
3430 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx)
3431 {
3432 Elf_Scn *scn;
3433
3434 scn = elf_getscn(obj->efile.elf, idx);
3435 if (!scn) {
3436 pr_warn("elf: failed to get section(%zu) from %s: %s\n",
3437 idx, obj->path, elf_errmsg(-1));
3438 return NULL;
3439 }
3440 return scn;
3441 }
3442
3443 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name)
3444 {
3445 Elf_Scn *scn = NULL;
3446 Elf *elf = obj->efile.elf;
3447 const char *sec_name;
3448
3449 while ((scn = elf_nextscn(elf, scn)) != NULL) {
3450 sec_name = elf_sec_name(obj, scn);
3451 if (!sec_name)
3452 return NULL;
3453
3454 if (strcmp(sec_name, name) != 0)
3455 continue;
3456
3457 return scn;
3458 }
3459 return NULL;
3460 }
3461
3462 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn)
3463 {
3464 Elf64_Shdr *shdr;
3465
3466 if (!scn)
3467 return NULL;
3468
3469 shdr = elf64_getshdr(scn);
3470 if (!shdr) {
3471 pr_warn("elf: failed to get section(%zu) header from %s: %s\n",
3472 elf_ndxscn(scn), obj->path, elf_errmsg(-1));
3473 return NULL;
3474 }
3475
3476 return shdr;
3477 }
3478
3479 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn)
3480 {
3481 const char *name;
3482 Elf64_Shdr *sh;
3483
3484 if (!scn)
3485 return NULL;
3486
3487 sh = elf_sec_hdr(obj, scn);
3488 if (!sh)
3489 return NULL;
3490
3491 name = elf_sec_str(obj, sh->sh_name);
3492 if (!name) {
3493 pr_warn("elf: failed to get section(%zu) name from %s: %s\n",
3494 elf_ndxscn(scn), obj->path, elf_errmsg(-1));
3495 return NULL;
3496 }
3497
3498 return name;
3499 }
3500
3501 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
3502 {
3503 Elf_Data *data;
3504
3505 if (!scn)
3506 return NULL;
3507
3508 data = elf_getdata(scn, 0);
3509 if (!data) {
3510 pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n",
3511 elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>",
3512 obj->path, elf_errmsg(-1));
3513 return NULL;
3514 }
3515
3516 return data;
3517 }
3518
3519 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx)
3520 {
3521 if (idx >= obj->efile.symbols->d_size / sizeof(Elf64_Sym))
3522 return NULL;
3523
3524 return (Elf64_Sym *)obj->efile.symbols->d_buf + idx;
3525 }
3526
3527 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx)
3528 {
3529 if (idx >= data->d_size / sizeof(Elf64_Rel))
3530 return NULL;
3531
3532 return (Elf64_Rel *)data->d_buf + idx;
3533 }
3534
3535 static bool is_sec_name_dwarf(const char *name)
3536 {
3537 /* approximation, but the actual list is too long */
3538 return str_has_pfx(name, ".debug_");
3539 }
3540
3541 static bool ignore_elf_section(Elf64_Shdr *hdr, const char *name)
3542 {
3543 /* no special handling of .strtab */
3544 if (hdr->sh_type == SHT_STRTAB)
3545 return true;
3546
3547 /* ignore .llvm_addrsig section as well */
3548 if (hdr->sh_type == SHT_LLVM_ADDRSIG)
3549 return true;
3550
3551 /* no subprograms will lead to an empty .text section, ignore it */
3552 if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 &&
3553 strcmp(name, ".text") == 0)
3554 return true;
3555
3556 /* DWARF sections */
3557 if (is_sec_name_dwarf(name))
3558 return true;
3559
3560 if (str_has_pfx(name, ".rel")) {
3561 name += sizeof(".rel") - 1;
3562 /* DWARF section relocations */
3563 if (is_sec_name_dwarf(name))
3564 return true;
3565
3566 /* .BTF and .BTF.ext don't need relocations */
3567 if (strcmp(name, BTF_ELF_SEC) == 0 ||
3568 strcmp(name, BTF_EXT_ELF_SEC) == 0)
3569 return true;
3570 }
3571
3572 return false;
3573 }
3574
3575 static int cmp_progs(const void *_a, const void *_b)
3576 {
3577 const struct bpf_program *a = _a;
3578 const struct bpf_program *b = _b;
3579
3580 if (a->sec_idx != b->sec_idx)
3581 return a->sec_idx < b->sec_idx ? -1 : 1;
3582
3583 /* sec_insn_off can't be the same within the section */
3584 return a->sec_insn_off < b->sec_insn_off ? -1 : 1;
3585 }
3586
3587 static int bpf_object__elf_collect(struct bpf_object *obj)
3588 {
3589 struct elf_sec_desc *sec_desc;
3590 Elf *elf = obj->efile.elf;
3591 Elf_Data *btf_ext_data = NULL;
3592 Elf_Data *btf_data = NULL;
3593 int idx = 0, err = 0;
3594 const char *name;
3595 Elf_Data *data;
3596 Elf_Scn *scn;
3597 Elf64_Shdr *sh;
3598
3599 /* ELF section indices are 0-based, but sec #0 is special "invalid"
3600 * section. Since section count retrieved by elf_getshdrnum() does
3601 * include sec #0, it is already the necessary size of an array to keep
3602 * all the sections.
3603 */
3604 if (elf_getshdrnum(obj->efile.elf, &obj->efile.sec_cnt)) {
3605 pr_warn("elf: failed to get the number of sections for %s: %s\n",
3606 obj->path, elf_errmsg(-1));
3607 return -LIBBPF_ERRNO__FORMAT;
3608 }
3609 obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs));
3610 if (!obj->efile.secs)
3611 return -ENOMEM;
3612
3613 /* a bunch of ELF parsing functionality depends on processing symbols,
3614 * so do the first pass and find the symbol table
3615 */
3616 scn = NULL;
3617 while ((scn = elf_nextscn(elf, scn)) != NULL) {
3618 sh = elf_sec_hdr(obj, scn);
3619 if (!sh)
3620 return -LIBBPF_ERRNO__FORMAT;
3621
3622 if (sh->sh_type == SHT_SYMTAB) {
3623 if (obj->efile.symbols) {
3624 pr_warn("elf: multiple symbol tables in %s\n", obj->path);
3625 return -LIBBPF_ERRNO__FORMAT;
3626 }
3627
3628 data = elf_sec_data(obj, scn);
3629 if (!data)
3630 return -LIBBPF_ERRNO__FORMAT;
3631
3632 idx = elf_ndxscn(scn);
3633
3634 obj->efile.symbols = data;
3635 obj->efile.symbols_shndx = idx;
3636 obj->efile.strtabidx = sh->sh_link;
3637 }
3638 }
3639
3640 if (!obj->efile.symbols) {
3641 pr_warn("elf: couldn't find symbol table in %s, stripped object file?\n",
3642 obj->path);
3643 return -ENOENT;
3644 }
3645
3646 scn = NULL;
3647 while ((scn = elf_nextscn(elf, scn)) != NULL) {
3648 idx = elf_ndxscn(scn);
3649 sec_desc = &obj->efile.secs[idx];
3650
3651 sh = elf_sec_hdr(obj, scn);
3652 if (!sh)
3653 return -LIBBPF_ERRNO__FORMAT;
3654
3655 name = elf_sec_str(obj, sh->sh_name);
3656 if (!name)
3657 return -LIBBPF_ERRNO__FORMAT;
3658
3659 if (ignore_elf_section(sh, name))
3660 continue;
3661
3662 data = elf_sec_data(obj, scn);
3663 if (!data)
3664 return -LIBBPF_ERRNO__FORMAT;
3665
3666 pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
3667 idx, name, (unsigned long)data->d_size,
3668 (int)sh->sh_link, (unsigned long)sh->sh_flags,
3669 (int)sh->sh_type);
3670
3671 if (strcmp(name, "license") == 0) {
3672 err = bpf_object__init_license(obj, data->d_buf, data->d_size);
3673 if (err)
3674 return err;
3675 } else if (strcmp(name, "version") == 0) {
3676 err = bpf_object__init_kversion(obj, data->d_buf, data->d_size);
3677 if (err)
3678 return err;
3679 } else if (strcmp(name, "maps") == 0) {
3680 pr_warn("elf: legacy map definitions in 'maps' section are not supported by libbpf v1.0+\n");
3681 return -ENOTSUP;
3682 } else if (strcmp(name, MAPS_ELF_SEC) == 0) {
3683 obj->efile.btf_maps_shndx = idx;
3684 } else if (strcmp(name, BTF_ELF_SEC) == 0) {
3685 if (sh->sh_type != SHT_PROGBITS)
3686 return -LIBBPF_ERRNO__FORMAT;
3687 btf_data = data;
3688 } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
3689 if (sh->sh_type != SHT_PROGBITS)
3690 return -LIBBPF_ERRNO__FORMAT;
3691 btf_ext_data = data;
3692 } else if (sh->sh_type == SHT_SYMTAB) {
3693 /* already processed during the first pass above */
3694 } else if (sh->sh_type == SHT_PROGBITS && data->d_size > 0) {
3695 if (sh->sh_flags & SHF_EXECINSTR) {
3696 if (strcmp(name, ".text") == 0)
3697 obj->efile.text_shndx = idx;
3698 err = bpf_object__add_programs(obj, data, name, idx);
3699 if (err)
3700 return err;
3701 } else if (strcmp(name, DATA_SEC) == 0 ||
3702 str_has_pfx(name, DATA_SEC ".")) {
3703 sec_desc->sec_type = SEC_DATA;
3704 sec_desc->shdr = sh;
3705 sec_desc->data = data;
3706 } else if (strcmp(name, RODATA_SEC) == 0 ||
3707 str_has_pfx(name, RODATA_SEC ".")) {
3708 sec_desc->sec_type = SEC_RODATA;
3709 sec_desc->shdr = sh;
3710 sec_desc->data = data;
3711 } else if (strcmp(name, STRUCT_OPS_SEC) == 0 ||
3712 strcmp(name, STRUCT_OPS_LINK_SEC) == 0 ||
3713 strcmp(name, "?" STRUCT_OPS_SEC) == 0 ||
3714 strcmp(name, "?" STRUCT_OPS_LINK_SEC) == 0) {
3715 sec_desc->sec_type = SEC_ST_OPS;
3716 sec_desc->shdr = sh;
3717 sec_desc->data = data;
3718 obj->efile.has_st_ops = true;
3719 } else {
3720 pr_info("elf: skipping unrecognized data section(%d) %s\n",
3721 idx, name);
3722 }
3723 } else if (sh->sh_type == SHT_REL) {
3724 int targ_sec_idx = sh->sh_info; /* points to other section */
3725
3726 if (sh->sh_entsize != sizeof(Elf64_Rel) ||
3727 targ_sec_idx >= obj->efile.sec_cnt)
3728 return -LIBBPF_ERRNO__FORMAT;
3729
3730 /* Only do relo for section with exec instructions */
3731 if (!section_have_execinstr(obj, targ_sec_idx) &&
3732 strcmp(name, ".rel" STRUCT_OPS_SEC) &&
3733 strcmp(name, ".rel" STRUCT_OPS_LINK_SEC) &&
3734 strcmp(name, ".rel?" STRUCT_OPS_SEC) &&
3735 strcmp(name, ".rel?" STRUCT_OPS_LINK_SEC) &&
3736 strcmp(name, ".rel" MAPS_ELF_SEC)) {
3737 pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
3738 idx, name, targ_sec_idx,
3739 elf_sec_name(obj, elf_sec_by_idx(obj, targ_sec_idx)) ?: "<?>");
3740 continue;
3741 }
3742
3743 sec_desc->sec_type = SEC_RELO;
3744 sec_desc->shdr = sh;
3745 sec_desc->data = data;
3746 } else if (sh->sh_type == SHT_NOBITS && (strcmp(name, BSS_SEC) == 0 ||
3747 str_has_pfx(name, BSS_SEC "."))) {
3748 sec_desc->sec_type = SEC_BSS;
3749 sec_desc->shdr = sh;
3750 sec_desc->data = data;
3751 } else {
3752 pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name,
3753 (size_t)sh->sh_size);
3754 }
3755 }
3756
3757 if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
3758 pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path);
3759 return -LIBBPF_ERRNO__FORMAT;
3760 }
3761
3762 /* sort BPF programs by section name and in-section instruction offset
3763 * for faster search
3764 */
3765 if (obj->nr_programs)
3766 qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
3767
3768 return bpf_object__init_btf(obj, btf_data, btf_ext_data);
3769 }
3770
3771 static bool sym_is_extern(const Elf64_Sym *sym)
3772 {
3773 int bind = ELF64_ST_BIND(sym->st_info);
3774 /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
3775 return sym->st_shndx == SHN_UNDEF &&
3776 (bind == STB_GLOBAL || bind == STB_WEAK) &&
3777 ELF64_ST_TYPE(sym->st_info) == STT_NOTYPE;
3778 }
3779
3780 static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx)
3781 {
3782 int bind = ELF64_ST_BIND(sym->st_info);
3783 int type = ELF64_ST_TYPE(sym->st_info);
3784
3785 /* in .text section */
3786 if (sym->st_shndx != text_shndx)
3787 return false;
3788
3789 /* local function */
3790 if (bind == STB_LOCAL && type == STT_SECTION)
3791 return true;
3792
3793 /* global function */
3794 return bind == STB_GLOBAL && type == STT_FUNC;
3795 }
3796
3797 static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
3798 {
3799 const struct btf_type *t;
3800 const char *tname;
3801 int i, n;
3802
3803 if (!btf)
3804 return -ESRCH;
3805
3806 n = btf__type_cnt(btf);
3807 for (i = 1; i < n; i++) {
3808 t = btf__type_by_id(btf, i);
3809
3810 if (!btf_is_var(t) && !btf_is_func(t))
3811 continue;
3812
3813 tname = btf__name_by_offset(btf, t->name_off);
3814 if (strcmp(tname, ext_name))
3815 continue;
3816
3817 if (btf_is_var(t) &&
3818 btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
3819 return -EINVAL;
3820
3821 if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN)
3822 return -EINVAL;
3823
3824 return i;
3825 }
3826
3827 return -ENOENT;
3828 }
3829
3830 static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
3831 const struct btf_var_secinfo *vs;
3832 const struct btf_type *t;
3833 int i, j, n;
3834
3835 if (!btf)
3836 return -ESRCH;
3837
3838 n = btf__type_cnt(btf);
3839 for (i = 1; i < n; i++) {
3840 t = btf__type_by_id(btf, i);
3841
3842 if (!btf_is_datasec(t))
3843 continue;
3844
3845 vs = btf_var_secinfos(t);
3846 for (j = 0; j < btf_vlen(t); j++, vs++) {
3847 if (vs->type == ext_btf_id)
3848 return i;
3849 }
3850 }
3851
3852 return -ENOENT;
3853 }
3854
3855 static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
3856 bool *is_signed)
3857 {
3858 const struct btf_type *t;
3859 const char *name;
3860
3861 t = skip_mods_and_typedefs(btf, id, NULL);
3862 name = btf__name_by_offset(btf, t->name_off);
3863
3864 if (is_signed)
3865 *is_signed = false;
3866 switch (btf_kind(t)) {
3867 case BTF_KIND_INT: {
3868 int enc = btf_int_encoding(t);
3869
3870 if (enc & BTF_INT_BOOL)
3871 return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
3872 if (is_signed)
3873 *is_signed = enc & BTF_INT_SIGNED;
3874 if (t->size == 1)
3875 return KCFG_CHAR;
3876 if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
3877 return KCFG_UNKNOWN;
3878 return KCFG_INT;
3879 }
3880 case BTF_KIND_ENUM:
3881 if (t->size != 4)
3882 return KCFG_UNKNOWN;
3883 if (strcmp(name, "libbpf_tristate"))
3884 return KCFG_UNKNOWN;
3885 return KCFG_TRISTATE;
3886 case BTF_KIND_ENUM64:
3887 if (strcmp(name, "libbpf_tristate"))
3888 return KCFG_UNKNOWN;
3889 return KCFG_TRISTATE;
3890 case BTF_KIND_ARRAY:
3891 if (btf_array(t)->nelems == 0)
3892 return KCFG_UNKNOWN;
3893 if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
3894 return KCFG_UNKNOWN;
3895 return KCFG_CHAR_ARR;
3896 default:
3897 return KCFG_UNKNOWN;
3898 }
3899 }
3900
3901 static int cmp_externs(const void *_a, const void *_b)
3902 {
3903 const struct extern_desc *a = _a;
3904 const struct extern_desc *b = _b;
3905
3906 if (a->type != b->type)
3907 return a->type < b->type ? -1 : 1;
3908
3909 if (a->type == EXT_KCFG) {
3910 /* descending order by alignment requirements */
3911 if (a->kcfg.align != b->kcfg.align)
3912 return a->kcfg.align > b->kcfg.align ? -1 : 1;
3913 /* ascending order by size, within same alignment class */
3914 if (a->kcfg.sz != b->kcfg.sz)
3915 return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
3916 }
3917
3918 /* resolve ties by name */
3919 return strcmp(a->name, b->name);
3920 }
3921
3922 static int find_int_btf_id(const struct btf *btf)
3923 {
3924 const struct btf_type *t;
3925 int i, n;
3926
3927 n = btf__type_cnt(btf);
3928 for (i = 1; i < n; i++) {
3929 t = btf__type_by_id(btf, i);
3930
3931 if (btf_is_int(t) && btf_int_bits(t) == 32)
3932 return i;
3933 }
3934
3935 return 0;
3936 }
3937
3938 static int add_dummy_ksym_var(struct btf *btf)
3939 {
3940 int i, int_btf_id, sec_btf_id, dummy_var_btf_id;
3941 const struct btf_var_secinfo *vs;
3942 const struct btf_type *sec;
3943
3944 if (!btf)
3945 return 0;
3946
3947 sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC,
3948 BTF_KIND_DATASEC);
3949 if (sec_btf_id < 0)
3950 return 0;
3951
3952 sec = btf__type_by_id(btf, sec_btf_id);
3953 vs = btf_var_secinfos(sec);
3954 for (i = 0; i < btf_vlen(sec); i++, vs++) {
3955 const struct btf_type *vt;
3956
3957 vt = btf__type_by_id(btf, vs->type);
3958 if (btf_is_func(vt))
3959 break;
3960 }
3961
3962 /* No func in ksyms sec. No need to add dummy var. */
3963 if (i == btf_vlen(sec))
3964 return 0;
3965
3966 int_btf_id = find_int_btf_id(btf);
3967 dummy_var_btf_id = btf__add_var(btf,
3968 "dummy_ksym",
3969 BTF_VAR_GLOBAL_ALLOCATED,
3970 int_btf_id);
3971 if (dummy_var_btf_id < 0)
3972 pr_warn("cannot create a dummy_ksym var\n");
3973
3974 return dummy_var_btf_id;
3975 }
3976
3977 static int bpf_object__collect_externs(struct bpf_object *obj)
3978 {
3979 struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
3980 const struct btf_type *t;
3981 struct extern_desc *ext;
3982 int i, n, off, dummy_var_btf_id;
3983 const char *ext_name, *sec_name;
3984 size_t ext_essent_len;
3985 Elf_Scn *scn;
3986 Elf64_Shdr *sh;
3987
3988 if (!obj->efile.symbols)
3989 return 0;
3990
3991 scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);
3992 sh = elf_sec_hdr(obj, scn);
3993 if (!sh || sh->sh_entsize != sizeof(Elf64_Sym))
3994 return -LIBBPF_ERRNO__FORMAT;
3995
3996 dummy_var_btf_id = add_dummy_ksym_var(obj->btf);
3997 if (dummy_var_btf_id < 0)
3998 return dummy_var_btf_id;
3999
4000 n = sh->sh_size / sh->sh_entsize;
4001 pr_debug("looking for externs among %d symbols...\n", n);
4002
4003 for (i = 0; i < n; i++) {
4004 Elf64_Sym *sym = elf_sym_by_idx(obj, i);
4005
4006 if (!sym)
4007 return -LIBBPF_ERRNO__FORMAT;
4008 if (!sym_is_extern(sym))
4009 continue;
4010 ext_name = elf_sym_str(obj, sym->st_name);
4011 if (!ext_name || !ext_name[0])
4012 continue;
4013
4014 ext = obj->externs;
4015 ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
4016 if (!ext)
4017 return -ENOMEM;
4018 obj->externs = ext;
4019 ext = &ext[obj->nr_extern];
4020 memset(ext, 0, sizeof(*ext));
4021 obj->nr_extern++;
4022
4023 ext->btf_id = find_extern_btf_id(obj->btf, ext_name);
4024 if (ext->btf_id <= 0) {
4025 pr_warn("failed to find BTF for extern '%s': %d\n",
4026 ext_name, ext->btf_id);
4027 return ext->btf_id;
4028 }
4029 t = btf__type_by_id(obj->btf, ext->btf_id);
4030 ext->name = btf__name_by_offset(obj->btf, t->name_off);
4031 ext->sym_idx = i;
4032 ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK;
4033
4034 ext_essent_len = bpf_core_essential_name_len(ext->name);
4035 ext->essent_name = NULL;
4036 if (ext_essent_len != strlen(ext->name)) {
4037 ext->essent_name = strndup(ext->name, ext_essent_len);
4038 if (!ext->essent_name)
4039 return -ENOMEM;
4040 }
4041
4042 ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
4043 if (ext->sec_btf_id <= 0) {
4044 pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
4045 ext_name, ext->btf_id, ext->sec_btf_id);
4046 return ext->sec_btf_id;
4047 }
4048 sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id);
4049 sec_name = btf__name_by_offset(obj->btf, sec->name_off);
4050
4051 if (strcmp(sec_name, KCONFIG_SEC) == 0) {
4052 if (btf_is_func(t)) {
4053 pr_warn("extern function %s is unsupported under %s section\n",
4054 ext->name, KCONFIG_SEC);
4055 return -ENOTSUP;
4056 }
4057 kcfg_sec = sec;
4058 ext->type = EXT_KCFG;
4059 ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
4060 if (ext->kcfg.sz <= 0) {
4061 pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
4062 ext_name, ext->kcfg.sz);
4063 return ext->kcfg.sz;
4064 }
4065 ext->kcfg.align = btf__align_of(obj->btf, t->type);
4066 if (ext->kcfg.align <= 0) {
4067 pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
4068 ext_name, ext->kcfg.align);
4069 return -EINVAL;
4070 }
4071 ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
4072 &ext->kcfg.is_signed);
4073 if (ext->kcfg.type == KCFG_UNKNOWN) {
4074 pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name);
4075 return -ENOTSUP;
4076 }
4077 } else if (strcmp(sec_name, KSYMS_SEC) == 0) {
4078 ksym_sec = sec;
4079 ext->type = EXT_KSYM;
4080 skip_mods_and_typedefs(obj->btf, t->type,
4081 &ext->ksym.type_id);
4082 } else {
4083 pr_warn("unrecognized extern section '%s'\n", sec_name);
4084 return -ENOTSUP;
4085 }
4086 }
4087 pr_debug("collected %d externs total\n", obj->nr_extern);
4088
4089 if (!obj->nr_extern)
4090 return 0;
4091
4092 /* sort externs by type, for kcfg ones also by (align, size, name) */
4093 qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
4094
4095 /* for .ksyms section, we need to turn all externs into allocated
4096 * variables in BTF to pass kernel verification; we do this by
4097 * pretending that each extern is a 8-byte variable
4098 */
4099 if (ksym_sec) {
4100 /* find existing 4-byte integer type in BTF to use for fake
4101 * extern variables in DATASEC
4102 */
4103 int int_btf_id = find_int_btf_id(obj->btf);
4104 /* For extern function, a dummy_var added earlier
4105 * will be used to replace the vs->type and
4106 * its name string will be used to refill
4107 * the missing param's name.
4108 */
4109 const struct btf_type *dummy_var;
4110
4111 dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id);
4112 for (i = 0; i < obj->nr_extern; i++) {
4113 ext = &obj->externs[i];
4114 if (ext->type != EXT_KSYM)
4115 continue;
4116 pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
4117 i, ext->sym_idx, ext->name);
4118 }
4119
4120 sec = ksym_sec;
4121 n = btf_vlen(sec);
4122 for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
4123 struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
4124 struct btf_type *vt;
4125
4126 vt = (void *)btf__type_by_id(obj->btf, vs->type);
4127 ext_name = btf__name_by_offset(obj->btf, vt->name_off);
4128 ext = find_extern_by_name(obj, ext_name);
4129 if (!ext) {
4130 pr_warn("failed to find extern definition for BTF %s '%s'\n",
4131 btf_kind_str(vt), ext_name);
4132 return -ESRCH;
4133 }
4134 if (btf_is_func(vt)) {
4135 const struct btf_type *func_proto;
4136 struct btf_param *param;
4137 int j;
4138
4139 func_proto = btf__type_by_id(obj->btf,
4140 vt->type);
4141 param = btf_params(func_proto);
4142 /* Reuse the dummy_var string if the
4143 * func proto does not have param name.
4144 */
4145 for (j = 0; j < btf_vlen(func_proto); j++)
4146 if (param[j].type && !param[j].name_off)
4147 param[j].name_off =
4148 dummy_var->name_off;
4149 vs->type = dummy_var_btf_id;
4150 vt->info &= ~0xffff;
4151 vt->info |= BTF_FUNC_GLOBAL;
4152 } else {
4153 btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
4154 vt->type = int_btf_id;
4155 }
4156 vs->offset = off;
4157 vs->size = sizeof(int);
4158 }
4159 sec->size = off;
4160 }
4161
4162 if (kcfg_sec) {
4163 sec = kcfg_sec;
4164 /* for kcfg externs calculate their offsets within a .kconfig map */
4165 off = 0;
4166 for (i = 0; i < obj->nr_extern; i++) {
4167 ext = &obj->externs[i];
4168 if (ext->type != EXT_KCFG)
4169 continue;
4170
4171 ext->kcfg.data_off = roundup(off, ext->kcfg.align);
4172 off = ext->kcfg.data_off + ext->kcfg.sz;
4173 pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
4174 i, ext->sym_idx, ext->kcfg.data_off, ext->name);
4175 }
4176 sec->size = off;
4177 n = btf_vlen(sec);
4178 for (i = 0; i < n; i++) {
4179 struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
4180
4181 t = btf__type_by_id(obj->btf, vs->type);
4182 ext_name = btf__name_by_offset(obj->btf, t->name_off);
4183 ext = find_extern_by_name(obj, ext_name);
4184 if (!ext) {
4185 pr_warn("failed to find extern definition for BTF var '%s'\n",
4186 ext_name);
4187 return -ESRCH;
4188 }
4189 btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
4190 vs->offset = ext->kcfg.data_off;
4191 }
4192 }
4193 return 0;
4194 }
4195
4196 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog)
4197 {
4198 return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1;
4199 }
4200
4201 struct bpf_program *
4202 bpf_object__find_program_by_name(const struct bpf_object *obj,
4203 const char *name)
4204 {
4205 struct bpf_program *prog;
4206
4207 bpf_object__for_each_program(prog, obj) {
4208 if (prog_is_subprog(obj, prog))
4209 continue;
4210 if (!strcmp(prog->name, name))
4211 return prog;
4212 }
4213 return errno = ENOENT, NULL;
4214 }
4215
4216 static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
4217 int shndx)
4218 {
4219 switch (obj->efile.secs[shndx].sec_type) {
4220 case SEC_BSS:
4221 case SEC_DATA:
4222 case SEC_RODATA:
4223 return true;
4224 default:
4225 return false;
4226 }
4227 }
4228
4229 static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
4230 int shndx)
4231 {
4232 return shndx == obj->efile.btf_maps_shndx;
4233 }
4234
4235 static enum libbpf_map_type
4236 bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
4237 {
4238 if (shndx == obj->efile.symbols_shndx)
4239 return LIBBPF_MAP_KCONFIG;
4240
4241 switch (obj->efile.secs[shndx].sec_type) {
4242 case SEC_BSS:
4243 return LIBBPF_MAP_BSS;
4244 case SEC_DATA:
4245 return LIBBPF_MAP_DATA;
4246 case SEC_RODATA:
4247 return LIBBPF_MAP_RODATA;
4248 default:
4249 return LIBBPF_MAP_UNSPEC;
4250 }
4251 }
4252
4253 static int bpf_program__record_reloc(struct bpf_program *prog,
4254 struct reloc_desc *reloc_desc,
4255 __u32 insn_idx, const char *sym_name,
4256 const Elf64_Sym *sym, const Elf64_Rel *rel)
4257 {
4258 struct bpf_insn *insn = &prog->insns[insn_idx];
4259 size_t map_idx, nr_maps = prog->obj->nr_maps;
4260 struct bpf_object *obj = prog->obj;
4261 __u32 shdr_idx = sym->st_shndx;
4262 enum libbpf_map_type type;
4263 const char *sym_sec_name;
4264 struct bpf_map *map;
4265
4266 if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) {
4267 pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
4268 prog->name, sym_name, insn_idx, insn->code);
4269 return -LIBBPF_ERRNO__RELOC;
4270 }
4271
4272 if (sym_is_extern(sym)) {
4273 int sym_idx = ELF64_R_SYM(rel->r_info);
4274 int i, n = obj->nr_extern;
4275 struct extern_desc *ext;
4276
4277 for (i = 0; i < n; i++) {
4278 ext = &obj->externs[i];
4279 if (ext->sym_idx == sym_idx)
4280 break;
4281 }
4282 if (i >= n) {
4283 pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
4284 prog->name, sym_name, sym_idx);
4285 return -LIBBPF_ERRNO__RELOC;
4286 }
4287 pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
4288 prog->name, i, ext->name, ext->sym_idx, insn_idx);
4289 if (insn->code == (BPF_JMP | BPF_CALL))
4290 reloc_desc->type = RELO_EXTERN_CALL;
4291 else
4292 reloc_desc->type = RELO_EXTERN_LD64;
4293 reloc_desc->insn_idx = insn_idx;
4294 reloc_desc->ext_idx = i;
4295 return 0;
4296 }
4297
4298 /* sub-program call relocation */
4299 if (is_call_insn(insn)) {
4300 if (insn->src_reg != BPF_PSEUDO_CALL) {
4301 pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
4302 return -LIBBPF_ERRNO__RELOC;
4303 }
4304 /* text_shndx can be 0, if no default "main" program exists */
4305 if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
4306 sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
4307 pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
4308 prog->name, sym_name, sym_sec_name);
4309 return -LIBBPF_ERRNO__RELOC;
4310 }
4311 if (sym->st_value % BPF_INSN_SZ) {
4312 pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
4313 prog->name, sym_name, (size_t)sym->st_value);
4314 return -LIBBPF_ERRNO__RELOC;
4315 }
4316 reloc_desc->type = RELO_CALL;
4317 reloc_desc->insn_idx = insn_idx;
4318 reloc_desc->sym_off = sym->st_value;
4319 return 0;
4320 }
4321
4322 if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
4323 pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
4324 prog->name, sym_name, shdr_idx);
4325 return -LIBBPF_ERRNO__RELOC;
4326 }
4327
4328 /* loading subprog addresses */
4329 if (sym_is_subprog(sym, obj->efile.text_shndx)) {
4330 /* global_func: sym->st_value = offset in the section, insn->imm = 0.
4331 * local_func: sym->st_value = 0, insn->imm = offset in the section.
4332 */
4333 if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) {
4334 pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
4335 prog->name, sym_name, (size_t)sym->st_value, insn->imm);
4336 return -LIBBPF_ERRNO__RELOC;
4337 }
4338
4339 reloc_desc->type = RELO_SUBPROG_ADDR;
4340 reloc_desc->insn_idx = insn_idx;
4341 reloc_desc->sym_off = sym->st_value;
4342 return 0;
4343 }
4344
4345 type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
4346 sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
4347
4348 /* generic map reference relocation */
4349 if (type == LIBBPF_MAP_UNSPEC) {
4350 if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
4351 pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n",
4352 prog->name, sym_name, sym_sec_name);
4353 return -LIBBPF_ERRNO__RELOC;
4354 }
4355 for (map_idx = 0; map_idx < nr_maps; map_idx++) {
4356 map = &obj->maps[map_idx];
4357 if (map->libbpf_type != type ||
4358 map->sec_idx != sym->st_shndx ||
4359 map->sec_offset != sym->st_value)
4360 continue;
4361 pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n",
4362 prog->name, map_idx, map->name, map->sec_idx,
4363 map->sec_offset, insn_idx);
4364 break;
4365 }
4366 if (map_idx >= nr_maps) {
4367 pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n",
4368 prog->name, sym_sec_name, (size_t)sym->st_value);
4369 return -LIBBPF_ERRNO__RELOC;
4370 }
4371 reloc_desc->type = RELO_LD64;
4372 reloc_desc->insn_idx = insn_idx;
4373 reloc_desc->map_idx = map_idx;
4374 reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */
4375 return 0;
4376 }
4377
4378 /* global data map relocation */
4379 if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
4380 pr_warn("prog '%s': bad data relo against section '%s'\n",
4381 prog->name, sym_sec_name);
4382 return -LIBBPF_ERRNO__RELOC;
4383 }
4384 for (map_idx = 0; map_idx < nr_maps; map_idx++) {
4385 map = &obj->maps[map_idx];
4386 if (map->libbpf_type != type || map->sec_idx != sym->st_shndx)
4387 continue;
4388 pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n",
4389 prog->name, map_idx, map->name, map->sec_idx,
4390 map->sec_offset, insn_idx);
4391 break;
4392 }
4393 if (map_idx >= nr_maps) {
4394 pr_warn("prog '%s': data relo failed to find map for section '%s'\n",
4395 prog->name, sym_sec_name);
4396 return -LIBBPF_ERRNO__RELOC;
4397 }
4398
4399 reloc_desc->type = RELO_DATA;
4400 reloc_desc->insn_idx = insn_idx;
4401 reloc_desc->map_idx = map_idx;
4402 reloc_desc->sym_off = sym->st_value;
4403 return 0;
4404 }
4405
4406 static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx)
4407 {
4408 return insn_idx >= prog->sec_insn_off &&
4409 insn_idx < prog->sec_insn_off + prog->sec_insn_cnt;
4410 }
4411
4412 static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
4413 size_t sec_idx, size_t insn_idx)
4414 {
4415 int l = 0, r = obj->nr_programs - 1, m;
4416 struct bpf_program *prog;
4417
4418 if (!obj->nr_programs)
4419 return NULL;
4420
4421 while (l < r) {
4422 m = l + (r - l + 1) / 2;
4423 prog = &obj->programs[m];
4424
4425 if (prog->sec_idx < sec_idx ||
4426 (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx))
4427 l = m;
4428 else
4429 r = m - 1;
4430 }
4431 /* matching program could be at index l, but it still might be the
4432 * wrong one, so we need to double check conditions for the last time
4433 */
4434 prog = &obj->programs[l];
4435 if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx))
4436 return prog;
4437 return NULL;
4438 }
4439
4440 static int
4441 bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data)
4442 {
4443 const char *relo_sec_name, *sec_name;
4444 size_t sec_idx = shdr->sh_info, sym_idx;
4445 struct bpf_program *prog;
4446 struct reloc_desc *relos;
4447 int err, i, nrels;
4448 const char *sym_name;
4449 __u32 insn_idx;
4450 Elf_Scn *scn;
4451 Elf_Data *scn_data;
4452 Elf64_Sym *sym;
4453 Elf64_Rel *rel;
4454
4455 if (sec_idx >= obj->efile.sec_cnt)
4456 return -EINVAL;
4457
4458 scn = elf_sec_by_idx(obj, sec_idx);
4459 scn_data = elf_sec_data(obj, scn);
4460 if (!scn_data)
4461 return -LIBBPF_ERRNO__FORMAT;
4462
4463 relo_sec_name = elf_sec_str(obj, shdr->sh_name);
4464 sec_name = elf_sec_name(obj, scn);
4465 if (!relo_sec_name || !sec_name)
4466 return -EINVAL;
4467
4468 pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n",
4469 relo_sec_name, sec_idx, sec_name);
4470 nrels = shdr->sh_size / shdr->sh_entsize;
4471
4472 for (i = 0; i < nrels; i++) {
4473 rel = elf_rel_by_idx(data, i);
4474 if (!rel) {
4475 pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i);
4476 return -LIBBPF_ERRNO__FORMAT;
4477 }
4478
4479 sym_idx = ELF64_R_SYM(rel->r_info);
4480 sym = elf_sym_by_idx(obj, sym_idx);
4481 if (!sym) {
4482 pr_warn("sec '%s': symbol #%zu not found for relo #%d\n",
4483 relo_sec_name, sym_idx, i);
4484 return -LIBBPF_ERRNO__FORMAT;
4485 }
4486
4487 if (sym->st_shndx >= obj->efile.sec_cnt) {
4488 pr_warn("sec '%s': corrupted symbol #%zu pointing to invalid section #%zu for relo #%d\n",
4489 relo_sec_name, sym_idx, (size_t)sym->st_shndx, i);
4490 return -LIBBPF_ERRNO__FORMAT;
4491 }
4492
4493 if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) {
4494 pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
4495 relo_sec_name, (size_t)rel->r_offset, i);
4496 return -LIBBPF_ERRNO__FORMAT;
4497 }
4498
4499 insn_idx = rel->r_offset / BPF_INSN_SZ;
4500 /* relocations against static functions are recorded as
4501 * relocations against the section that contains a function;
4502 * in such case, symbol will be STT_SECTION and sym.st_name
4503 * will point to empty string (0), so fetch section name
4504 * instead
4505 */
4506 if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && sym->st_name == 0)
4507 sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym->st_shndx));
4508 else
4509 sym_name = elf_sym_str(obj, sym->st_name);
4510 sym_name = sym_name ?: "<?";
4511
4512 pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n",
4513 relo_sec_name, i, insn_idx, sym_name);
4514
4515 prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
4516 if (!prog) {
4517 pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n",
4518 relo_sec_name, i, sec_name, insn_idx);
4519 continue;
4520 }
4521
4522 relos = libbpf_reallocarray(prog->reloc_desc,
4523 prog->nr_reloc + 1, sizeof(*relos));
4524 if (!relos)
4525 return -ENOMEM;
4526 prog->reloc_desc = relos;
4527
4528 /* adjust insn_idx to local BPF program frame of reference */
4529 insn_idx -= prog->sec_insn_off;
4530 err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc],
4531 insn_idx, sym_name, sym, rel);
4532 if (err)
4533 return err;
4534
4535 prog->nr_reloc++;
4536 }
4537 return 0;
4538 }
4539
4540 static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map)
4541 {
4542 int id;
4543
4544 if (!obj->btf)
4545 return -ENOENT;
4546
4547 /* if it's BTF-defined map, we don't need to search for type IDs.
4548 * For struct_ops map, it does not need btf_key_type_id and
4549 * btf_value_type_id.
4550 */
4551 if (map->sec_idx == obj->efile.btf_maps_shndx || bpf_map__is_struct_ops(map))
4552 return 0;
4553
4554 /*
4555 * LLVM annotates global data differently in BTF, that is,
4556 * only as '.data', '.bss' or '.rodata'.
4557 */
4558 if (!bpf_map__is_internal(map))
4559 return -ENOENT;
4560
4561 id = btf__find_by_name(obj->btf, map->real_name);
4562 if (id < 0)
4563 return id;
4564
4565 map->btf_key_type_id = 0;
4566 map->btf_value_type_id = id;
4567 return 0;
4568 }
4569
4570 static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
4571 {
4572 char file[PATH_MAX], buff[4096];
4573 FILE *fp;
4574 __u32 val;
4575 int err;
4576
4577 snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
4578 memset(info, 0, sizeof(*info));
4579
4580 fp = fopen(file, "re");
4581 if (!fp) {
4582 err = -errno;
4583 pr_warn("failed to open %s: %d. No procfs support?\n", file,
4584 err);
4585 return err;
4586 }
4587
4588 while (fgets(buff, sizeof(buff), fp)) {
4589 if (sscanf(buff, "map_type:\t%u", &val) == 1)
4590 info->type = val;
4591 else if (sscanf(buff, "key_size:\t%u", &val) == 1)
4592 info->key_size = val;
4593 else if (sscanf(buff, "value_size:\t%u", &val) == 1)
4594 info->value_size = val;
4595 else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
4596 info->max_entries = val;
4597 else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
4598 info->map_flags = val;
4599 }
4600
4601 fclose(fp);
4602
4603 return 0;
4604 }
4605
4606 bool bpf_map__autocreate(const struct bpf_map *map)
4607 {
4608 return map->autocreate;
4609 }
4610
4611 int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate)
4612 {
4613 if (map->obj->loaded)
4614 return libbpf_err(-EBUSY);
4615
4616 map->autocreate = autocreate;
4617 return 0;
4618 }
4619
4620 int bpf_map__reuse_fd(struct bpf_map *map, int fd)
4621 {
4622 struct bpf_map_info info;
4623 __u32 len = sizeof(info), name_len;
4624 int new_fd, err;
4625 char *new_name;
4626
4627 memset(&info, 0, len);
4628 err = bpf_map_get_info_by_fd(fd, &info, &len);
4629 if (err && errno == EINVAL)
4630 err = bpf_get_map_info_from_fdinfo(fd, &info);
4631 if (err)
4632 return libbpf_err(err);
4633
4634 name_len = strlen(info.name);
4635 if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0)
4636 new_name = strdup(map->name);
4637 else
4638 new_name = strdup(info.name);
4639
4640 if (!new_name)
4641 return libbpf_err(-errno);
4642
4643 /*
4644 * Like dup(), but make sure new FD is >= 3 and has O_CLOEXEC set.
4645 * This is similar to what we do in ensure_good_fd(), but without
4646 * closing original FD.
4647 */
4648 new_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
4649 if (new_fd < 0) {
4650 err = -errno;
4651 goto err_free_new_name;
4652 }
4653
4654 err = reuse_fd(map->fd, new_fd);
4655 if (err)
4656 goto err_free_new_name;
4657
4658 free(map->name);
4659
4660 map->name = new_name;
4661 map->def.type = info.type;
4662 map->def.key_size = info.key_size;
4663 map->def.value_size = info.value_size;
4664 map->def.max_entries = info.max_entries;
4665 map->def.map_flags = info.map_flags;
4666 map->btf_key_type_id = info.btf_key_type_id;
4667 map->btf_value_type_id = info.btf_value_type_id;
4668 map->reused = true;
4669 map->map_extra = info.map_extra;
4670
4671 return 0;
4672
4673 err_free_new_name:
4674 free(new_name);
4675 return libbpf_err(err);
4676 }
4677
4678 __u32 bpf_map__max_entries(const struct bpf_map *map)
4679 {
4680 return map->def.max_entries;
4681 }
4682
4683 struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
4684 {
4685 if (!bpf_map_type__is_map_in_map(map->def.type))
4686 return errno = EINVAL, NULL;
4687
4688 return map->inner_map;
4689 }
4690
4691 int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
4692 {
4693 if (map->obj->loaded)
4694 return libbpf_err(-EBUSY);
4695
4696 map->def.max_entries = max_entries;
4697
4698 /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
4699 if (map_is_ringbuf(map))
4700 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
4701
4702 return 0;
4703 }
4704
4705 static int bpf_object_prepare_token(struct bpf_object *obj)
4706 {
4707 const char *bpffs_path;
4708 int bpffs_fd = -1, token_fd, err;
4709 bool mandatory;
4710 enum libbpf_print_level level;
4711
4712 /* token is explicitly prevented */
4713 if (obj->token_path && obj->token_path[0] == '\0') {
4714 pr_debug("object '%s': token is prevented, skipping...\n", obj->name);
4715 return 0;
4716 }
4717
4718 mandatory = obj->token_path != NULL;
4719 level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG;
4720
4721 bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH;
4722 bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR);
4723 if (bpffs_fd < 0) {
4724 err = -errno;
4725 __pr(level, "object '%s': failed (%d) to open BPF FS mount at '%s'%s\n",
4726 obj->name, err, bpffs_path,
4727 mandatory ? "" : ", skipping optional step...");
4728 return mandatory ? err : 0;
4729 }
4730
4731 token_fd = bpf_token_create(bpffs_fd, 0);
4732 close(bpffs_fd);
4733 if (token_fd < 0) {
4734 if (!mandatory && token_fd == -ENOENT) {
4735 pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n",
4736 obj->name, bpffs_path);
4737 return 0;
4738 }
4739 __pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n",
4740 obj->name, token_fd, bpffs_path,
4741 mandatory ? "" : ", skipping optional step...");
4742 return mandatory ? token_fd : 0;
4743 }
4744
4745 obj->feat_cache = calloc(1, sizeof(*obj->feat_cache));
4746 if (!obj->feat_cache) {
4747 close(token_fd);
4748 return -ENOMEM;
4749 }
4750
4751 obj->token_fd = token_fd;
4752 obj->feat_cache->token_fd = token_fd;
4753
4754 return 0;
4755 }
4756
4757 static int
4758 bpf_object__probe_loading(struct bpf_object *obj)
4759 {
4760 char *cp, errmsg[STRERR_BUFSIZE];
4761 struct bpf_insn insns[] = {
4762 BPF_MOV64_IMM(BPF_REG_0, 0),
4763 BPF_EXIT_INSN(),
4764 };
4765 int ret, insn_cnt = ARRAY_SIZE(insns);
4766 LIBBPF_OPTS(bpf_prog_load_opts, opts,
4767 .token_fd = obj->token_fd,
4768 .prog_flags = obj->token_fd ? BPF_F_TOKEN_FD : 0,
4769 );
4770
4771 if (obj->gen_loader)
4772 return 0;
4773
4774 ret = bump_rlimit_memlock();
4775 if (ret)
4776 pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret);
4777
4778 /* make sure basic loading works */
4779 ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts);
4780 if (ret < 0)
4781 ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
4782 if (ret < 0) {
4783 ret = errno;
4784 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
4785 pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF "
4786 "program. Make sure your kernel supports BPF "
4787 "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is "
4788 "set to big enough value.\n", __func__, cp, ret);
4789 return -ret;
4790 }
4791 close(ret);
4792
4793 return 0;
4794 }
4795
4796 bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
4797 {
4798 if (obj->gen_loader)
4799 /* To generate loader program assume the latest kernel
4800 * to avoid doing extra prog_load, map_create syscalls.
4801 */
4802 return true;
4803
4804 if (obj->token_fd)
4805 return feat_supported(obj->feat_cache, feat_id);
4806
4807 return feat_supported(NULL, feat_id);
4808 }
4809
4810 static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
4811 {
4812 struct bpf_map_info map_info;
4813 char msg[STRERR_BUFSIZE];
4814 __u32 map_info_len = sizeof(map_info);
4815 int err;
4816
4817 memset(&map_info, 0, map_info_len);
4818 err = bpf_map_get_info_by_fd(map_fd, &map_info, &map_info_len);
4819 if (err && errno == EINVAL)
4820 err = bpf_get_map_info_from_fdinfo(map_fd, &map_info);
4821 if (err) {
4822 pr_warn("failed to get map info for map FD %d: %s\n", map_fd,
4823 libbpf_strerror_r(errno, msg, sizeof(msg)));
4824 return false;
4825 }
4826
4827 return (map_info.type == map->def.type &&
4828 map_info.key_size == map->def.key_size &&
4829 map_info.value_size == map->def.value_size &&
4830 map_info.max_entries == map->def.max_entries &&
4831 map_info.map_flags == map->def.map_flags &&
4832 map_info.map_extra == map->map_extra);
4833 }
4834
4835 static int
4836 bpf_object__reuse_map(struct bpf_map *map)
4837 {
4838 char *cp, errmsg[STRERR_BUFSIZE];
4839 int err, pin_fd;
4840
4841 pin_fd = bpf_obj_get(map->pin_path);
4842 if (pin_fd < 0) {
4843 err = -errno;
4844 if (err == -ENOENT) {
4845 pr_debug("found no pinned map to reuse at '%s'\n",
4846 map->pin_path);
4847 return 0;
4848 }
4849
4850 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
4851 pr_warn("couldn't retrieve pinned map '%s': %s\n",
4852 map->pin_path, cp);
4853 return err;
4854 }
4855
4856 if (!map_is_reuse_compat(map, pin_fd)) {
4857 pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
4858 map->pin_path);
4859 close(pin_fd);
4860 return -EINVAL;
4861 }
4862
4863 err = bpf_map__reuse_fd(map, pin_fd);
4864 close(pin_fd);
4865 if (err)
4866 return err;
4867
4868 map->pinned = true;
4869 pr_debug("reused pinned map at '%s'\n", map->pin_path);
4870
4871 return 0;
4872 }
4873
4874 static int
4875 bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
4876 {
4877 enum libbpf_map_type map_type = map->libbpf_type;
4878 char *cp, errmsg[STRERR_BUFSIZE];
4879 int err, zero = 0;
4880
4881 if (obj->gen_loader) {
4882 bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps,
4883 map->mmaped, map->def.value_size);
4884 if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG)
4885 bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
4886 return 0;
4887 }
4888 err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
4889 if (err) {
4890 err = -errno;
4891 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4892 pr_warn("Error setting initial map(%s) contents: %s\n",
4893 map->name, cp);
4894 return err;
4895 }
4896
4897 /* Freeze .rodata and .kconfig map as read-only from syscall side. */
4898 if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) {
4899 err = bpf_map_freeze(map->fd);
4900 if (err) {
4901 err = -errno;
4902 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4903 pr_warn("Error freezing map(%s) as read-only: %s\n",
4904 map->name, cp);
4905 return err;
4906 }
4907 }
4908 return 0;
4909 }
4910
4911 static void bpf_map__destroy(struct bpf_map *map);
4912
4913 static bool map_is_created(const struct bpf_map *map)
4914 {
4915 return map->obj->loaded || map->reused;
4916 }
4917
4918 static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
4919 {
4920 LIBBPF_OPTS(bpf_map_create_opts, create_attr);
4921 struct bpf_map_def *def = &map->def;
4922 const char *map_name = NULL;
4923 int err = 0, map_fd;
4924
4925 if (kernel_supports(obj, FEAT_PROG_NAME))
4926 map_name = map->name;
4927 create_attr.map_ifindex = map->map_ifindex;
4928 create_attr.map_flags = def->map_flags;
4929 create_attr.numa_node = map->numa_node;
4930 create_attr.map_extra = map->map_extra;
4931 create_attr.token_fd = obj->token_fd;
4932 if (obj->token_fd)
4933 create_attr.map_flags |= BPF_F_TOKEN_FD;
4934
4935 if (bpf_map__is_struct_ops(map)) {
4936 create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
4937 if (map->mod_btf_fd >= 0) {
4938 create_attr.value_type_btf_obj_fd = map->mod_btf_fd;
4939 create_attr.map_flags |= BPF_F_VTYPE_BTF_OBJ_FD;
4940 }
4941 }
4942
4943 if (obj->btf && btf__fd(obj->btf) >= 0) {
4944 create_attr.btf_fd = btf__fd(obj->btf);
4945 create_attr.btf_key_type_id = map->btf_key_type_id;
4946 create_attr.btf_value_type_id = map->btf_value_type_id;
4947 }
4948
4949 if (bpf_map_type__is_map_in_map(def->type)) {
4950 if (map->inner_map) {
4951 err = map_set_def_max_entries(map->inner_map);
4952 if (err)
4953 return err;
4954 err = bpf_object__create_map(obj, map->inner_map, true);
4955 if (err) {
4956 pr_warn("map '%s': failed to create inner map: %d\n",
4957 map->name, err);
4958 return err;
4959 }
4960 map->inner_map_fd = map->inner_map->fd;
4961 }
4962 if (map->inner_map_fd >= 0)
4963 create_attr.inner_map_fd = map->inner_map_fd;
4964 }
4965
4966 switch (def->type) {
4967 case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
4968 case BPF_MAP_TYPE_CGROUP_ARRAY:
4969 case BPF_MAP_TYPE_STACK_TRACE:
4970 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
4971 case BPF_MAP_TYPE_HASH_OF_MAPS:
4972 case BPF_MAP_TYPE_DEVMAP:
4973 case BPF_MAP_TYPE_DEVMAP_HASH:
4974 case BPF_MAP_TYPE_CPUMAP:
4975 case BPF_MAP_TYPE_XSKMAP:
4976 case BPF_MAP_TYPE_SOCKMAP:
4977 case BPF_MAP_TYPE_SOCKHASH:
4978 case BPF_MAP_TYPE_QUEUE:
4979 case BPF_MAP_TYPE_STACK:
4980 create_attr.btf_fd = 0;
4981 create_attr.btf_key_type_id = 0;
4982 create_attr.btf_value_type_id = 0;
4983 map->btf_key_type_id = 0;
4984 map->btf_value_type_id = 0;
4985 break;
4986 case BPF_MAP_TYPE_STRUCT_OPS:
4987 create_attr.btf_value_type_id = 0;
4988 break;
4989 default:
4990 break;
4991 }
4992
4993 if (obj->gen_loader) {
4994 bpf_gen__map_create(obj->gen_loader, def->type, map_name,
4995 def->key_size, def->value_size, def->max_entries,
4996 &create_attr, is_inner ? -1 : map - obj->maps);
4997 /* We keep pretenting we have valid FD to pass various fd >= 0
4998 * checks by just keeping original placeholder FDs in place.
4999 * See bpf_object__add_map() comment.
5000 * This placeholder fd will not be used with any syscall and
5001 * will be reset to -1 eventually.
5002 */
5003 map_fd = map->fd;
5004 } else {
5005 map_fd = bpf_map_create(def->type, map_name,
5006 def->key_size, def->value_size,
5007 def->max_entries, &create_attr);
5008 }
5009 if (map_fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) {
5010 char *cp, errmsg[STRERR_BUFSIZE];
5011
5012 err = -errno;
5013 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
5014 pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
5015 map->name, cp, err);
5016 create_attr.btf_fd = 0;
5017 create_attr.btf_key_type_id = 0;
5018 create_attr.btf_value_type_id = 0;
5019 map->btf_key_type_id = 0;
5020 map->btf_value_type_id = 0;
5021 map_fd = bpf_map_create(def->type, map_name,
5022 def->key_size, def->value_size,
5023 def->max_entries, &create_attr);
5024 }
5025
5026 if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
5027 if (obj->gen_loader)
5028 map->inner_map->fd = -1;
5029 bpf_map__destroy(map->inner_map);
5030 zfree(&map->inner_map);
5031 }
5032
5033 if (map_fd < 0)
5034 return map_fd;
5035
5036 /* obj->gen_loader case, prevent reuse_fd() from closing map_fd */
5037 if (map->fd == map_fd)
5038 return 0;
5039
5040 /* Keep placeholder FD value but now point it to the BPF map object.
5041 * This way everything that relied on this map's FD (e.g., relocated
5042 * ldimm64 instructions) will stay valid and won't need adjustments.
5043 * map->fd stays valid but now point to what map_fd points to.
5044 */
5045 return reuse_fd(map->fd, map_fd);
5046 }
5047
5048 static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map)
5049 {
5050 const struct bpf_map *targ_map;
5051 unsigned int i;
5052 int fd, err = 0;
5053
5054 for (i = 0; i < map->init_slots_sz; i++) {
5055 if (!map->init_slots[i])
5056 continue;
5057
5058 targ_map = map->init_slots[i];
5059 fd = targ_map->fd;
5060
5061 if (obj->gen_loader) {
5062 bpf_gen__populate_outer_map(obj->gen_loader,
5063 map - obj->maps, i,
5064 targ_map - obj->maps);
5065 } else {
5066 err = bpf_map_update_elem(map->fd, &i, &fd, 0);
5067 }
5068 if (err) {
5069 err = -errno;
5070 pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
5071 map->name, i, targ_map->name, fd, err);
5072 return err;
5073 }
5074 pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
5075 map->name, i, targ_map->name, fd);
5076 }
5077
5078 zfree(&map->init_slots);
5079 map->init_slots_sz = 0;
5080
5081 return 0;
5082 }
5083
5084 static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map)
5085 {
5086 const struct bpf_program *targ_prog;
5087 unsigned int i;
5088 int fd, err;
5089
5090 if (obj->gen_loader)
5091 return -ENOTSUP;
5092
5093 for (i = 0; i < map->init_slots_sz; i++) {
5094 if (!map->init_slots[i])
5095 continue;
5096
5097 targ_prog = map->init_slots[i];
5098 fd = bpf_program__fd(targ_prog);
5099
5100 err = bpf_map_update_elem(map->fd, &i, &fd, 0);
5101 if (err) {
5102 err = -errno;
5103 pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %d\n",
5104 map->name, i, targ_prog->name, fd, err);
5105 return err;
5106 }
5107 pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n",
5108 map->name, i, targ_prog->name, fd);
5109 }
5110
5111 zfree(&map->init_slots);
5112 map->init_slots_sz = 0;
5113
5114 return 0;
5115 }
5116
5117 static int bpf_object_init_prog_arrays(struct bpf_object *obj)
5118 {
5119 struct bpf_map *map;
5120 int i, err;
5121
5122 for (i = 0; i < obj->nr_maps; i++) {
5123 map = &obj->maps[i];
5124
5125 if (!map->init_slots_sz || map->def.type != BPF_MAP_TYPE_PROG_ARRAY)
5126 continue;
5127
5128 err = init_prog_array_slots(obj, map);
5129 if (err < 0)
5130 return err;
5131 }
5132 return 0;
5133 }
5134
5135 static int map_set_def_max_entries(struct bpf_map *map)
5136 {
5137 if (map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !map->def.max_entries) {
5138 int nr_cpus;
5139
5140 nr_cpus = libbpf_num_possible_cpus();
5141 if (nr_cpus < 0) {
5142 pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
5143 map->name, nr_cpus);
5144 return nr_cpus;
5145 }
5146 pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
5147 map->def.max_entries = nr_cpus;
5148 }
5149
5150 return 0;
5151 }
5152
5153 static int
5154 bpf_object__create_maps(struct bpf_object *obj)
5155 {
5156 struct bpf_map *map;
5157 char *cp, errmsg[STRERR_BUFSIZE];
5158 unsigned int i, j;
5159 int err;
5160 bool retried;
5161
5162 for (i = 0; i < obj->nr_maps; i++) {
5163 map = &obj->maps[i];
5164
5165 /* To support old kernels, we skip creating global data maps
5166 * (.rodata, .data, .kconfig, etc); later on, during program
5167 * loading, if we detect that at least one of the to-be-loaded
5168 * programs is referencing any global data map, we'll error
5169 * out with program name and relocation index logged.
5170 * This approach allows to accommodate Clang emitting
5171 * unnecessary .rodata.str1.1 sections for string literals,
5172 * but also it allows to have CO-RE applications that use
5173 * global variables in some of BPF programs, but not others.
5174 * If those global variable-using programs are not loaded at
5175 * runtime due to bpf_program__set_autoload(prog, false),
5176 * bpf_object loading will succeed just fine even on old
5177 * kernels.
5178 */
5179 if (bpf_map__is_internal(map) && !kernel_supports(obj, FEAT_GLOBAL_DATA))
5180 map->autocreate = false;
5181
5182 if (!map->autocreate) {
5183 pr_debug("map '%s': skipped auto-creating...\n", map->name);
5184 continue;
5185 }
5186
5187 err = map_set_def_max_entries(map);
5188 if (err)
5189 goto err_out;
5190
5191 retried = false;
5192 retry:
5193 if (map->pin_path) {
5194 err = bpf_object__reuse_map(map);
5195 if (err) {
5196 pr_warn("map '%s': error reusing pinned map\n",
5197 map->name);
5198 goto err_out;
5199 }
5200 if (retried && map->fd < 0) {
5201 pr_warn("map '%s': cannot find pinned map\n",
5202 map->name);
5203 err = -ENOENT;
5204 goto err_out;
5205 }
5206 }
5207
5208 if (map->reused) {
5209 pr_debug("map '%s': skipping creation (preset fd=%d)\n",
5210 map->name, map->fd);
5211 } else {
5212 err = bpf_object__create_map(obj, map, false);
5213 if (err)
5214 goto err_out;
5215
5216 pr_debug("map '%s': created successfully, fd=%d\n",
5217 map->name, map->fd);
5218
5219 if (bpf_map__is_internal(map)) {
5220 err = bpf_object__populate_internal_map(obj, map);
5221 if (err < 0)
5222 goto err_out;
5223 }
5224
5225 if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) {
5226 err = init_map_in_map_slots(obj, map);
5227 if (err < 0)
5228 goto err_out;
5229 }
5230 }
5231
5232 if (map->pin_path && !map->pinned) {
5233 err = bpf_map__pin(map, NULL);
5234 if (err) {
5235 if (!retried && err == -EEXIST) {
5236 retried = true;
5237 goto retry;
5238 }
5239 pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
5240 map->name, map->pin_path, err);
5241 goto err_out;
5242 }
5243 }
5244 }
5245
5246 return 0;
5247
5248 err_out:
5249 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
5250 pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err);
5251 pr_perm_msg(err);
5252 for (j = 0; j < i; j++)
5253 zclose(obj->maps[j].fd);
5254 return err;
5255 }
5256
5257 static bool bpf_core_is_flavor_sep(const char *s)
5258 {
5259 /* check X___Y name pattern, where X and Y are not underscores */
5260 return s[0] != '_' && /* X */
5261 s[1] == '_' && s[2] == '_' && s[3] == '_' && /* ___ */
5262 s[4] != '_'; /* Y */
5263 }
5264
5265 /* Given 'some_struct_name___with_flavor' return the length of a name prefix
5266 * before last triple underscore. Struct name part after last triple
5267 * underscore is ignored by BPF CO-RE relocation during relocation matching.
5268 */
5269 size_t bpf_core_essential_name_len(const char *name)
5270 {
5271 size_t n = strlen(name);
5272 int i;
5273
5274 for (i = n - 5; i >= 0; i--) {
5275 if (bpf_core_is_flavor_sep(name + i))
5276 return i + 1;
5277 }
5278 return n;
5279 }
5280
5281 void bpf_core_free_cands(struct bpf_core_cand_list *cands)
5282 {
5283 if (!cands)
5284 return;
5285
5286 free(cands->cands);
5287 free(cands);
5288 }
5289
5290 int bpf_core_add_cands(struct bpf_core_cand *local_cand,
5291 size_t local_essent_len,
5292 const struct btf *targ_btf,
5293 const char *targ_btf_name,
5294 int targ_start_id,
5295 struct bpf_core_cand_list *cands)
5296 {
5297 struct bpf_core_cand *new_cands, *cand;
5298 const struct btf_type *t, *local_t;
5299 const char *targ_name, *local_name;
5300 size_t targ_essent_len;
5301 int n, i;
5302
5303 local_t = btf__type_by_id(local_cand->btf, local_cand->id);
5304 local_name = btf__str_by_offset(local_cand->btf, local_t->name_off);
5305
5306 n = btf__type_cnt(targ_btf);
5307 for (i = targ_start_id; i < n; i++) {
5308 t = btf__type_by_id(targ_btf, i);
5309 if (!btf_kind_core_compat(t, local_t))
5310 continue;
5311
5312 targ_name = btf__name_by_offset(targ_btf, t->name_off);
5313 if (str_is_empty(targ_name))
5314 continue;
5315
5316 targ_essent_len = bpf_core_essential_name_len(targ_name);
5317 if (targ_essent_len != local_essent_len)
5318 continue;
5319
5320 if (strncmp(local_name, targ_name, local_essent_len) != 0)
5321 continue;
5322
5323 pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n",
5324 local_cand->id, btf_kind_str(local_t),
5325 local_name, i, btf_kind_str(t), targ_name,
5326 targ_btf_name);
5327 new_cands = libbpf_reallocarray(cands->cands, cands->len + 1,
5328 sizeof(*cands->cands));
5329 if (!new_cands)
5330 return -ENOMEM;
5331
5332 cand = &new_cands[cands->len];
5333 cand->btf = targ_btf;
5334 cand->id = i;
5335
5336 cands->cands = new_cands;
5337 cands->len++;
5338 }
5339 return 0;
5340 }
5341
5342 static int load_module_btfs(struct bpf_object *obj)
5343 {
5344 struct bpf_btf_info info;
5345 struct module_btf *mod_btf;
5346 struct btf *btf;
5347 char name[64];
5348 __u32 id = 0, len;
5349 int err, fd;
5350
5351 if (obj->btf_modules_loaded)
5352 return 0;
5353
5354 if (obj->gen_loader)
5355 return 0;
5356
5357 /* don't do this again, even if we find no module BTFs */
5358 obj->btf_modules_loaded = true;
5359
5360 /* kernel too old to support module BTFs */
5361 if (!kernel_supports(obj, FEAT_MODULE_BTF))
5362 return 0;
5363
5364 while (true) {
5365 err = bpf_btf_get_next_id(id, &id);
5366 if (err && errno == ENOENT)
5367 return 0;
5368 if (err && errno == EPERM) {
5369 pr_debug("skipping module BTFs loading, missing privileges\n");
5370 return 0;
5371 }
5372 if (err) {
5373 err = -errno;
5374 pr_warn("failed to iterate BTF objects: %d\n", err);
5375 return err;
5376 }
5377
5378 fd = bpf_btf_get_fd_by_id(id);
5379 if (fd < 0) {
5380 if (errno == ENOENT)
5381 continue; /* expected race: BTF was unloaded */
5382 err = -errno;
5383 pr_warn("failed to get BTF object #%d FD: %d\n", id, err);
5384 return err;
5385 }
5386
5387 len = sizeof(info);
5388 memset(&info, 0, sizeof(info));
5389 info.name = ptr_to_u64(name);
5390 info.name_len = sizeof(name);
5391
5392 err = bpf_btf_get_info_by_fd(fd, &info, &len);
5393 if (err) {
5394 err = -errno;
5395 pr_warn("failed to get BTF object #%d info: %d\n", id, err);
5396 goto err_out;
5397 }
5398
5399 /* ignore non-module BTFs */
5400 if (!info.kernel_btf || strcmp(name, "vmlinux") == 0) {
5401 close(fd);
5402 continue;
5403 }
5404
5405 btf = btf_get_from_fd(fd, obj->btf_vmlinux);
5406 err = libbpf_get_error(btf);
5407 if (err) {
5408 pr_warn("failed to load module [%s]'s BTF object #%d: %d\n",
5409 name, id, err);
5410 goto err_out;
5411 }
5412
5413 err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
5414 sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
5415 if (err)
5416 goto err_out;
5417
5418 mod_btf = &obj->btf_modules[obj->btf_module_cnt++];
5419
5420 mod_btf->btf = btf;
5421 mod_btf->id = id;
5422 mod_btf->fd = fd;
5423 mod_btf->name = strdup(name);
5424 if (!mod_btf->name) {
5425 err = -ENOMEM;
5426 goto err_out;
5427 }
5428 continue;
5429
5430 err_out:
5431 close(fd);
5432 return err;
5433 }
5434
5435 return 0;
5436 }
5437
5438 static struct bpf_core_cand_list *
5439 bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)
5440 {
5441 struct bpf_core_cand local_cand = {};
5442 struct bpf_core_cand_list *cands;
5443 const struct btf *main_btf;
5444 const struct btf_type *local_t;
5445 const char *local_name;
5446 size_t local_essent_len;
5447 int err, i;
5448
5449 local_cand.btf = local_btf;
5450 local_cand.id = local_type_id;
5451 local_t = btf__type_by_id(local_btf, local_type_id);
5452 if (!local_t)
5453 return ERR_PTR(-EINVAL);
5454
5455 local_name = btf__name_by_offset(local_btf, local_t->name_off);
5456 if (str_is_empty(local_name))
5457 return ERR_PTR(-EINVAL);
5458 local_essent_len = bpf_core_essential_name_len(local_name);
5459
5460 cands = calloc(1, sizeof(*cands));
5461 if (!cands)
5462 return ERR_PTR(-ENOMEM);
5463
5464 /* Attempt to find target candidates in vmlinux BTF first */
5465 main_btf = obj->btf_vmlinux_override ?: obj->btf_vmlinux;
5466 err = bpf_core_add_cands(&local_cand, local_essent_len, main_btf, "vmlinux", 1, cands);
5467 if (err)
5468 goto err_out;
5469
5470 /* if vmlinux BTF has any candidate, don't got for module BTFs */
5471 if (cands->len)
5472 return cands;
5473
5474 /* if vmlinux BTF was overridden, don't attempt to load module BTFs */
5475 if (obj->btf_vmlinux_override)
5476 return cands;
5477
5478 /* now look through module BTFs, trying to still find candidates */
5479 err = load_module_btfs(obj);
5480 if (err)
5481 goto err_out;
5482
5483 for (i = 0; i < obj->btf_module_cnt; i++) {
5484 err = bpf_core_add_cands(&local_cand, local_essent_len,
5485 obj->btf_modules[i].btf,
5486 obj->btf_modules[i].name,
5487 btf__type_cnt(obj->btf_vmlinux),
5488 cands);
5489 if (err)
5490 goto err_out;
5491 }
5492
5493 return cands;
5494 err_out:
5495 bpf_core_free_cands(cands);
5496 return ERR_PTR(err);
5497 }
5498
5499 /* Check local and target types for compatibility. This check is used for
5500 * type-based CO-RE relocations and follow slightly different rules than
5501 * field-based relocations. This function assumes that root types were already
5502 * checked for name match. Beyond that initial root-level name check, names
5503 * are completely ignored. Compatibility rules are as follows:
5504 * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
5505 * kind should match for local and target types (i.e., STRUCT is not
5506 * compatible with UNION);
5507 * - for ENUMs, the size is ignored;
5508 * - for INT, size and signedness are ignored;
5509 * - for ARRAY, dimensionality is ignored, element types are checked for
5510 * compatibility recursively;
5511 * - CONST/VOLATILE/RESTRICT modifiers are ignored;
5512 * - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
5513 * - FUNC_PROTOs are compatible if they have compatible signature: same
5514 * number of input args and compatible return and argument types.
5515 * These rules are not set in stone and probably will be adjusted as we get
5516 * more experience with using BPF CO-RE relocations.
5517 */
5518 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
5519 const struct btf *targ_btf, __u32 targ_id)
5520 {
5521 return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32);
5522 }
5523
5524 int bpf_core_types_match(const struct btf *local_btf, __u32 local_id,
5525 const struct btf *targ_btf, __u32 targ_id)
5526 {
5527 return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32);
5528 }
5529
5530 static size_t bpf_core_hash_fn(const long key, void *ctx)
5531 {
5532 return key;
5533 }
5534
5535 static bool bpf_core_equal_fn(const long k1, const long k2, void *ctx)
5536 {
5537 return k1 == k2;
5538 }
5539
5540 static int record_relo_core(struct bpf_program *prog,
5541 const struct bpf_core_relo *core_relo, int insn_idx)
5542 {
5543 struct reloc_desc *relos, *relo;
5544
5545 relos = libbpf_reallocarray(prog->reloc_desc,
5546 prog->nr_reloc + 1, sizeof(*relos));
5547 if (!relos)
5548 return -ENOMEM;
5549 relo = &relos[prog->nr_reloc];
5550 relo->type = RELO_CORE;
5551 relo->insn_idx = insn_idx;
5552 relo->core_relo = core_relo;
5553 prog->reloc_desc = relos;
5554 prog->nr_reloc++;
5555 return 0;
5556 }
5557
5558 static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx)
5559 {
5560 struct reloc_desc *relo;
5561 int i;
5562
5563 for (i = 0; i < prog->nr_reloc; i++) {
5564 relo = &prog->reloc_desc[i];
5565 if (relo->type != RELO_CORE || relo->insn_idx != insn_idx)
5566 continue;
5567
5568 return relo->core_relo;
5569 }
5570
5571 return NULL;
5572 }
5573
5574 static int bpf_core_resolve_relo(struct bpf_program *prog,
5575 const struct bpf_core_relo *relo,
5576 int relo_idx,
5577 const struct btf *local_btf,
5578 struct hashmap *cand_cache,
5579 struct bpf_core_relo_res *targ_res)
5580 {
5581 struct bpf_core_spec specs_scratch[3] = {};
5582 struct bpf_core_cand_list *cands = NULL;
5583 const char *prog_name = prog->name;
5584 const struct btf_type *local_type;
5585 const char *local_name;
5586 __u32 local_id = relo->type_id;
5587 int err;
5588
5589 local_type = btf__type_by_id(local_btf, local_id);
5590 if (!local_type)
5591 return -EINVAL;
5592
5593 local_name = btf__name_by_offset(local_btf, local_type->name_off);
5594 if (!local_name)
5595 return -EINVAL;
5596
5597 if (relo->kind != BPF_CORE_TYPE_ID_LOCAL &&
5598 !hashmap__find(cand_cache, local_id, &cands)) {
5599 cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
5600 if (IS_ERR(cands)) {
5601 pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
5602 prog_name, relo_idx, local_id, btf_kind_str(local_type),
5603 local_name, PTR_ERR(cands));
5604 return PTR_ERR(cands);
5605 }
5606 err = hashmap__set(cand_cache, local_id, cands, NULL, NULL);
5607 if (err) {
5608 bpf_core_free_cands(cands);
5609 return err;
5610 }
5611 }
5612
5613 return bpf_core_calc_relo_insn(prog_name, relo, relo_idx, local_btf, cands, specs_scratch,
5614 targ_res);
5615 }
5616
5617 static int
5618 bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
5619 {
5620 const struct btf_ext_info_sec *sec;
5621 struct bpf_core_relo_res targ_res;
5622 const struct bpf_core_relo *rec;
5623 const struct btf_ext_info *seg;
5624 struct hashmap_entry *entry;
5625 struct hashmap *cand_cache = NULL;
5626 struct bpf_program *prog;
5627 struct bpf_insn *insn;
5628 const char *sec_name;
5629 int i, err = 0, insn_idx, sec_idx, sec_num;
5630
5631 if (obj->btf_ext->core_relo_info.len == 0)
5632 return 0;
5633
5634 if (targ_btf_path) {
5635 obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL);
5636 err = libbpf_get_error(obj->btf_vmlinux_override);
5637 if (err) {
5638 pr_warn("failed to parse target BTF: %d\n", err);
5639 return err;
5640 }
5641 }
5642
5643 cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
5644 if (IS_ERR(cand_cache)) {
5645 err = PTR_ERR(cand_cache);
5646 goto out;
5647 }
5648
5649 seg = &obj->btf_ext->core_relo_info;
5650 sec_num = 0;
5651 for_each_btf_ext_sec(seg, sec) {
5652 sec_idx = seg->sec_idxs[sec_num];
5653 sec_num++;
5654
5655 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
5656 if (str_is_empty(sec_name)) {
5657 err = -EINVAL;
5658 goto out;
5659 }
5660
5661 pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info);
5662
5663 for_each_btf_ext_rec(seg, sec, i, rec) {
5664 if (rec->insn_off % BPF_INSN_SZ)
5665 return -EINVAL;
5666 insn_idx = rec->insn_off / BPF_INSN_SZ;
5667 prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
5668 if (!prog) {
5669 /* When __weak subprog is "overridden" by another instance
5670 * of the subprog from a different object file, linker still
5671 * appends all the .BTF.ext info that used to belong to that
5672 * eliminated subprogram.
5673 * This is similar to what x86-64 linker does for relocations.
5674 * So just ignore such relocations just like we ignore
5675 * subprog instructions when discovering subprograms.
5676 */
5677 pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n",
5678 sec_name, i, insn_idx);
5679 continue;
5680 }
5681 /* no need to apply CO-RE relocation if the program is
5682 * not going to be loaded
5683 */
5684 if (!prog->autoload)
5685 continue;
5686
5687 /* adjust insn_idx from section frame of reference to the local
5688 * program's frame of reference; (sub-)program code is not yet
5689 * relocated, so it's enough to just subtract in-section offset
5690 */
5691 insn_idx = insn_idx - prog->sec_insn_off;
5692 if (insn_idx >= prog->insns_cnt)
5693 return -EINVAL;
5694 insn = &prog->insns[insn_idx];
5695
5696 err = record_relo_core(prog, rec, insn_idx);
5697 if (err) {
5698 pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n",
5699 prog->name, i, err);
5700 goto out;
5701 }
5702
5703 if (prog->obj->gen_loader)
5704 continue;
5705
5706 err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res);
5707 if (err) {
5708 pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
5709 prog->name, i, err);
5710 goto out;
5711 }
5712
5713 err = bpf_core_patch_insn(prog->name, insn, insn_idx, rec, i, &targ_res);
5714 if (err) {
5715 pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n",
5716 prog->name, i, insn_idx, err);
5717 goto out;
5718 }
5719 }
5720 }
5721
5722 out:
5723 /* obj->btf_vmlinux and module BTFs are freed after object load */
5724 btf__free(obj->btf_vmlinux_override);
5725 obj->btf_vmlinux_override = NULL;
5726
5727 if (!IS_ERR_OR_NULL(cand_cache)) {
5728 hashmap__for_each_entry(cand_cache, entry, i) {
5729 bpf_core_free_cands(entry->pvalue);
5730 }
5731 hashmap__free(cand_cache);
5732 }
5733 return err;
5734 }
5735
5736 /* base map load ldimm64 special constant, used also for log fixup logic */
5737 #define POISON_LDIMM64_MAP_BASE 2001000000
5738 #define POISON_LDIMM64_MAP_PFX "200100"
5739
5740 static void poison_map_ldimm64(struct bpf_program *prog, int relo_idx,
5741 int insn_idx, struct bpf_insn *insn,
5742 int map_idx, const struct bpf_map *map)
5743 {
5744 int i;
5745
5746 pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n",
5747 prog->name, relo_idx, insn_idx, map_idx, map->name);
5748
5749 /* we turn single ldimm64 into two identical invalid calls */
5750 for (i = 0; i < 2; i++) {
5751 insn->code = BPF_JMP | BPF_CALL;
5752 insn->dst_reg = 0;
5753 insn->src_reg = 0;
5754 insn->off = 0;
5755 /* if this instruction is reachable (not a dead code),
5756 * verifier will complain with something like:
5757 * invalid func unknown#2001000123
5758 * where lower 123 is map index into obj->maps[] array
5759 */
5760 insn->imm = POISON_LDIMM64_MAP_BASE + map_idx;
5761
5762 insn++;
5763 }
5764 }
5765
5766 /* unresolved kfunc call special constant, used also for log fixup logic */
5767 #define POISON_CALL_KFUNC_BASE 2002000000
5768 #define POISON_CALL_KFUNC_PFX "2002"
5769
5770 static void poison_kfunc_call(struct bpf_program *prog, int relo_idx,
5771 int insn_idx, struct bpf_insn *insn,
5772 int ext_idx, const struct extern_desc *ext)
5773 {
5774 pr_debug("prog '%s': relo #%d: poisoning insn #%d that calls kfunc '%s'\n",
5775 prog->name, relo_idx, insn_idx, ext->name);
5776
5777 /* we turn kfunc call into invalid helper call with identifiable constant */
5778 insn->code = BPF_JMP | BPF_CALL;
5779 insn->dst_reg = 0;
5780 insn->src_reg = 0;
5781 insn->off = 0;
5782 /* if this instruction is reachable (not a dead code),
5783 * verifier will complain with something like:
5784 * invalid func unknown#2001000123
5785 * where lower 123 is extern index into obj->externs[] array
5786 */
5787 insn->imm = POISON_CALL_KFUNC_BASE + ext_idx;
5788 }
5789
5790 /* Relocate data references within program code:
5791 * - map references;
5792 * - global variable references;
5793 * - extern references.
5794 */
5795 static int
5796 bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
5797 {
5798 int i;
5799
5800 for (i = 0; i < prog->nr_reloc; i++) {
5801 struct reloc_desc *relo = &prog->reloc_desc[i];
5802 struct bpf_insn *insn = &prog->insns[relo->insn_idx];
5803 const struct bpf_map *map;
5804 struct extern_desc *ext;
5805
5806 switch (relo->type) {
5807 case RELO_LD64:
5808 map = &obj->maps[relo->map_idx];
5809 if (obj->gen_loader) {
5810 insn[0].src_reg = BPF_PSEUDO_MAP_IDX;
5811 insn[0].imm = relo->map_idx;
5812 } else if (map->autocreate) {
5813 insn[0].src_reg = BPF_PSEUDO_MAP_FD;
5814 insn[0].imm = map->fd;
5815 } else {
5816 poison_map_ldimm64(prog, i, relo->insn_idx, insn,
5817 relo->map_idx, map);
5818 }
5819 break;
5820 case RELO_DATA:
5821 map = &obj->maps[relo->map_idx];
5822 insn[1].imm = insn[0].imm + relo->sym_off;
5823 if (obj->gen_loader) {
5824 insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
5825 insn[0].imm = relo->map_idx;
5826 } else if (map->autocreate) {
5827 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
5828 insn[0].imm = map->fd;
5829 } else {
5830 poison_map_ldimm64(prog, i, relo->insn_idx, insn,
5831 relo->map_idx, map);
5832 }
5833 break;
5834 case RELO_EXTERN_LD64:
5835 ext = &obj->externs[relo->ext_idx];
5836 if (ext->type == EXT_KCFG) {
5837 if (obj->gen_loader) {
5838 insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
5839 insn[0].imm = obj->kconfig_map_idx;
5840 } else {
5841 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
5842 insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
5843 }
5844 insn[1].imm = ext->kcfg.data_off;
5845 } else /* EXT_KSYM */ {
5846 if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */
5847 insn[0].src_reg = BPF_PSEUDO_BTF_ID;
5848 insn[0].imm = ext->ksym.kernel_btf_id;
5849 insn[1].imm = ext->ksym.kernel_btf_obj_fd;
5850 } else { /* typeless ksyms or unresolved typed ksyms */
5851 insn[0].imm = (__u32)ext->ksym.addr;
5852 insn[1].imm = ext->ksym.addr >> 32;
5853 }
5854 }
5855 break;
5856 case RELO_EXTERN_CALL:
5857 ext = &obj->externs[relo->ext_idx];
5858 insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL;
5859 if (ext->is_set) {
5860 insn[0].imm = ext->ksym.kernel_btf_id;
5861 insn[0].off = ext->ksym.btf_fd_idx;
5862 } else { /* unresolved weak kfunc call */
5863 poison_kfunc_call(prog, i, relo->insn_idx, insn,
5864 relo->ext_idx, ext);
5865 }
5866 break;
5867 case RELO_SUBPROG_ADDR:
5868 if (insn[0].src_reg != BPF_PSEUDO_FUNC) {
5869 pr_warn("prog '%s': relo #%d: bad insn\n",
5870 prog->name, i);
5871 return -EINVAL;
5872 }
5873 /* handled already */
5874 break;
5875 case RELO_CALL:
5876 /* handled already */
5877 break;
5878 case RELO_CORE:
5879 /* will be handled by bpf_program_record_relos() */
5880 break;
5881 default:
5882 pr_warn("prog '%s': relo #%d: bad relo type %d\n",
5883 prog->name, i, relo->type);
5884 return -EINVAL;
5885 }
5886 }
5887
5888 return 0;
5889 }
5890
5891 static int adjust_prog_btf_ext_info(const struct bpf_object *obj,
5892 const struct bpf_program *prog,
5893 const struct btf_ext_info *ext_info,
5894 void **prog_info, __u32 *prog_rec_cnt,
5895 __u32 *prog_rec_sz)
5896 {
5897 void *copy_start = NULL, *copy_end = NULL;
5898 void *rec, *rec_end, *new_prog_info;
5899 const struct btf_ext_info_sec *sec;
5900 size_t old_sz, new_sz;
5901 int i, sec_num, sec_idx, off_adj;
5902
5903 sec_num = 0;
5904 for_each_btf_ext_sec(ext_info, sec) {
5905 sec_idx = ext_info->sec_idxs[sec_num];
5906 sec_num++;
5907 if (prog->sec_idx != sec_idx)
5908 continue;
5909
5910 for_each_btf_ext_rec(ext_info, sec, i, rec) {
5911 __u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ;
5912
5913 if (insn_off < prog->sec_insn_off)
5914 continue;
5915 if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt)
5916 break;
5917
5918 if (!copy_start)
5919 copy_start = rec;
5920 copy_end = rec + ext_info->rec_size;
5921 }
5922
5923 if (!copy_start)
5924 return -ENOENT;
5925
5926 /* append func/line info of a given (sub-)program to the main
5927 * program func/line info
5928 */
5929 old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size;
5930 new_sz = old_sz + (copy_end - copy_start);
5931 new_prog_info = realloc(*prog_info, new_sz);
5932 if (!new_prog_info)
5933 return -ENOMEM;
5934 *prog_info = new_prog_info;
5935 *prog_rec_cnt = new_sz / ext_info->rec_size;
5936 memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start);
5937
5938 /* Kernel instruction offsets are in units of 8-byte
5939 * instructions, while .BTF.ext instruction offsets generated
5940 * by Clang are in units of bytes. So convert Clang offsets
5941 * into kernel offsets and adjust offset according to program
5942 * relocated position.
5943 */
5944 off_adj = prog->sub_insn_off - prog->sec_insn_off;
5945 rec = new_prog_info + old_sz;
5946 rec_end = new_prog_info + new_sz;
5947 for (; rec < rec_end; rec += ext_info->rec_size) {
5948 __u32 *insn_off = rec;
5949
5950 *insn_off = *insn_off / BPF_INSN_SZ + off_adj;
5951 }
5952 *prog_rec_sz = ext_info->rec_size;
5953 return 0;
5954 }
5955
5956 return -ENOENT;
5957 }
5958
5959 static int
5960 reloc_prog_func_and_line_info(const struct bpf_object *obj,
5961 struct bpf_program *main_prog,
5962 const struct bpf_program *prog)
5963 {
5964 int err;
5965
5966 /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
5967 * support func/line info
5968 */
5969 if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC))
5970 return 0;
5971
5972 /* only attempt func info relocation if main program's func_info
5973 * relocation was successful
5974 */
5975 if (main_prog != prog && !main_prog->func_info)
5976 goto line_info;
5977
5978 err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info,
5979 &main_prog->func_info,
5980 &main_prog->func_info_cnt,
5981 &main_prog->func_info_rec_size);
5982 if (err) {
5983 if (err != -ENOENT) {
5984 pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n",
5985 prog->name, err);
5986 return err;
5987 }
5988 if (main_prog->func_info) {
5989 /*
5990 * Some info has already been found but has problem
5991 * in the last btf_ext reloc. Must have to error out.
5992 */
5993 pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name);
5994 return err;
5995 }
5996 /* Have problem loading the very first info. Ignore the rest. */
5997 pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n",
5998 prog->name);
5999 }
6000
6001 line_info:
6002 /* don't relocate line info if main program's relocation failed */
6003 if (main_prog != prog && !main_prog->line_info)
6004 return 0;
6005
6006 err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info,
6007 &main_prog->line_info,
6008 &main_prog->line_info_cnt,
6009 &main_prog->line_info_rec_size);
6010 if (err) {
6011 if (err != -ENOENT) {
6012 pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n",
6013 prog->name, err);
6014 return err;
6015 }
6016 if (main_prog->line_info) {
6017 /*
6018 * Some info has already been found but has problem
6019 * in the last btf_ext reloc. Must have to error out.
6020 */
6021 pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name);
6022 return err;
6023 }
6024 /* Have problem loading the very first info. Ignore the rest. */
6025 pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n",
6026 prog->name);
6027 }
6028 return 0;
6029 }
6030
6031 static int cmp_relo_by_insn_idx(const void *key, const void *elem)
6032 {
6033 size_t insn_idx = *(const size_t *)key;
6034 const struct reloc_desc *relo = elem;
6035
6036 if (insn_idx == relo->insn_idx)
6037 return 0;
6038 return insn_idx < relo->insn_idx ? -1 : 1;
6039 }
6040
6041 static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx)
6042 {
6043 if (!prog->nr_reloc)
6044 return NULL;
6045 return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,
6046 sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
6047 }
6048
6049 static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog)
6050 {
6051 int new_cnt = main_prog->nr_reloc + subprog->nr_reloc;
6052 struct reloc_desc *relos;
6053 int i;
6054
6055 if (main_prog == subprog)
6056 return 0;
6057 relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos));
6058 /* if new count is zero, reallocarray can return a valid NULL result;
6059 * in this case the previous pointer will be freed, so we *have to*
6060 * reassign old pointer to the new value (even if it's NULL)
6061 */
6062 if (!relos && new_cnt)
6063 return -ENOMEM;
6064 if (subprog->nr_reloc)
6065 memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc,
6066 sizeof(*relos) * subprog->nr_reloc);
6067
6068 for (i = main_prog->nr_reloc; i < new_cnt; i++)
6069 relos[i].insn_idx += subprog->sub_insn_off;
6070 /* After insn_idx adjustment the 'relos' array is still sorted
6071 * by insn_idx and doesn't break bsearch.
6072 */
6073 main_prog->reloc_desc = relos;
6074 main_prog->nr_reloc = new_cnt;
6075 return 0;
6076 }
6077
6078 static int
6079 bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog,
6080 struct bpf_program *subprog)
6081 {
6082 struct bpf_insn *insns;
6083 size_t new_cnt;
6084 int err;
6085
6086 subprog->sub_insn_off = main_prog->insns_cnt;
6087
6088 new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
6089 insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
6090 if (!insns) {
6091 pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
6092 return -ENOMEM;
6093 }
6094 main_prog->insns = insns;
6095 main_prog->insns_cnt = new_cnt;
6096
6097 memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
6098 subprog->insns_cnt * sizeof(*insns));
6099
6100 pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
6101 main_prog->name, subprog->insns_cnt, subprog->name);
6102
6103 /* The subprog insns are now appended. Append its relos too. */
6104 err = append_subprog_relos(main_prog, subprog);
6105 if (err)
6106 return err;
6107 return 0;
6108 }
6109
6110 static int
6111 bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
6112 struct bpf_program *prog)
6113 {
6114 size_t sub_insn_idx, insn_idx;
6115 struct bpf_program *subprog;
6116 struct reloc_desc *relo;
6117 struct bpf_insn *insn;
6118 int err;
6119
6120 err = reloc_prog_func_and_line_info(obj, main_prog, prog);
6121 if (err)
6122 return err;
6123
6124 for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
6125 insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6126 if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
6127 continue;
6128
6129 relo = find_prog_insn_relo(prog, insn_idx);
6130 if (relo && relo->type == RELO_EXTERN_CALL)
6131 /* kfunc relocations will be handled later
6132 * in bpf_object__relocate_data()
6133 */
6134 continue;
6135 if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
6136 pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
6137 prog->name, insn_idx, relo->type);
6138 return -LIBBPF_ERRNO__RELOC;
6139 }
6140 if (relo) {
6141 /* sub-program instruction index is a combination of
6142 * an offset of a symbol pointed to by relocation and
6143 * call instruction's imm field; for global functions,
6144 * call always has imm = -1, but for static functions
6145 * relocation is against STT_SECTION and insn->imm
6146 * points to a start of a static function
6147 *
6148 * for subprog addr relocation, the relo->sym_off + insn->imm is
6149 * the byte offset in the corresponding section.
6150 */
6151 if (relo->type == RELO_CALL)
6152 sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
6153 else
6154 sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ;
6155 } else if (insn_is_pseudo_func(insn)) {
6156 /*
6157 * RELO_SUBPROG_ADDR relo is always emitted even if both
6158 * functions are in the same section, so it shouldn't reach here.
6159 */
6160 pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
6161 prog->name, insn_idx);
6162 return -LIBBPF_ERRNO__RELOC;
6163 } else {
6164 /* if subprogram call is to a static function within
6165 * the same ELF section, there won't be any relocation
6166 * emitted, but it also means there is no additional
6167 * offset necessary, insns->imm is relative to
6168 * instruction's original position within the section
6169 */
6170 sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1;
6171 }
6172
6173 /* we enforce that sub-programs should be in .text section */
6174 subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx);
6175 if (!subprog) {
6176 pr_warn("prog '%s': no .text section found yet sub-program call exists\n",
6177 prog->name);
6178 return -LIBBPF_ERRNO__RELOC;
6179 }
6180
6181 /* if it's the first call instruction calling into this
6182 * subprogram (meaning this subprog hasn't been processed
6183 * yet) within the context of current main program:
6184 * - append it at the end of main program's instructions blog;
6185 * - process is recursively, while current program is put on hold;
6186 * - if that subprogram calls some other not yet processes
6187 * subprogram, same thing will happen recursively until
6188 * there are no more unprocesses subprograms left to append
6189 * and relocate.
6190 */
6191 if (subprog->sub_insn_off == 0) {
6192 err = bpf_object__append_subprog_code(obj, main_prog, subprog);
6193 if (err)
6194 return err;
6195 err = bpf_object__reloc_code(obj, main_prog, subprog);
6196 if (err)
6197 return err;
6198 }
6199
6200 /* main_prog->insns memory could have been re-allocated, so
6201 * calculate pointer again
6202 */
6203 insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6204 /* calculate correct instruction position within current main
6205 * prog; each main prog can have a different set of
6206 * subprograms appended (potentially in different order as
6207 * well), so position of any subprog can be different for
6208 * different main programs
6209 */
6210 insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
6211
6212 pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
6213 prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
6214 }
6215
6216 return 0;
6217 }
6218
6219 /*
6220 * Relocate sub-program calls.
6221 *
6222 * Algorithm operates as follows. Each entry-point BPF program (referred to as
6223 * main prog) is processed separately. For each subprog (non-entry functions,
6224 * that can be called from either entry progs or other subprogs) gets their
6225 * sub_insn_off reset to zero. This serves as indicator that this subprogram
6226 * hasn't been yet appended and relocated within current main prog. Once its
6227 * relocated, sub_insn_off will point at the position within current main prog
6228 * where given subprog was appended. This will further be used to relocate all
6229 * the call instructions jumping into this subprog.
6230 *
6231 * We start with main program and process all call instructions. If the call
6232 * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off
6233 * is zero), subprog instructions are appended at the end of main program's
6234 * instruction array. Then main program is "put on hold" while we recursively
6235 * process newly appended subprogram. If that subprogram calls into another
6236 * subprogram that hasn't been appended, new subprogram is appended again to
6237 * the *main* prog's instructions (subprog's instructions are always left
6238 * untouched, as they need to be in unmodified state for subsequent main progs
6239 * and subprog instructions are always sent only as part of a main prog) and
6240 * the process continues recursively. Once all the subprogs called from a main
6241 * prog or any of its subprogs are appended (and relocated), all their
6242 * positions within finalized instructions array are known, so it's easy to
6243 * rewrite call instructions with correct relative offsets, corresponding to
6244 * desired target subprog.
6245 *
6246 * Its important to realize that some subprogs might not be called from some
6247 * main prog and any of its called/used subprogs. Those will keep their
6248 * subprog->sub_insn_off as zero at all times and won't be appended to current
6249 * main prog and won't be relocated within the context of current main prog.
6250 * They might still be used from other main progs later.
6251 *
6252 * Visually this process can be shown as below. Suppose we have two main
6253 * programs mainA and mainB and BPF object contains three subprogs: subA,
6254 * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and
6255 * subC both call subB:
6256 *
6257 * +--------+ +-------+
6258 * | v v |
6259 * +--+---+ +--+-+-+ +---+--+
6260 * | subA | | subB | | subC |
6261 * +--+---+ +------+ +---+--+
6262 * ^ ^
6263 * | |
6264 * +---+-------+ +------+----+
6265 * | mainA | | mainB |
6266 * +-----------+ +-----------+
6267 *
6268 * We'll start relocating mainA, will find subA, append it and start
6269 * processing sub A recursively:
6270 *
6271 * +-----------+------+
6272 * | mainA | subA |
6273 * +-----------+------+
6274 *
6275 * At this point we notice that subB is used from subA, so we append it and
6276 * relocate (there are no further subcalls from subB):
6277 *
6278 * +-----------+------+------+
6279 * | mainA | subA | subB |
6280 * +-----------+------+------+
6281 *
6282 * At this point, we relocate subA calls, then go one level up and finish with
6283 * relocatin mainA calls. mainA is done.
6284 *
6285 * For mainB process is similar but results in different order. We start with
6286 * mainB and skip subA and subB, as mainB never calls them (at least
6287 * directly), but we see subC is needed, so we append and start processing it:
6288 *
6289 * +-----------+------+
6290 * | mainB | subC |
6291 * +-----------+------+
6292 * Now we see subC needs subB, so we go back to it, append and relocate it:
6293 *
6294 * +-----------+------+------+
6295 * | mainB | subC | subB |
6296 * +-----------+------+------+
6297 *
6298 * At this point we unwind recursion, relocate calls in subC, then in mainB.
6299 */
6300 static int
6301 bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
6302 {
6303 struct bpf_program *subprog;
6304 int i, err;
6305
6306 /* mark all subprogs as not relocated (yet) within the context of
6307 * current main program
6308 */
6309 for (i = 0; i < obj->nr_programs; i++) {
6310 subprog = &obj->programs[i];
6311 if (!prog_is_subprog(obj, subprog))
6312 continue;
6313
6314 subprog->sub_insn_off = 0;
6315 }
6316
6317 err = bpf_object__reloc_code(obj, prog, prog);
6318 if (err)
6319 return err;
6320
6321 return 0;
6322 }
6323
6324 static void
6325 bpf_object__free_relocs(struct bpf_object *obj)
6326 {
6327 struct bpf_program *prog;
6328 int i;
6329
6330 /* free up relocation descriptors */
6331 for (i = 0; i < obj->nr_programs; i++) {
6332 prog = &obj->programs[i];
6333 zfree(&prog->reloc_desc);
6334 prog->nr_reloc = 0;
6335 }
6336 }
6337
6338 static int cmp_relocs(const void *_a, const void *_b)
6339 {
6340 const struct reloc_desc *a = _a;
6341 const struct reloc_desc *b = _b;
6342
6343 if (a->insn_idx != b->insn_idx)
6344 return a->insn_idx < b->insn_idx ? -1 : 1;
6345
6346 /* no two relocations should have the same insn_idx, but ... */
6347 if (a->type != b->type)
6348 return a->type < b->type ? -1 : 1;
6349
6350 return 0;
6351 }
6352
6353 static void bpf_object__sort_relos(struct bpf_object *obj)
6354 {
6355 int i;
6356
6357 for (i = 0; i < obj->nr_programs; i++) {
6358 struct bpf_program *p = &obj->programs[i];
6359
6360 if (!p->nr_reloc)
6361 continue;
6362
6363 qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs);
6364 }
6365 }
6366
6367 static int bpf_prog_assign_exc_cb(struct bpf_object *obj, struct bpf_program *prog)
6368 {
6369 const char *str = "exception_callback:";
6370 size_t pfx_len = strlen(str);
6371 int i, j, n;
6372
6373 if (!obj->btf || !kernel_supports(obj, FEAT_BTF_DECL_TAG))
6374 return 0;
6375
6376 n = btf__type_cnt(obj->btf);
6377 for (i = 1; i < n; i++) {
6378 const char *name;
6379 struct btf_type *t;
6380
6381 t = btf_type_by_id(obj->btf, i);
6382 if (!btf_is_decl_tag(t) || btf_decl_tag(t)->component_idx != -1)
6383 continue;
6384
6385 name = btf__str_by_offset(obj->btf, t->name_off);
6386 if (strncmp(name, str, pfx_len) != 0)
6387 continue;
6388
6389 t = btf_type_by_id(obj->btf, t->type);
6390 if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) {
6391 pr_warn("prog '%s': exception_callback:<value> decl tag not applied to the main program\n",
6392 prog->name);
6393 return -EINVAL;
6394 }
6395 if (strcmp(prog->name, btf__str_by_offset(obj->btf, t->name_off)) != 0)
6396 continue;
6397 /* Multiple callbacks are specified for the same prog,
6398 * the verifier will eventually return an error for this
6399 * case, hence simply skip appending a subprog.
6400 */
6401 if (prog->exception_cb_idx >= 0) {
6402 prog->exception_cb_idx = -1;
6403 break;
6404 }
6405
6406 name += pfx_len;
6407 if (str_is_empty(name)) {
6408 pr_warn("prog '%s': exception_callback:<value> decl tag contains empty value\n",
6409 prog->name);
6410 return -EINVAL;
6411 }
6412
6413 for (j = 0; j < obj->nr_programs; j++) {
6414 struct bpf_program *subprog = &obj->programs[j];
6415
6416 if (!prog_is_subprog(obj, subprog))
6417 continue;
6418 if (strcmp(name, subprog->name) != 0)
6419 continue;
6420 /* Enforce non-hidden, as from verifier point of
6421 * view it expects global functions, whereas the
6422 * mark_btf_static fixes up linkage as static.
6423 */
6424 if (!subprog->sym_global || subprog->mark_btf_static) {
6425 pr_warn("prog '%s': exception callback %s must be a global non-hidden function\n",
6426 prog->name, subprog->name);
6427 return -EINVAL;
6428 }
6429 /* Let's see if we already saw a static exception callback with the same name */
6430 if (prog->exception_cb_idx >= 0) {
6431 pr_warn("prog '%s': multiple subprogs with same name as exception callback '%s'\n",
6432 prog->name, subprog->name);
6433 return -EINVAL;
6434 }
6435 prog->exception_cb_idx = j;
6436 break;
6437 }
6438
6439 if (prog->exception_cb_idx >= 0)
6440 continue;
6441
6442 pr_warn("prog '%s': cannot find exception callback '%s'\n", prog->name, name);
6443 return -ENOENT;
6444 }
6445
6446 return 0;
6447 }
6448
6449 static struct {
6450 enum bpf_prog_type prog_type;
6451 const char *ctx_name;
6452 } global_ctx_map[] = {
6453 { BPF_PROG_TYPE_CGROUP_DEVICE, "bpf_cgroup_dev_ctx" },
6454 { BPF_PROG_TYPE_CGROUP_SKB, "__sk_buff" },
6455 { BPF_PROG_TYPE_CGROUP_SOCK, "bpf_sock" },
6456 { BPF_PROG_TYPE_CGROUP_SOCK_ADDR, "bpf_sock_addr" },
6457 { BPF_PROG_TYPE_CGROUP_SOCKOPT, "bpf_sockopt" },
6458 { BPF_PROG_TYPE_CGROUP_SYSCTL, "bpf_sysctl" },
6459 { BPF_PROG_TYPE_FLOW_DISSECTOR, "__sk_buff" },
6460 { BPF_PROG_TYPE_KPROBE, "bpf_user_pt_regs_t" },
6461 { BPF_PROG_TYPE_LWT_IN, "__sk_buff" },
6462 { BPF_PROG_TYPE_LWT_OUT, "__sk_buff" },
6463 { BPF_PROG_TYPE_LWT_SEG6LOCAL, "__sk_buff" },
6464 { BPF_PROG_TYPE_LWT_XMIT, "__sk_buff" },
6465 { BPF_PROG_TYPE_NETFILTER, "bpf_nf_ctx" },
6466 { BPF_PROG_TYPE_PERF_EVENT, "bpf_perf_event_data" },
6467 { BPF_PROG_TYPE_RAW_TRACEPOINT, "bpf_raw_tracepoint_args" },
6468 { BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, "bpf_raw_tracepoint_args" },
6469 { BPF_PROG_TYPE_SCHED_ACT, "__sk_buff" },
6470 { BPF_PROG_TYPE_SCHED_CLS, "__sk_buff" },
6471 { BPF_PROG_TYPE_SK_LOOKUP, "bpf_sk_lookup" },
6472 { BPF_PROG_TYPE_SK_MSG, "sk_msg_md" },
6473 { BPF_PROG_TYPE_SK_REUSEPORT, "sk_reuseport_md" },
6474 { BPF_PROG_TYPE_SK_SKB, "__sk_buff" },
6475 { BPF_PROG_TYPE_SOCK_OPS, "bpf_sock_ops" },
6476 { BPF_PROG_TYPE_SOCKET_FILTER, "__sk_buff" },
6477 { BPF_PROG_TYPE_XDP, "xdp_md" },
6478 /* all other program types don't have "named" context structs */
6479 };
6480
6481 /* forward declarations for arch-specific underlying types of bpf_user_pt_regs_t typedef,
6482 * for below __builtin_types_compatible_p() checks;
6483 * with this approach we don't need any extra arch-specific #ifdef guards
6484 */
6485 struct pt_regs;
6486 struct user_pt_regs;
6487 struct user_regs_struct;
6488
6489 static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_program *prog,
6490 const char *subprog_name, int arg_idx,
6491 int arg_type_id, const char *ctx_name)
6492 {
6493 const struct btf_type *t;
6494 const char *tname;
6495
6496 /* check if existing parameter already matches verifier expectations */
6497 t = skip_mods_and_typedefs(btf, arg_type_id, NULL);
6498 if (!btf_is_ptr(t))
6499 goto out_warn;
6500
6501 /* typedef bpf_user_pt_regs_t is a special PITA case, valid for kprobe
6502 * and perf_event programs, so check this case early on and forget
6503 * about it for subsequent checks
6504 */
6505 while (btf_is_mod(t))
6506 t = btf__type_by_id(btf, t->type);
6507 if (btf_is_typedef(t) &&
6508 (prog->type == BPF_PROG_TYPE_KPROBE || prog->type == BPF_PROG_TYPE_PERF_EVENT)) {
6509 tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>";
6510 if (strcmp(tname, "bpf_user_pt_regs_t") == 0)
6511 return false; /* canonical type for kprobe/perf_event */
6512 }
6513
6514 /* now we can ignore typedefs moving forward */
6515 t = skip_mods_and_typedefs(btf, t->type, NULL);
6516
6517 /* if it's `void *`, definitely fix up BTF info */
6518 if (btf_is_void(t))
6519 return true;
6520
6521 /* if it's already proper canonical type, no need to fix up */
6522 tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>";
6523 if (btf_is_struct(t) && strcmp(tname, ctx_name) == 0)
6524 return false;
6525
6526 /* special cases */
6527 switch (prog->type) {
6528 case BPF_PROG_TYPE_KPROBE:
6529 /* `struct pt_regs *` is expected, but we need to fix up */
6530 if (btf_is_struct(t) && strcmp(tname, "pt_regs") == 0)
6531 return true;
6532 break;
6533 case BPF_PROG_TYPE_PERF_EVENT:
6534 if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct pt_regs) &&
6535 btf_is_struct(t) && strcmp(tname, "pt_regs") == 0)
6536 return true;
6537 if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_pt_regs) &&
6538 btf_is_struct(t) && strcmp(tname, "user_pt_regs") == 0)
6539 return true;
6540 if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_regs_struct) &&
6541 btf_is_struct(t) && strcmp(tname, "user_regs_struct") == 0)
6542 return true;
6543 break;
6544 case BPF_PROG_TYPE_RAW_TRACEPOINT:
6545 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
6546 /* allow u64* as ctx */
6547 if (btf_is_int(t) && t->size == 8)
6548 return true;
6549 break;
6550 default:
6551 break;
6552 }
6553
6554 out_warn:
6555 pr_warn("prog '%s': subprog '%s' arg#%d is expected to be of `struct %s *` type\n",
6556 prog->name, subprog_name, arg_idx, ctx_name);
6557 return false;
6558 }
6559
6560 static int clone_func_btf_info(struct btf *btf, int orig_fn_id, struct bpf_program *prog)
6561 {
6562 int fn_id, fn_proto_id, ret_type_id, orig_proto_id;
6563 int i, err, arg_cnt, fn_name_off, linkage;
6564 struct btf_type *fn_t, *fn_proto_t, *t;
6565 struct btf_param *p;
6566
6567 /* caller already validated FUNC -> FUNC_PROTO validity */
6568 fn_t = btf_type_by_id(btf, orig_fn_id);
6569 fn_proto_t = btf_type_by_id(btf, fn_t->type);
6570
6571 /* Note that each btf__add_xxx() operation invalidates
6572 * all btf_type and string pointers, so we need to be
6573 * very careful when cloning BTF types. BTF type
6574 * pointers have to be always refetched. And to avoid
6575 * problems with invalidated string pointers, we
6576 * add empty strings initially, then just fix up
6577 * name_off offsets in place. Offsets are stable for
6578 * existing strings, so that works out.
6579 */
6580 fn_name_off = fn_t->name_off; /* we are about to invalidate fn_t */
6581 linkage = btf_func_linkage(fn_t);
6582 orig_proto_id = fn_t->type; /* original FUNC_PROTO ID */
6583 ret_type_id = fn_proto_t->type; /* fn_proto_t will be invalidated */
6584 arg_cnt = btf_vlen(fn_proto_t);
6585
6586 /* clone FUNC_PROTO and its params */
6587 fn_proto_id = btf__add_func_proto(btf, ret_type_id);
6588 if (fn_proto_id < 0)
6589 return -EINVAL;
6590
6591 for (i = 0; i < arg_cnt; i++) {
6592 int name_off;
6593
6594 /* copy original parameter data */
6595 t = btf_type_by_id(btf, orig_proto_id);
6596 p = &btf_params(t)[i];
6597 name_off = p->name_off;
6598
6599 err = btf__add_func_param(btf, "", p->type);
6600 if (err)
6601 return err;
6602
6603 fn_proto_t = btf_type_by_id(btf, fn_proto_id);
6604 p = &btf_params(fn_proto_t)[i];
6605 p->name_off = name_off; /* use remembered str offset */
6606 }
6607
6608 /* clone FUNC now, btf__add_func() enforces non-empty name, so use
6609 * entry program's name as a placeholder, which we replace immediately
6610 * with original name_off
6611 */
6612 fn_id = btf__add_func(btf, prog->name, linkage, fn_proto_id);
6613 if (fn_id < 0)
6614 return -EINVAL;
6615
6616 fn_t = btf_type_by_id(btf, fn_id);
6617 fn_t->name_off = fn_name_off; /* reuse original string */
6618
6619 return fn_id;
6620 }
6621
6622 /* Check if main program or global subprog's function prototype has `arg:ctx`
6623 * argument tags, and, if necessary, substitute correct type to match what BPF
6624 * verifier would expect, taking into account specific program type. This
6625 * allows to support __arg_ctx tag transparently on old kernels that don't yet
6626 * have a native support for it in the verifier, making user's life much
6627 * easier.
6628 */
6629 static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_program *prog)
6630 {
6631 const char *ctx_name = NULL, *ctx_tag = "arg:ctx", *fn_name;
6632 struct bpf_func_info_min *func_rec;
6633 struct btf_type *fn_t, *fn_proto_t;
6634 struct btf *btf = obj->btf;
6635 const struct btf_type *t;
6636 struct btf_param *p;
6637 int ptr_id = 0, struct_id, tag_id, orig_fn_id;
6638 int i, n, arg_idx, arg_cnt, err, rec_idx;
6639 int *orig_ids;
6640
6641 /* no .BTF.ext, no problem */
6642 if (!obj->btf_ext || !prog->func_info)
6643 return 0;
6644
6645 /* don't do any fix ups if kernel natively supports __arg_ctx */
6646 if (kernel_supports(obj, FEAT_ARG_CTX_TAG))
6647 return 0;
6648
6649 /* some BPF program types just don't have named context structs, so
6650 * this fallback mechanism doesn't work for them
6651 */
6652 for (i = 0; i < ARRAY_SIZE(global_ctx_map); i++) {
6653 if (global_ctx_map[i].prog_type != prog->type)
6654 continue;
6655 ctx_name = global_ctx_map[i].ctx_name;
6656 break;
6657 }
6658 if (!ctx_name)
6659 return 0;
6660
6661 /* remember original func BTF IDs to detect if we already cloned them */
6662 orig_ids = calloc(prog->func_info_cnt, sizeof(*orig_ids));
6663 if (!orig_ids)
6664 return -ENOMEM;
6665 for (i = 0; i < prog->func_info_cnt; i++) {
6666 func_rec = prog->func_info + prog->func_info_rec_size * i;
6667 orig_ids[i] = func_rec->type_id;
6668 }
6669
6670 /* go through each DECL_TAG with "arg:ctx" and see if it points to one
6671 * of our subprogs; if yes and subprog is global and needs adjustment,
6672 * clone and adjust FUNC -> FUNC_PROTO combo
6673 */
6674 for (i = 1, n = btf__type_cnt(btf); i < n; i++) {
6675 /* only DECL_TAG with "arg:ctx" value are interesting */
6676 t = btf__type_by_id(btf, i);
6677 if (!btf_is_decl_tag(t))
6678 continue;
6679 if (strcmp(btf__str_by_offset(btf, t->name_off), ctx_tag) != 0)
6680 continue;
6681
6682 /* only global funcs need adjustment, if at all */
6683 orig_fn_id = t->type;
6684 fn_t = btf_type_by_id(btf, orig_fn_id);
6685 if (!btf_is_func(fn_t) || btf_func_linkage(fn_t) != BTF_FUNC_GLOBAL)
6686 continue;
6687
6688 /* sanity check FUNC -> FUNC_PROTO chain, just in case */
6689 fn_proto_t = btf_type_by_id(btf, fn_t->type);
6690 if (!fn_proto_t || !btf_is_func_proto(fn_proto_t))
6691 continue;
6692
6693 /* find corresponding func_info record */
6694 func_rec = NULL;
6695 for (rec_idx = 0; rec_idx < prog->func_info_cnt; rec_idx++) {
6696 if (orig_ids[rec_idx] == t->type) {
6697 func_rec = prog->func_info + prog->func_info_rec_size * rec_idx;
6698 break;
6699 }
6700 }
6701 /* current main program doesn't call into this subprog */
6702 if (!func_rec)
6703 continue;
6704
6705 /* some more sanity checking of DECL_TAG */
6706 arg_cnt = btf_vlen(fn_proto_t);
6707 arg_idx = btf_decl_tag(t)->component_idx;
6708 if (arg_idx < 0 || arg_idx >= arg_cnt)
6709 continue;
6710
6711 /* check if we should fix up argument type */
6712 p = &btf_params(fn_proto_t)[arg_idx];
6713 fn_name = btf__str_by_offset(btf, fn_t->name_off) ?: "<anon>";
6714 if (!need_func_arg_type_fixup(btf, prog, fn_name, arg_idx, p->type, ctx_name))
6715 continue;
6716
6717 /* clone fn/fn_proto, unless we already did it for another arg */
6718 if (func_rec->type_id == orig_fn_id) {
6719 int fn_id;
6720
6721 fn_id = clone_func_btf_info(btf, orig_fn_id, prog);
6722 if (fn_id < 0) {
6723 err = fn_id;
6724 goto err_out;
6725 }
6726
6727 /* point func_info record to a cloned FUNC type */
6728 func_rec->type_id = fn_id;
6729 }
6730
6731 /* create PTR -> STRUCT type chain to mark PTR_TO_CTX argument;
6732 * we do it just once per main BPF program, as all global
6733 * funcs share the same program type, so need only PTR ->
6734 * STRUCT type chain
6735 */
6736 if (ptr_id == 0) {
6737 struct_id = btf__add_struct(btf, ctx_name, 0);
6738 ptr_id = btf__add_ptr(btf, struct_id);
6739 if (ptr_id < 0 || struct_id < 0) {
6740 err = -EINVAL;
6741 goto err_out;
6742 }
6743 }
6744
6745 /* for completeness, clone DECL_TAG and point it to cloned param */
6746 tag_id = btf__add_decl_tag(btf, ctx_tag, func_rec->type_id, arg_idx);
6747 if (tag_id < 0) {
6748 err = -EINVAL;
6749 goto err_out;
6750 }
6751
6752 /* all the BTF manipulations invalidated pointers, refetch them */
6753 fn_t = btf_type_by_id(btf, func_rec->type_id);
6754 fn_proto_t = btf_type_by_id(btf, fn_t->type);
6755
6756 /* fix up type ID pointed to by param */
6757 p = &btf_params(fn_proto_t)[arg_idx];
6758 p->type = ptr_id;
6759 }
6760
6761 free(orig_ids);
6762 return 0;
6763 err_out:
6764 free(orig_ids);
6765 return err;
6766 }
6767
6768 static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
6769 {
6770 struct bpf_program *prog;
6771 size_t i, j;
6772 int err;
6773
6774 if (obj->btf_ext) {
6775 err = bpf_object__relocate_core(obj, targ_btf_path);
6776 if (err) {
6777 pr_warn("failed to perform CO-RE relocations: %d\n",
6778 err);
6779 return err;
6780 }
6781 bpf_object__sort_relos(obj);
6782 }
6783
6784 /* Before relocating calls pre-process relocations and mark
6785 * few ld_imm64 instructions that points to subprogs.
6786 * Otherwise bpf_object__reloc_code() later would have to consider
6787 * all ld_imm64 insns as relocation candidates. That would
6788 * reduce relocation speed, since amount of find_prog_insn_relo()
6789 * would increase and most of them will fail to find a relo.
6790 */
6791 for (i = 0; i < obj->nr_programs; i++) {
6792 prog = &obj->programs[i];
6793 for (j = 0; j < prog->nr_reloc; j++) {
6794 struct reloc_desc *relo = &prog->reloc_desc[j];
6795 struct bpf_insn *insn = &prog->insns[relo->insn_idx];
6796
6797 /* mark the insn, so it's recognized by insn_is_pseudo_func() */
6798 if (relo->type == RELO_SUBPROG_ADDR)
6799 insn[0].src_reg = BPF_PSEUDO_FUNC;
6800 }
6801 }
6802
6803 /* relocate subprogram calls and append used subprograms to main
6804 * programs; each copy of subprogram code needs to be relocated
6805 * differently for each main program, because its code location might
6806 * have changed.
6807 * Append subprog relos to main programs to allow data relos to be
6808 * processed after text is completely relocated.
6809 */
6810 for (i = 0; i < obj->nr_programs; i++) {
6811 prog = &obj->programs[i];
6812 /* sub-program's sub-calls are relocated within the context of
6813 * its main program only
6814 */
6815 if (prog_is_subprog(obj, prog))
6816 continue;
6817 if (!prog->autoload)
6818 continue;
6819
6820 err = bpf_object__relocate_calls(obj, prog);
6821 if (err) {
6822 pr_warn("prog '%s': failed to relocate calls: %d\n",
6823 prog->name, err);
6824 return err;
6825 }
6826
6827 err = bpf_prog_assign_exc_cb(obj, prog);
6828 if (err)
6829 return err;
6830 /* Now, also append exception callback if it has not been done already. */
6831 if (prog->exception_cb_idx >= 0) {
6832 struct bpf_program *subprog = &obj->programs[prog->exception_cb_idx];
6833
6834 /* Calling exception callback directly is disallowed, which the
6835 * verifier will reject later. In case it was processed already,
6836 * we can skip this step, otherwise for all other valid cases we
6837 * have to append exception callback now.
6838 */
6839 if (subprog->sub_insn_off == 0) {
6840 err = bpf_object__append_subprog_code(obj, prog, subprog);
6841 if (err)
6842 return err;
6843 err = bpf_object__reloc_code(obj, prog, subprog);
6844 if (err)
6845 return err;
6846 }
6847 }
6848 }
6849 for (i = 0; i < obj->nr_programs; i++) {
6850 prog = &obj->programs[i];
6851 if (prog_is_subprog(obj, prog))
6852 continue;
6853 if (!prog->autoload)
6854 continue;
6855
6856 /* Process data relos for main programs */
6857 err = bpf_object__relocate_data(obj, prog);
6858 if (err) {
6859 pr_warn("prog '%s': failed to relocate data references: %d\n",
6860 prog->name, err);
6861 return err;
6862 }
6863
6864 /* Fix up .BTF.ext information, if necessary */
6865 err = bpf_program_fixup_func_info(obj, prog);
6866 if (err) {
6867 pr_warn("prog '%s': failed to perform .BTF.ext fix ups: %d\n",
6868 prog->name, err);
6869 return err;
6870 }
6871 }
6872
6873 return 0;
6874 }
6875
6876 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
6877 Elf64_Shdr *shdr, Elf_Data *data);
6878
6879 static int bpf_object__collect_map_relos(struct bpf_object *obj,
6880 Elf64_Shdr *shdr, Elf_Data *data)
6881 {
6882 const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *);
6883 int i, j, nrels, new_sz;
6884 const struct btf_var_secinfo *vi = NULL;
6885 const struct btf_type *sec, *var, *def;
6886 struct bpf_map *map = NULL, *targ_map = NULL;
6887 struct bpf_program *targ_prog = NULL;
6888 bool is_prog_array, is_map_in_map;
6889 const struct btf_member *member;
6890 const char *name, *mname, *type;
6891 unsigned int moff;
6892 Elf64_Sym *sym;
6893 Elf64_Rel *rel;
6894 void *tmp;
6895
6896 if (!obj->efile.btf_maps_sec_btf_id || !obj->btf)
6897 return -EINVAL;
6898 sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id);
6899 if (!sec)
6900 return -EINVAL;
6901
6902 nrels = shdr->sh_size / shdr->sh_entsize;
6903 for (i = 0; i < nrels; i++) {
6904 rel = elf_rel_by_idx(data, i);
6905 if (!rel) {
6906 pr_warn(".maps relo #%d: failed to get ELF relo\n", i);
6907 return -LIBBPF_ERRNO__FORMAT;
6908 }
6909
6910 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
6911 if (!sym) {
6912 pr_warn(".maps relo #%d: symbol %zx not found\n",
6913 i, (size_t)ELF64_R_SYM(rel->r_info));
6914 return -LIBBPF_ERRNO__FORMAT;
6915 }
6916 name = elf_sym_str(obj, sym->st_name) ?: "<?>";
6917
6918 pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n",
6919 i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value,
6920 (size_t)rel->r_offset, sym->st_name, name);
6921
6922 for (j = 0; j < obj->nr_maps; j++) {
6923 map = &obj->maps[j];
6924 if (map->sec_idx != obj->efile.btf_maps_shndx)
6925 continue;
6926
6927 vi = btf_var_secinfos(sec) + map->btf_var_idx;
6928 if (vi->offset <= rel->r_offset &&
6929 rel->r_offset + bpf_ptr_sz <= vi->offset + vi->size)
6930 break;
6931 }
6932 if (j == obj->nr_maps) {
6933 pr_warn(".maps relo #%d: cannot find map '%s' at rel->r_offset %zu\n",
6934 i, name, (size_t)rel->r_offset);
6935 return -EINVAL;
6936 }
6937
6938 is_map_in_map = bpf_map_type__is_map_in_map(map->def.type);
6939 is_prog_array = map->def.type == BPF_MAP_TYPE_PROG_ARRAY;
6940 type = is_map_in_map ? "map" : "prog";
6941 if (is_map_in_map) {
6942 if (sym->st_shndx != obj->efile.btf_maps_shndx) {
6943 pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
6944 i, name);
6945 return -LIBBPF_ERRNO__RELOC;
6946 }
6947 if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
6948 map->def.key_size != sizeof(int)) {
6949 pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
6950 i, map->name, sizeof(int));
6951 return -EINVAL;
6952 }
6953 targ_map = bpf_object__find_map_by_name(obj, name);
6954 if (!targ_map) {
6955 pr_warn(".maps relo #%d: '%s' isn't a valid map reference\n",
6956 i, name);
6957 return -ESRCH;
6958 }
6959 } else if (is_prog_array) {
6960 targ_prog = bpf_object__find_program_by_name(obj, name);
6961 if (!targ_prog) {
6962 pr_warn(".maps relo #%d: '%s' isn't a valid program reference\n",
6963 i, name);
6964 return -ESRCH;
6965 }
6966 if (targ_prog->sec_idx != sym->st_shndx ||
6967 targ_prog->sec_insn_off * 8 != sym->st_value ||
6968 prog_is_subprog(obj, targ_prog)) {
6969 pr_warn(".maps relo #%d: '%s' isn't an entry-point program\n",
6970 i, name);
6971 return -LIBBPF_ERRNO__RELOC;
6972 }
6973 } else {
6974 return -EINVAL;
6975 }
6976
6977 var = btf__type_by_id(obj->btf, vi->type);
6978 def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
6979 if (btf_vlen(def) == 0)
6980 return -EINVAL;
6981 member = btf_members(def) + btf_vlen(def) - 1;
6982 mname = btf__name_by_offset(obj->btf, member->name_off);
6983 if (strcmp(mname, "values"))
6984 return -EINVAL;
6985
6986 moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8;
6987 if (rel->r_offset - vi->offset < moff)
6988 return -EINVAL;
6989
6990 moff = rel->r_offset - vi->offset - moff;
6991 /* here we use BPF pointer size, which is always 64 bit, as we
6992 * are parsing ELF that was built for BPF target
6993 */
6994 if (moff % bpf_ptr_sz)
6995 return -EINVAL;
6996 moff /= bpf_ptr_sz;
6997 if (moff >= map->init_slots_sz) {
6998 new_sz = moff + 1;
6999 tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz);
7000 if (!tmp)
7001 return -ENOMEM;
7002 map->init_slots = tmp;
7003 memset(map->init_slots + map->init_slots_sz, 0,
7004 (new_sz - map->init_slots_sz) * host_ptr_sz);
7005 map->init_slots_sz = new_sz;
7006 }
7007 map->init_slots[moff] = is_map_in_map ? (void *)targ_map : (void *)targ_prog;
7008
7009 pr_debug(".maps relo #%d: map '%s' slot [%d] points to %s '%s'\n",
7010 i, map->name, moff, type, name);
7011 }
7012
7013 return 0;
7014 }
7015
7016 static int bpf_object__collect_relos(struct bpf_object *obj)
7017 {
7018 int i, err;
7019
7020 for (i = 0; i < obj->efile.sec_cnt; i++) {
7021 struct elf_sec_desc *sec_desc = &obj->efile.secs[i];
7022 Elf64_Shdr *shdr;
7023 Elf_Data *data;
7024 int idx;
7025
7026 if (sec_desc->sec_type != SEC_RELO)
7027 continue;
7028
7029 shdr = sec_desc->shdr;
7030 data = sec_desc->data;
7031 idx = shdr->sh_info;
7032
7033 if (shdr->sh_type != SHT_REL || idx < 0 || idx >= obj->efile.sec_cnt) {
7034 pr_warn("internal error at %d\n", __LINE__);
7035 return -LIBBPF_ERRNO__INTERNAL;
7036 }
7037
7038 if (obj->efile.secs[idx].sec_type == SEC_ST_OPS)
7039 err = bpf_object__collect_st_ops_relos(obj, shdr, data);
7040 else if (idx == obj->efile.btf_maps_shndx)
7041 err = bpf_object__collect_map_relos(obj, shdr, data);
7042 else
7043 err = bpf_object__collect_prog_relos(obj, shdr, data);
7044 if (err)
7045 return err;
7046 }
7047
7048 bpf_object__sort_relos(obj);
7049 return 0;
7050 }
7051
7052 static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id)
7053 {
7054 if (BPF_CLASS(insn->code) == BPF_JMP &&
7055 BPF_OP(insn->code) == BPF_CALL &&
7056 BPF_SRC(insn->code) == BPF_K &&
7057 insn->src_reg == 0 &&
7058 insn->dst_reg == 0) {
7059 *func_id = insn->imm;
7060 return true;
7061 }
7062 return false;
7063 }
7064
7065 static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog)
7066 {
7067 struct bpf_insn *insn = prog->insns;
7068 enum bpf_func_id func_id;
7069 int i;
7070
7071 if (obj->gen_loader)
7072 return 0;
7073
7074 for (i = 0; i < prog->insns_cnt; i++, insn++) {
7075 if (!insn_is_helper_call(insn, &func_id))
7076 continue;
7077
7078 /* on kernels that don't yet support
7079 * bpf_probe_read_{kernel,user}[_str] helpers, fall back
7080 * to bpf_probe_read() which works well for old kernels
7081 */
7082 switch (func_id) {
7083 case BPF_FUNC_probe_read_kernel:
7084 case BPF_FUNC_probe_read_user:
7085 if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
7086 insn->imm = BPF_FUNC_probe_read;
7087 break;
7088 case BPF_FUNC_probe_read_kernel_str:
7089 case BPF_FUNC_probe_read_user_str:
7090 if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
7091 insn->imm = BPF_FUNC_probe_read_str;
7092 break;
7093 default:
7094 break;
7095 }
7096 }
7097 return 0;
7098 }
7099
7100 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
7101 int *btf_obj_fd, int *btf_type_id);
7102
7103 /* this is called as prog->sec_def->prog_prepare_load_fn for libbpf-supported sec_defs */
7104 static int libbpf_prepare_prog_load(struct bpf_program *prog,
7105 struct bpf_prog_load_opts *opts, long cookie)
7106 {
7107 enum sec_def_flags def = cookie;
7108
7109 /* old kernels might not support specifying expected_attach_type */
7110 if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE))
7111 opts->expected_attach_type = 0;
7112
7113 if (def & SEC_SLEEPABLE)
7114 opts->prog_flags |= BPF_F_SLEEPABLE;
7115
7116 if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS))
7117 opts->prog_flags |= BPF_F_XDP_HAS_FRAGS;
7118
7119 /* special check for usdt to use uprobe_multi link */
7120 if ((def & SEC_USDT) && kernel_supports(prog->obj, FEAT_UPROBE_MULTI_LINK))
7121 prog->expected_attach_type = BPF_TRACE_UPROBE_MULTI;
7122
7123 if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) {
7124 int btf_obj_fd = 0, btf_type_id = 0, err;
7125 const char *attach_name;
7126
7127 attach_name = strchr(prog->sec_name, '/');
7128 if (!attach_name) {
7129 /* if BPF program is annotated with just SEC("fentry")
7130 * (or similar) without declaratively specifying
7131 * target, then it is expected that target will be
7132 * specified with bpf_program__set_attach_target() at
7133 * runtime before BPF object load step. If not, then
7134 * there is nothing to load into the kernel as BPF
7135 * verifier won't be able to validate BPF program
7136 * correctness anyways.
7137 */
7138 pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n",
7139 prog->name);
7140 return -EINVAL;
7141 }
7142 attach_name++; /* skip over / */
7143
7144 err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id);
7145 if (err)
7146 return err;
7147
7148 /* cache resolved BTF FD and BTF type ID in the prog */
7149 prog->attach_btf_obj_fd = btf_obj_fd;
7150 prog->attach_btf_id = btf_type_id;
7151
7152 /* but by now libbpf common logic is not utilizing
7153 * prog->atach_btf_obj_fd/prog->attach_btf_id anymore because
7154 * this callback is called after opts were populated by
7155 * libbpf, so this callback has to update opts explicitly here
7156 */
7157 opts->attach_btf_obj_fd = btf_obj_fd;
7158 opts->attach_btf_id = btf_type_id;
7159 }
7160 return 0;
7161 }
7162
7163 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz);
7164
7165 static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog,
7166 struct bpf_insn *insns, int insns_cnt,
7167 const char *license, __u32 kern_version, int *prog_fd)
7168 {
7169 LIBBPF_OPTS(bpf_prog_load_opts, load_attr);
7170 const char *prog_name = NULL;
7171 char *cp, errmsg[STRERR_BUFSIZE];
7172 size_t log_buf_size = 0;
7173 char *log_buf = NULL, *tmp;
7174 int btf_fd, ret, err;
7175 bool own_log_buf = true;
7176 __u32 log_level = prog->log_level;
7177
7178 if (prog->type == BPF_PROG_TYPE_UNSPEC) {
7179 /*
7180 * The program type must be set. Most likely we couldn't find a proper
7181 * section definition at load time, and thus we didn't infer the type.
7182 */
7183 pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n",
7184 prog->name, prog->sec_name);
7185 return -EINVAL;
7186 }
7187
7188 if (!insns || !insns_cnt)
7189 return -EINVAL;
7190
7191 if (kernel_supports(obj, FEAT_PROG_NAME))
7192 prog_name = prog->name;
7193 load_attr.attach_prog_fd = prog->attach_prog_fd;
7194 load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd;
7195 load_attr.attach_btf_id = prog->attach_btf_id;
7196 load_attr.kern_version = kern_version;
7197 load_attr.prog_ifindex = prog->prog_ifindex;
7198
7199 /* specify func_info/line_info only if kernel supports them */
7200 btf_fd = btf__fd(obj->btf);
7201 if (btf_fd >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) {
7202 load_attr.prog_btf_fd = btf_fd;
7203 load_attr.func_info = prog->func_info;
7204 load_attr.func_info_rec_size = prog->func_info_rec_size;
7205 load_attr.func_info_cnt = prog->func_info_cnt;
7206 load_attr.line_info = prog->line_info;
7207 load_attr.line_info_rec_size = prog->line_info_rec_size;
7208 load_attr.line_info_cnt = prog->line_info_cnt;
7209 }
7210 load_attr.log_level = log_level;
7211 load_attr.prog_flags = prog->prog_flags;
7212 load_attr.fd_array = obj->fd_array;
7213
7214 load_attr.token_fd = obj->token_fd;
7215 if (obj->token_fd)
7216 load_attr.prog_flags |= BPF_F_TOKEN_FD;
7217
7218 /* adjust load_attr if sec_def provides custom preload callback */
7219 if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) {
7220 err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie);
7221 if (err < 0) {
7222 pr_warn("prog '%s': failed to prepare load attributes: %d\n",
7223 prog->name, err);
7224 return err;
7225 }
7226 insns = prog->insns;
7227 insns_cnt = prog->insns_cnt;
7228 }
7229
7230 /* allow prog_prepare_load_fn to change expected_attach_type */
7231 load_attr.expected_attach_type = prog->expected_attach_type;
7232
7233 if (obj->gen_loader) {
7234 bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name,
7235 license, insns, insns_cnt, &load_attr,
7236 prog - obj->programs);
7237 *prog_fd = -1;
7238 return 0;
7239 }
7240
7241 retry_load:
7242 /* if log_level is zero, we don't request logs initially even if
7243 * custom log_buf is specified; if the program load fails, then we'll
7244 * bump log_level to 1 and use either custom log_buf or we'll allocate
7245 * our own and retry the load to get details on what failed
7246 */
7247 if (log_level) {
7248 if (prog->log_buf) {
7249 log_buf = prog->log_buf;
7250 log_buf_size = prog->log_size;
7251 own_log_buf = false;
7252 } else if (obj->log_buf) {
7253 log_buf = obj->log_buf;
7254 log_buf_size = obj->log_size;
7255 own_log_buf = false;
7256 } else {
7257 log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, log_buf_size * 2);
7258 tmp = realloc(log_buf, log_buf_size);
7259 if (!tmp) {
7260 ret = -ENOMEM;
7261 goto out;
7262 }
7263 log_buf = tmp;
7264 log_buf[0] = '\0';
7265 own_log_buf = true;
7266 }
7267 }
7268
7269 load_attr.log_buf = log_buf;
7270 load_attr.log_size = log_buf_size;
7271 load_attr.log_level = log_level;
7272
7273 ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr);
7274 if (ret >= 0) {
7275 if (log_level && own_log_buf) {
7276 pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
7277 prog->name, log_buf);
7278 }
7279
7280 if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) {
7281 struct bpf_map *map;
7282 int i;
7283
7284 for (i = 0; i < obj->nr_maps; i++) {
7285 map = &prog->obj->maps[i];
7286 if (map->libbpf_type != LIBBPF_MAP_RODATA)
7287 continue;
7288
7289 if (bpf_prog_bind_map(ret, map->fd, NULL)) {
7290 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
7291 pr_warn("prog '%s': failed to bind map '%s': %s\n",
7292 prog->name, map->real_name, cp);
7293 /* Don't fail hard if can't bind rodata. */
7294 }
7295 }
7296 }
7297
7298 *prog_fd = ret;
7299 ret = 0;
7300 goto out;
7301 }
7302
7303 if (log_level == 0) {
7304 log_level = 1;
7305 goto retry_load;
7306 }
7307 /* On ENOSPC, increase log buffer size and retry, unless custom
7308 * log_buf is specified.
7309 * Be careful to not overflow u32, though. Kernel's log buf size limit
7310 * isn't part of UAPI so it can always be bumped to full 4GB. So don't
7311 * multiply by 2 unless we are sure we'll fit within 32 bits.
7312 * Currently, we'll get -EINVAL when we reach (UINT_MAX >> 2).
7313 */
7314 if (own_log_buf && errno == ENOSPC && log_buf_size <= UINT_MAX / 2)
7315 goto retry_load;
7316
7317 ret = -errno;
7318
7319 /* post-process verifier log to improve error descriptions */
7320 fixup_verifier_log(prog, log_buf, log_buf_size);
7321
7322 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
7323 pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp);
7324 pr_perm_msg(ret);
7325
7326 if (own_log_buf && log_buf && log_buf[0] != '\0') {
7327 pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
7328 prog->name, log_buf);
7329 }
7330
7331 out:
7332 if (own_log_buf)
7333 free(log_buf);
7334 return ret;
7335 }
7336
7337 static char *find_prev_line(char *buf, char *cur)
7338 {
7339 char *p;
7340
7341 if (cur == buf) /* end of a log buf */
7342 return NULL;
7343
7344 p = cur - 1;
7345 while (p - 1 >= buf && *(p - 1) != '\n')
7346 p--;
7347
7348 return p;
7349 }
7350
7351 static void patch_log(char *buf, size_t buf_sz, size_t log_sz,
7352 char *orig, size_t orig_sz, const char *patch)
7353 {
7354 /* size of the remaining log content to the right from the to-be-replaced part */
7355 size_t rem_sz = (buf + log_sz) - (orig + orig_sz);
7356 size_t patch_sz = strlen(patch);
7357
7358 if (patch_sz != orig_sz) {
7359 /* If patch line(s) are longer than original piece of verifier log,
7360 * shift log contents by (patch_sz - orig_sz) bytes to the right
7361 * starting from after to-be-replaced part of the log.
7362 *
7363 * If patch line(s) are shorter than original piece of verifier log,
7364 * shift log contents by (orig_sz - patch_sz) bytes to the left
7365 * starting from after to-be-replaced part of the log
7366 *
7367 * We need to be careful about not overflowing available
7368 * buf_sz capacity. If that's the case, we'll truncate the end
7369 * of the original log, as necessary.
7370 */
7371 if (patch_sz > orig_sz) {
7372 if (orig + patch_sz >= buf + buf_sz) {
7373 /* patch is big enough to cover remaining space completely */
7374 patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1;
7375 rem_sz = 0;
7376 } else if (patch_sz - orig_sz > buf_sz - log_sz) {
7377 /* patch causes part of remaining log to be truncated */
7378 rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz);
7379 }
7380 }
7381 /* shift remaining log to the right by calculated amount */
7382 memmove(orig + patch_sz, orig + orig_sz, rem_sz);
7383 }
7384
7385 memcpy(orig, patch, patch_sz);
7386 }
7387
7388 static void fixup_log_failed_core_relo(struct bpf_program *prog,
7389 char *buf, size_t buf_sz, size_t log_sz,
7390 char *line1, char *line2, char *line3)
7391 {
7392 /* Expected log for failed and not properly guarded CO-RE relocation:
7393 * line1 -> 123: (85) call unknown#195896080
7394 * line2 -> invalid func unknown#195896080
7395 * line3 -> <anything else or end of buffer>
7396 *
7397 * "123" is the index of the instruction that was poisoned. We extract
7398 * instruction index to find corresponding CO-RE relocation and
7399 * replace this part of the log with more relevant information about
7400 * failed CO-RE relocation.
7401 */
7402 const struct bpf_core_relo *relo;
7403 struct bpf_core_spec spec;
7404 char patch[512], spec_buf[256];
7405 int insn_idx, err, spec_len;
7406
7407 if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1)
7408 return;
7409
7410 relo = find_relo_core(prog, insn_idx);
7411 if (!relo)
7412 return;
7413
7414 err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec);
7415 if (err)
7416 return;
7417
7418 spec_len = bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec);
7419 snprintf(patch, sizeof(patch),
7420 "%d: <invalid CO-RE relocation>\n"
7421 "failed to resolve CO-RE relocation %s%s\n",
7422 insn_idx, spec_buf, spec_len >= sizeof(spec_buf) ? "..." : "");
7423
7424 patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
7425 }
7426
7427 static void fixup_log_missing_map_load(struct bpf_program *prog,
7428 char *buf, size_t buf_sz, size_t log_sz,
7429 char *line1, char *line2, char *line3)
7430 {
7431 /* Expected log for failed and not properly guarded map reference:
7432 * line1 -> 123: (85) call unknown#2001000345
7433 * line2 -> invalid func unknown#2001000345
7434 * line3 -> <anything else or end of buffer>
7435 *
7436 * "123" is the index of the instruction that was poisoned.
7437 * "345" in "2001000345" is a map index in obj->maps to fetch map name.
7438 */
7439 struct bpf_object *obj = prog->obj;
7440 const struct bpf_map *map;
7441 int insn_idx, map_idx;
7442 char patch[128];
7443
7444 if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &map_idx) != 2)
7445 return;
7446
7447 map_idx -= POISON_LDIMM64_MAP_BASE;
7448 if (map_idx < 0 || map_idx >= obj->nr_maps)
7449 return;
7450 map = &obj->maps[map_idx];
7451
7452 snprintf(patch, sizeof(patch),
7453 "%d: <invalid BPF map reference>\n"
7454 "BPF map '%s' is referenced but wasn't created\n",
7455 insn_idx, map->name);
7456
7457 patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
7458 }
7459
7460 static void fixup_log_missing_kfunc_call(struct bpf_program *prog,
7461 char *buf, size_t buf_sz, size_t log_sz,
7462 char *line1, char *line2, char *line3)
7463 {
7464 /* Expected log for failed and not properly guarded kfunc call:
7465 * line1 -> 123: (85) call unknown#2002000345
7466 * line2 -> invalid func unknown#2002000345
7467 * line3 -> <anything else or end of buffer>
7468 *
7469 * "123" is the index of the instruction that was poisoned.
7470 * "345" in "2002000345" is an extern index in obj->externs to fetch kfunc name.
7471 */
7472 struct bpf_object *obj = prog->obj;
7473 const struct extern_desc *ext;
7474 int insn_idx, ext_idx;
7475 char patch[128];
7476
7477 if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &ext_idx) != 2)
7478 return;
7479
7480 ext_idx -= POISON_CALL_KFUNC_BASE;
7481 if (ext_idx < 0 || ext_idx >= obj->nr_extern)
7482 return;
7483 ext = &obj->externs[ext_idx];
7484
7485 snprintf(patch, sizeof(patch),
7486 "%d: <invalid kfunc call>\n"
7487 "kfunc '%s' is referenced but wasn't resolved\n",
7488 insn_idx, ext->name);
7489
7490 patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
7491 }
7492
7493 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz)
7494 {
7495 /* look for familiar error patterns in last N lines of the log */
7496 const size_t max_last_line_cnt = 10;
7497 char *prev_line, *cur_line, *next_line;
7498 size_t log_sz;
7499 int i;
7500
7501 if (!buf)
7502 return;
7503
7504 log_sz = strlen(buf) + 1;
7505 next_line = buf + log_sz - 1;
7506
7507 for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) {
7508 cur_line = find_prev_line(buf, next_line);
7509 if (!cur_line)
7510 return;
7511
7512 if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) {
7513 prev_line = find_prev_line(buf, cur_line);
7514 if (!prev_line)
7515 continue;
7516
7517 /* failed CO-RE relocation case */
7518 fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz,
7519 prev_line, cur_line, next_line);
7520 return;
7521 } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_LDIMM64_MAP_PFX)) {
7522 prev_line = find_prev_line(buf, cur_line);
7523 if (!prev_line)
7524 continue;
7525
7526 /* reference to uncreated BPF map */
7527 fixup_log_missing_map_load(prog, buf, buf_sz, log_sz,
7528 prev_line, cur_line, next_line);
7529 return;
7530 } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_CALL_KFUNC_PFX)) {
7531 prev_line = find_prev_line(buf, cur_line);
7532 if (!prev_line)
7533 continue;
7534
7535 /* reference to unresolved kfunc */
7536 fixup_log_missing_kfunc_call(prog, buf, buf_sz, log_sz,
7537 prev_line, cur_line, next_line);
7538 return;
7539 }
7540 }
7541 }
7542
7543 static int bpf_program_record_relos(struct bpf_program *prog)
7544 {
7545 struct bpf_object *obj = prog->obj;
7546 int i;
7547
7548 for (i = 0; i < prog->nr_reloc; i++) {
7549 struct reloc_desc *relo = &prog->reloc_desc[i];
7550 struct extern_desc *ext = &obj->externs[relo->ext_idx];
7551 int kind;
7552
7553 switch (relo->type) {
7554 case RELO_EXTERN_LD64:
7555 if (ext->type != EXT_KSYM)
7556 continue;
7557 kind = btf_is_var(btf__type_by_id(obj->btf, ext->btf_id)) ?
7558 BTF_KIND_VAR : BTF_KIND_FUNC;
7559 bpf_gen__record_extern(obj->gen_loader, ext->name,
7560 ext->is_weak, !ext->ksym.type_id,
7561 true, kind, relo->insn_idx);
7562 break;
7563 case RELO_EXTERN_CALL:
7564 bpf_gen__record_extern(obj->gen_loader, ext->name,
7565 ext->is_weak, false, false, BTF_KIND_FUNC,
7566 relo->insn_idx);
7567 break;
7568 case RELO_CORE: {
7569 struct bpf_core_relo cr = {
7570 .insn_off = relo->insn_idx * 8,
7571 .type_id = relo->core_relo->type_id,
7572 .access_str_off = relo->core_relo->access_str_off,
7573 .kind = relo->core_relo->kind,
7574 };
7575
7576 bpf_gen__record_relo_core(obj->gen_loader, &cr);
7577 break;
7578 }
7579 default:
7580 continue;
7581 }
7582 }
7583 return 0;
7584 }
7585
7586 static int
7587 bpf_object__load_progs(struct bpf_object *obj, int log_level)
7588 {
7589 struct bpf_program *prog;
7590 size_t i;
7591 int err;
7592
7593 for (i = 0; i < obj->nr_programs; i++) {
7594 prog = &obj->programs[i];
7595 err = bpf_object__sanitize_prog(obj, prog);
7596 if (err)
7597 return err;
7598 }
7599
7600 for (i = 0; i < obj->nr_programs; i++) {
7601 prog = &obj->programs[i];
7602 if (prog_is_subprog(obj, prog))
7603 continue;
7604 if (!prog->autoload) {
7605 pr_debug("prog '%s': skipped loading\n", prog->name);
7606 continue;
7607 }
7608 prog->log_level |= log_level;
7609
7610 if (obj->gen_loader)
7611 bpf_program_record_relos(prog);
7612
7613 err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt,
7614 obj->license, obj->kern_version, &prog->fd);
7615 if (err) {
7616 pr_warn("prog '%s': failed to load: %d\n", prog->name, err);
7617 return err;
7618 }
7619 }
7620
7621 bpf_object__free_relocs(obj);
7622 return 0;
7623 }
7624
7625 static const struct bpf_sec_def *find_sec_def(const char *sec_name);
7626
7627 static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts)
7628 {
7629 struct bpf_program *prog;
7630 int err;
7631
7632 bpf_object__for_each_program(prog, obj) {
7633 prog->sec_def = find_sec_def(prog->sec_name);
7634 if (!prog->sec_def) {
7635 /* couldn't guess, but user might manually specify */
7636 pr_debug("prog '%s': unrecognized ELF section name '%s'\n",
7637 prog->name, prog->sec_name);
7638 continue;
7639 }
7640
7641 prog->type = prog->sec_def->prog_type;
7642 prog->expected_attach_type = prog->sec_def->expected_attach_type;
7643
7644 /* sec_def can have custom callback which should be called
7645 * after bpf_program is initialized to adjust its properties
7646 */
7647 if (prog->sec_def->prog_setup_fn) {
7648 err = prog->sec_def->prog_setup_fn(prog, prog->sec_def->cookie);
7649 if (err < 0) {
7650 pr_warn("prog '%s': failed to initialize: %d\n",
7651 prog->name, err);
7652 return err;
7653 }
7654 }
7655 }
7656
7657 return 0;
7658 }
7659
7660 static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz,
7661 const struct bpf_object_open_opts *opts)
7662 {
7663 const char *obj_name, *kconfig, *btf_tmp_path, *token_path;
7664 struct bpf_object *obj;
7665 char tmp_name[64];
7666 int err;
7667 char *log_buf;
7668 size_t log_size;
7669 __u32 log_level;
7670
7671 if (elf_version(EV_CURRENT) == EV_NONE) {
7672 pr_warn("failed to init libelf for %s\n",
7673 path ? : "(mem buf)");
7674 return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
7675 }
7676
7677 if (!OPTS_VALID(opts, bpf_object_open_opts))
7678 return ERR_PTR(-EINVAL);
7679
7680 obj_name = OPTS_GET(opts, object_name, NULL);
7681 if (obj_buf) {
7682 if (!obj_name) {
7683 snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
7684 (unsigned long)obj_buf,
7685 (unsigned long)obj_buf_sz);
7686 obj_name = tmp_name;
7687 }
7688 path = obj_name;
7689 pr_debug("loading object '%s' from buffer\n", obj_name);
7690 }
7691
7692 log_buf = OPTS_GET(opts, kernel_log_buf, NULL);
7693 log_size = OPTS_GET(opts, kernel_log_size, 0);
7694 log_level = OPTS_GET(opts, kernel_log_level, 0);
7695 if (log_size > UINT_MAX)
7696 return ERR_PTR(-EINVAL);
7697 if (log_size && !log_buf)
7698 return ERR_PTR(-EINVAL);
7699
7700 token_path = OPTS_GET(opts, bpf_token_path, NULL);
7701 /* if user didn't specify bpf_token_path explicitly, check if
7702 * LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as bpf_token_path
7703 * option
7704 */
7705 if (!token_path)
7706 token_path = getenv("LIBBPF_BPF_TOKEN_PATH");
7707 if (token_path && strlen(token_path) >= PATH_MAX)
7708 return ERR_PTR(-ENAMETOOLONG);
7709
7710 obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
7711 if (IS_ERR(obj))
7712 return obj;
7713
7714 obj->log_buf = log_buf;
7715 obj->log_size = log_size;
7716 obj->log_level = log_level;
7717
7718 if (token_path) {
7719 obj->token_path = strdup(token_path);
7720 if (!obj->token_path) {
7721 err = -ENOMEM;
7722 goto out;
7723 }
7724 }
7725
7726 btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
7727 if (btf_tmp_path) {
7728 if (strlen(btf_tmp_path) >= PATH_MAX) {
7729 err = -ENAMETOOLONG;
7730 goto out;
7731 }
7732 obj->btf_custom_path = strdup(btf_tmp_path);
7733 if (!obj->btf_custom_path) {
7734 err = -ENOMEM;
7735 goto out;
7736 }
7737 }
7738
7739 kconfig = OPTS_GET(opts, kconfig, NULL);
7740 if (kconfig) {
7741 obj->kconfig = strdup(kconfig);
7742 if (!obj->kconfig) {
7743 err = -ENOMEM;
7744 goto out;
7745 }
7746 }
7747
7748 err = bpf_object__elf_init(obj);
7749 err = err ? : bpf_object__check_endianness(obj);
7750 err = err ? : bpf_object__elf_collect(obj);
7751 err = err ? : bpf_object__collect_externs(obj);
7752 err = err ? : bpf_object_fixup_btf(obj);
7753 err = err ? : bpf_object__init_maps(obj, opts);
7754 err = err ? : bpf_object_init_progs(obj, opts);
7755 err = err ? : bpf_object__collect_relos(obj);
7756 if (err)
7757 goto out;
7758
7759 bpf_object__elf_finish(obj);
7760
7761 return obj;
7762 out:
7763 bpf_object__close(obj);
7764 return ERR_PTR(err);
7765 }
7766
7767 struct bpf_object *
7768 bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
7769 {
7770 if (!path)
7771 return libbpf_err_ptr(-EINVAL);
7772
7773 pr_debug("loading %s\n", path);
7774
7775 return libbpf_ptr(bpf_object_open(path, NULL, 0, opts));
7776 }
7777
7778 struct bpf_object *bpf_object__open(const char *path)
7779 {
7780 return bpf_object__open_file(path, NULL);
7781 }
7782
7783 struct bpf_object *
7784 bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
7785 const struct bpf_object_open_opts *opts)
7786 {
7787 if (!obj_buf || obj_buf_sz == 0)
7788 return libbpf_err_ptr(-EINVAL);
7789
7790 return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts));
7791 }
7792
7793 static int bpf_object_unload(struct bpf_object *obj)
7794 {
7795 size_t i;
7796
7797 if (!obj)
7798 return libbpf_err(-EINVAL);
7799
7800 for (i = 0; i < obj->nr_maps; i++) {
7801 zclose(obj->maps[i].fd);
7802 if (obj->maps[i].st_ops)
7803 zfree(&obj->maps[i].st_ops->kern_vdata);
7804 }
7805
7806 for (i = 0; i < obj->nr_programs; i++)
7807 bpf_program__unload(&obj->programs[i]);
7808
7809 return 0;
7810 }
7811
7812 static int bpf_object__sanitize_maps(struct bpf_object *obj)
7813 {
7814 struct bpf_map *m;
7815
7816 bpf_object__for_each_map(m, obj) {
7817 if (!bpf_map__is_internal(m))
7818 continue;
7819 if (!kernel_supports(obj, FEAT_ARRAY_MMAP))
7820 m->def.map_flags &= ~BPF_F_MMAPABLE;
7821 }
7822
7823 return 0;
7824 }
7825
7826 int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx)
7827 {
7828 char sym_type, sym_name[500];
7829 unsigned long long sym_addr;
7830 int ret, err = 0;
7831 FILE *f;
7832
7833 f = fopen("/proc/kallsyms", "re");
7834 if (!f) {
7835 err = -errno;
7836 pr_warn("failed to open /proc/kallsyms: %d\n", err);
7837 return err;
7838 }
7839
7840 while (true) {
7841 ret = fscanf(f, "%llx %c %499s%*[^\n]\n",
7842 &sym_addr, &sym_type, sym_name);
7843 if (ret == EOF && feof(f))
7844 break;
7845 if (ret != 3) {
7846 pr_warn("failed to read kallsyms entry: %d\n", ret);
7847 err = -EINVAL;
7848 break;
7849 }
7850
7851 err = cb(sym_addr, sym_type, sym_name, ctx);
7852 if (err)
7853 break;
7854 }
7855
7856 fclose(f);
7857 return err;
7858 }
7859
7860 static int kallsyms_cb(unsigned long long sym_addr, char sym_type,
7861 const char *sym_name, void *ctx)
7862 {
7863 struct bpf_object *obj = ctx;
7864 const struct btf_type *t;
7865 struct extern_desc *ext;
7866
7867 ext = find_extern_by_name(obj, sym_name);
7868 if (!ext || ext->type != EXT_KSYM)
7869 return 0;
7870
7871 t = btf__type_by_id(obj->btf, ext->btf_id);
7872 if (!btf_is_var(t))
7873 return 0;
7874
7875 if (ext->is_set && ext->ksym.addr != sym_addr) {
7876 pr_warn("extern (ksym) '%s': resolution is ambiguous: 0x%llx or 0x%llx\n",
7877 sym_name, ext->ksym.addr, sym_addr);
7878 return -EINVAL;
7879 }
7880 if (!ext->is_set) {
7881 ext->is_set = true;
7882 ext->ksym.addr = sym_addr;
7883 pr_debug("extern (ksym) '%s': set to 0x%llx\n", sym_name, sym_addr);
7884 }
7885 return 0;
7886 }
7887
7888 static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
7889 {
7890 return libbpf_kallsyms_parse(kallsyms_cb, obj);
7891 }
7892
7893 static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
7894 __u16 kind, struct btf **res_btf,
7895 struct module_btf **res_mod_btf)
7896 {
7897 struct module_btf *mod_btf;
7898 struct btf *btf;
7899 int i, id, err;
7900
7901 btf = obj->btf_vmlinux;
7902 mod_btf = NULL;
7903 id = btf__find_by_name_kind(btf, ksym_name, kind);
7904
7905 if (id == -ENOENT) {
7906 err = load_module_btfs(obj);
7907 if (err)
7908 return err;
7909
7910 for (i = 0; i < obj->btf_module_cnt; i++) {
7911 /* we assume module_btf's BTF FD is always >0 */
7912 mod_btf = &obj->btf_modules[i];
7913 btf = mod_btf->btf;
7914 id = btf__find_by_name_kind_own(btf, ksym_name, kind);
7915 if (id != -ENOENT)
7916 break;
7917 }
7918 }
7919 if (id <= 0)
7920 return -ESRCH;
7921
7922 *res_btf = btf;
7923 *res_mod_btf = mod_btf;
7924 return id;
7925 }
7926
7927 static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
7928 struct extern_desc *ext)
7929 {
7930 const struct btf_type *targ_var, *targ_type;
7931 __u32 targ_type_id, local_type_id;
7932 struct module_btf *mod_btf = NULL;
7933 const char *targ_var_name;
7934 struct btf *btf = NULL;
7935 int id, err;
7936
7937 id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf);
7938 if (id < 0) {
7939 if (id == -ESRCH && ext->is_weak)
7940 return 0;
7941 pr_warn("extern (var ksym) '%s': not found in kernel BTF\n",
7942 ext->name);
7943 return id;
7944 }
7945
7946 /* find local type_id */
7947 local_type_id = ext->ksym.type_id;
7948
7949 /* find target type_id */
7950 targ_var = btf__type_by_id(btf, id);
7951 targ_var_name = btf__name_by_offset(btf, targ_var->name_off);
7952 targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id);
7953
7954 err = bpf_core_types_are_compat(obj->btf, local_type_id,
7955 btf, targ_type_id);
7956 if (err <= 0) {
7957 const struct btf_type *local_type;
7958 const char *targ_name, *local_name;
7959
7960 local_type = btf__type_by_id(obj->btf, local_type_id);
7961 local_name = btf__name_by_offset(obj->btf, local_type->name_off);
7962 targ_name = btf__name_by_offset(btf, targ_type->name_off);
7963
7964 pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
7965 ext->name, local_type_id,
7966 btf_kind_str(local_type), local_name, targ_type_id,
7967 btf_kind_str(targ_type), targ_name);
7968 return -EINVAL;
7969 }
7970
7971 ext->is_set = true;
7972 ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
7973 ext->ksym.kernel_btf_id = id;
7974 pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n",
7975 ext->name, id, btf_kind_str(targ_var), targ_var_name);
7976
7977 return 0;
7978 }
7979
7980 static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj,
7981 struct extern_desc *ext)
7982 {
7983 int local_func_proto_id, kfunc_proto_id, kfunc_id;
7984 struct module_btf *mod_btf = NULL;
7985 const struct btf_type *kern_func;
7986 struct btf *kern_btf = NULL;
7987 int ret;
7988
7989 local_func_proto_id = ext->ksym.type_id;
7990
7991 kfunc_id = find_ksym_btf_id(obj, ext->essent_name ?: ext->name, BTF_KIND_FUNC, &kern_btf,
7992 &mod_btf);
7993 if (kfunc_id < 0) {
7994 if (kfunc_id == -ESRCH && ext->is_weak)
7995 return 0;
7996 pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n",
7997 ext->name);
7998 return kfunc_id;
7999 }
8000
8001 kern_func = btf__type_by_id(kern_btf, kfunc_id);
8002 kfunc_proto_id = kern_func->type;
8003
8004 ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id,
8005 kern_btf, kfunc_proto_id);
8006 if (ret <= 0) {
8007 if (ext->is_weak)
8008 return 0;
8009
8010 pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with %s [%d]\n",
8011 ext->name, local_func_proto_id,
8012 mod_btf ? mod_btf->name : "vmlinux", kfunc_proto_id);
8013 return -EINVAL;
8014 }
8015
8016 /* set index for module BTF fd in fd_array, if unset */
8017 if (mod_btf && !mod_btf->fd_array_idx) {
8018 /* insn->off is s16 */
8019 if (obj->fd_array_cnt == INT16_MAX) {
8020 pr_warn("extern (func ksym) '%s': module BTF fd index %d too big to fit in bpf_insn offset\n",
8021 ext->name, mod_btf->fd_array_idx);
8022 return -E2BIG;
8023 }
8024 /* Cannot use index 0 for module BTF fd */
8025 if (!obj->fd_array_cnt)
8026 obj->fd_array_cnt = 1;
8027
8028 ret = libbpf_ensure_mem((void **)&obj->fd_array, &obj->fd_array_cap, sizeof(int),
8029 obj->fd_array_cnt + 1);
8030 if (ret)
8031 return ret;
8032 mod_btf->fd_array_idx = obj->fd_array_cnt;
8033 /* we assume module BTF FD is always >0 */
8034 obj->fd_array[obj->fd_array_cnt++] = mod_btf->fd;
8035 }
8036
8037 ext->is_set = true;
8038 ext->ksym.kernel_btf_id = kfunc_id;
8039 ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0;
8040 /* Also set kernel_btf_obj_fd to make sure that bpf_object__relocate_data()
8041 * populates FD into ld_imm64 insn when it's used to point to kfunc.
8042 * {kernel_btf_id, btf_fd_idx} -> fixup bpf_call.
8043 * {kernel_btf_id, kernel_btf_obj_fd} -> fixup ld_imm64.
8044 */
8045 ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
8046 pr_debug("extern (func ksym) '%s': resolved to %s [%d]\n",
8047 ext->name, mod_btf ? mod_btf->name : "vmlinux", kfunc_id);
8048
8049 return 0;
8050 }
8051
8052 static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
8053 {
8054 const struct btf_type *t;
8055 struct extern_desc *ext;
8056 int i, err;
8057
8058 for (i = 0; i < obj->nr_extern; i++) {
8059 ext = &obj->externs[i];
8060 if (ext->type != EXT_KSYM || !ext->ksym.type_id)
8061 continue;
8062
8063 if (obj->gen_loader) {
8064 ext->is_set = true;
8065 ext->ksym.kernel_btf_obj_fd = 0;
8066 ext->ksym.kernel_btf_id = 0;
8067 continue;
8068 }
8069 t = btf__type_by_id(obj->btf, ext->btf_id);
8070 if (btf_is_var(t))
8071 err = bpf_object__resolve_ksym_var_btf_id(obj, ext);
8072 else
8073 err = bpf_object__resolve_ksym_func_btf_id(obj, ext);
8074 if (err)
8075 return err;
8076 }
8077 return 0;
8078 }
8079
8080 static int bpf_object__resolve_externs(struct bpf_object *obj,
8081 const char *extra_kconfig)
8082 {
8083 bool need_config = false, need_kallsyms = false;
8084 bool need_vmlinux_btf = false;
8085 struct extern_desc *ext;
8086 void *kcfg_data = NULL;
8087 int err, i;
8088
8089 if (obj->nr_extern == 0)
8090 return 0;
8091
8092 if (obj->kconfig_map_idx >= 0)
8093 kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped;
8094
8095 for (i = 0; i < obj->nr_extern; i++) {
8096 ext = &obj->externs[i];
8097
8098 if (ext->type == EXT_KSYM) {
8099 if (ext->ksym.type_id)
8100 need_vmlinux_btf = true;
8101 else
8102 need_kallsyms = true;
8103 continue;
8104 } else if (ext->type == EXT_KCFG) {
8105 void *ext_ptr = kcfg_data + ext->kcfg.data_off;
8106 __u64 value = 0;
8107
8108 /* Kconfig externs need actual /proc/config.gz */
8109 if (str_has_pfx(ext->name, "CONFIG_")) {
8110 need_config = true;
8111 continue;
8112 }
8113
8114 /* Virtual kcfg externs are customly handled by libbpf */
8115 if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
8116 value = get_kernel_version();
8117 if (!value) {
8118 pr_warn("extern (kcfg) '%s': failed to get kernel version\n", ext->name);
8119 return -EINVAL;
8120 }
8121 } else if (strcmp(ext->name, "LINUX_HAS_BPF_COOKIE") == 0) {
8122 value = kernel_supports(obj, FEAT_BPF_COOKIE);
8123 } else if (strcmp(ext->name, "LINUX_HAS_SYSCALL_WRAPPER") == 0) {
8124 value = kernel_supports(obj, FEAT_SYSCALL_WRAPPER);
8125 } else if (!str_has_pfx(ext->name, "LINUX_") || !ext->is_weak) {
8126 /* Currently libbpf supports only CONFIG_ and LINUX_ prefixed
8127 * __kconfig externs, where LINUX_ ones are virtual and filled out
8128 * customly by libbpf (their values don't come from Kconfig).
8129 * If LINUX_xxx variable is not recognized by libbpf, but is marked
8130 * __weak, it defaults to zero value, just like for CONFIG_xxx
8131 * externs.
8132 */
8133 pr_warn("extern (kcfg) '%s': unrecognized virtual extern\n", ext->name);
8134 return -EINVAL;
8135 }
8136
8137 err = set_kcfg_value_num(ext, ext_ptr, value);
8138 if (err)
8139 return err;
8140 pr_debug("extern (kcfg) '%s': set to 0x%llx\n",
8141 ext->name, (long long)value);
8142 } else {
8143 pr_warn("extern '%s': unrecognized extern kind\n", ext->name);
8144 return -EINVAL;
8145 }
8146 }
8147 if (need_config && extra_kconfig) {
8148 err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data);
8149 if (err)
8150 return -EINVAL;
8151 need_config = false;
8152 for (i = 0; i < obj->nr_extern; i++) {
8153 ext = &obj->externs[i];
8154 if (ext->type == EXT_KCFG && !ext->is_set) {
8155 need_config = true;
8156 break;
8157 }
8158 }
8159 }
8160 if (need_config) {
8161 err = bpf_object__read_kconfig_file(obj, kcfg_data);
8162 if (err)
8163 return -EINVAL;
8164 }
8165 if (need_kallsyms) {
8166 err = bpf_object__read_kallsyms_file(obj);
8167 if (err)
8168 return -EINVAL;
8169 }
8170 if (need_vmlinux_btf) {
8171 err = bpf_object__resolve_ksyms_btf_id(obj);
8172 if (err)
8173 return -EINVAL;
8174 }
8175 for (i = 0; i < obj->nr_extern; i++) {
8176 ext = &obj->externs[i];
8177
8178 if (!ext->is_set && !ext->is_weak) {
8179 pr_warn("extern '%s' (strong): not resolved\n", ext->name);
8180 return -ESRCH;
8181 } else if (!ext->is_set) {
8182 pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n",
8183 ext->name);
8184 }
8185 }
8186
8187 return 0;
8188 }
8189
8190 static void bpf_map_prepare_vdata(const struct bpf_map *map)
8191 {
8192 struct bpf_struct_ops *st_ops;
8193 __u32 i;
8194
8195 st_ops = map->st_ops;
8196 for (i = 0; i < btf_vlen(st_ops->type); i++) {
8197 struct bpf_program *prog = st_ops->progs[i];
8198 void *kern_data;
8199 int prog_fd;
8200
8201 if (!prog)
8202 continue;
8203
8204 prog_fd = bpf_program__fd(prog);
8205 kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
8206 *(unsigned long *)kern_data = prog_fd;
8207 }
8208 }
8209
8210 static int bpf_object_prepare_struct_ops(struct bpf_object *obj)
8211 {
8212 struct bpf_map *map;
8213 int i;
8214
8215 for (i = 0; i < obj->nr_maps; i++) {
8216 map = &obj->maps[i];
8217
8218 if (!bpf_map__is_struct_ops(map))
8219 continue;
8220
8221 if (!map->autocreate)
8222 continue;
8223
8224 bpf_map_prepare_vdata(map);
8225 }
8226
8227 return 0;
8228 }
8229
8230 static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path)
8231 {
8232 int err, i;
8233
8234 if (!obj)
8235 return libbpf_err(-EINVAL);
8236
8237 if (obj->loaded) {
8238 pr_warn("object '%s': load can't be attempted twice\n", obj->name);
8239 return libbpf_err(-EINVAL);
8240 }
8241
8242 if (obj->gen_loader)
8243 bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps);
8244
8245 err = bpf_object_prepare_token(obj);
8246 err = err ? : bpf_object__probe_loading(obj);
8247 err = err ? : bpf_object__load_vmlinux_btf(obj, false);
8248 err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
8249 err = err ? : bpf_object__sanitize_maps(obj);
8250 err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
8251 err = err ? : bpf_object_adjust_struct_ops_autoload(obj);
8252 err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
8253 err = err ? : bpf_object__sanitize_and_load_btf(obj);
8254 err = err ? : bpf_object__create_maps(obj);
8255 err = err ? : bpf_object__load_progs(obj, extra_log_level);
8256 err = err ? : bpf_object_init_prog_arrays(obj);
8257 err = err ? : bpf_object_prepare_struct_ops(obj);
8258
8259 if (obj->gen_loader) {
8260 /* reset FDs */
8261 if (obj->btf)
8262 btf__set_fd(obj->btf, -1);
8263 if (!err)
8264 err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps);
8265 }
8266
8267 /* clean up fd_array */
8268 zfree(&obj->fd_array);
8269
8270 /* clean up module BTFs */
8271 for (i = 0; i < obj->btf_module_cnt; i++) {
8272 close(obj->btf_modules[i].fd);
8273 btf__free(obj->btf_modules[i].btf);
8274 free(obj->btf_modules[i].name);
8275 }
8276 free(obj->btf_modules);
8277
8278 /* clean up vmlinux BTF */
8279 btf__free(obj->btf_vmlinux);
8280 obj->btf_vmlinux = NULL;
8281
8282 obj->loaded = true; /* doesn't matter if successfully or not */
8283
8284 if (err)
8285 goto out;
8286
8287 return 0;
8288 out:
8289 /* unpin any maps that were auto-pinned during load */
8290 for (i = 0; i < obj->nr_maps; i++)
8291 if (obj->maps[i].pinned && !obj->maps[i].reused)
8292 bpf_map__unpin(&obj->maps[i], NULL);
8293
8294 bpf_object_unload(obj);
8295 pr_warn("failed to load object '%s'\n", obj->path);
8296 return libbpf_err(err);
8297 }
8298
8299 int bpf_object__load(struct bpf_object *obj)
8300 {
8301 return bpf_object_load(obj, 0, NULL);
8302 }
8303
8304 static int make_parent_dir(const char *path)
8305 {
8306 char *cp, errmsg[STRERR_BUFSIZE];
8307 char *dname, *dir;
8308 int err = 0;
8309
8310 dname = strdup(path);
8311 if (dname == NULL)
8312 return -ENOMEM;
8313
8314 dir = dirname(dname);
8315 if (mkdir(dir, 0700) && errno != EEXIST)
8316 err = -errno;
8317
8318 free(dname);
8319 if (err) {
8320 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
8321 pr_warn("failed to mkdir %s: %s\n", path, cp);
8322 }
8323 return err;
8324 }
8325
8326 static int check_path(const char *path)
8327 {
8328 char *cp, errmsg[STRERR_BUFSIZE];
8329 struct statfs st_fs;
8330 char *dname, *dir;
8331 int err = 0;
8332
8333 if (path == NULL)
8334 return -EINVAL;
8335
8336 dname = strdup(path);
8337 if (dname == NULL)
8338 return -ENOMEM;
8339
8340 dir = dirname(dname);
8341 if (statfs(dir, &st_fs)) {
8342 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
8343 pr_warn("failed to statfs %s: %s\n", dir, cp);
8344 err = -errno;
8345 }
8346 free(dname);
8347
8348 if (!err && st_fs.f_type != BPF_FS_MAGIC) {
8349 pr_warn("specified path %s is not on BPF FS\n", path);
8350 err = -EINVAL;
8351 }
8352
8353 return err;
8354 }
8355
8356 int bpf_program__pin(struct bpf_program *prog, const char *path)
8357 {
8358 char *cp, errmsg[STRERR_BUFSIZE];
8359 int err;
8360
8361 if (prog->fd < 0) {
8362 pr_warn("prog '%s': can't pin program that wasn't loaded\n", prog->name);
8363 return libbpf_err(-EINVAL);
8364 }
8365
8366 err = make_parent_dir(path);
8367 if (err)
8368 return libbpf_err(err);
8369
8370 err = check_path(path);
8371 if (err)
8372 return libbpf_err(err);
8373
8374 if (bpf_obj_pin(prog->fd, path)) {
8375 err = -errno;
8376 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
8377 pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, cp);
8378 return libbpf_err(err);
8379 }
8380
8381 pr_debug("prog '%s': pinned at '%s'\n", prog->name, path);
8382 return 0;
8383 }
8384
8385 int bpf_program__unpin(struct bpf_program *prog, const char *path)
8386 {
8387 int err;
8388
8389 if (prog->fd < 0) {
8390 pr_warn("prog '%s': can't unpin program that wasn't loaded\n", prog->name);
8391 return libbpf_err(-EINVAL);
8392 }
8393
8394 err = check_path(path);
8395 if (err)
8396 return libbpf_err(err);
8397
8398 err = unlink(path);
8399 if (err)
8400 return libbpf_err(-errno);
8401
8402 pr_debug("prog '%s': unpinned from '%s'\n", prog->name, path);
8403 return 0;
8404 }
8405
8406 int bpf_map__pin(struct bpf_map *map, const char *path)
8407 {
8408 char *cp, errmsg[STRERR_BUFSIZE];
8409 int err;
8410
8411 if (map == NULL) {
8412 pr_warn("invalid map pointer\n");
8413 return libbpf_err(-EINVAL);
8414 }
8415
8416 if (map->pin_path) {
8417 if (path && strcmp(path, map->pin_path)) {
8418 pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
8419 bpf_map__name(map), map->pin_path, path);
8420 return libbpf_err(-EINVAL);
8421 } else if (map->pinned) {
8422 pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
8423 bpf_map__name(map), map->pin_path);
8424 return 0;
8425 }
8426 } else {
8427 if (!path) {
8428 pr_warn("missing a path to pin map '%s' at\n",
8429 bpf_map__name(map));
8430 return libbpf_err(-EINVAL);
8431 } else if (map->pinned) {
8432 pr_warn("map '%s' already pinned\n", bpf_map__name(map));
8433 return libbpf_err(-EEXIST);
8434 }
8435
8436 map->pin_path = strdup(path);
8437 if (!map->pin_path) {
8438 err = -errno;
8439 goto out_err;
8440 }
8441 }
8442
8443 err = make_parent_dir(map->pin_path);
8444 if (err)
8445 return libbpf_err(err);
8446
8447 err = check_path(map->pin_path);
8448 if (err)
8449 return libbpf_err(err);
8450
8451 if (bpf_obj_pin(map->fd, map->pin_path)) {
8452 err = -errno;
8453 goto out_err;
8454 }
8455
8456 map->pinned = true;
8457 pr_debug("pinned map '%s'\n", map->pin_path);
8458
8459 return 0;
8460
8461 out_err:
8462 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
8463 pr_warn("failed to pin map: %s\n", cp);
8464 return libbpf_err(err);
8465 }
8466
8467 int bpf_map__unpin(struct bpf_map *map, const char *path)
8468 {
8469 int err;
8470
8471 if (map == NULL) {
8472 pr_warn("invalid map pointer\n");
8473 return libbpf_err(-EINVAL);
8474 }
8475
8476 if (map->pin_path) {
8477 if (path && strcmp(path, map->pin_path)) {
8478 pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
8479 bpf_map__name(map), map->pin_path, path);
8480 return libbpf_err(-EINVAL);
8481 }
8482 path = map->pin_path;
8483 } else if (!path) {
8484 pr_warn("no path to unpin map '%s' from\n",
8485 bpf_map__name(map));
8486 return libbpf_err(-EINVAL);
8487 }
8488
8489 err = check_path(path);
8490 if (err)
8491 return libbpf_err(err);
8492
8493 err = unlink(path);
8494 if (err != 0)
8495 return libbpf_err(-errno);
8496
8497 map->pinned = false;
8498 pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
8499
8500 return 0;
8501 }
8502
8503 int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
8504 {
8505 char *new = NULL;
8506
8507 if (path) {
8508 new = strdup(path);
8509 if (!new)
8510 return libbpf_err(-errno);
8511 }
8512
8513 free(map->pin_path);
8514 map->pin_path = new;
8515 return 0;
8516 }
8517
8518 __alias(bpf_map__pin_path)
8519 const char *bpf_map__get_pin_path(const struct bpf_map *map);
8520
8521 const char *bpf_map__pin_path(const struct bpf_map *map)
8522 {
8523 return map->pin_path;
8524 }
8525
8526 bool bpf_map__is_pinned(const struct bpf_map *map)
8527 {
8528 return map->pinned;
8529 }
8530
8531 static void sanitize_pin_path(char *s)
8532 {
8533 /* bpffs disallows periods in path names */
8534 while (*s) {
8535 if (*s == '.')
8536 *s = '_';
8537 s++;
8538 }
8539 }
8540
8541 int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
8542 {
8543 struct bpf_map *map;
8544 int err;
8545
8546 if (!obj)
8547 return libbpf_err(-ENOENT);
8548
8549 if (!obj->loaded) {
8550 pr_warn("object not yet loaded; load it first\n");
8551 return libbpf_err(-ENOENT);
8552 }
8553
8554 bpf_object__for_each_map(map, obj) {
8555 char *pin_path = NULL;
8556 char buf[PATH_MAX];
8557
8558 if (!map->autocreate)
8559 continue;
8560
8561 if (path) {
8562 err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
8563 if (err)
8564 goto err_unpin_maps;
8565 sanitize_pin_path(buf);
8566 pin_path = buf;
8567 } else if (!map->pin_path) {
8568 continue;
8569 }
8570
8571 err = bpf_map__pin(map, pin_path);
8572 if (err)
8573 goto err_unpin_maps;
8574 }
8575
8576 return 0;
8577
8578 err_unpin_maps:
8579 while ((map = bpf_object__prev_map(obj, map))) {
8580 if (!map->pin_path)
8581 continue;
8582
8583 bpf_map__unpin(map, NULL);
8584 }
8585
8586 return libbpf_err(err);
8587 }
8588
8589 int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
8590 {
8591 struct bpf_map *map;
8592 int err;
8593
8594 if (!obj)
8595 return libbpf_err(-ENOENT);
8596
8597 bpf_object__for_each_map(map, obj) {
8598 char *pin_path = NULL;
8599 char buf[PATH_MAX];
8600
8601 if (path) {
8602 err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
8603 if (err)
8604 return libbpf_err(err);
8605 sanitize_pin_path(buf);
8606 pin_path = buf;
8607 } else if (!map->pin_path) {
8608 continue;
8609 }
8610
8611 err = bpf_map__unpin(map, pin_path);
8612 if (err)
8613 return libbpf_err(err);
8614 }
8615
8616 return 0;
8617 }
8618
8619 int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
8620 {
8621 struct bpf_program *prog;
8622 char buf[PATH_MAX];
8623 int err;
8624
8625 if (!obj)
8626 return libbpf_err(-ENOENT);
8627
8628 if (!obj->loaded) {
8629 pr_warn("object not yet loaded; load it first\n");
8630 return libbpf_err(-ENOENT);
8631 }
8632
8633 bpf_object__for_each_program(prog, obj) {
8634 err = pathname_concat(buf, sizeof(buf), path, prog->name);
8635 if (err)
8636 goto err_unpin_programs;
8637
8638 err = bpf_program__pin(prog, buf);
8639 if (err)
8640 goto err_unpin_programs;
8641 }
8642
8643 return 0;
8644
8645 err_unpin_programs:
8646 while ((prog = bpf_object__prev_program(obj, prog))) {
8647 if (pathname_concat(buf, sizeof(buf), path, prog->name))
8648 continue;
8649
8650 bpf_program__unpin(prog, buf);
8651 }
8652
8653 return libbpf_err(err);
8654 }
8655
8656 int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
8657 {
8658 struct bpf_program *prog;
8659 int err;
8660
8661 if (!obj)
8662 return libbpf_err(-ENOENT);
8663
8664 bpf_object__for_each_program(prog, obj) {
8665 char buf[PATH_MAX];
8666
8667 err = pathname_concat(buf, sizeof(buf), path, prog->name);
8668 if (err)
8669 return libbpf_err(err);
8670
8671 err = bpf_program__unpin(prog, buf);
8672 if (err)
8673 return libbpf_err(err);
8674 }
8675
8676 return 0;
8677 }
8678
8679 int bpf_object__pin(struct bpf_object *obj, const char *path)
8680 {
8681 int err;
8682
8683 err = bpf_object__pin_maps(obj, path);
8684 if (err)
8685 return libbpf_err(err);
8686
8687 err = bpf_object__pin_programs(obj, path);
8688 if (err) {
8689 bpf_object__unpin_maps(obj, path);
8690 return libbpf_err(err);
8691 }
8692
8693 return 0;
8694 }
8695
8696 int bpf_object__unpin(struct bpf_object *obj, const char *path)
8697 {
8698 int err;
8699
8700 err = bpf_object__unpin_programs(obj, path);
8701 if (err)
8702 return libbpf_err(err);
8703
8704 err = bpf_object__unpin_maps(obj, path);
8705 if (err)
8706 return libbpf_err(err);
8707
8708 return 0;
8709 }
8710
8711 static void bpf_map__destroy(struct bpf_map *map)
8712 {
8713 if (map->inner_map) {
8714 bpf_map__destroy(map->inner_map);
8715 zfree(&map->inner_map);
8716 }
8717
8718 zfree(&map->init_slots);
8719 map->init_slots_sz = 0;
8720
8721 if (map->mmaped) {
8722 size_t mmap_sz;
8723
8724 mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
8725 munmap(map->mmaped, mmap_sz);
8726 map->mmaped = NULL;
8727 }
8728
8729 if (map->st_ops) {
8730 zfree(&map->st_ops->data);
8731 zfree(&map->st_ops->progs);
8732 zfree(&map->st_ops->kern_func_off);
8733 zfree(&map->st_ops);
8734 }
8735
8736 zfree(&map->name);
8737 zfree(&map->real_name);
8738 zfree(&map->pin_path);
8739
8740 if (map->fd >= 0)
8741 zclose(map->fd);
8742 }
8743
8744 void bpf_object__close(struct bpf_object *obj)
8745 {
8746 size_t i;
8747
8748 if (IS_ERR_OR_NULL(obj))
8749 return;
8750
8751 usdt_manager_free(obj->usdt_man);
8752 obj->usdt_man = NULL;
8753
8754 bpf_gen__free(obj->gen_loader);
8755 bpf_object__elf_finish(obj);
8756 bpf_object_unload(obj);
8757 btf__free(obj->btf);
8758 btf__free(obj->btf_vmlinux);
8759 btf_ext__free(obj->btf_ext);
8760
8761 for (i = 0; i < obj->nr_maps; i++)
8762 bpf_map__destroy(&obj->maps[i]);
8763
8764 zfree(&obj->btf_custom_path);
8765 zfree(&obj->kconfig);
8766
8767 for (i = 0; i < obj->nr_extern; i++)
8768 zfree(&obj->externs[i].essent_name);
8769
8770 zfree(&obj->externs);
8771 obj->nr_extern = 0;
8772
8773 zfree(&obj->maps);
8774 obj->nr_maps = 0;
8775
8776 if (obj->programs && obj->nr_programs) {
8777 for (i = 0; i < obj->nr_programs; i++)
8778 bpf_program__exit(&obj->programs[i]);
8779 }
8780 zfree(&obj->programs);
8781
8782 zfree(&obj->feat_cache);
8783 zfree(&obj->token_path);
8784 if (obj->token_fd > 0)
8785 close(obj->token_fd);
8786
8787 free(obj);
8788 }
8789
8790 const char *bpf_object__name(const struct bpf_object *obj)
8791 {
8792 return obj ? obj->name : libbpf_err_ptr(-EINVAL);
8793 }
8794
8795 unsigned int bpf_object__kversion(const struct bpf_object *obj)
8796 {
8797 return obj ? obj->kern_version : 0;
8798 }
8799
8800 struct btf *bpf_object__btf(const struct bpf_object *obj)
8801 {
8802 return obj ? obj->btf : NULL;
8803 }
8804
8805 int bpf_object__btf_fd(const struct bpf_object *obj)
8806 {
8807 return obj->btf ? btf__fd(obj->btf) : -1;
8808 }
8809
8810 int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
8811 {
8812 if (obj->loaded)
8813 return libbpf_err(-EINVAL);
8814
8815 obj->kern_version = kern_version;
8816
8817 return 0;
8818 }
8819
8820 int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
8821 {
8822 struct bpf_gen *gen;
8823
8824 if (!opts)
8825 return -EFAULT;
8826 if (!OPTS_VALID(opts, gen_loader_opts))
8827 return -EINVAL;
8828 gen = calloc(sizeof(*gen), 1);
8829 if (!gen)
8830 return -ENOMEM;
8831 gen->opts = opts;
8832 obj->gen_loader = gen;
8833 return 0;
8834 }
8835
8836 static struct bpf_program *
8837 __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
8838 bool forward)
8839 {
8840 size_t nr_programs = obj->nr_programs;
8841 ssize_t idx;
8842
8843 if (!nr_programs)
8844 return NULL;
8845
8846 if (!p)
8847 /* Iter from the beginning */
8848 return forward ? &obj->programs[0] :
8849 &obj->programs[nr_programs - 1];
8850
8851 if (p->obj != obj) {
8852 pr_warn("error: program handler doesn't match object\n");
8853 return errno = EINVAL, NULL;
8854 }
8855
8856 idx = (p - obj->programs) + (forward ? 1 : -1);
8857 if (idx >= obj->nr_programs || idx < 0)
8858 return NULL;
8859 return &obj->programs[idx];
8860 }
8861
8862 struct bpf_program *
8863 bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
8864 {
8865 struct bpf_program *prog = prev;
8866
8867 do {
8868 prog = __bpf_program__iter(prog, obj, true);
8869 } while (prog && prog_is_subprog(obj, prog));
8870
8871 return prog;
8872 }
8873
8874 struct bpf_program *
8875 bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next)
8876 {
8877 struct bpf_program *prog = next;
8878
8879 do {
8880 prog = __bpf_program__iter(prog, obj, false);
8881 } while (prog && prog_is_subprog(obj, prog));
8882
8883 return prog;
8884 }
8885
8886 void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
8887 {
8888 prog->prog_ifindex = ifindex;
8889 }
8890
8891 const char *bpf_program__name(const struct bpf_program *prog)
8892 {
8893 return prog->name;
8894 }
8895
8896 const char *bpf_program__section_name(const struct bpf_program *prog)
8897 {
8898 return prog->sec_name;
8899 }
8900
8901 bool bpf_program__autoload(const struct bpf_program *prog)
8902 {
8903 return prog->autoload;
8904 }
8905
8906 int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
8907 {
8908 if (prog->obj->loaded)
8909 return libbpf_err(-EINVAL);
8910
8911 prog->autoload = autoload;
8912 return 0;
8913 }
8914
8915 bool bpf_program__autoattach(const struct bpf_program *prog)
8916 {
8917 return prog->autoattach;
8918 }
8919
8920 void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach)
8921 {
8922 prog->autoattach = autoattach;
8923 }
8924
8925 const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog)
8926 {
8927 return prog->insns;
8928 }
8929
8930 size_t bpf_program__insn_cnt(const struct bpf_program *prog)
8931 {
8932 return prog->insns_cnt;
8933 }
8934
8935 int bpf_program__set_insns(struct bpf_program *prog,
8936 struct bpf_insn *new_insns, size_t new_insn_cnt)
8937 {
8938 struct bpf_insn *insns;
8939
8940 if (prog->obj->loaded)
8941 return -EBUSY;
8942
8943 insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns));
8944 /* NULL is a valid return from reallocarray if the new count is zero */
8945 if (!insns && new_insn_cnt) {
8946 pr_warn("prog '%s': failed to realloc prog code\n", prog->name);
8947 return -ENOMEM;
8948 }
8949 memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns));
8950
8951 prog->insns = insns;
8952 prog->insns_cnt = new_insn_cnt;
8953 return 0;
8954 }
8955
8956 int bpf_program__fd(const struct bpf_program *prog)
8957 {
8958 if (!prog)
8959 return libbpf_err(-EINVAL);
8960
8961 if (prog->fd < 0)
8962 return libbpf_err(-ENOENT);
8963
8964 return prog->fd;
8965 }
8966
8967 __alias(bpf_program__type)
8968 enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
8969
8970 enum bpf_prog_type bpf_program__type(const struct bpf_program *prog)
8971 {
8972 return prog->type;
8973 }
8974
8975 static size_t custom_sec_def_cnt;
8976 static struct bpf_sec_def *custom_sec_defs;
8977 static struct bpf_sec_def custom_fallback_def;
8978 static bool has_custom_fallback_def;
8979 static int last_custom_sec_def_handler_id;
8980
8981 int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
8982 {
8983 if (prog->obj->loaded)
8984 return libbpf_err(-EBUSY);
8985
8986 /* if type is not changed, do nothing */
8987 if (prog->type == type)
8988 return 0;
8989
8990 prog->type = type;
8991
8992 /* If a program type was changed, we need to reset associated SEC()
8993 * handler, as it will be invalid now. The only exception is a generic
8994 * fallback handler, which by definition is program type-agnostic and
8995 * is a catch-all custom handler, optionally set by the application,
8996 * so should be able to handle any type of BPF program.
8997 */
8998 if (prog->sec_def != &custom_fallback_def)
8999 prog->sec_def = NULL;
9000 return 0;
9001 }
9002
9003 __alias(bpf_program__expected_attach_type)
9004 enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog);
9005
9006 enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog)
9007 {
9008 return prog->expected_attach_type;
9009 }
9010
9011 int bpf_program__set_expected_attach_type(struct bpf_program *prog,
9012 enum bpf_attach_type type)
9013 {
9014 if (prog->obj->loaded)
9015 return libbpf_err(-EBUSY);
9016
9017 prog->expected_attach_type = type;
9018 return 0;
9019 }
9020
9021 __u32 bpf_program__flags(const struct bpf_program *prog)
9022 {
9023 return prog->prog_flags;
9024 }
9025
9026 int bpf_program__set_flags(struct bpf_program *prog, __u32 flags)
9027 {
9028 if (prog->obj->loaded)
9029 return libbpf_err(-EBUSY);
9030
9031 prog->prog_flags = flags;
9032 return 0;
9033 }
9034
9035 __u32 bpf_program__log_level(const struct bpf_program *prog)
9036 {
9037 return prog->log_level;
9038 }
9039
9040 int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level)
9041 {
9042 if (prog->obj->loaded)
9043 return libbpf_err(-EBUSY);
9044
9045 prog->log_level = log_level;
9046 return 0;
9047 }
9048
9049 const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size)
9050 {
9051 *log_size = prog->log_size;
9052 return prog->log_buf;
9053 }
9054
9055 int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size)
9056 {
9057 if (log_size && !log_buf)
9058 return -EINVAL;
9059 if (prog->log_size > UINT_MAX)
9060 return -EINVAL;
9061 if (prog->obj->loaded)
9062 return -EBUSY;
9063
9064 prog->log_buf = log_buf;
9065 prog->log_size = log_size;
9066 return 0;
9067 }
9068
9069 #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \
9070 .sec = (char *)sec_pfx, \
9071 .prog_type = BPF_PROG_TYPE_##ptype, \
9072 .expected_attach_type = atype, \
9073 .cookie = (long)(flags), \
9074 .prog_prepare_load_fn = libbpf_prepare_prog_load, \
9075 __VA_ARGS__ \
9076 }
9077
9078 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9079 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9080 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9081 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9082 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9083 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9084 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9085 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9086 static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9087 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9088 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9089
9090 static const struct bpf_sec_def section_defs[] = {
9091 SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE),
9092 SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE),
9093 SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE),
9094 SEC_DEF("kprobe+", KPROBE, 0, SEC_NONE, attach_kprobe),
9095 SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe),
9096 SEC_DEF("uprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
9097 SEC_DEF("kretprobe+", KPROBE, 0, SEC_NONE, attach_kprobe),
9098 SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe),
9099 SEC_DEF("uretprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
9100 SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
9101 SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
9102 SEC_DEF("uprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi),
9103 SEC_DEF("uretprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi),
9104 SEC_DEF("uprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi),
9105 SEC_DEF("uretprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi),
9106 SEC_DEF("ksyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall),
9107 SEC_DEF("kretsyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall),
9108 SEC_DEF("usdt+", KPROBE, 0, SEC_USDT, attach_usdt),
9109 SEC_DEF("usdt.s+", KPROBE, 0, SEC_USDT | SEC_SLEEPABLE, attach_usdt),
9110 SEC_DEF("tc/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), /* alias for tcx */
9111 SEC_DEF("tc/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE), /* alias for tcx */
9112 SEC_DEF("tcx/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE),
9113 SEC_DEF("tcx/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE),
9114 SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */
9115 SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */
9116 SEC_DEF("action", SCHED_ACT, 0, SEC_NONE), /* deprecated / legacy, use tcx */
9117 SEC_DEF("netkit/primary", SCHED_CLS, BPF_NETKIT_PRIMARY, SEC_NONE),
9118 SEC_DEF("netkit/peer", SCHED_CLS, BPF_NETKIT_PEER, SEC_NONE),
9119 SEC_DEF("tracepoint+", TRACEPOINT, 0, SEC_NONE, attach_tp),
9120 SEC_DEF("tp+", TRACEPOINT, 0, SEC_NONE, attach_tp),
9121 SEC_DEF("raw_tracepoint+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
9122 SEC_DEF("raw_tp+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
9123 SEC_DEF("raw_tracepoint.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
9124 SEC_DEF("raw_tp.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
9125 SEC_DEF("tp_btf+", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace),
9126 SEC_DEF("fentry+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace),
9127 SEC_DEF("fmod_ret+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace),
9128 SEC_DEF("fexit+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace),
9129 SEC_DEF("fentry.s+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
9130 SEC_DEF("fmod_ret.s+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
9131 SEC_DEF("fexit.s+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
9132 SEC_DEF("freplace+", EXT, 0, SEC_ATTACH_BTF, attach_trace),
9133 SEC_DEF("lsm+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
9134 SEC_DEF("lsm.s+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
9135 SEC_DEF("lsm_cgroup+", LSM, BPF_LSM_CGROUP, SEC_ATTACH_BTF),
9136 SEC_DEF("iter+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
9137 SEC_DEF("iter.s+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
9138 SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE),
9139 SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS),
9140 SEC_DEF("xdp/devmap", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
9141 SEC_DEF("xdp.frags/cpumap", XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS),
9142 SEC_DEF("xdp/cpumap", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
9143 SEC_DEF("xdp.frags", XDP, BPF_XDP, SEC_XDP_FRAGS),
9144 SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT),
9145 SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE),
9146 SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE),
9147 SEC_DEF("lwt_out", LWT_OUT, 0, SEC_NONE),
9148 SEC_DEF("lwt_xmit", LWT_XMIT, 0, SEC_NONE),
9149 SEC_DEF("lwt_seg6local", LWT_SEG6LOCAL, 0, SEC_NONE),
9150 SEC_DEF("sockops", SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT),
9151 SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT),
9152 SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT),
9153 SEC_DEF("sk_skb", SK_SKB, 0, SEC_NONE),
9154 SEC_DEF("sk_msg", SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT),
9155 SEC_DEF("lirc_mode2", LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT),
9156 SEC_DEF("flow_dissector", FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT),
9157 SEC_DEF("cgroup_skb/ingress", CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT),
9158 SEC_DEF("cgroup_skb/egress", CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT),
9159 SEC_DEF("cgroup/skb", CGROUP_SKB, 0, SEC_NONE),
9160 SEC_DEF("cgroup/sock_create", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE),
9161 SEC_DEF("cgroup/sock_release", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE),
9162 SEC_DEF("cgroup/sock", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT),
9163 SEC_DEF("cgroup/post_bind4", CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE),
9164 SEC_DEF("cgroup/post_bind6", CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE),
9165 SEC_DEF("cgroup/bind4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE),
9166 SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE),
9167 SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE),
9168 SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE),
9169 SEC_DEF("cgroup/connect_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT, SEC_ATTACHABLE),
9170 SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE),
9171 SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE),
9172 SEC_DEF("cgroup/sendmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG, SEC_ATTACHABLE),
9173 SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE),
9174 SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE),
9175 SEC_DEF("cgroup/recvmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG, SEC_ATTACHABLE),
9176 SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE),
9177 SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE),
9178 SEC_DEF("cgroup/getpeername_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME, SEC_ATTACHABLE),
9179 SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE),
9180 SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE),
9181 SEC_DEF("cgroup/getsockname_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME, SEC_ATTACHABLE),
9182 SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE),
9183 SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE),
9184 SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE),
9185 SEC_DEF("cgroup/dev", CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT),
9186 SEC_DEF("struct_ops+", STRUCT_OPS, 0, SEC_NONE),
9187 SEC_DEF("struct_ops.s+", STRUCT_OPS, 0, SEC_SLEEPABLE),
9188 SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE),
9189 SEC_DEF("netfilter", NETFILTER, BPF_NETFILTER, SEC_NONE),
9190 };
9191
9192 int libbpf_register_prog_handler(const char *sec,
9193 enum bpf_prog_type prog_type,
9194 enum bpf_attach_type exp_attach_type,
9195 const struct libbpf_prog_handler_opts *opts)
9196 {
9197 struct bpf_sec_def *sec_def;
9198
9199 if (!OPTS_VALID(opts, libbpf_prog_handler_opts))
9200 return libbpf_err(-EINVAL);
9201
9202 if (last_custom_sec_def_handler_id == INT_MAX) /* prevent overflow */
9203 return libbpf_err(-E2BIG);
9204
9205 if (sec) {
9206 sec_def = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt + 1,
9207 sizeof(*sec_def));
9208 if (!sec_def)
9209 return libbpf_err(-ENOMEM);
9210
9211 custom_sec_defs = sec_def;
9212 sec_def = &custom_sec_defs[custom_sec_def_cnt];
9213 } else {
9214 if (has_custom_fallback_def)
9215 return libbpf_err(-EBUSY);
9216
9217 sec_def = &custom_fallback_def;
9218 }
9219
9220 sec_def->sec = sec ? strdup(sec) : NULL;
9221 if (sec && !sec_def->sec)
9222 return libbpf_err(-ENOMEM);
9223
9224 sec_def->prog_type = prog_type;
9225 sec_def->expected_attach_type = exp_attach_type;
9226 sec_def->cookie = OPTS_GET(opts, cookie, 0);
9227
9228 sec_def->prog_setup_fn = OPTS_GET(opts, prog_setup_fn, NULL);
9229 sec_def->prog_prepare_load_fn = OPTS_GET(opts, prog_prepare_load_fn, NULL);
9230 sec_def->prog_attach_fn = OPTS_GET(opts, prog_attach_fn, NULL);
9231
9232 sec_def->handler_id = ++last_custom_sec_def_handler_id;
9233
9234 if (sec)
9235 custom_sec_def_cnt++;
9236 else
9237 has_custom_fallback_def = true;
9238
9239 return sec_def->handler_id;
9240 }
9241
9242 int libbpf_unregister_prog_handler(int handler_id)
9243 {
9244 struct bpf_sec_def *sec_defs;
9245 int i;
9246
9247 if (handler_id <= 0)
9248 return libbpf_err(-EINVAL);
9249
9250 if (has_custom_fallback_def && custom_fallback_def.handler_id == handler_id) {
9251 memset(&custom_fallback_def, 0, sizeof(custom_fallback_def));
9252 has_custom_fallback_def = false;
9253 return 0;
9254 }
9255
9256 for (i = 0; i < custom_sec_def_cnt; i++) {
9257 if (custom_sec_defs[i].handler_id == handler_id)
9258 break;
9259 }
9260
9261 if (i == custom_sec_def_cnt)
9262 return libbpf_err(-ENOENT);
9263
9264 free(custom_sec_defs[i].sec);
9265 for (i = i + 1; i < custom_sec_def_cnt; i++)
9266 custom_sec_defs[i - 1] = custom_sec_defs[i];
9267 custom_sec_def_cnt--;
9268
9269 /* try to shrink the array, but it's ok if we couldn't */
9270 sec_defs = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt, sizeof(*sec_defs));
9271 /* if new count is zero, reallocarray can return a valid NULL result;
9272 * in this case the previous pointer will be freed, so we *have to*
9273 * reassign old pointer to the new value (even if it's NULL)
9274 */
9275 if (sec_defs || custom_sec_def_cnt == 0)
9276 custom_sec_defs = sec_defs;
9277
9278 return 0;
9279 }
9280
9281 static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name)
9282 {
9283 size_t len = strlen(sec_def->sec);
9284
9285 /* "type/" always has to have proper SEC("type/extras") form */
9286 if (sec_def->sec[len - 1] == '/') {
9287 if (str_has_pfx(sec_name, sec_def->sec))
9288 return true;
9289 return false;
9290 }
9291
9292 /* "type+" means it can be either exact SEC("type") or
9293 * well-formed SEC("type/extras") with proper '/' separator
9294 */
9295 if (sec_def->sec[len - 1] == '+') {
9296 len--;
9297 /* not even a prefix */
9298 if (strncmp(sec_name, sec_def->sec, len) != 0)
9299 return false;
9300 /* exact match or has '/' separator */
9301 if (sec_name[len] == '\0' || sec_name[len] == '/')
9302 return true;
9303 return false;
9304 }
9305
9306 return strcmp(sec_name, sec_def->sec) == 0;
9307 }
9308
9309 static const struct bpf_sec_def *find_sec_def(const char *sec_name)
9310 {
9311 const struct bpf_sec_def *sec_def;
9312 int i, n;
9313
9314 n = custom_sec_def_cnt;
9315 for (i = 0; i < n; i++) {
9316 sec_def = &custom_sec_defs[i];
9317 if (sec_def_matches(sec_def, sec_name))
9318 return sec_def;
9319 }
9320
9321 n = ARRAY_SIZE(section_defs);
9322 for (i = 0; i < n; i++) {
9323 sec_def = &section_defs[i];
9324 if (sec_def_matches(sec_def, sec_name))
9325 return sec_def;
9326 }
9327
9328 if (has_custom_fallback_def)
9329 return &custom_fallback_def;
9330
9331 return NULL;
9332 }
9333
9334 #define MAX_TYPE_NAME_SIZE 32
9335
9336 static char *libbpf_get_type_names(bool attach_type)
9337 {
9338 int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE;
9339 char *buf;
9340
9341 buf = malloc(len);
9342 if (!buf)
9343 return NULL;
9344
9345 buf[0] = '\0';
9346 /* Forge string buf with all available names */
9347 for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
9348 const struct bpf_sec_def *sec_def = &section_defs[i];
9349
9350 if (attach_type) {
9351 if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
9352 continue;
9353
9354 if (!(sec_def->cookie & SEC_ATTACHABLE))
9355 continue;
9356 }
9357
9358 if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) {
9359 free(buf);
9360 return NULL;
9361 }
9362 strcat(buf, " ");
9363 strcat(buf, section_defs[i].sec);
9364 }
9365
9366 return buf;
9367 }
9368
9369 int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
9370 enum bpf_attach_type *expected_attach_type)
9371 {
9372 const struct bpf_sec_def *sec_def;
9373 char *type_names;
9374
9375 if (!name)
9376 return libbpf_err(-EINVAL);
9377
9378 sec_def = find_sec_def(name);
9379 if (sec_def) {
9380 *prog_type = sec_def->prog_type;
9381 *expected_attach_type = sec_def->expected_attach_type;
9382 return 0;
9383 }
9384
9385 pr_debug("failed to guess program type from ELF section '%s'\n", name);
9386 type_names = libbpf_get_type_names(false);
9387 if (type_names != NULL) {
9388 pr_debug("supported section(type) names are:%s\n", type_names);
9389 free(type_names);
9390 }
9391
9392 return libbpf_err(-ESRCH);
9393 }
9394
9395 const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t)
9396 {
9397 if (t < 0 || t >= ARRAY_SIZE(attach_type_name))
9398 return NULL;
9399
9400 return attach_type_name[t];
9401 }
9402
9403 const char *libbpf_bpf_link_type_str(enum bpf_link_type t)
9404 {
9405 if (t < 0 || t >= ARRAY_SIZE(link_type_name))
9406 return NULL;
9407
9408 return link_type_name[t];
9409 }
9410
9411 const char *libbpf_bpf_map_type_str(enum bpf_map_type t)
9412 {
9413 if (t < 0 || t >= ARRAY_SIZE(map_type_name))
9414 return NULL;
9415
9416 return map_type_name[t];
9417 }
9418
9419 const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t)
9420 {
9421 if (t < 0 || t >= ARRAY_SIZE(prog_type_name))
9422 return NULL;
9423
9424 return prog_type_name[t];
9425 }
9426
9427 static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
9428 int sec_idx,
9429 size_t offset)
9430 {
9431 struct bpf_map *map;
9432 size_t i;
9433
9434 for (i = 0; i < obj->nr_maps; i++) {
9435 map = &obj->maps[i];
9436 if (!bpf_map__is_struct_ops(map))
9437 continue;
9438 if (map->sec_idx == sec_idx &&
9439 map->sec_offset <= offset &&
9440 offset - map->sec_offset < map->def.value_size)
9441 return map;
9442 }
9443
9444 return NULL;
9445 }
9446
9447 /* Collect the reloc from ELF, populate the st_ops->progs[], and update
9448 * st_ops->data for shadow type.
9449 */
9450 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
9451 Elf64_Shdr *shdr, Elf_Data *data)
9452 {
9453 const struct btf_member *member;
9454 struct bpf_struct_ops *st_ops;
9455 struct bpf_program *prog;
9456 unsigned int shdr_idx;
9457 const struct btf *btf;
9458 struct bpf_map *map;
9459 unsigned int moff, insn_idx;
9460 const char *name;
9461 __u32 member_idx;
9462 Elf64_Sym *sym;
9463 Elf64_Rel *rel;
9464 int i, nrels;
9465
9466 btf = obj->btf;
9467 nrels = shdr->sh_size / shdr->sh_entsize;
9468 for (i = 0; i < nrels; i++) {
9469 rel = elf_rel_by_idx(data, i);
9470 if (!rel) {
9471 pr_warn("struct_ops reloc: failed to get %d reloc\n", i);
9472 return -LIBBPF_ERRNO__FORMAT;
9473 }
9474
9475 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
9476 if (!sym) {
9477 pr_warn("struct_ops reloc: symbol %zx not found\n",
9478 (size_t)ELF64_R_SYM(rel->r_info));
9479 return -LIBBPF_ERRNO__FORMAT;
9480 }
9481
9482 name = elf_sym_str(obj, sym->st_name) ?: "<?>";
9483 map = find_struct_ops_map_by_offset(obj, shdr->sh_info, rel->r_offset);
9484 if (!map) {
9485 pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n",
9486 (size_t)rel->r_offset);
9487 return -EINVAL;
9488 }
9489
9490 moff = rel->r_offset - map->sec_offset;
9491 shdr_idx = sym->st_shndx;
9492 st_ops = map->st_ops;
9493 pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel->r_offset %zu map->sec_offset %zu name %d (\'%s\')\n",
9494 map->name,
9495 (long long)(rel->r_info >> 32),
9496 (long long)sym->st_value,
9497 shdr_idx, (size_t)rel->r_offset,
9498 map->sec_offset, sym->st_name, name);
9499
9500 if (shdr_idx >= SHN_LORESERVE) {
9501 pr_warn("struct_ops reloc %s: rel->r_offset %zu shdr_idx %u unsupported non-static function\n",
9502 map->name, (size_t)rel->r_offset, shdr_idx);
9503 return -LIBBPF_ERRNO__RELOC;
9504 }
9505 if (sym->st_value % BPF_INSN_SZ) {
9506 pr_warn("struct_ops reloc %s: invalid target program offset %llu\n",
9507 map->name, (unsigned long long)sym->st_value);
9508 return -LIBBPF_ERRNO__FORMAT;
9509 }
9510 insn_idx = sym->st_value / BPF_INSN_SZ;
9511
9512 member = find_member_by_offset(st_ops->type, moff * 8);
9513 if (!member) {
9514 pr_warn("struct_ops reloc %s: cannot find member at moff %u\n",
9515 map->name, moff);
9516 return -EINVAL;
9517 }
9518 member_idx = member - btf_members(st_ops->type);
9519 name = btf__name_by_offset(btf, member->name_off);
9520
9521 if (!resolve_func_ptr(btf, member->type, NULL)) {
9522 pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n",
9523 map->name, name);
9524 return -EINVAL;
9525 }
9526
9527 prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx);
9528 if (!prog) {
9529 pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",
9530 map->name, shdr_idx, name);
9531 return -EINVAL;
9532 }
9533
9534 /* prevent the use of BPF prog with invalid type */
9535 if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) {
9536 pr_warn("struct_ops reloc %s: prog %s is not struct_ops BPF program\n",
9537 map->name, prog->name);
9538 return -EINVAL;
9539 }
9540
9541 st_ops->progs[member_idx] = prog;
9542
9543 /* st_ops->data will be exposed to users, being returned by
9544 * bpf_map__initial_value() as a pointer to the shadow
9545 * type. All function pointers in the original struct type
9546 * should be converted to a pointer to struct bpf_program
9547 * in the shadow type.
9548 */
9549 *((struct bpf_program **)(st_ops->data + moff)) = prog;
9550 }
9551
9552 return 0;
9553 }
9554
9555 #define BTF_TRACE_PREFIX "btf_trace_"
9556 #define BTF_LSM_PREFIX "bpf_lsm_"
9557 #define BTF_ITER_PREFIX "bpf_iter_"
9558 #define BTF_MAX_NAME_SIZE 128
9559
9560 void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
9561 const char **prefix, int *kind)
9562 {
9563 switch (attach_type) {
9564 case BPF_TRACE_RAW_TP:
9565 *prefix = BTF_TRACE_PREFIX;
9566 *kind = BTF_KIND_TYPEDEF;
9567 break;
9568 case BPF_LSM_MAC:
9569 case BPF_LSM_CGROUP:
9570 *prefix = BTF_LSM_PREFIX;
9571 *kind = BTF_KIND_FUNC;
9572 break;
9573 case BPF_TRACE_ITER:
9574 *prefix = BTF_ITER_PREFIX;
9575 *kind = BTF_KIND_FUNC;
9576 break;
9577 default:
9578 *prefix = "";
9579 *kind = BTF_KIND_FUNC;
9580 }
9581 }
9582
9583 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
9584 const char *name, __u32 kind)
9585 {
9586 char btf_type_name[BTF_MAX_NAME_SIZE];
9587 int ret;
9588
9589 ret = snprintf(btf_type_name, sizeof(btf_type_name),
9590 "%s%s", prefix, name);
9591 /* snprintf returns the number of characters written excluding the
9592 * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
9593 * indicates truncation.
9594 */
9595 if (ret < 0 || ret >= sizeof(btf_type_name))
9596 return -ENAMETOOLONG;
9597 return btf__find_by_name_kind(btf, btf_type_name, kind);
9598 }
9599
9600 static inline int find_attach_btf_id(struct btf *btf, const char *name,
9601 enum bpf_attach_type attach_type)
9602 {
9603 const char *prefix;
9604 int kind;
9605
9606 btf_get_kernel_prefix_kind(attach_type, &prefix, &kind);
9607 return find_btf_by_prefix_kind(btf, prefix, name, kind);
9608 }
9609
9610 int libbpf_find_vmlinux_btf_id(const char *name,
9611 enum bpf_attach_type attach_type)
9612 {
9613 struct btf *btf;
9614 int err;
9615
9616 btf = btf__load_vmlinux_btf();
9617 err = libbpf_get_error(btf);
9618 if (err) {
9619 pr_warn("vmlinux BTF is not found\n");
9620 return libbpf_err(err);
9621 }
9622
9623 err = find_attach_btf_id(btf, name, attach_type);
9624 if (err <= 0)
9625 pr_warn("%s is not found in vmlinux BTF\n", name);
9626
9627 btf__free(btf);
9628 return libbpf_err(err);
9629 }
9630
9631 static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
9632 {
9633 struct bpf_prog_info info;
9634 __u32 info_len = sizeof(info);
9635 struct btf *btf;
9636 int err;
9637
9638 memset(&info, 0, info_len);
9639 err = bpf_prog_get_info_by_fd(attach_prog_fd, &info, &info_len);
9640 if (err) {
9641 pr_warn("failed bpf_prog_get_info_by_fd for FD %d: %d\n",
9642 attach_prog_fd, err);
9643 return err;
9644 }
9645
9646 err = -EINVAL;
9647 if (!info.btf_id) {
9648 pr_warn("The target program doesn't have BTF\n");
9649 goto out;
9650 }
9651 btf = btf__load_from_kernel_by_id(info.btf_id);
9652 err = libbpf_get_error(btf);
9653 if (err) {
9654 pr_warn("Failed to get BTF %d of the program: %d\n", info.btf_id, err);
9655 goto out;
9656 }
9657 err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
9658 btf__free(btf);
9659 if (err <= 0) {
9660 pr_warn("%s is not found in prog's BTF\n", name);
9661 goto out;
9662 }
9663 out:
9664 return err;
9665 }
9666
9667 static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name,
9668 enum bpf_attach_type attach_type,
9669 int *btf_obj_fd, int *btf_type_id)
9670 {
9671 int ret, i;
9672
9673 ret = find_attach_btf_id(obj->btf_vmlinux, attach_name, attach_type);
9674 if (ret > 0) {
9675 *btf_obj_fd = 0; /* vmlinux BTF */
9676 *btf_type_id = ret;
9677 return 0;
9678 }
9679 if (ret != -ENOENT)
9680 return ret;
9681
9682 ret = load_module_btfs(obj);
9683 if (ret)
9684 return ret;
9685
9686 for (i = 0; i < obj->btf_module_cnt; i++) {
9687 const struct module_btf *mod = &obj->btf_modules[i];
9688
9689 ret = find_attach_btf_id(mod->btf, attach_name, attach_type);
9690 if (ret > 0) {
9691 *btf_obj_fd = mod->fd;
9692 *btf_type_id = ret;
9693 return 0;
9694 }
9695 if (ret == -ENOENT)
9696 continue;
9697
9698 return ret;
9699 }
9700
9701 return -ESRCH;
9702 }
9703
9704 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
9705 int *btf_obj_fd, int *btf_type_id)
9706 {
9707 enum bpf_attach_type attach_type = prog->expected_attach_type;
9708 __u32 attach_prog_fd = prog->attach_prog_fd;
9709 int err = 0;
9710
9711 /* BPF program's BTF ID */
9712 if (prog->type == BPF_PROG_TYPE_EXT || attach_prog_fd) {
9713 if (!attach_prog_fd) {
9714 pr_warn("prog '%s': attach program FD is not set\n", prog->name);
9715 return -EINVAL;
9716 }
9717 err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd);
9718 if (err < 0) {
9719 pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %d\n",
9720 prog->name, attach_prog_fd, attach_name, err);
9721 return err;
9722 }
9723 *btf_obj_fd = 0;
9724 *btf_type_id = err;
9725 return 0;
9726 }
9727
9728 /* kernel/module BTF ID */
9729 if (prog->obj->gen_loader) {
9730 bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type);
9731 *btf_obj_fd = 0;
9732 *btf_type_id = 1;
9733 } else {
9734 err = find_kernel_btf_id(prog->obj, attach_name,
9735 attach_type, btf_obj_fd,
9736 btf_type_id);
9737 }
9738 if (err) {
9739 pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %d\n",
9740 prog->name, attach_name, err);
9741 return err;
9742 }
9743 return 0;
9744 }
9745
9746 int libbpf_attach_type_by_name(const char *name,
9747 enum bpf_attach_type *attach_type)
9748 {
9749 char *type_names;
9750 const struct bpf_sec_def *sec_def;
9751
9752 if (!name)
9753 return libbpf_err(-EINVAL);
9754
9755 sec_def = find_sec_def(name);
9756 if (!sec_def) {
9757 pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
9758 type_names = libbpf_get_type_names(true);
9759 if (type_names != NULL) {
9760 pr_debug("attachable section(type) names are:%s\n", type_names);
9761 free(type_names);
9762 }
9763
9764 return libbpf_err(-EINVAL);
9765 }
9766
9767 if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
9768 return libbpf_err(-EINVAL);
9769 if (!(sec_def->cookie & SEC_ATTACHABLE))
9770 return libbpf_err(-EINVAL);
9771
9772 *attach_type = sec_def->expected_attach_type;
9773 return 0;
9774 }
9775
9776 int bpf_map__fd(const struct bpf_map *map)
9777 {
9778 if (!map)
9779 return libbpf_err(-EINVAL);
9780 if (!map_is_created(map))
9781 return -1;
9782 return map->fd;
9783 }
9784
9785 static bool map_uses_real_name(const struct bpf_map *map)
9786 {
9787 /* Since libbpf started to support custom .data.* and .rodata.* maps,
9788 * their user-visible name differs from kernel-visible name. Users see
9789 * such map's corresponding ELF section name as a map name.
9790 * This check distinguishes .data/.rodata from .data.* and .rodata.*
9791 * maps to know which name has to be returned to the user.
9792 */
9793 if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0)
9794 return true;
9795 if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0)
9796 return true;
9797 return false;
9798 }
9799
9800 const char *bpf_map__name(const struct bpf_map *map)
9801 {
9802 if (!map)
9803 return NULL;
9804
9805 if (map_uses_real_name(map))
9806 return map->real_name;
9807
9808 return map->name;
9809 }
9810
9811 enum bpf_map_type bpf_map__type(const struct bpf_map *map)
9812 {
9813 return map->def.type;
9814 }
9815
9816 int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
9817 {
9818 if (map_is_created(map))
9819 return libbpf_err(-EBUSY);
9820 map->def.type = type;
9821 return 0;
9822 }
9823
9824 __u32 bpf_map__map_flags(const struct bpf_map *map)
9825 {
9826 return map->def.map_flags;
9827 }
9828
9829 int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
9830 {
9831 if (map_is_created(map))
9832 return libbpf_err(-EBUSY);
9833 map->def.map_flags = flags;
9834 return 0;
9835 }
9836
9837 __u64 bpf_map__map_extra(const struct bpf_map *map)
9838 {
9839 return map->map_extra;
9840 }
9841
9842 int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra)
9843 {
9844 if (map_is_created(map))
9845 return libbpf_err(-EBUSY);
9846 map->map_extra = map_extra;
9847 return 0;
9848 }
9849
9850 __u32 bpf_map__numa_node(const struct bpf_map *map)
9851 {
9852 return map->numa_node;
9853 }
9854
9855 int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
9856 {
9857 if (map_is_created(map))
9858 return libbpf_err(-EBUSY);
9859 map->numa_node = numa_node;
9860 return 0;
9861 }
9862
9863 __u32 bpf_map__key_size(const struct bpf_map *map)
9864 {
9865 return map->def.key_size;
9866 }
9867
9868 int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
9869 {
9870 if (map_is_created(map))
9871 return libbpf_err(-EBUSY);
9872 map->def.key_size = size;
9873 return 0;
9874 }
9875
9876 __u32 bpf_map__value_size(const struct bpf_map *map)
9877 {
9878 return map->def.value_size;
9879 }
9880
9881 static int map_btf_datasec_resize(struct bpf_map *map, __u32 size)
9882 {
9883 struct btf *btf;
9884 struct btf_type *datasec_type, *var_type;
9885 struct btf_var_secinfo *var;
9886 const struct btf_type *array_type;
9887 const struct btf_array *array;
9888 int vlen, element_sz, new_array_id;
9889 __u32 nr_elements;
9890
9891 /* check btf existence */
9892 btf = bpf_object__btf(map->obj);
9893 if (!btf)
9894 return -ENOENT;
9895
9896 /* verify map is datasec */
9897 datasec_type = btf_type_by_id(btf, bpf_map__btf_value_type_id(map));
9898 if (!btf_is_datasec(datasec_type)) {
9899 pr_warn("map '%s': cannot be resized, map value type is not a datasec\n",
9900 bpf_map__name(map));
9901 return -EINVAL;
9902 }
9903
9904 /* verify datasec has at least one var */
9905 vlen = btf_vlen(datasec_type);
9906 if (vlen == 0) {
9907 pr_warn("map '%s': cannot be resized, map value datasec is empty\n",
9908 bpf_map__name(map));
9909 return -EINVAL;
9910 }
9911
9912 /* verify last var in the datasec is an array */
9913 var = &btf_var_secinfos(datasec_type)[vlen - 1];
9914 var_type = btf_type_by_id(btf, var->type);
9915 array_type = skip_mods_and_typedefs(btf, var_type->type, NULL);
9916 if (!btf_is_array(array_type)) {
9917 pr_warn("map '%s': cannot be resized, last var must be an array\n",
9918 bpf_map__name(map));
9919 return -EINVAL;
9920 }
9921
9922 /* verify request size aligns with array */
9923 array = btf_array(array_type);
9924 element_sz = btf__resolve_size(btf, array->type);
9925 if (element_sz <= 0 || (size - var->offset) % element_sz != 0) {
9926 pr_warn("map '%s': cannot be resized, element size (%d) doesn't align with new total size (%u)\n",
9927 bpf_map__name(map), element_sz, size);
9928 return -EINVAL;
9929 }
9930
9931 /* create a new array based on the existing array, but with new length */
9932 nr_elements = (size - var->offset) / element_sz;
9933 new_array_id = btf__add_array(btf, array->index_type, array->type, nr_elements);
9934 if (new_array_id < 0)
9935 return new_array_id;
9936
9937 /* adding a new btf type invalidates existing pointers to btf objects,
9938 * so refresh pointers before proceeding
9939 */
9940 datasec_type = btf_type_by_id(btf, map->btf_value_type_id);
9941 var = &btf_var_secinfos(datasec_type)[vlen - 1];
9942 var_type = btf_type_by_id(btf, var->type);
9943
9944 /* finally update btf info */
9945 datasec_type->size = size;
9946 var->size = size - var->offset;
9947 var_type->type = new_array_id;
9948
9949 return 0;
9950 }
9951
9952 int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
9953 {
9954 if (map->obj->loaded || map->reused)
9955 return libbpf_err(-EBUSY);
9956
9957 if (map->mmaped) {
9958 int err;
9959 size_t mmap_old_sz, mmap_new_sz;
9960
9961 mmap_old_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
9962 mmap_new_sz = bpf_map_mmap_sz(size, map->def.max_entries);
9963 err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz);
9964 if (err) {
9965 pr_warn("map '%s': failed to resize memory-mapped region: %d\n",
9966 bpf_map__name(map), err);
9967 return err;
9968 }
9969 err = map_btf_datasec_resize(map, size);
9970 if (err && err != -ENOENT) {
9971 pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %d\n",
9972 bpf_map__name(map), err);
9973 map->btf_value_type_id = 0;
9974 map->btf_key_type_id = 0;
9975 }
9976 }
9977
9978 map->def.value_size = size;
9979 return 0;
9980 }
9981
9982 __u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
9983 {
9984 return map ? map->btf_key_type_id : 0;
9985 }
9986
9987 __u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
9988 {
9989 return map ? map->btf_value_type_id : 0;
9990 }
9991
9992 int bpf_map__set_initial_value(struct bpf_map *map,
9993 const void *data, size_t size)
9994 {
9995 if (map->obj->loaded || map->reused)
9996 return libbpf_err(-EBUSY);
9997
9998 if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
9999 size != map->def.value_size)
10000 return libbpf_err(-EINVAL);
10001
10002 memcpy(map->mmaped, data, size);
10003 return 0;
10004 }
10005
10006 void *bpf_map__initial_value(struct bpf_map *map, size_t *psize)
10007 {
10008 if (bpf_map__is_struct_ops(map)) {
10009 if (psize)
10010 *psize = map->def.value_size;
10011 return map->st_ops->data;
10012 }
10013
10014 if (!map->mmaped)
10015 return NULL;
10016 *psize = map->def.value_size;
10017 return map->mmaped;
10018 }
10019
10020 bool bpf_map__is_internal(const struct bpf_map *map)
10021 {
10022 return map->libbpf_type != LIBBPF_MAP_UNSPEC;
10023 }
10024
10025 __u32 bpf_map__ifindex(const struct bpf_map *map)
10026 {
10027 return map->map_ifindex;
10028 }
10029
10030 int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
10031 {
10032 if (map_is_created(map))
10033 return libbpf_err(-EBUSY);
10034 map->map_ifindex = ifindex;
10035 return 0;
10036 }
10037
10038 int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
10039 {
10040 if (!bpf_map_type__is_map_in_map(map->def.type)) {
10041 pr_warn("error: unsupported map type\n");
10042 return libbpf_err(-EINVAL);
10043 }
10044 if (map->inner_map_fd != -1) {
10045 pr_warn("error: inner_map_fd already specified\n");
10046 return libbpf_err(-EINVAL);
10047 }
10048 if (map->inner_map) {
10049 bpf_map__destroy(map->inner_map);
10050 zfree(&map->inner_map);
10051 }
10052 map->inner_map_fd = fd;
10053 return 0;
10054 }
10055
10056 static struct bpf_map *
10057 __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
10058 {
10059 ssize_t idx;
10060 struct bpf_map *s, *e;
10061
10062 if (!obj || !obj->maps)
10063 return errno = EINVAL, NULL;
10064
10065 s = obj->maps;
10066 e = obj->maps + obj->nr_maps;
10067
10068 if ((m < s) || (m >= e)) {
10069 pr_warn("error in %s: map handler doesn't belong to object\n",
10070 __func__);
10071 return errno = EINVAL, NULL;
10072 }
10073
10074 idx = (m - obj->maps) + i;
10075 if (idx >= obj->nr_maps || idx < 0)
10076 return NULL;
10077 return &obj->maps[idx];
10078 }
10079
10080 struct bpf_map *
10081 bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
10082 {
10083 if (prev == NULL)
10084 return obj->maps;
10085
10086 return __bpf_map__iter(prev, obj, 1);
10087 }
10088
10089 struct bpf_map *
10090 bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next)
10091 {
10092 if (next == NULL) {
10093 if (!obj->nr_maps)
10094 return NULL;
10095 return obj->maps + obj->nr_maps - 1;
10096 }
10097
10098 return __bpf_map__iter(next, obj, -1);
10099 }
10100
10101 struct bpf_map *
10102 bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
10103 {
10104 struct bpf_map *pos;
10105
10106 bpf_object__for_each_map(pos, obj) {
10107 /* if it's a special internal map name (which always starts
10108 * with dot) then check if that special name matches the
10109 * real map name (ELF section name)
10110 */
10111 if (name[0] == '.') {
10112 if (pos->real_name && strcmp(pos->real_name, name) == 0)
10113 return pos;
10114 continue;
10115 }
10116 /* otherwise map name has to be an exact match */
10117 if (map_uses_real_name(pos)) {
10118 if (strcmp(pos->real_name, name) == 0)
10119 return pos;
10120 continue;
10121 }
10122 if (strcmp(pos->name, name) == 0)
10123 return pos;
10124 }
10125 return errno = ENOENT, NULL;
10126 }
10127
10128 int
10129 bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
10130 {
10131 return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
10132 }
10133
10134 static int validate_map_op(const struct bpf_map *map, size_t key_sz,
10135 size_t value_sz, bool check_value_sz)
10136 {
10137 if (!map_is_created(map)) /* map is not yet created */
10138 return -ENOENT;
10139
10140 if (map->def.key_size != key_sz) {
10141 pr_warn("map '%s': unexpected key size %zu provided, expected %u\n",
10142 map->name, key_sz, map->def.key_size);
10143 return -EINVAL;
10144 }
10145
10146 if (!check_value_sz)
10147 return 0;
10148
10149 switch (map->def.type) {
10150 case BPF_MAP_TYPE_PERCPU_ARRAY:
10151 case BPF_MAP_TYPE_PERCPU_HASH:
10152 case BPF_MAP_TYPE_LRU_PERCPU_HASH:
10153 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: {
10154 int num_cpu = libbpf_num_possible_cpus();
10155 size_t elem_sz = roundup(map->def.value_size, 8);
10156
10157 if (value_sz != num_cpu * elem_sz) {
10158 pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n",
10159 map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz);
10160 return -EINVAL;
10161 }
10162 break;
10163 }
10164 default:
10165 if (map->def.value_size != value_sz) {
10166 pr_warn("map '%s': unexpected value size %zu provided, expected %u\n",
10167 map->name, value_sz, map->def.value_size);
10168 return -EINVAL;
10169 }
10170 break;
10171 }
10172 return 0;
10173 }
10174
10175 int bpf_map__lookup_elem(const struct bpf_map *map,
10176 const void *key, size_t key_sz,
10177 void *value, size_t value_sz, __u64 flags)
10178 {
10179 int err;
10180
10181 err = validate_map_op(map, key_sz, value_sz, true);
10182 if (err)
10183 return libbpf_err(err);
10184
10185 return bpf_map_lookup_elem_flags(map->fd, key, value, flags);
10186 }
10187
10188 int bpf_map__update_elem(const struct bpf_map *map,
10189 const void *key, size_t key_sz,
10190 const void *value, size_t value_sz, __u64 flags)
10191 {
10192 int err;
10193
10194 err = validate_map_op(map, key_sz, value_sz, true);
10195 if (err)
10196 return libbpf_err(err);
10197
10198 return bpf_map_update_elem(map->fd, key, value, flags);
10199 }
10200
10201 int bpf_map__delete_elem(const struct bpf_map *map,
10202 const void *key, size_t key_sz, __u64 flags)
10203 {
10204 int err;
10205
10206 err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
10207 if (err)
10208 return libbpf_err(err);
10209
10210 return bpf_map_delete_elem_flags(map->fd, key, flags);
10211 }
10212
10213 int bpf_map__lookup_and_delete_elem(const struct bpf_map *map,
10214 const void *key, size_t key_sz,
10215 void *value, size_t value_sz, __u64 flags)
10216 {
10217 int err;
10218
10219 err = validate_map_op(map, key_sz, value_sz, true);
10220 if (err)
10221 return libbpf_err(err);
10222
10223 return bpf_map_lookup_and_delete_elem_flags(map->fd, key, value, flags);
10224 }
10225
10226 int bpf_map__get_next_key(const struct bpf_map *map,
10227 const void *cur_key, void *next_key, size_t key_sz)
10228 {
10229 int err;
10230
10231 err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
10232 if (err)
10233 return libbpf_err(err);
10234
10235 return bpf_map_get_next_key(map->fd, cur_key, next_key);
10236 }
10237
10238 long libbpf_get_error(const void *ptr)
10239 {
10240 if (!IS_ERR_OR_NULL(ptr))
10241 return 0;
10242
10243 if (IS_ERR(ptr))
10244 errno = -PTR_ERR(ptr);
10245
10246 /* If ptr == NULL, then errno should be already set by the failing
10247 * API, because libbpf never returns NULL on success and it now always
10248 * sets errno on error. So no extra errno handling for ptr == NULL
10249 * case.
10250 */
10251 return -errno;
10252 }
10253
10254 /* Replace link's underlying BPF program with the new one */
10255 int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
10256 {
10257 int ret;
10258
10259 ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
10260 return libbpf_err_errno(ret);
10261 }
10262
10263 /* Release "ownership" of underlying BPF resource (typically, BPF program
10264 * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
10265 * link, when destructed through bpf_link__destroy() call won't attempt to
10266 * detach/unregisted that BPF resource. This is useful in situations where,
10267 * say, attached BPF program has to outlive userspace program that attached it
10268 * in the system. Depending on type of BPF program, though, there might be
10269 * additional steps (like pinning BPF program in BPF FS) necessary to ensure
10270 * exit of userspace program doesn't trigger automatic detachment and clean up
10271 * inside the kernel.
10272 */
10273 void bpf_link__disconnect(struct bpf_link *link)
10274 {
10275 link->disconnected = true;
10276 }
10277
10278 int bpf_link__destroy(struct bpf_link *link)
10279 {
10280 int err = 0;
10281
10282 if (IS_ERR_OR_NULL(link))
10283 return 0;
10284
10285 if (!link->disconnected && link->detach)
10286 err = link->detach(link);
10287 if (link->pin_path)
10288 free(link->pin_path);
10289 if (link->dealloc)
10290 link->dealloc(link);
10291 else
10292 free(link);
10293
10294 return libbpf_err(err);
10295 }
10296
10297 int bpf_link__fd(const struct bpf_link *link)
10298 {
10299 return link->fd;
10300 }
10301
10302 const char *bpf_link__pin_path(const struct bpf_link *link)
10303 {
10304 return link->pin_path;
10305 }
10306
10307 static int bpf_link__detach_fd(struct bpf_link *link)
10308 {
10309 return libbpf_err_errno(close(link->fd));
10310 }
10311
10312 struct bpf_link *bpf_link__open(const char *path)
10313 {
10314 struct bpf_link *link;
10315 int fd;
10316
10317 fd = bpf_obj_get(path);
10318 if (fd < 0) {
10319 fd = -errno;
10320 pr_warn("failed to open link at %s: %d\n", path, fd);
10321 return libbpf_err_ptr(fd);
10322 }
10323
10324 link = calloc(1, sizeof(*link));
10325 if (!link) {
10326 close(fd);
10327 return libbpf_err_ptr(-ENOMEM);
10328 }
10329 link->detach = &bpf_link__detach_fd;
10330 link->fd = fd;
10331
10332 link->pin_path = strdup(path);
10333 if (!link->pin_path) {
10334 bpf_link__destroy(link);
10335 return libbpf_err_ptr(-ENOMEM);
10336 }
10337
10338 return link;
10339 }
10340
10341 int bpf_link__detach(struct bpf_link *link)
10342 {
10343 return bpf_link_detach(link->fd) ? -errno : 0;
10344 }
10345
10346 int bpf_link__pin(struct bpf_link *link, const char *path)
10347 {
10348 int err;
10349
10350 if (link->pin_path)
10351 return libbpf_err(-EBUSY);
10352 err = make_parent_dir(path);
10353 if (err)
10354 return libbpf_err(err);
10355 err = check_path(path);
10356 if (err)
10357 return libbpf_err(err);
10358
10359 link->pin_path = strdup(path);
10360 if (!link->pin_path)
10361 return libbpf_err(-ENOMEM);
10362
10363 if (bpf_obj_pin(link->fd, link->pin_path)) {
10364 err = -errno;
10365 zfree(&link->pin_path);
10366 return libbpf_err(err);
10367 }
10368
10369 pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
10370 return 0;
10371 }
10372
10373 int bpf_link__unpin(struct bpf_link *link)
10374 {
10375 int err;
10376
10377 if (!link->pin_path)
10378 return libbpf_err(-EINVAL);
10379
10380 err = unlink(link->pin_path);
10381 if (err != 0)
10382 return -errno;
10383
10384 pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
10385 zfree(&link->pin_path);
10386 return 0;
10387 }
10388
10389 struct bpf_link_perf {
10390 struct bpf_link link;
10391 int perf_event_fd;
10392 /* legacy kprobe support: keep track of probe identifier and type */
10393 char *legacy_probe_name;
10394 bool legacy_is_kprobe;
10395 bool legacy_is_retprobe;
10396 };
10397
10398 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe);
10399 static int remove_uprobe_event_legacy(const char *probe_name, bool retprobe);
10400
10401 static int bpf_link_perf_detach(struct bpf_link *link)
10402 {
10403 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
10404 int err = 0;
10405
10406 if (ioctl(perf_link->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0) < 0)
10407 err = -errno;
10408
10409 if (perf_link->perf_event_fd != link->fd)
10410 close(perf_link->perf_event_fd);
10411 close(link->fd);
10412
10413 /* legacy uprobe/kprobe needs to be removed after perf event fd closure */
10414 if (perf_link->legacy_probe_name) {
10415 if (perf_link->legacy_is_kprobe) {
10416 err = remove_kprobe_event_legacy(perf_link->legacy_probe_name,
10417 perf_link->legacy_is_retprobe);
10418 } else {
10419 err = remove_uprobe_event_legacy(perf_link->legacy_probe_name,
10420 perf_link->legacy_is_retprobe);
10421 }
10422 }
10423
10424 return err;
10425 }
10426
10427 static void bpf_link_perf_dealloc(struct bpf_link *link)
10428 {
10429 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
10430
10431 free(perf_link->legacy_probe_name);
10432 free(perf_link);
10433 }
10434
10435 struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd,
10436 const struct bpf_perf_event_opts *opts)
10437 {
10438 char errmsg[STRERR_BUFSIZE];
10439 struct bpf_link_perf *link;
10440 int prog_fd, link_fd = -1, err;
10441 bool force_ioctl_attach;
10442
10443 if (!OPTS_VALID(opts, bpf_perf_event_opts))
10444 return libbpf_err_ptr(-EINVAL);
10445
10446 if (pfd < 0) {
10447 pr_warn("prog '%s': invalid perf event FD %d\n",
10448 prog->name, pfd);
10449 return libbpf_err_ptr(-EINVAL);
10450 }
10451 prog_fd = bpf_program__fd(prog);
10452 if (prog_fd < 0) {
10453 pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
10454 prog->name);
10455 return libbpf_err_ptr(-EINVAL);
10456 }
10457
10458 link = calloc(1, sizeof(*link));
10459 if (!link)
10460 return libbpf_err_ptr(-ENOMEM);
10461 link->link.detach = &bpf_link_perf_detach;
10462 link->link.dealloc = &bpf_link_perf_dealloc;
10463 link->perf_event_fd = pfd;
10464
10465 force_ioctl_attach = OPTS_GET(opts, force_ioctl_attach, false);
10466 if (kernel_supports(prog->obj, FEAT_PERF_LINK) && !force_ioctl_attach) {
10467 DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts,
10468 .perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0));
10469
10470 link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts);
10471 if (link_fd < 0) {
10472 err = -errno;
10473 pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n",
10474 prog->name, pfd,
10475 err, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10476 goto err_out;
10477 }
10478 link->link.fd = link_fd;
10479 } else {
10480 if (OPTS_GET(opts, bpf_cookie, 0)) {
10481 pr_warn("prog '%s': user context value is not supported\n", prog->name);
10482 err = -EOPNOTSUPP;
10483 goto err_out;
10484 }
10485
10486 if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
10487 err = -errno;
10488 pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n",
10489 prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10490 if (err == -EPROTO)
10491 pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
10492 prog->name, pfd);
10493 goto err_out;
10494 }
10495 link->link.fd = pfd;
10496 }
10497 if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
10498 err = -errno;
10499 pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n",
10500 prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10501 goto err_out;
10502 }
10503
10504 return &link->link;
10505 err_out:
10506 if (link_fd >= 0)
10507 close(link_fd);
10508 free(link);
10509 return libbpf_err_ptr(err);
10510 }
10511
10512 struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd)
10513 {
10514 return bpf_program__attach_perf_event_opts(prog, pfd, NULL);
10515 }
10516
10517 /*
10518 * this function is expected to parse integer in the range of [0, 2^31-1] from
10519 * given file using scanf format string fmt. If actual parsed value is
10520 * negative, the result might be indistinguishable from error
10521 */
10522 static int parse_uint_from_file(const char *file, const char *fmt)
10523 {
10524 char buf[STRERR_BUFSIZE];
10525 int err, ret;
10526 FILE *f;
10527
10528 f = fopen(file, "re");
10529 if (!f) {
10530 err = -errno;
10531 pr_debug("failed to open '%s': %s\n", file,
10532 libbpf_strerror_r(err, buf, sizeof(buf)));
10533 return err;
10534 }
10535 err = fscanf(f, fmt, &ret);
10536 if (err != 1) {
10537 err = err == EOF ? -EIO : -errno;
10538 pr_debug("failed to parse '%s': %s\n", file,
10539 libbpf_strerror_r(err, buf, sizeof(buf)));
10540 fclose(f);
10541 return err;
10542 }
10543 fclose(f);
10544 return ret;
10545 }
10546
10547 static int determine_kprobe_perf_type(void)
10548 {
10549 const char *file = "/sys/bus/event_source/devices/kprobe/type";
10550
10551 return parse_uint_from_file(file, "%d\n");
10552 }
10553
10554 static int determine_uprobe_perf_type(void)
10555 {
10556 const char *file = "/sys/bus/event_source/devices/uprobe/type";
10557
10558 return parse_uint_from_file(file, "%d\n");
10559 }
10560
10561 static int determine_kprobe_retprobe_bit(void)
10562 {
10563 const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
10564
10565 return parse_uint_from_file(file, "config:%d\n");
10566 }
10567
10568 static int determine_uprobe_retprobe_bit(void)
10569 {
10570 const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
10571
10572 return parse_uint_from_file(file, "config:%d\n");
10573 }
10574
10575 #define PERF_UPROBE_REF_CTR_OFFSET_BITS 32
10576 #define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32
10577
10578 static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
10579 uint64_t offset, int pid, size_t ref_ctr_off)
10580 {
10581 const size_t attr_sz = sizeof(struct perf_event_attr);
10582 struct perf_event_attr attr;
10583 char errmsg[STRERR_BUFSIZE];
10584 int type, pfd;
10585
10586 if ((__u64)ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS))
10587 return -EINVAL;
10588
10589 memset(&attr, 0, attr_sz);
10590
10591 type = uprobe ? determine_uprobe_perf_type()
10592 : determine_kprobe_perf_type();
10593 if (type < 0) {
10594 pr_warn("failed to determine %s perf type: %s\n",
10595 uprobe ? "uprobe" : "kprobe",
10596 libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
10597 return type;
10598 }
10599 if (retprobe) {
10600 int bit = uprobe ? determine_uprobe_retprobe_bit()
10601 : determine_kprobe_retprobe_bit();
10602
10603 if (bit < 0) {
10604 pr_warn("failed to determine %s retprobe bit: %s\n",
10605 uprobe ? "uprobe" : "kprobe",
10606 libbpf_strerror_r(bit, errmsg, sizeof(errmsg)));
10607 return bit;
10608 }
10609 attr.config |= 1 << bit;
10610 }
10611 attr.size = attr_sz;
10612 attr.type = type;
10613 attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
10614 attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
10615 attr.config2 = offset; /* kprobe_addr or probe_offset */
10616
10617 /* pid filter is meaningful only for uprobes */
10618 pfd = syscall(__NR_perf_event_open, &attr,
10619 pid < 0 ? -1 : pid /* pid */,
10620 pid == -1 ? 0 : -1 /* cpu */,
10621 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
10622 return pfd >= 0 ? pfd : -errno;
10623 }
10624
10625 static int append_to_file(const char *file, const char *fmt, ...)
10626 {
10627 int fd, n, err = 0;
10628 va_list ap;
10629 char buf[1024];
10630
10631 va_start(ap, fmt);
10632 n = vsnprintf(buf, sizeof(buf), fmt, ap);
10633 va_end(ap);
10634
10635 if (n < 0 || n >= sizeof(buf))
10636 return -EINVAL;
10637
10638 fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0);
10639 if (fd < 0)
10640 return -errno;
10641
10642 if (write(fd, buf, n) < 0)
10643 err = -errno;
10644
10645 close(fd);
10646 return err;
10647 }
10648
10649 #define DEBUGFS "/sys/kernel/debug/tracing"
10650 #define TRACEFS "/sys/kernel/tracing"
10651
10652 static bool use_debugfs(void)
10653 {
10654 static int has_debugfs = -1;
10655
10656 if (has_debugfs < 0)
10657 has_debugfs = faccessat(AT_FDCWD, DEBUGFS, F_OK, AT_EACCESS) == 0;
10658
10659 return has_debugfs == 1;
10660 }
10661
10662 static const char *tracefs_path(void)
10663 {
10664 return use_debugfs() ? DEBUGFS : TRACEFS;
10665 }
10666
10667 static const char *tracefs_kprobe_events(void)
10668 {
10669 return use_debugfs() ? DEBUGFS"/kprobe_events" : TRACEFS"/kprobe_events";
10670 }
10671
10672 static const char *tracefs_uprobe_events(void)
10673 {
10674 return use_debugfs() ? DEBUGFS"/uprobe_events" : TRACEFS"/uprobe_events";
10675 }
10676
10677 static const char *tracefs_available_filter_functions(void)
10678 {
10679 return use_debugfs() ? DEBUGFS"/available_filter_functions"
10680 : TRACEFS"/available_filter_functions";
10681 }
10682
10683 static const char *tracefs_available_filter_functions_addrs(void)
10684 {
10685 return use_debugfs() ? DEBUGFS"/available_filter_functions_addrs"
10686 : TRACEFS"/available_filter_functions_addrs";
10687 }
10688
10689 static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz,
10690 const char *kfunc_name, size_t offset)
10691 {
10692 static int index = 0;
10693 int i;
10694
10695 snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset,
10696 __sync_fetch_and_add(&index, 1));
10697
10698 /* sanitize binary_path in the probe name */
10699 for (i = 0; buf[i]; i++) {
10700 if (!isalnum(buf[i]))
10701 buf[i] = '_';
10702 }
10703 }
10704
10705 static int add_kprobe_event_legacy(const char *probe_name, bool retprobe,
10706 const char *kfunc_name, size_t offset)
10707 {
10708 return append_to_file(tracefs_kprobe_events(), "%c:%s/%s %s+0x%zx",
10709 retprobe ? 'r' : 'p',
10710 retprobe ? "kretprobes" : "kprobes",
10711 probe_name, kfunc_name, offset);
10712 }
10713
10714 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe)
10715 {
10716 return append_to_file(tracefs_kprobe_events(), "-:%s/%s",
10717 retprobe ? "kretprobes" : "kprobes", probe_name);
10718 }
10719
10720 static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe)
10721 {
10722 char file[256];
10723
10724 snprintf(file, sizeof(file), "%s/events/%s/%s/id",
10725 tracefs_path(), retprobe ? "kretprobes" : "kprobes", probe_name);
10726
10727 return parse_uint_from_file(file, "%d\n");
10728 }
10729
10730 static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe,
10731 const char *kfunc_name, size_t offset, int pid)
10732 {
10733 const size_t attr_sz = sizeof(struct perf_event_attr);
10734 struct perf_event_attr attr;
10735 char errmsg[STRERR_BUFSIZE];
10736 int type, pfd, err;
10737
10738 err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset);
10739 if (err < 0) {
10740 pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n",
10741 kfunc_name, offset,
10742 libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10743 return err;
10744 }
10745 type = determine_kprobe_perf_type_legacy(probe_name, retprobe);
10746 if (type < 0) {
10747 err = type;
10748 pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n",
10749 kfunc_name, offset,
10750 libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10751 goto err_clean_legacy;
10752 }
10753
10754 memset(&attr, 0, attr_sz);
10755 attr.size = attr_sz;
10756 attr.config = type;
10757 attr.type = PERF_TYPE_TRACEPOINT;
10758
10759 pfd = syscall(__NR_perf_event_open, &attr,
10760 pid < 0 ? -1 : pid, /* pid */
10761 pid == -1 ? 0 : -1, /* cpu */
10762 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
10763 if (pfd < 0) {
10764 err = -errno;
10765 pr_warn("legacy kprobe perf_event_open() failed: %s\n",
10766 libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10767 goto err_clean_legacy;
10768 }
10769 return pfd;
10770
10771 err_clean_legacy:
10772 /* Clear the newly added legacy kprobe_event */
10773 remove_kprobe_event_legacy(probe_name, retprobe);
10774 return err;
10775 }
10776
10777 static const char *arch_specific_syscall_pfx(void)
10778 {
10779 #if defined(__x86_64__)
10780 return "x64";
10781 #elif defined(__i386__)
10782 return "ia32";
10783 #elif defined(__s390x__)
10784 return "s390x";
10785 #elif defined(__s390__)
10786 return "s390";
10787 #elif defined(__arm__)
10788 return "arm";
10789 #elif defined(__aarch64__)
10790 return "arm64";
10791 #elif defined(__mips__)
10792 return "mips";
10793 #elif defined(__riscv)
10794 return "riscv";
10795 #elif defined(__powerpc__)
10796 return "powerpc";
10797 #elif defined(__powerpc64__)
10798 return "powerpc64";
10799 #else
10800 return NULL;
10801 #endif
10802 }
10803
10804 int probe_kern_syscall_wrapper(int token_fd)
10805 {
10806 char syscall_name[64];
10807 const char *ksys_pfx;
10808
10809 ksys_pfx = arch_specific_syscall_pfx();
10810 if (!ksys_pfx)
10811 return 0;
10812
10813 snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx);
10814
10815 if (determine_kprobe_perf_type() >= 0) {
10816 int pfd;
10817
10818 pfd = perf_event_open_probe(false, false, syscall_name, 0, getpid(), 0);
10819 if (pfd >= 0)
10820 close(pfd);
10821
10822 return pfd >= 0 ? 1 : 0;
10823 } else { /* legacy mode */
10824 char probe_name[128];
10825
10826 gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0);
10827 if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0)
10828 return 0;
10829
10830 (void)remove_kprobe_event_legacy(probe_name, false);
10831 return 1;
10832 }
10833 }
10834
10835 struct bpf_link *
10836 bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
10837 const char *func_name,
10838 const struct bpf_kprobe_opts *opts)
10839 {
10840 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
10841 enum probe_attach_mode attach_mode;
10842 char errmsg[STRERR_BUFSIZE];
10843 char *legacy_probe = NULL;
10844 struct bpf_link *link;
10845 size_t offset;
10846 bool retprobe, legacy;
10847 int pfd, err;
10848
10849 if (!OPTS_VALID(opts, bpf_kprobe_opts))
10850 return libbpf_err_ptr(-EINVAL);
10851
10852 attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT);
10853 retprobe = OPTS_GET(opts, retprobe, false);
10854 offset = OPTS_GET(opts, offset, 0);
10855 pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
10856
10857 legacy = determine_kprobe_perf_type() < 0;
10858 switch (attach_mode) {
10859 case PROBE_ATTACH_MODE_LEGACY:
10860 legacy = true;
10861 pe_opts.force_ioctl_attach = true;
10862 break;
10863 case PROBE_ATTACH_MODE_PERF:
10864 if (legacy)
10865 return libbpf_err_ptr(-ENOTSUP);
10866 pe_opts.force_ioctl_attach = true;
10867 break;
10868 case PROBE_ATTACH_MODE_LINK:
10869 if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK))
10870 return libbpf_err_ptr(-ENOTSUP);
10871 break;
10872 case PROBE_ATTACH_MODE_DEFAULT:
10873 break;
10874 default:
10875 return libbpf_err_ptr(-EINVAL);
10876 }
10877
10878 if (!legacy) {
10879 pfd = perf_event_open_probe(false /* uprobe */, retprobe,
10880 func_name, offset,
10881 -1 /* pid */, 0 /* ref_ctr_off */);
10882 } else {
10883 char probe_name[256];
10884
10885 gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name),
10886 func_name, offset);
10887
10888 legacy_probe = strdup(probe_name);
10889 if (!legacy_probe)
10890 return libbpf_err_ptr(-ENOMEM);
10891
10892 pfd = perf_event_kprobe_open_legacy(legacy_probe, retprobe, func_name,
10893 offset, -1 /* pid */);
10894 }
10895 if (pfd < 0) {
10896 err = -errno;
10897 pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n",
10898 prog->name, retprobe ? "kretprobe" : "kprobe",
10899 func_name, offset,
10900 libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10901 goto err_out;
10902 }
10903 link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
10904 err = libbpf_get_error(link);
10905 if (err) {
10906 close(pfd);
10907 pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n",
10908 prog->name, retprobe ? "kretprobe" : "kprobe",
10909 func_name, offset,
10910 libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10911 goto err_clean_legacy;
10912 }
10913 if (legacy) {
10914 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
10915
10916 perf_link->legacy_probe_name = legacy_probe;
10917 perf_link->legacy_is_kprobe = true;
10918 perf_link->legacy_is_retprobe = retprobe;
10919 }
10920
10921 return link;
10922
10923 err_clean_legacy:
10924 if (legacy)
10925 remove_kprobe_event_legacy(legacy_probe, retprobe);
10926 err_out:
10927 free(legacy_probe);
10928 return libbpf_err_ptr(err);
10929 }
10930
10931 struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog,
10932 bool retprobe,
10933 const char *func_name)
10934 {
10935 DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts,
10936 .retprobe = retprobe,
10937 );
10938
10939 return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
10940 }
10941
10942 struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog,
10943 const char *syscall_name,
10944 const struct bpf_ksyscall_opts *opts)
10945 {
10946 LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
10947 char func_name[128];
10948
10949 if (!OPTS_VALID(opts, bpf_ksyscall_opts))
10950 return libbpf_err_ptr(-EINVAL);
10951
10952 if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) {
10953 /* arch_specific_syscall_pfx() should never return NULL here
10954 * because it is guarded by kernel_supports(). However, since
10955 * compiler does not know that we have an explicit conditional
10956 * as well.
10957 */
10958 snprintf(func_name, sizeof(func_name), "__%s_sys_%s",
10959 arch_specific_syscall_pfx() ? : "", syscall_name);
10960 } else {
10961 snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name);
10962 }
10963
10964 kprobe_opts.retprobe = OPTS_GET(opts, retprobe, false);
10965 kprobe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
10966
10967 return bpf_program__attach_kprobe_opts(prog, func_name, &kprobe_opts);
10968 }
10969
10970 /* Adapted from perf/util/string.c */
10971 bool glob_match(const char *str, const char *pat)
10972 {
10973 while (*str && *pat && *pat != '*') {
10974 if (*pat == '?') { /* Matches any single character */
10975 str++;
10976 pat++;
10977 continue;
10978 }
10979 if (*str != *pat)
10980 return false;
10981 str++;
10982 pat++;
10983 }
10984 /* Check wild card */
10985 if (*pat == '*') {
10986 while (*pat == '*')
10987 pat++;
10988 if (!*pat) /* Tail wild card matches all */
10989 return true;
10990 while (*str)
10991 if (glob_match(str++, pat))
10992 return true;
10993 }
10994 return !*str && !*pat;
10995 }
10996
10997 struct kprobe_multi_resolve {
10998 const char *pattern;
10999 unsigned long *addrs;
11000 size_t cap;
11001 size_t cnt;
11002 };
11003
11004 struct avail_kallsyms_data {
11005 char **syms;
11006 size_t cnt;
11007 struct kprobe_multi_resolve *res;
11008 };
11009
11010 static int avail_func_cmp(const void *a, const void *b)
11011 {
11012 return strcmp(*(const char **)a, *(const char **)b);
11013 }
11014
11015 static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type,
11016 const char *sym_name, void *ctx)
11017 {
11018 struct avail_kallsyms_data *data = ctx;
11019 struct kprobe_multi_resolve *res = data->res;
11020 int err;
11021
11022 if (!bsearch(&sym_name, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp))
11023 return 0;
11024
11025 err = libbpf_ensure_mem((void **)&res->addrs, &res->cap, sizeof(*res->addrs), res->cnt + 1);
11026 if (err)
11027 return err;
11028
11029 res->addrs[res->cnt++] = (unsigned long)sym_addr;
11030 return 0;
11031 }
11032
11033 static int libbpf_available_kallsyms_parse(struct kprobe_multi_resolve *res)
11034 {
11035 const char *available_functions_file = tracefs_available_filter_functions();
11036 struct avail_kallsyms_data data;
11037 char sym_name[500];
11038 FILE *f;
11039 int err = 0, ret, i;
11040 char **syms = NULL;
11041 size_t cap = 0, cnt = 0;
11042
11043 f = fopen(available_functions_file, "re");
11044 if (!f) {
11045 err = -errno;
11046 pr_warn("failed to open %s: %d\n", available_functions_file, err);
11047 return err;
11048 }
11049
11050 while (true) {
11051 char *name;
11052
11053 ret = fscanf(f, "%499s%*[^\n]\n", sym_name);
11054 if (ret == EOF && feof(f))
11055 break;
11056
11057 if (ret != 1) {
11058 pr_warn("failed to parse available_filter_functions entry: %d\n", ret);
11059 err = -EINVAL;
11060 goto cleanup;
11061 }
11062
11063 if (!glob_match(sym_name, res->pattern))
11064 continue;
11065
11066 err = libbpf_ensure_mem((void **)&syms, &cap, sizeof(*syms), cnt + 1);
11067 if (err)
11068 goto cleanup;
11069
11070 name = strdup(sym_name);
11071 if (!name) {
11072 err = -errno;
11073 goto cleanup;
11074 }
11075
11076 syms[cnt++] = name;
11077 }
11078
11079 /* no entries found, bail out */
11080 if (cnt == 0) {
11081 err = -ENOENT;
11082 goto cleanup;
11083 }
11084
11085 /* sort available functions */
11086 qsort(syms, cnt, sizeof(*syms), avail_func_cmp);
11087
11088 data.syms = syms;
11089 data.res = res;
11090 data.cnt = cnt;
11091 libbpf_kallsyms_parse(avail_kallsyms_cb, &data);
11092
11093 if (res->cnt == 0)
11094 err = -ENOENT;
11095
11096 cleanup:
11097 for (i = 0; i < cnt; i++)
11098 free((char *)syms[i]);
11099 free(syms);
11100
11101 fclose(f);
11102 return err;
11103 }
11104
11105 static bool has_available_filter_functions_addrs(void)
11106 {
11107 return access(tracefs_available_filter_functions_addrs(), R_OK) != -1;
11108 }
11109
11110 static int libbpf_available_kprobes_parse(struct kprobe_multi_resolve *res)
11111 {
11112 const char *available_path = tracefs_available_filter_functions_addrs();
11113 char sym_name[500];
11114 FILE *f;
11115 int ret, err = 0;
11116 unsigned long long sym_addr;
11117
11118 f = fopen(available_path, "re");
11119 if (!f) {
11120 err = -errno;
11121 pr_warn("failed to open %s: %d\n", available_path, err);
11122 return err;
11123 }
11124
11125 while (true) {
11126 ret = fscanf(f, "%llx %499s%*[^\n]\n", &sym_addr, sym_name);
11127 if (ret == EOF && feof(f))
11128 break;
11129
11130 if (ret != 2) {
11131 pr_warn("failed to parse available_filter_functions_addrs entry: %d\n",
11132 ret);
11133 err = -EINVAL;
11134 goto cleanup;
11135 }
11136
11137 if (!glob_match(sym_name, res->pattern))
11138 continue;
11139
11140 err = libbpf_ensure_mem((void **)&res->addrs, &res->cap,
11141 sizeof(*res->addrs), res->cnt + 1);
11142 if (err)
11143 goto cleanup;
11144
11145 res->addrs[res->cnt++] = (unsigned long)sym_addr;
11146 }
11147
11148 if (res->cnt == 0)
11149 err = -ENOENT;
11150
11151 cleanup:
11152 fclose(f);
11153 return err;
11154 }
11155
11156 struct bpf_link *
11157 bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
11158 const char *pattern,
11159 const struct bpf_kprobe_multi_opts *opts)
11160 {
11161 LIBBPF_OPTS(bpf_link_create_opts, lopts);
11162 struct kprobe_multi_resolve res = {
11163 .pattern = pattern,
11164 };
11165 struct bpf_link *link = NULL;
11166 char errmsg[STRERR_BUFSIZE];
11167 const unsigned long *addrs;
11168 int err, link_fd, prog_fd;
11169 const __u64 *cookies;
11170 const char **syms;
11171 bool retprobe;
11172 size_t cnt;
11173
11174 if (!OPTS_VALID(opts, bpf_kprobe_multi_opts))
11175 return libbpf_err_ptr(-EINVAL);
11176
11177 syms = OPTS_GET(opts, syms, false);
11178 addrs = OPTS_GET(opts, addrs, false);
11179 cnt = OPTS_GET(opts, cnt, false);
11180 cookies = OPTS_GET(opts, cookies, false);
11181
11182 if (!pattern && !addrs && !syms)
11183 return libbpf_err_ptr(-EINVAL);
11184 if (pattern && (addrs || syms || cookies || cnt))
11185 return libbpf_err_ptr(-EINVAL);
11186 if (!pattern && !cnt)
11187 return libbpf_err_ptr(-EINVAL);
11188 if (addrs && syms)
11189 return libbpf_err_ptr(-EINVAL);
11190
11191 if (pattern) {
11192 if (has_available_filter_functions_addrs())
11193 err = libbpf_available_kprobes_parse(&res);
11194 else
11195 err = libbpf_available_kallsyms_parse(&res);
11196 if (err)
11197 goto error;
11198 addrs = res.addrs;
11199 cnt = res.cnt;
11200 }
11201
11202 retprobe = OPTS_GET(opts, retprobe, false);
11203
11204 lopts.kprobe_multi.syms = syms;
11205 lopts.kprobe_multi.addrs = addrs;
11206 lopts.kprobe_multi.cookies = cookies;
11207 lopts.kprobe_multi.cnt = cnt;
11208 lopts.kprobe_multi.flags = retprobe ? BPF_F_KPROBE_MULTI_RETURN : 0;
11209
11210 link = calloc(1, sizeof(*link));
11211 if (!link) {
11212 err = -ENOMEM;
11213 goto error;
11214 }
11215 link->detach = &bpf_link__detach_fd;
11216
11217 prog_fd = bpf_program__fd(prog);
11218 link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, &lopts);
11219 if (link_fd < 0) {
11220 err = -errno;
11221 pr_warn("prog '%s': failed to attach: %s\n",
11222 prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11223 goto error;
11224 }
11225 link->fd = link_fd;
11226 free(res.addrs);
11227 return link;
11228
11229 error:
11230 free(link);
11231 free(res.addrs);
11232 return libbpf_err_ptr(err);
11233 }
11234
11235 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11236 {
11237 DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
11238 unsigned long offset = 0;
11239 const char *func_name;
11240 char *func;
11241 int n;
11242
11243 *link = NULL;
11244
11245 /* no auto-attach for SEC("kprobe") and SEC("kretprobe") */
11246 if (strcmp(prog->sec_name, "kprobe") == 0 || strcmp(prog->sec_name, "kretprobe") == 0)
11247 return 0;
11248
11249 opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/");
11250 if (opts.retprobe)
11251 func_name = prog->sec_name + sizeof("kretprobe/") - 1;
11252 else
11253 func_name = prog->sec_name + sizeof("kprobe/") - 1;
11254
11255 n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset);
11256 if (n < 1) {
11257 pr_warn("kprobe name is invalid: %s\n", func_name);
11258 return -EINVAL;
11259 }
11260 if (opts.retprobe && offset != 0) {
11261 free(func);
11262 pr_warn("kretprobes do not support offset specification\n");
11263 return -EINVAL;
11264 }
11265
11266 opts.offset = offset;
11267 *link = bpf_program__attach_kprobe_opts(prog, func, &opts);
11268 free(func);
11269 return libbpf_get_error(*link);
11270 }
11271
11272 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11273 {
11274 LIBBPF_OPTS(bpf_ksyscall_opts, opts);
11275 const char *syscall_name;
11276
11277 *link = NULL;
11278
11279 /* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */
11280 if (strcmp(prog->sec_name, "ksyscall") == 0 || strcmp(prog->sec_name, "kretsyscall") == 0)
11281 return 0;
11282
11283 opts.retprobe = str_has_pfx(prog->sec_name, "kretsyscall/");
11284 if (opts.retprobe)
11285 syscall_name = prog->sec_name + sizeof("kretsyscall/") - 1;
11286 else
11287 syscall_name = prog->sec_name + sizeof("ksyscall/") - 1;
11288
11289 *link = bpf_program__attach_ksyscall(prog, syscall_name, &opts);
11290 return *link ? 0 : -errno;
11291 }
11292
11293 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11294 {
11295 LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
11296 const char *spec;
11297 char *pattern;
11298 int n;
11299
11300 *link = NULL;
11301
11302 /* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */
11303 if (strcmp(prog->sec_name, "kprobe.multi") == 0 ||
11304 strcmp(prog->sec_name, "kretprobe.multi") == 0)
11305 return 0;
11306
11307 opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe.multi/");
11308 if (opts.retprobe)
11309 spec = prog->sec_name + sizeof("kretprobe.multi/") - 1;
11310 else
11311 spec = prog->sec_name + sizeof("kprobe.multi/") - 1;
11312
11313 n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern);
11314 if (n < 1) {
11315 pr_warn("kprobe multi pattern is invalid: %s\n", pattern);
11316 return -EINVAL;
11317 }
11318
11319 *link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts);
11320 free(pattern);
11321 return libbpf_get_error(*link);
11322 }
11323
11324 static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11325 {
11326 char *probe_type = NULL, *binary_path = NULL, *func_name = NULL;
11327 LIBBPF_OPTS(bpf_uprobe_multi_opts, opts);
11328 int n, ret = -EINVAL;
11329
11330 *link = NULL;
11331
11332 n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]",
11333 &probe_type, &binary_path, &func_name);
11334 switch (n) {
11335 case 1:
11336 /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
11337 ret = 0;
11338 break;
11339 case 3:
11340 opts.retprobe = strcmp(probe_type, "uretprobe.multi") == 0;
11341 *link = bpf_program__attach_uprobe_multi(prog, -1, binary_path, func_name, &opts);
11342 ret = libbpf_get_error(*link);
11343 break;
11344 default:
11345 pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name,
11346 prog->sec_name);
11347 break;
11348 }
11349 free(probe_type);
11350 free(binary_path);
11351 free(func_name);
11352 return ret;
11353 }
11354
11355 static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz,
11356 const char *binary_path, uint64_t offset)
11357 {
11358 int i;
11359
11360 snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset);
11361
11362 /* sanitize binary_path in the probe name */
11363 for (i = 0; buf[i]; i++) {
11364 if (!isalnum(buf[i]))
11365 buf[i] = '_';
11366 }
11367 }
11368
11369 static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe,
11370 const char *binary_path, size_t offset)
11371 {
11372 return append_to_file(tracefs_uprobe_events(), "%c:%s/%s %s:0x%zx",
11373 retprobe ? 'r' : 'p',
11374 retprobe ? "uretprobes" : "uprobes",
11375 probe_name, binary_path, offset);
11376 }
11377
11378 static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe)
11379 {
11380 return append_to_file(tracefs_uprobe_events(), "-:%s/%s",
11381 retprobe ? "uretprobes" : "uprobes", probe_name);
11382 }
11383
11384 static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe)
11385 {
11386 char file[512];
11387
11388 snprintf(file, sizeof(file), "%s/events/%s/%s/id",
11389 tracefs_path(), retprobe ? "uretprobes" : "uprobes", probe_name);
11390
11391 return parse_uint_from_file(file, "%d\n");
11392 }
11393
11394 static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe,
11395 const char *binary_path, size_t offset, int pid)
11396 {
11397 const size_t attr_sz = sizeof(struct perf_event_attr);
11398 struct perf_event_attr attr;
11399 int type, pfd, err;
11400
11401 err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset);
11402 if (err < 0) {
11403 pr_warn("failed to add legacy uprobe event for %s:0x%zx: %d\n",
11404 binary_path, (size_t)offset, err);
11405 return err;
11406 }
11407 type = determine_uprobe_perf_type_legacy(probe_name, retprobe);
11408 if (type < 0) {
11409 err = type;
11410 pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n",
11411 binary_path, offset, err);
11412 goto err_clean_legacy;
11413 }
11414
11415 memset(&attr, 0, attr_sz);
11416 attr.size = attr_sz;
11417 attr.config = type;
11418 attr.type = PERF_TYPE_TRACEPOINT;
11419
11420 pfd = syscall(__NR_perf_event_open, &attr,
11421 pid < 0 ? -1 : pid, /* pid */
11422 pid == -1 ? 0 : -1, /* cpu */
11423 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
11424 if (pfd < 0) {
11425 err = -errno;
11426 pr_warn("legacy uprobe perf_event_open() failed: %d\n", err);
11427 goto err_clean_legacy;
11428 }
11429 return pfd;
11430
11431 err_clean_legacy:
11432 /* Clear the newly added legacy uprobe_event */
11433 remove_uprobe_event_legacy(probe_name, retprobe);
11434 return err;
11435 }
11436
11437 /* Find offset of function name in archive specified by path. Currently
11438 * supported are .zip files that do not compress their contents, as used on
11439 * Android in the form of APKs, for example. "file_name" is the name of the ELF
11440 * file inside the archive. "func_name" matches symbol name or name@@LIB for
11441 * library functions.
11442 *
11443 * An overview of the APK format specifically provided here:
11444 * https://en.wikipedia.org/w/index.php?title=Apk_(file_format)&oldid=1139099120#Package_contents
11445 */
11446 static long elf_find_func_offset_from_archive(const char *archive_path, const char *file_name,
11447 const char *func_name)
11448 {
11449 struct zip_archive *archive;
11450 struct zip_entry entry;
11451 long ret;
11452 Elf *elf;
11453
11454 archive = zip_archive_open(archive_path);
11455 if (IS_ERR(archive)) {
11456 ret = PTR_ERR(archive);
11457 pr_warn("zip: failed to open %s: %ld\n", archive_path, ret);
11458 return ret;
11459 }
11460
11461 ret = zip_archive_find_entry(archive, file_name, &entry);
11462 if (ret) {
11463 pr_warn("zip: could not find archive member %s in %s: %ld\n", file_name,
11464 archive_path, ret);
11465 goto out;
11466 }
11467 pr_debug("zip: found entry for %s in %s at 0x%lx\n", file_name, archive_path,
11468 (unsigned long)entry.data_offset);
11469
11470 if (entry.compression) {
11471 pr_warn("zip: entry %s of %s is compressed and cannot be handled\n", file_name,
11472 archive_path);
11473 ret = -LIBBPF_ERRNO__FORMAT;
11474 goto out;
11475 }
11476
11477 elf = elf_memory((void *)entry.data, entry.data_length);
11478 if (!elf) {
11479 pr_warn("elf: could not read elf file %s from %s: %s\n", file_name, archive_path,
11480 elf_errmsg(-1));
11481 ret = -LIBBPF_ERRNO__LIBELF;
11482 goto out;
11483 }
11484
11485 ret = elf_find_func_offset(elf, file_name, func_name);
11486 if (ret > 0) {
11487 pr_debug("elf: symbol address match for %s of %s in %s: 0x%x + 0x%lx = 0x%lx\n",
11488 func_name, file_name, archive_path, entry.data_offset, ret,
11489 ret + entry.data_offset);
11490 ret += entry.data_offset;
11491 }
11492 elf_end(elf);
11493
11494 out:
11495 zip_archive_close(archive);
11496 return ret;
11497 }
11498
11499 static const char *arch_specific_lib_paths(void)
11500 {
11501 /*
11502 * Based on https://packages.debian.org/sid/libc6.
11503 *
11504 * Assume that the traced program is built for the same architecture
11505 * as libbpf, which should cover the vast majority of cases.
11506 */
11507 #if defined(__x86_64__)
11508 return "/lib/x86_64-linux-gnu";
11509 #elif defined(__i386__)
11510 return "/lib/i386-linux-gnu";
11511 #elif defined(__s390x__)
11512 return "/lib/s390x-linux-gnu";
11513 #elif defined(__s390__)
11514 return "/lib/s390-linux-gnu";
11515 #elif defined(__arm__) && defined(__SOFTFP__)
11516 return "/lib/arm-linux-gnueabi";
11517 #elif defined(__arm__) && !defined(__SOFTFP__)
11518 return "/lib/arm-linux-gnueabihf";
11519 #elif defined(__aarch64__)
11520 return "/lib/aarch64-linux-gnu";
11521 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64
11522 return "/lib/mips64el-linux-gnuabi64";
11523 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32
11524 return "/lib/mipsel-linux-gnu";
11525 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
11526 return "/lib/powerpc64le-linux-gnu";
11527 #elif defined(__sparc__) && defined(__arch64__)
11528 return "/lib/sparc64-linux-gnu";
11529 #elif defined(__riscv) && __riscv_xlen == 64
11530 return "/lib/riscv64-linux-gnu";
11531 #else
11532 return NULL;
11533 #endif
11534 }
11535
11536 /* Get full path to program/shared library. */
11537 static int resolve_full_path(const char *file, char *result, size_t result_sz)
11538 {
11539 const char *search_paths[3] = {};
11540 int i, perm;
11541
11542 if (str_has_sfx(file, ".so") || strstr(file, ".so.")) {
11543 search_paths[0] = getenv("LD_LIBRARY_PATH");
11544 search_paths[1] = "/usr/lib64:/usr/lib";
11545 search_paths[2] = arch_specific_lib_paths();
11546 perm = R_OK;
11547 } else {
11548 search_paths[0] = getenv("PATH");
11549 search_paths[1] = "/usr/bin:/usr/sbin";
11550 perm = R_OK | X_OK;
11551 }
11552
11553 for (i = 0; i < ARRAY_SIZE(search_paths); i++) {
11554 const char *s;
11555
11556 if (!search_paths[i])
11557 continue;
11558 for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) {
11559 char *next_path;
11560 int seg_len;
11561
11562 if (s[0] == ':')
11563 s++;
11564 next_path = strchr(s, ':');
11565 seg_len = next_path ? next_path - s : strlen(s);
11566 if (!seg_len)
11567 continue;
11568 snprintf(result, result_sz, "%.*s/%s", seg_len, s, file);
11569 /* ensure it has required permissions */
11570 if (faccessat(AT_FDCWD, result, perm, AT_EACCESS) < 0)
11571 continue;
11572 pr_debug("resolved '%s' to '%s'\n", file, result);
11573 return 0;
11574 }
11575 }
11576 return -ENOENT;
11577 }
11578
11579 struct bpf_link *
11580 bpf_program__attach_uprobe_multi(const struct bpf_program *prog,
11581 pid_t pid,
11582 const char *path,
11583 const char *func_pattern,
11584 const struct bpf_uprobe_multi_opts *opts)
11585 {
11586 const unsigned long *ref_ctr_offsets = NULL, *offsets = NULL;
11587 LIBBPF_OPTS(bpf_link_create_opts, lopts);
11588 unsigned long *resolved_offsets = NULL;
11589 int err = 0, link_fd, prog_fd;
11590 struct bpf_link *link = NULL;
11591 char errmsg[STRERR_BUFSIZE];
11592 char full_path[PATH_MAX];
11593 const __u64 *cookies;
11594 const char **syms;
11595 size_t cnt;
11596
11597 if (!OPTS_VALID(opts, bpf_uprobe_multi_opts))
11598 return libbpf_err_ptr(-EINVAL);
11599
11600 syms = OPTS_GET(opts, syms, NULL);
11601 offsets = OPTS_GET(opts, offsets, NULL);
11602 ref_ctr_offsets = OPTS_GET(opts, ref_ctr_offsets, NULL);
11603 cookies = OPTS_GET(opts, cookies, NULL);
11604 cnt = OPTS_GET(opts, cnt, 0);
11605
11606 /*
11607 * User can specify 2 mutually exclusive set of inputs:
11608 *
11609 * 1) use only path/func_pattern/pid arguments
11610 *
11611 * 2) use path/pid with allowed combinations of:
11612 * syms/offsets/ref_ctr_offsets/cookies/cnt
11613 *
11614 * - syms and offsets are mutually exclusive
11615 * - ref_ctr_offsets and cookies are optional
11616 *
11617 * Any other usage results in error.
11618 */
11619
11620 if (!path)
11621 return libbpf_err_ptr(-EINVAL);
11622 if (!func_pattern && cnt == 0)
11623 return libbpf_err_ptr(-EINVAL);
11624
11625 if (func_pattern) {
11626 if (syms || offsets || ref_ctr_offsets || cookies || cnt)
11627 return libbpf_err_ptr(-EINVAL);
11628 } else {
11629 if (!!syms == !!offsets)
11630 return libbpf_err_ptr(-EINVAL);
11631 }
11632
11633 if (func_pattern) {
11634 if (!strchr(path, '/')) {
11635 err = resolve_full_path(path, full_path, sizeof(full_path));
11636 if (err) {
11637 pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
11638 prog->name, path, err);
11639 return libbpf_err_ptr(err);
11640 }
11641 path = full_path;
11642 }
11643
11644 err = elf_resolve_pattern_offsets(path, func_pattern,
11645 &resolved_offsets, &cnt);
11646 if (err < 0)
11647 return libbpf_err_ptr(err);
11648 offsets = resolved_offsets;
11649 } else if (syms) {
11650 err = elf_resolve_syms_offsets(path, cnt, syms, &resolved_offsets, STT_FUNC);
11651 if (err < 0)
11652 return libbpf_err_ptr(err);
11653 offsets = resolved_offsets;
11654 }
11655
11656 lopts.uprobe_multi.path = path;
11657 lopts.uprobe_multi.offsets = offsets;
11658 lopts.uprobe_multi.ref_ctr_offsets = ref_ctr_offsets;
11659 lopts.uprobe_multi.cookies = cookies;
11660 lopts.uprobe_multi.cnt = cnt;
11661 lopts.uprobe_multi.flags = OPTS_GET(opts, retprobe, false) ? BPF_F_UPROBE_MULTI_RETURN : 0;
11662
11663 if (pid == 0)
11664 pid = getpid();
11665 if (pid > 0)
11666 lopts.uprobe_multi.pid = pid;
11667
11668 link = calloc(1, sizeof(*link));
11669 if (!link) {
11670 err = -ENOMEM;
11671 goto error;
11672 }
11673 link->detach = &bpf_link__detach_fd;
11674
11675 prog_fd = bpf_program__fd(prog);
11676 link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &lopts);
11677 if (link_fd < 0) {
11678 err = -errno;
11679 pr_warn("prog '%s': failed to attach multi-uprobe: %s\n",
11680 prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11681 goto error;
11682 }
11683 link->fd = link_fd;
11684 free(resolved_offsets);
11685 return link;
11686
11687 error:
11688 free(resolved_offsets);
11689 free(link);
11690 return libbpf_err_ptr(err);
11691 }
11692
11693 LIBBPF_API struct bpf_link *
11694 bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
11695 const char *binary_path, size_t func_offset,
11696 const struct bpf_uprobe_opts *opts)
11697 {
11698 const char *archive_path = NULL, *archive_sep = NULL;
11699 char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL;
11700 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
11701 enum probe_attach_mode attach_mode;
11702 char full_path[PATH_MAX];
11703 struct bpf_link *link;
11704 size_t ref_ctr_off;
11705 int pfd, err;
11706 bool retprobe, legacy;
11707 const char *func_name;
11708
11709 if (!OPTS_VALID(opts, bpf_uprobe_opts))
11710 return libbpf_err_ptr(-EINVAL);
11711
11712 attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT);
11713 retprobe = OPTS_GET(opts, retprobe, false);
11714 ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0);
11715 pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
11716
11717 if (!binary_path)
11718 return libbpf_err_ptr(-EINVAL);
11719
11720 /* Check if "binary_path" refers to an archive. */
11721 archive_sep = strstr(binary_path, "!/");
11722 if (archive_sep) {
11723 full_path[0] = '\0';
11724 libbpf_strlcpy(full_path, binary_path,
11725 min(sizeof(full_path), (size_t)(archive_sep - binary_path + 1)));
11726 archive_path = full_path;
11727 binary_path = archive_sep + 2;
11728 } else if (!strchr(binary_path, '/')) {
11729 err = resolve_full_path(binary_path, full_path, sizeof(full_path));
11730 if (err) {
11731 pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
11732 prog->name, binary_path, err);
11733 return libbpf_err_ptr(err);
11734 }
11735 binary_path = full_path;
11736 }
11737 func_name = OPTS_GET(opts, func_name, NULL);
11738 if (func_name) {
11739 long sym_off;
11740
11741 if (archive_path) {
11742 sym_off = elf_find_func_offset_from_archive(archive_path, binary_path,
11743 func_name);
11744 binary_path = archive_path;
11745 } else {
11746 sym_off = elf_find_func_offset_from_file(binary_path, func_name);
11747 }
11748 if (sym_off < 0)
11749 return libbpf_err_ptr(sym_off);
11750 func_offset += sym_off;
11751 }
11752
11753 legacy = determine_uprobe_perf_type() < 0;
11754 switch (attach_mode) {
11755 case PROBE_ATTACH_MODE_LEGACY:
11756 legacy = true;
11757 pe_opts.force_ioctl_attach = true;
11758 break;
11759 case PROBE_ATTACH_MODE_PERF:
11760 if (legacy)
11761 return libbpf_err_ptr(-ENOTSUP);
11762 pe_opts.force_ioctl_attach = true;
11763 break;
11764 case PROBE_ATTACH_MODE_LINK:
11765 if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK))
11766 return libbpf_err_ptr(-ENOTSUP);
11767 break;
11768 case PROBE_ATTACH_MODE_DEFAULT:
11769 break;
11770 default:
11771 return libbpf_err_ptr(-EINVAL);
11772 }
11773
11774 if (!legacy) {
11775 pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path,
11776 func_offset, pid, ref_ctr_off);
11777 } else {
11778 char probe_name[PATH_MAX + 64];
11779
11780 if (ref_ctr_off)
11781 return libbpf_err_ptr(-EINVAL);
11782
11783 gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name),
11784 binary_path, func_offset);
11785
11786 legacy_probe = strdup(probe_name);
11787 if (!legacy_probe)
11788 return libbpf_err_ptr(-ENOMEM);
11789
11790 pfd = perf_event_uprobe_open_legacy(legacy_probe, retprobe,
11791 binary_path, func_offset, pid);
11792 }
11793 if (pfd < 0) {
11794 err = -errno;
11795 pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
11796 prog->name, retprobe ? "uretprobe" : "uprobe",
11797 binary_path, func_offset,
11798 libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11799 goto err_out;
11800 }
11801
11802 link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
11803 err = libbpf_get_error(link);
11804 if (err) {
11805 close(pfd);
11806 pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
11807 prog->name, retprobe ? "uretprobe" : "uprobe",
11808 binary_path, func_offset,
11809 libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11810 goto err_clean_legacy;
11811 }
11812 if (legacy) {
11813 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
11814
11815 perf_link->legacy_probe_name = legacy_probe;
11816 perf_link->legacy_is_kprobe = false;
11817 perf_link->legacy_is_retprobe = retprobe;
11818 }
11819 return link;
11820
11821 err_clean_legacy:
11822 if (legacy)
11823 remove_uprobe_event_legacy(legacy_probe, retprobe);
11824 err_out:
11825 free(legacy_probe);
11826 return libbpf_err_ptr(err);
11827 }
11828
11829 /* Format of u[ret]probe section definition supporting auto-attach:
11830 * u[ret]probe/binary:function[+offset]
11831 *
11832 * binary can be an absolute/relative path or a filename; the latter is resolved to a
11833 * full binary path via bpf_program__attach_uprobe_opts.
11834 *
11835 * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be
11836 * specified (and auto-attach is not possible) or the above format is specified for
11837 * auto-attach.
11838 */
11839 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11840 {
11841 DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
11842 char *probe_type = NULL, *binary_path = NULL, *func_name = NULL, *func_off;
11843 int n, c, ret = -EINVAL;
11844 long offset = 0;
11845
11846 *link = NULL;
11847
11848 n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]",
11849 &probe_type, &binary_path, &func_name);
11850 switch (n) {
11851 case 1:
11852 /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
11853 ret = 0;
11854 break;
11855 case 2:
11856 pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n",
11857 prog->name, prog->sec_name);
11858 break;
11859 case 3:
11860 /* check if user specifies `+offset`, if yes, this should be
11861 * the last part of the string, make sure sscanf read to EOL
11862 */
11863 func_off = strrchr(func_name, '+');
11864 if (func_off) {
11865 n = sscanf(func_off, "+%li%n", &offset, &c);
11866 if (n == 1 && *(func_off + c) == '\0')
11867 func_off[0] = '\0';
11868 else
11869 offset = 0;
11870 }
11871 opts.retprobe = strcmp(probe_type, "uretprobe") == 0 ||
11872 strcmp(probe_type, "uretprobe.s") == 0;
11873 if (opts.retprobe && offset != 0) {
11874 pr_warn("prog '%s': uretprobes do not support offset specification\n",
11875 prog->name);
11876 break;
11877 }
11878 opts.func_name = func_name;
11879 *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts);
11880 ret = libbpf_get_error(*link);
11881 break;
11882 default:
11883 pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name,
11884 prog->sec_name);
11885 break;
11886 }
11887 free(probe_type);
11888 free(binary_path);
11889 free(func_name);
11890
11891 return ret;
11892 }
11893
11894 struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog,
11895 bool retprobe, pid_t pid,
11896 const char *binary_path,
11897 size_t func_offset)
11898 {
11899 DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, .retprobe = retprobe);
11900
11901 return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts);
11902 }
11903
11904 struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog,
11905 pid_t pid, const char *binary_path,
11906 const char *usdt_provider, const char *usdt_name,
11907 const struct bpf_usdt_opts *opts)
11908 {
11909 char resolved_path[512];
11910 struct bpf_object *obj = prog->obj;
11911 struct bpf_link *link;
11912 __u64 usdt_cookie;
11913 int err;
11914
11915 if (!OPTS_VALID(opts, bpf_uprobe_opts))
11916 return libbpf_err_ptr(-EINVAL);
11917
11918 if (bpf_program__fd(prog) < 0) {
11919 pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
11920 prog->name);
11921 return libbpf_err_ptr(-EINVAL);
11922 }
11923
11924 if (!binary_path)
11925 return libbpf_err_ptr(-EINVAL);
11926
11927 if (!strchr(binary_path, '/')) {
11928 err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path));
11929 if (err) {
11930 pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
11931 prog->name, binary_path, err);
11932 return libbpf_err_ptr(err);
11933 }
11934 binary_path = resolved_path;
11935 }
11936
11937 /* USDT manager is instantiated lazily on first USDT attach. It will
11938 * be destroyed together with BPF object in bpf_object__close().
11939 */
11940 if (IS_ERR(obj->usdt_man))
11941 return libbpf_ptr(obj->usdt_man);
11942 if (!obj->usdt_man) {
11943 obj->usdt_man = usdt_manager_new(obj);
11944 if (IS_ERR(obj->usdt_man))
11945 return libbpf_ptr(obj->usdt_man);
11946 }
11947
11948 usdt_cookie = OPTS_GET(opts, usdt_cookie, 0);
11949 link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path,
11950 usdt_provider, usdt_name, usdt_cookie);
11951 err = libbpf_get_error(link);
11952 if (err)
11953 return libbpf_err_ptr(err);
11954 return link;
11955 }
11956
11957 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11958 {
11959 char *path = NULL, *provider = NULL, *name = NULL;
11960 const char *sec_name;
11961 int n, err;
11962
11963 sec_name = bpf_program__section_name(prog);
11964 if (strcmp(sec_name, "usdt") == 0) {
11965 /* no auto-attach for just SEC("usdt") */
11966 *link = NULL;
11967 return 0;
11968 }
11969
11970 n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name);
11971 if (n != 3) {
11972 pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n",
11973 sec_name);
11974 err = -EINVAL;
11975 } else {
11976 *link = bpf_program__attach_usdt(prog, -1 /* any process */, path,
11977 provider, name, NULL);
11978 err = libbpf_get_error(*link);
11979 }
11980 free(path);
11981 free(provider);
11982 free(name);
11983 return err;
11984 }
11985
11986 static int determine_tracepoint_id(const char *tp_category,
11987 const char *tp_name)
11988 {
11989 char file[PATH_MAX];
11990 int ret;
11991
11992 ret = snprintf(file, sizeof(file), "%s/events/%s/%s/id",
11993 tracefs_path(), tp_category, tp_name);
11994 if (ret < 0)
11995 return -errno;
11996 if (ret >= sizeof(file)) {
11997 pr_debug("tracepoint %s/%s path is too long\n",
11998 tp_category, tp_name);
11999 return -E2BIG;
12000 }
12001 return parse_uint_from_file(file, "%d\n");
12002 }
12003
12004 static int perf_event_open_tracepoint(const char *tp_category,
12005 const char *tp_name)
12006 {
12007 const size_t attr_sz = sizeof(struct perf_event_attr);
12008 struct perf_event_attr attr;
12009 char errmsg[STRERR_BUFSIZE];
12010 int tp_id, pfd, err;
12011
12012 tp_id = determine_tracepoint_id(tp_category, tp_name);
12013 if (tp_id < 0) {
12014 pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
12015 tp_category, tp_name,
12016 libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));
12017 return tp_id;
12018 }
12019
12020 memset(&attr, 0, attr_sz);
12021 attr.type = PERF_TYPE_TRACEPOINT;
12022 attr.size = attr_sz;
12023 attr.config = tp_id;
12024
12025 pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
12026 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
12027 if (pfd < 0) {
12028 err = -errno;
12029 pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n",
12030 tp_category, tp_name,
12031 libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
12032 return err;
12033 }
12034 return pfd;
12035 }
12036
12037 struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *prog,
12038 const char *tp_category,
12039 const char *tp_name,
12040 const struct bpf_tracepoint_opts *opts)
12041 {
12042 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
12043 char errmsg[STRERR_BUFSIZE];
12044 struct bpf_link *link;
12045 int pfd, err;
12046
12047 if (!OPTS_VALID(opts, bpf_tracepoint_opts))
12048 return libbpf_err_ptr(-EINVAL);
12049
12050 pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
12051
12052 pfd = perf_event_open_tracepoint(tp_category, tp_name);
12053 if (pfd < 0) {
12054 pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
12055 prog->name, tp_category, tp_name,
12056 libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
12057 return libbpf_err_ptr(pfd);
12058 }
12059 link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
12060 err = libbpf_get_error(link);
12061 if (err) {
12062 close(pfd);
12063 pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
12064 prog->name, tp_category, tp_name,
12065 libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
12066 return libbpf_err_ptr(err);
12067 }
12068 return link;
12069 }
12070
12071 struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog,
12072 const char *tp_category,
12073 const char *tp_name)
12074 {
12075 return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL);
12076 }
12077
12078 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12079 {
12080 char *sec_name, *tp_cat, *tp_name;
12081
12082 *link = NULL;
12083
12084 /* no auto-attach for SEC("tp") or SEC("tracepoint") */
12085 if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0)
12086 return 0;
12087
12088 sec_name = strdup(prog->sec_name);
12089 if (!sec_name)
12090 return -ENOMEM;
12091
12092 /* extract "tp/<category>/<name>" or "tracepoint/<category>/<name>" */
12093 if (str_has_pfx(prog->sec_name, "tp/"))
12094 tp_cat = sec_name + sizeof("tp/") - 1;
12095 else
12096 tp_cat = sec_name + sizeof("tracepoint/") - 1;
12097 tp_name = strchr(tp_cat, '/');
12098 if (!tp_name) {
12099 free(sec_name);
12100 return -EINVAL;
12101 }
12102 *tp_name = '\0';
12103 tp_name++;
12104
12105 *link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
12106 free(sec_name);
12107 return libbpf_get_error(*link);
12108 }
12109
12110 struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog,
12111 const char *tp_name)
12112 {
12113 char errmsg[STRERR_BUFSIZE];
12114 struct bpf_link *link;
12115 int prog_fd, pfd;
12116
12117 prog_fd = bpf_program__fd(prog);
12118 if (prog_fd < 0) {
12119 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12120 return libbpf_err_ptr(-EINVAL);
12121 }
12122
12123 link = calloc(1, sizeof(*link));
12124 if (!link)
12125 return libbpf_err_ptr(-ENOMEM);
12126 link->detach = &bpf_link__detach_fd;
12127
12128 pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
12129 if (pfd < 0) {
12130 pfd = -errno;
12131 free(link);
12132 pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
12133 prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
12134 return libbpf_err_ptr(pfd);
12135 }
12136 link->fd = pfd;
12137 return link;
12138 }
12139
12140 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12141 {
12142 static const char *const prefixes[] = {
12143 "raw_tp",
12144 "raw_tracepoint",
12145 "raw_tp.w",
12146 "raw_tracepoint.w",
12147 };
12148 size_t i;
12149 const char *tp_name = NULL;
12150
12151 *link = NULL;
12152
12153 for (i = 0; i < ARRAY_SIZE(prefixes); i++) {
12154 size_t pfx_len;
12155
12156 if (!str_has_pfx(prog->sec_name, prefixes[i]))
12157 continue;
12158
12159 pfx_len = strlen(prefixes[i]);
12160 /* no auto-attach case of, e.g., SEC("raw_tp") */
12161 if (prog->sec_name[pfx_len] == '\0')
12162 return 0;
12163
12164 if (prog->sec_name[pfx_len] != '/')
12165 continue;
12166
12167 tp_name = prog->sec_name + pfx_len + 1;
12168 break;
12169 }
12170
12171 if (!tp_name) {
12172 pr_warn("prog '%s': invalid section name '%s'\n",
12173 prog->name, prog->sec_name);
12174 return -EINVAL;
12175 }
12176
12177 *link = bpf_program__attach_raw_tracepoint(prog, tp_name);
12178 return libbpf_get_error(*link);
12179 }
12180
12181 /* Common logic for all BPF program types that attach to a btf_id */
12182 static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog,
12183 const struct bpf_trace_opts *opts)
12184 {
12185 LIBBPF_OPTS(bpf_link_create_opts, link_opts);
12186 char errmsg[STRERR_BUFSIZE];
12187 struct bpf_link *link;
12188 int prog_fd, pfd;
12189
12190 if (!OPTS_VALID(opts, bpf_trace_opts))
12191 return libbpf_err_ptr(-EINVAL);
12192
12193 prog_fd = bpf_program__fd(prog);
12194 if (prog_fd < 0) {
12195 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12196 return libbpf_err_ptr(-EINVAL);
12197 }
12198
12199 link = calloc(1, sizeof(*link));
12200 if (!link)
12201 return libbpf_err_ptr(-ENOMEM);
12202 link->detach = &bpf_link__detach_fd;
12203
12204 /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */
12205 link_opts.tracing.cookie = OPTS_GET(opts, cookie, 0);
12206 pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), &link_opts);
12207 if (pfd < 0) {
12208 pfd = -errno;
12209 free(link);
12210 pr_warn("prog '%s': failed to attach: %s\n",
12211 prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
12212 return libbpf_err_ptr(pfd);
12213 }
12214 link->fd = pfd;
12215 return link;
12216 }
12217
12218 struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog)
12219 {
12220 return bpf_program__attach_btf_id(prog, NULL);
12221 }
12222
12223 struct bpf_link *bpf_program__attach_trace_opts(const struct bpf_program *prog,
12224 const struct bpf_trace_opts *opts)
12225 {
12226 return bpf_program__attach_btf_id(prog, opts);
12227 }
12228
12229 struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog)
12230 {
12231 return bpf_program__attach_btf_id(prog, NULL);
12232 }
12233
12234 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12235 {
12236 *link = bpf_program__attach_trace(prog);
12237 return libbpf_get_error(*link);
12238 }
12239
12240 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12241 {
12242 *link = bpf_program__attach_lsm(prog);
12243 return libbpf_get_error(*link);
12244 }
12245
12246 static struct bpf_link *
12247 bpf_program_attach_fd(const struct bpf_program *prog,
12248 int target_fd, const char *target_name,
12249 const struct bpf_link_create_opts *opts)
12250 {
12251 enum bpf_attach_type attach_type;
12252 char errmsg[STRERR_BUFSIZE];
12253 struct bpf_link *link;
12254 int prog_fd, link_fd;
12255
12256 prog_fd = bpf_program__fd(prog);
12257 if (prog_fd < 0) {
12258 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12259 return libbpf_err_ptr(-EINVAL);
12260 }
12261
12262 link = calloc(1, sizeof(*link));
12263 if (!link)
12264 return libbpf_err_ptr(-ENOMEM);
12265 link->detach = &bpf_link__detach_fd;
12266
12267 attach_type = bpf_program__expected_attach_type(prog);
12268 link_fd = bpf_link_create(prog_fd, target_fd, attach_type, opts);
12269 if (link_fd < 0) {
12270 link_fd = -errno;
12271 free(link);
12272 pr_warn("prog '%s': failed to attach to %s: %s\n",
12273 prog->name, target_name,
12274 libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
12275 return libbpf_err_ptr(link_fd);
12276 }
12277 link->fd = link_fd;
12278 return link;
12279 }
12280
12281 struct bpf_link *
12282 bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd)
12283 {
12284 return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", NULL);
12285 }
12286
12287 struct bpf_link *
12288 bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd)
12289 {
12290 return bpf_program_attach_fd(prog, netns_fd, "netns", NULL);
12291 }
12292
12293 struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex)
12294 {
12295 /* target_fd/target_ifindex use the same field in LINK_CREATE */
12296 return bpf_program_attach_fd(prog, ifindex, "xdp", NULL);
12297 }
12298
12299 struct bpf_link *
12300 bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex,
12301 const struct bpf_tcx_opts *opts)
12302 {
12303 LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
12304 __u32 relative_id;
12305 int relative_fd;
12306
12307 if (!OPTS_VALID(opts, bpf_tcx_opts))
12308 return libbpf_err_ptr(-EINVAL);
12309
12310 relative_id = OPTS_GET(opts, relative_id, 0);
12311 relative_fd = OPTS_GET(opts, relative_fd, 0);
12312
12313 /* validate we don't have unexpected combinations of non-zero fields */
12314 if (!ifindex) {
12315 pr_warn("prog '%s': target netdevice ifindex cannot be zero\n",
12316 prog->name);
12317 return libbpf_err_ptr(-EINVAL);
12318 }
12319 if (relative_fd && relative_id) {
12320 pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n",
12321 prog->name);
12322 return libbpf_err_ptr(-EINVAL);
12323 }
12324
12325 link_create_opts.tcx.expected_revision = OPTS_GET(opts, expected_revision, 0);
12326 link_create_opts.tcx.relative_fd = relative_fd;
12327 link_create_opts.tcx.relative_id = relative_id;
12328 link_create_opts.flags = OPTS_GET(opts, flags, 0);
12329
12330 /* target_fd/target_ifindex use the same field in LINK_CREATE */
12331 return bpf_program_attach_fd(prog, ifindex, "tcx", &link_create_opts);
12332 }
12333
12334 struct bpf_link *
12335 bpf_program__attach_netkit(const struct bpf_program *prog, int ifindex,
12336 const struct bpf_netkit_opts *opts)
12337 {
12338 LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
12339 __u32 relative_id;
12340 int relative_fd;
12341
12342 if (!OPTS_VALID(opts, bpf_netkit_opts))
12343 return libbpf_err_ptr(-EINVAL);
12344
12345 relative_id = OPTS_GET(opts, relative_id, 0);
12346 relative_fd = OPTS_GET(opts, relative_fd, 0);
12347
12348 /* validate we don't have unexpected combinations of non-zero fields */
12349 if (!ifindex) {
12350 pr_warn("prog '%s': target netdevice ifindex cannot be zero\n",
12351 prog->name);
12352 return libbpf_err_ptr(-EINVAL);
12353 }
12354 if (relative_fd && relative_id) {
12355 pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n",
12356 prog->name);
12357 return libbpf_err_ptr(-EINVAL);
12358 }
12359
12360 link_create_opts.netkit.expected_revision = OPTS_GET(opts, expected_revision, 0);
12361 link_create_opts.netkit.relative_fd = relative_fd;
12362 link_create_opts.netkit.relative_id = relative_id;
12363 link_create_opts.flags = OPTS_GET(opts, flags, 0);
12364
12365 return bpf_program_attach_fd(prog, ifindex, "netkit", &link_create_opts);
12366 }
12367
12368 struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog,
12369 int target_fd,
12370 const char *attach_func_name)
12371 {
12372 int btf_id;
12373
12374 if (!!target_fd != !!attach_func_name) {
12375 pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
12376 prog->name);
12377 return libbpf_err_ptr(-EINVAL);
12378 }
12379
12380 if (prog->type != BPF_PROG_TYPE_EXT) {
12381 pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace",
12382 prog->name);
12383 return libbpf_err_ptr(-EINVAL);
12384 }
12385
12386 if (target_fd) {
12387 LIBBPF_OPTS(bpf_link_create_opts, target_opts);
12388
12389 btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
12390 if (btf_id < 0)
12391 return libbpf_err_ptr(btf_id);
12392
12393 target_opts.target_btf_id = btf_id;
12394
12395 return bpf_program_attach_fd(prog, target_fd, "freplace",
12396 &target_opts);
12397 } else {
12398 /* no target, so use raw_tracepoint_open for compatibility
12399 * with old kernels
12400 */
12401 return bpf_program__attach_trace(prog);
12402 }
12403 }
12404
12405 struct bpf_link *
12406 bpf_program__attach_iter(const struct bpf_program *prog,
12407 const struct bpf_iter_attach_opts *opts)
12408 {
12409 DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
12410 char errmsg[STRERR_BUFSIZE];
12411 struct bpf_link *link;
12412 int prog_fd, link_fd;
12413 __u32 target_fd = 0;
12414
12415 if (!OPTS_VALID(opts, bpf_iter_attach_opts))
12416 return libbpf_err_ptr(-EINVAL);
12417
12418 link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
12419 link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
12420
12421 prog_fd = bpf_program__fd(prog);
12422 if (prog_fd < 0) {
12423 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12424 return libbpf_err_ptr(-EINVAL);
12425 }
12426
12427 link = calloc(1, sizeof(*link));
12428 if (!link)
12429 return libbpf_err_ptr(-ENOMEM);
12430 link->detach = &bpf_link__detach_fd;
12431
12432 link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
12433 &link_create_opts);
12434 if (link_fd < 0) {
12435 link_fd = -errno;
12436 free(link);
12437 pr_warn("prog '%s': failed to attach to iterator: %s\n",
12438 prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
12439 return libbpf_err_ptr(link_fd);
12440 }
12441 link->fd = link_fd;
12442 return link;
12443 }
12444
12445 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12446 {
12447 *link = bpf_program__attach_iter(prog, NULL);
12448 return libbpf_get_error(*link);
12449 }
12450
12451 struct bpf_link *bpf_program__attach_netfilter(const struct bpf_program *prog,
12452 const struct bpf_netfilter_opts *opts)
12453 {
12454 LIBBPF_OPTS(bpf_link_create_opts, lopts);
12455 struct bpf_link *link;
12456 int prog_fd, link_fd;
12457
12458 if (!OPTS_VALID(opts, bpf_netfilter_opts))
12459 return libbpf_err_ptr(-EINVAL);
12460
12461 prog_fd = bpf_program__fd(prog);
12462 if (prog_fd < 0) {
12463 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12464 return libbpf_err_ptr(-EINVAL);
12465 }
12466
12467 link = calloc(1, sizeof(*link));
12468 if (!link)
12469 return libbpf_err_ptr(-ENOMEM);
12470
12471 link->detach = &bpf_link__detach_fd;
12472
12473 lopts.netfilter.pf = OPTS_GET(opts, pf, 0);
12474 lopts.netfilter.hooknum = OPTS_GET(opts, hooknum, 0);
12475 lopts.netfilter.priority = OPTS_GET(opts, priority, 0);
12476 lopts.netfilter.flags = OPTS_GET(opts, flags, 0);
12477
12478 link_fd = bpf_link_create(prog_fd, 0, BPF_NETFILTER, &lopts);
12479 if (link_fd < 0) {
12480 char errmsg[STRERR_BUFSIZE];
12481
12482 link_fd = -errno;
12483 free(link);
12484 pr_warn("prog '%s': failed to attach to netfilter: %s\n",
12485 prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
12486 return libbpf_err_ptr(link_fd);
12487 }
12488 link->fd = link_fd;
12489
12490 return link;
12491 }
12492
12493 struct bpf_link *bpf_program__attach(const struct bpf_program *prog)
12494 {
12495 struct bpf_link *link = NULL;
12496 int err;
12497
12498 if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
12499 return libbpf_err_ptr(-EOPNOTSUPP);
12500
12501 err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, &link);
12502 if (err)
12503 return libbpf_err_ptr(err);
12504
12505 /* When calling bpf_program__attach() explicitly, auto-attach support
12506 * is expected to work, so NULL returned link is considered an error.
12507 * This is different for skeleton's attach, see comment in
12508 * bpf_object__attach_skeleton().
12509 */
12510 if (!link)
12511 return libbpf_err_ptr(-EOPNOTSUPP);
12512
12513 return link;
12514 }
12515
12516 struct bpf_link_struct_ops {
12517 struct bpf_link link;
12518 int map_fd;
12519 };
12520
12521 static int bpf_link__detach_struct_ops(struct bpf_link *link)
12522 {
12523 struct bpf_link_struct_ops *st_link;
12524 __u32 zero = 0;
12525
12526 st_link = container_of(link, struct bpf_link_struct_ops, link);
12527
12528 if (st_link->map_fd < 0)
12529 /* w/o a real link */
12530 return bpf_map_delete_elem(link->fd, &zero);
12531
12532 return close(link->fd);
12533 }
12534
12535 struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
12536 {
12537 struct bpf_link_struct_ops *link;
12538 __u32 zero = 0;
12539 int err, fd;
12540
12541 if (!bpf_map__is_struct_ops(map) || map->fd == -1)
12542 return libbpf_err_ptr(-EINVAL);
12543
12544 link = calloc(1, sizeof(*link));
12545 if (!link)
12546 return libbpf_err_ptr(-EINVAL);
12547
12548 /* kern_vdata should be prepared during the loading phase. */
12549 err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
12550 /* It can be EBUSY if the map has been used to create or
12551 * update a link before. We don't allow updating the value of
12552 * a struct_ops once it is set. That ensures that the value
12553 * never changed. So, it is safe to skip EBUSY.
12554 */
12555 if (err && (!(map->def.map_flags & BPF_F_LINK) || err != -EBUSY)) {
12556 free(link);
12557 return libbpf_err_ptr(err);
12558 }
12559
12560 link->link.detach = bpf_link__detach_struct_ops;
12561
12562 if (!(map->def.map_flags & BPF_F_LINK)) {
12563 /* w/o a real link */
12564 link->link.fd = map->fd;
12565 link->map_fd = -1;
12566 return &link->link;
12567 }
12568
12569 fd = bpf_link_create(map->fd, 0, BPF_STRUCT_OPS, NULL);
12570 if (fd < 0) {
12571 free(link);
12572 return libbpf_err_ptr(fd);
12573 }
12574
12575 link->link.fd = fd;
12576 link->map_fd = map->fd;
12577
12578 return &link->link;
12579 }
12580
12581 /*
12582 * Swap the back struct_ops of a link with a new struct_ops map.
12583 */
12584 int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map)
12585 {
12586 struct bpf_link_struct_ops *st_ops_link;
12587 __u32 zero = 0;
12588 int err;
12589
12590 if (!bpf_map__is_struct_ops(map) || !map_is_created(map))
12591 return -EINVAL;
12592
12593 st_ops_link = container_of(link, struct bpf_link_struct_ops, link);
12594 /* Ensure the type of a link is correct */
12595 if (st_ops_link->map_fd < 0)
12596 return -EINVAL;
12597
12598 err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
12599 /* It can be EBUSY if the map has been used to create or
12600 * update a link before. We don't allow updating the value of
12601 * a struct_ops once it is set. That ensures that the value
12602 * never changed. So, it is safe to skip EBUSY.
12603 */
12604 if (err && err != -EBUSY)
12605 return err;
12606
12607 err = bpf_link_update(link->fd, map->fd, NULL);
12608 if (err < 0)
12609 return err;
12610
12611 st_ops_link->map_fd = map->fd;
12612
12613 return 0;
12614 }
12615
12616 typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr,
12617 void *private_data);
12618
12619 static enum bpf_perf_event_ret
12620 perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
12621 void **copy_mem, size_t *copy_size,
12622 bpf_perf_event_print_t fn, void *private_data)
12623 {
12624 struct perf_event_mmap_page *header = mmap_mem;
12625 __u64 data_head = ring_buffer_read_head(header);
12626 __u64 data_tail = header->data_tail;
12627 void *base = ((__u8 *)header) + page_size;
12628 int ret = LIBBPF_PERF_EVENT_CONT;
12629 struct perf_event_header *ehdr;
12630 size_t ehdr_size;
12631
12632 while (data_head != data_tail) {
12633 ehdr = base + (data_tail & (mmap_size - 1));
12634 ehdr_size = ehdr->size;
12635
12636 if (((void *)ehdr) + ehdr_size > base + mmap_size) {
12637 void *copy_start = ehdr;
12638 size_t len_first = base + mmap_size - copy_start;
12639 size_t len_secnd = ehdr_size - len_first;
12640
12641 if (*copy_size < ehdr_size) {
12642 free(*copy_mem);
12643 *copy_mem = malloc(ehdr_size);
12644 if (!*copy_mem) {
12645 *copy_size = 0;
12646 ret = LIBBPF_PERF_EVENT_ERROR;
12647 break;
12648 }
12649 *copy_size = ehdr_size;
12650 }
12651
12652 memcpy(*copy_mem, copy_start, len_first);
12653 memcpy(*copy_mem + len_first, base, len_secnd);
12654 ehdr = *copy_mem;
12655 }
12656
12657 ret = fn(ehdr, private_data);
12658 data_tail += ehdr_size;
12659 if (ret != LIBBPF_PERF_EVENT_CONT)
12660 break;
12661 }
12662
12663 ring_buffer_write_tail(header, data_tail);
12664 return libbpf_err(ret);
12665 }
12666
12667 struct perf_buffer;
12668
12669 struct perf_buffer_params {
12670 struct perf_event_attr *attr;
12671 /* if event_cb is specified, it takes precendence */
12672 perf_buffer_event_fn event_cb;
12673 /* sample_cb and lost_cb are higher-level common-case callbacks */
12674 perf_buffer_sample_fn sample_cb;
12675 perf_buffer_lost_fn lost_cb;
12676 void *ctx;
12677 int cpu_cnt;
12678 int *cpus;
12679 int *map_keys;
12680 };
12681
12682 struct perf_cpu_buf {
12683 struct perf_buffer *pb;
12684 void *base; /* mmap()'ed memory */
12685 void *buf; /* for reconstructing segmented data */
12686 size_t buf_size;
12687 int fd;
12688 int cpu;
12689 int map_key;
12690 };
12691
12692 struct perf_buffer {
12693 perf_buffer_event_fn event_cb;
12694 perf_buffer_sample_fn sample_cb;
12695 perf_buffer_lost_fn lost_cb;
12696 void *ctx; /* passed into callbacks */
12697
12698 size_t page_size;
12699 size_t mmap_size;
12700 struct perf_cpu_buf **cpu_bufs;
12701 struct epoll_event *events;
12702 int cpu_cnt; /* number of allocated CPU buffers */
12703 int epoll_fd; /* perf event FD */
12704 int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
12705 };
12706
12707 static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
12708 struct perf_cpu_buf *cpu_buf)
12709 {
12710 if (!cpu_buf)
12711 return;
12712 if (cpu_buf->base &&
12713 munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
12714 pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
12715 if (cpu_buf->fd >= 0) {
12716 ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
12717 close(cpu_buf->fd);
12718 }
12719 free(cpu_buf->buf);
12720 free(cpu_buf);
12721 }
12722
12723 void perf_buffer__free(struct perf_buffer *pb)
12724 {
12725 int i;
12726
12727 if (IS_ERR_OR_NULL(pb))
12728 return;
12729 if (pb->cpu_bufs) {
12730 for (i = 0; i < pb->cpu_cnt; i++) {
12731 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
12732
12733 if (!cpu_buf)
12734 continue;
12735
12736 bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
12737 perf_buffer__free_cpu_buf(pb, cpu_buf);
12738 }
12739 free(pb->cpu_bufs);
12740 }
12741 if (pb->epoll_fd >= 0)
12742 close(pb->epoll_fd);
12743 free(pb->events);
12744 free(pb);
12745 }
12746
12747 static struct perf_cpu_buf *
12748 perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
12749 int cpu, int map_key)
12750 {
12751 struct perf_cpu_buf *cpu_buf;
12752 char msg[STRERR_BUFSIZE];
12753 int err;
12754
12755 cpu_buf = calloc(1, sizeof(*cpu_buf));
12756 if (!cpu_buf)
12757 return ERR_PTR(-ENOMEM);
12758
12759 cpu_buf->pb = pb;
12760 cpu_buf->cpu = cpu;
12761 cpu_buf->map_key = map_key;
12762
12763 cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
12764 -1, PERF_FLAG_FD_CLOEXEC);
12765 if (cpu_buf->fd < 0) {
12766 err = -errno;
12767 pr_warn("failed to open perf buffer event on cpu #%d: %s\n",
12768 cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
12769 goto error;
12770 }
12771
12772 cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
12773 PROT_READ | PROT_WRITE, MAP_SHARED,
12774 cpu_buf->fd, 0);
12775 if (cpu_buf->base == MAP_FAILED) {
12776 cpu_buf->base = NULL;
12777 err = -errno;
12778 pr_warn("failed to mmap perf buffer on cpu #%d: %s\n",
12779 cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
12780 goto error;
12781 }
12782
12783 if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
12784 err = -errno;
12785 pr_warn("failed to enable perf buffer event on cpu #%d: %s\n",
12786 cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
12787 goto error;
12788 }
12789
12790 return cpu_buf;
12791
12792 error:
12793 perf_buffer__free_cpu_buf(pb, cpu_buf);
12794 return (struct perf_cpu_buf *)ERR_PTR(err);
12795 }
12796
12797 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
12798 struct perf_buffer_params *p);
12799
12800 struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
12801 perf_buffer_sample_fn sample_cb,
12802 perf_buffer_lost_fn lost_cb,
12803 void *ctx,
12804 const struct perf_buffer_opts *opts)
12805 {
12806 const size_t attr_sz = sizeof(struct perf_event_attr);
12807 struct perf_buffer_params p = {};
12808 struct perf_event_attr attr;
12809 __u32 sample_period;
12810
12811 if (!OPTS_VALID(opts, perf_buffer_opts))
12812 return libbpf_err_ptr(-EINVAL);
12813
12814 sample_period = OPTS_GET(opts, sample_period, 1);
12815 if (!sample_period)
12816 sample_period = 1;
12817
12818 memset(&attr, 0, attr_sz);
12819 attr.size = attr_sz;
12820 attr.config = PERF_COUNT_SW_BPF_OUTPUT;
12821 attr.type = PERF_TYPE_SOFTWARE;
12822 attr.sample_type = PERF_SAMPLE_RAW;
12823 attr.sample_period = sample_period;
12824 attr.wakeup_events = sample_period;
12825
12826 p.attr = &attr;
12827 p.sample_cb = sample_cb;
12828 p.lost_cb = lost_cb;
12829 p.ctx = ctx;
12830
12831 return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
12832 }
12833
12834 struct perf_buffer *perf_buffer__new_raw(int map_fd, size_t page_cnt,
12835 struct perf_event_attr *attr,
12836 perf_buffer_event_fn event_cb, void *ctx,
12837 const struct perf_buffer_raw_opts *opts)
12838 {
12839 struct perf_buffer_params p = {};
12840
12841 if (!attr)
12842 return libbpf_err_ptr(-EINVAL);
12843
12844 if (!OPTS_VALID(opts, perf_buffer_raw_opts))
12845 return libbpf_err_ptr(-EINVAL);
12846
12847 p.attr = attr;
12848 p.event_cb = event_cb;
12849 p.ctx = ctx;
12850 p.cpu_cnt = OPTS_GET(opts, cpu_cnt, 0);
12851 p.cpus = OPTS_GET(opts, cpus, NULL);
12852 p.map_keys = OPTS_GET(opts, map_keys, NULL);
12853
12854 return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
12855 }
12856
12857 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
12858 struct perf_buffer_params *p)
12859 {
12860 const char *online_cpus_file = "/sys/devices/system/cpu/online";
12861 struct bpf_map_info map;
12862 char msg[STRERR_BUFSIZE];
12863 struct perf_buffer *pb;
12864 bool *online = NULL;
12865 __u32 map_info_len;
12866 int err, i, j, n;
12867
12868 if (page_cnt == 0 || (page_cnt & (page_cnt - 1))) {
12869 pr_warn("page count should be power of two, but is %zu\n",
12870 page_cnt);
12871 return ERR_PTR(-EINVAL);
12872 }
12873
12874 /* best-effort sanity checks */
12875 memset(&map, 0, sizeof(map));
12876 map_info_len = sizeof(map);
12877 err = bpf_map_get_info_by_fd(map_fd, &map, &map_info_len);
12878 if (err) {
12879 err = -errno;
12880 /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
12881 * -EBADFD, -EFAULT, or -E2BIG on real error
12882 */
12883 if (err != -EINVAL) {
12884 pr_warn("failed to get map info for map FD %d: %s\n",
12885 map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
12886 return ERR_PTR(err);
12887 }
12888 pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n",
12889 map_fd);
12890 } else {
12891 if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
12892 pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
12893 map.name);
12894 return ERR_PTR(-EINVAL);
12895 }
12896 }
12897
12898 pb = calloc(1, sizeof(*pb));
12899 if (!pb)
12900 return ERR_PTR(-ENOMEM);
12901
12902 pb->event_cb = p->event_cb;
12903 pb->sample_cb = p->sample_cb;
12904 pb->lost_cb = p->lost_cb;
12905 pb->ctx = p->ctx;
12906
12907 pb->page_size = getpagesize();
12908 pb->mmap_size = pb->page_size * page_cnt;
12909 pb->map_fd = map_fd;
12910
12911 pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
12912 if (pb->epoll_fd < 0) {
12913 err = -errno;
12914 pr_warn("failed to create epoll instance: %s\n",
12915 libbpf_strerror_r(err, msg, sizeof(msg)));
12916 goto error;
12917 }
12918
12919 if (p->cpu_cnt > 0) {
12920 pb->cpu_cnt = p->cpu_cnt;
12921 } else {
12922 pb->cpu_cnt = libbpf_num_possible_cpus();
12923 if (pb->cpu_cnt < 0) {
12924 err = pb->cpu_cnt;
12925 goto error;
12926 }
12927 if (map.max_entries && map.max_entries < pb->cpu_cnt)
12928 pb->cpu_cnt = map.max_entries;
12929 }
12930
12931 pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
12932 if (!pb->events) {
12933 err = -ENOMEM;
12934 pr_warn("failed to allocate events: out of memory\n");
12935 goto error;
12936 }
12937 pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
12938 if (!pb->cpu_bufs) {
12939 err = -ENOMEM;
12940 pr_warn("failed to allocate buffers: out of memory\n");
12941 goto error;
12942 }
12943
12944 err = parse_cpu_mask_file(online_cpus_file, &online, &n);
12945 if (err) {
12946 pr_warn("failed to get online CPU mask: %d\n", err);
12947 goto error;
12948 }
12949
12950 for (i = 0, j = 0; i < pb->cpu_cnt; i++) {
12951 struct perf_cpu_buf *cpu_buf;
12952 int cpu, map_key;
12953
12954 cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
12955 map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
12956
12957 /* in case user didn't explicitly requested particular CPUs to
12958 * be attached to, skip offline/not present CPUs
12959 */
12960 if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu]))
12961 continue;
12962
12963 cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
12964 if (IS_ERR(cpu_buf)) {
12965 err = PTR_ERR(cpu_buf);
12966 goto error;
12967 }
12968
12969 pb->cpu_bufs[j] = cpu_buf;
12970
12971 err = bpf_map_update_elem(pb->map_fd, &map_key,
12972 &cpu_buf->fd, 0);
12973 if (err) {
12974 err = -errno;
12975 pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
12976 cpu, map_key, cpu_buf->fd,
12977 libbpf_strerror_r(err, msg, sizeof(msg)));
12978 goto error;
12979 }
12980
12981 pb->events[j].events = EPOLLIN;
12982 pb->events[j].data.ptr = cpu_buf;
12983 if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
12984 &pb->events[j]) < 0) {
12985 err = -errno;
12986 pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
12987 cpu, cpu_buf->fd,
12988 libbpf_strerror_r(err, msg, sizeof(msg)));
12989 goto error;
12990 }
12991 j++;
12992 }
12993 pb->cpu_cnt = j;
12994 free(online);
12995
12996 return pb;
12997
12998 error:
12999 free(online);
13000 if (pb)
13001 perf_buffer__free(pb);
13002 return ERR_PTR(err);
13003 }
13004
13005 struct perf_sample_raw {
13006 struct perf_event_header header;
13007 uint32_t size;
13008 char data[];
13009 };
13010
13011 struct perf_sample_lost {
13012 struct perf_event_header header;
13013 uint64_t id;
13014 uint64_t lost;
13015 uint64_t sample_id;
13016 };
13017
13018 static enum bpf_perf_event_ret
13019 perf_buffer__process_record(struct perf_event_header *e, void *ctx)
13020 {
13021 struct perf_cpu_buf *cpu_buf = ctx;
13022 struct perf_buffer *pb = cpu_buf->pb;
13023 void *data = e;
13024
13025 /* user wants full control over parsing perf event */
13026 if (pb->event_cb)
13027 return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
13028
13029 switch (e->type) {
13030 case PERF_RECORD_SAMPLE: {
13031 struct perf_sample_raw *s = data;
13032
13033 if (pb->sample_cb)
13034 pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
13035 break;
13036 }
13037 case PERF_RECORD_LOST: {
13038 struct perf_sample_lost *s = data;
13039
13040 if (pb->lost_cb)
13041 pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
13042 break;
13043 }
13044 default:
13045 pr_warn("unknown perf sample type %d\n", e->type);
13046 return LIBBPF_PERF_EVENT_ERROR;
13047 }
13048 return LIBBPF_PERF_EVENT_CONT;
13049 }
13050
13051 static int perf_buffer__process_records(struct perf_buffer *pb,
13052 struct perf_cpu_buf *cpu_buf)
13053 {
13054 enum bpf_perf_event_ret ret;
13055
13056 ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size,
13057 pb->page_size, &cpu_buf->buf,
13058 &cpu_buf->buf_size,
13059 perf_buffer__process_record, cpu_buf);
13060 if (ret != LIBBPF_PERF_EVENT_CONT)
13061 return ret;
13062 return 0;
13063 }
13064
13065 int perf_buffer__epoll_fd(const struct perf_buffer *pb)
13066 {
13067 return pb->epoll_fd;
13068 }
13069
13070 int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
13071 {
13072 int i, cnt, err;
13073
13074 cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
13075 if (cnt < 0)
13076 return -errno;
13077
13078 for (i = 0; i < cnt; i++) {
13079 struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
13080
13081 err = perf_buffer__process_records(pb, cpu_buf);
13082 if (err) {
13083 pr_warn("error while processing records: %d\n", err);
13084 return libbpf_err(err);
13085 }
13086 }
13087 return cnt;
13088 }
13089
13090 /* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
13091 * manager.
13092 */
13093 size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb)
13094 {
13095 return pb->cpu_cnt;
13096 }
13097
13098 /*
13099 * Return perf_event FD of a ring buffer in *buf_idx* slot of
13100 * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using
13101 * select()/poll()/epoll() Linux syscalls.
13102 */
13103 int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
13104 {
13105 struct perf_cpu_buf *cpu_buf;
13106
13107 if (buf_idx >= pb->cpu_cnt)
13108 return libbpf_err(-EINVAL);
13109
13110 cpu_buf = pb->cpu_bufs[buf_idx];
13111 if (!cpu_buf)
13112 return libbpf_err(-ENOENT);
13113
13114 return cpu_buf->fd;
13115 }
13116
13117 int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, size_t *buf_size)
13118 {
13119 struct perf_cpu_buf *cpu_buf;
13120
13121 if (buf_idx >= pb->cpu_cnt)
13122 return libbpf_err(-EINVAL);
13123
13124 cpu_buf = pb->cpu_bufs[buf_idx];
13125 if (!cpu_buf)
13126 return libbpf_err(-ENOENT);
13127
13128 *buf = cpu_buf->base;
13129 *buf_size = pb->mmap_size;
13130 return 0;
13131 }
13132
13133 /*
13134 * Consume data from perf ring buffer corresponding to slot *buf_idx* in
13135 * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
13136 * consume, do nothing and return success.
13137 * Returns:
13138 * - 0 on success;
13139 * - <0 on failure.
13140 */
13141 int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
13142 {
13143 struct perf_cpu_buf *cpu_buf;
13144
13145 if (buf_idx >= pb->cpu_cnt)
13146 return libbpf_err(-EINVAL);
13147
13148 cpu_buf = pb->cpu_bufs[buf_idx];
13149 if (!cpu_buf)
13150 return libbpf_err(-ENOENT);
13151
13152 return perf_buffer__process_records(pb, cpu_buf);
13153 }
13154
13155 int perf_buffer__consume(struct perf_buffer *pb)
13156 {
13157 int i, err;
13158
13159 for (i = 0; i < pb->cpu_cnt; i++) {
13160 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
13161
13162 if (!cpu_buf)
13163 continue;
13164
13165 err = perf_buffer__process_records(pb, cpu_buf);
13166 if (err) {
13167 pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
13168 return libbpf_err(err);
13169 }
13170 }
13171 return 0;
13172 }
13173
13174 int bpf_program__set_attach_target(struct bpf_program *prog,
13175 int attach_prog_fd,
13176 const char *attach_func_name)
13177 {
13178 int btf_obj_fd = 0, btf_id = 0, err;
13179
13180 if (!prog || attach_prog_fd < 0)
13181 return libbpf_err(-EINVAL);
13182
13183 if (prog->obj->loaded)
13184 return libbpf_err(-EINVAL);
13185
13186 if (attach_prog_fd && !attach_func_name) {
13187 /* remember attach_prog_fd and let bpf_program__load() find
13188 * BTF ID during the program load
13189 */
13190 prog->attach_prog_fd = attach_prog_fd;
13191 return 0;
13192 }
13193
13194 if (attach_prog_fd) {
13195 btf_id = libbpf_find_prog_btf_id(attach_func_name,
13196 attach_prog_fd);
13197 if (btf_id < 0)
13198 return libbpf_err(btf_id);
13199 } else {
13200 if (!attach_func_name)
13201 return libbpf_err(-EINVAL);
13202
13203 /* load btf_vmlinux, if not yet */
13204 err = bpf_object__load_vmlinux_btf(prog->obj, true);
13205 if (err)
13206 return libbpf_err(err);
13207 err = find_kernel_btf_id(prog->obj, attach_func_name,
13208 prog->expected_attach_type,
13209 &btf_obj_fd, &btf_id);
13210 if (err)
13211 return libbpf_err(err);
13212 }
13213
13214 prog->attach_btf_id = btf_id;
13215 prog->attach_btf_obj_fd = btf_obj_fd;
13216 prog->attach_prog_fd = attach_prog_fd;
13217 return 0;
13218 }
13219
13220 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
13221 {
13222 int err = 0, n, len, start, end = -1;
13223 bool *tmp;
13224
13225 *mask = NULL;
13226 *mask_sz = 0;
13227
13228 /* Each sub string separated by ',' has format \d+-\d+ or \d+ */
13229 while (*s) {
13230 if (*s == ',' || *s == '\n') {
13231 s++;
13232 continue;
13233 }
13234 n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len);
13235 if (n <= 0 || n > 2) {
13236 pr_warn("Failed to get CPU range %s: %d\n", s, n);
13237 err = -EINVAL;
13238 goto cleanup;
13239 } else if (n == 1) {
13240 end = start;
13241 }
13242 if (start < 0 || start > end) {
13243 pr_warn("Invalid CPU range [%d,%d] in %s\n",
13244 start, end, s);
13245 err = -EINVAL;
13246 goto cleanup;
13247 }
13248 tmp = realloc(*mask, end + 1);
13249 if (!tmp) {
13250 err = -ENOMEM;
13251 goto cleanup;
13252 }
13253 *mask = tmp;
13254 memset(tmp + *mask_sz, 0, start - *mask_sz);
13255 memset(tmp + start, 1, end - start + 1);
13256 *mask_sz = end + 1;
13257 s += len;
13258 }
13259 if (!*mask_sz) {
13260 pr_warn("Empty CPU range\n");
13261 return -EINVAL;
13262 }
13263 return 0;
13264 cleanup:
13265 free(*mask);
13266 *mask = NULL;
13267 return err;
13268 }
13269
13270 int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz)
13271 {
13272 int fd, err = 0, len;
13273 char buf[128];
13274
13275 fd = open(fcpu, O_RDONLY | O_CLOEXEC);
13276 if (fd < 0) {
13277 err = -errno;
13278 pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err);
13279 return err;
13280 }
13281 len = read(fd, buf, sizeof(buf));
13282 close(fd);
13283 if (len <= 0) {
13284 err = len ? -errno : -EINVAL;
13285 pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err);
13286 return err;
13287 }
13288 if (len >= sizeof(buf)) {
13289 pr_warn("CPU mask is too big in file %s\n", fcpu);
13290 return -E2BIG;
13291 }
13292 buf[len] = '\0';
13293
13294 return parse_cpu_mask_str(buf, mask, mask_sz);
13295 }
13296
13297 int libbpf_num_possible_cpus(void)
13298 {
13299 static const char *fcpu = "/sys/devices/system/cpu/possible";
13300 static int cpus;
13301 int err, n, i, tmp_cpus;
13302 bool *mask;
13303
13304 tmp_cpus = READ_ONCE(cpus);
13305 if (tmp_cpus > 0)
13306 return tmp_cpus;
13307
13308 err = parse_cpu_mask_file(fcpu, &mask, &n);
13309 if (err)
13310 return libbpf_err(err);
13311
13312 tmp_cpus = 0;
13313 for (i = 0; i < n; i++) {
13314 if (mask[i])
13315 tmp_cpus++;
13316 }
13317 free(mask);
13318
13319 WRITE_ONCE(cpus, tmp_cpus);
13320 return tmp_cpus;
13321 }
13322
13323 static int populate_skeleton_maps(const struct bpf_object *obj,
13324 struct bpf_map_skeleton *maps,
13325 size_t map_cnt)
13326 {
13327 int i;
13328
13329 for (i = 0; i < map_cnt; i++) {
13330 struct bpf_map **map = maps[i].map;
13331 const char *name = maps[i].name;
13332 void **mmaped = maps[i].mmaped;
13333
13334 *map = bpf_object__find_map_by_name(obj, name);
13335 if (!*map) {
13336 pr_warn("failed to find skeleton map '%s'\n", name);
13337 return -ESRCH;
13338 }
13339
13340 /* externs shouldn't be pre-setup from user code */
13341 if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
13342 *mmaped = (*map)->mmaped;
13343 }
13344 return 0;
13345 }
13346
13347 static int populate_skeleton_progs(const struct bpf_object *obj,
13348 struct bpf_prog_skeleton *progs,
13349 size_t prog_cnt)
13350 {
13351 int i;
13352
13353 for (i = 0; i < prog_cnt; i++) {
13354 struct bpf_program **prog = progs[i].prog;
13355 const char *name = progs[i].name;
13356
13357 *prog = bpf_object__find_program_by_name(obj, name);
13358 if (!*prog) {
13359 pr_warn("failed to find skeleton program '%s'\n", name);
13360 return -ESRCH;
13361 }
13362 }
13363 return 0;
13364 }
13365
13366 int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
13367 const struct bpf_object_open_opts *opts)
13368 {
13369 DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts,
13370 .object_name = s->name,
13371 );
13372 struct bpf_object *obj;
13373 int err;
13374
13375 /* Attempt to preserve opts->object_name, unless overriden by user
13376 * explicitly. Overwriting object name for skeletons is discouraged,
13377 * as it breaks global data maps, because they contain object name
13378 * prefix as their own map name prefix. When skeleton is generated,
13379 * bpftool is making an assumption that this name will stay the same.
13380 */
13381 if (opts) {
13382 memcpy(&skel_opts, opts, sizeof(*opts));
13383 if (!opts->object_name)
13384 skel_opts.object_name = s->name;
13385 }
13386
13387 obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
13388 err = libbpf_get_error(obj);
13389 if (err) {
13390 pr_warn("failed to initialize skeleton BPF object '%s': %d\n",
13391 s->name, err);
13392 return libbpf_err(err);
13393 }
13394
13395 *s->obj = obj;
13396 err = populate_skeleton_maps(obj, s->maps, s->map_cnt);
13397 if (err) {
13398 pr_warn("failed to populate skeleton maps for '%s': %d\n", s->name, err);
13399 return libbpf_err(err);
13400 }
13401
13402 err = populate_skeleton_progs(obj, s->progs, s->prog_cnt);
13403 if (err) {
13404 pr_warn("failed to populate skeleton progs for '%s': %d\n", s->name, err);
13405 return libbpf_err(err);
13406 }
13407
13408 return 0;
13409 }
13410
13411 int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s)
13412 {
13413 int err, len, var_idx, i;
13414 const char *var_name;
13415 const struct bpf_map *map;
13416 struct btf *btf;
13417 __u32 map_type_id;
13418 const struct btf_type *map_type, *var_type;
13419 const struct bpf_var_skeleton *var_skel;
13420 struct btf_var_secinfo *var;
13421
13422 if (!s->obj)
13423 return libbpf_err(-EINVAL);
13424
13425 btf = bpf_object__btf(s->obj);
13426 if (!btf) {
13427 pr_warn("subskeletons require BTF at runtime (object %s)\n",
13428 bpf_object__name(s->obj));
13429 return libbpf_err(-errno);
13430 }
13431
13432 err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt);
13433 if (err) {
13434 pr_warn("failed to populate subskeleton maps: %d\n", err);
13435 return libbpf_err(err);
13436 }
13437
13438 err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt);
13439 if (err) {
13440 pr_warn("failed to populate subskeleton maps: %d\n", err);
13441 return libbpf_err(err);
13442 }
13443
13444 for (var_idx = 0; var_idx < s->var_cnt; var_idx++) {
13445 var_skel = &s->vars[var_idx];
13446 map = *var_skel->map;
13447 map_type_id = bpf_map__btf_value_type_id(map);
13448 map_type = btf__type_by_id(btf, map_type_id);
13449
13450 if (!btf_is_datasec(map_type)) {
13451 pr_warn("type for map '%1$s' is not a datasec: %2$s",
13452 bpf_map__name(map),
13453 __btf_kind_str(btf_kind(map_type)));
13454 return libbpf_err(-EINVAL);
13455 }
13456
13457 len = btf_vlen(map_type);
13458 var = btf_var_secinfos(map_type);
13459 for (i = 0; i < len; i++, var++) {
13460 var_type = btf__type_by_id(btf, var->type);
13461 var_name = btf__name_by_offset(btf, var_type->name_off);
13462 if (strcmp(var_name, var_skel->name) == 0) {
13463 *var_skel->addr = map->mmaped + var->offset;
13464 break;
13465 }
13466 }
13467 }
13468 return 0;
13469 }
13470
13471 void bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s)
13472 {
13473 if (!s)
13474 return;
13475 free(s->maps);
13476 free(s->progs);
13477 free(s->vars);
13478 free(s);
13479 }
13480
13481 int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
13482 {
13483 int i, err;
13484
13485 err = bpf_object__load(*s->obj);
13486 if (err) {
13487 pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
13488 return libbpf_err(err);
13489 }
13490
13491 for (i = 0; i < s->map_cnt; i++) {
13492 struct bpf_map *map = *s->maps[i].map;
13493 size_t mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
13494 int prot, map_fd = map->fd;
13495 void **mmaped = s->maps[i].mmaped;
13496
13497 if (!mmaped)
13498 continue;
13499
13500 if (!(map->def.map_flags & BPF_F_MMAPABLE)) {
13501 *mmaped = NULL;
13502 continue;
13503 }
13504
13505 if (map->def.map_flags & BPF_F_RDONLY_PROG)
13506 prot = PROT_READ;
13507 else
13508 prot = PROT_READ | PROT_WRITE;
13509
13510 /* Remap anonymous mmap()-ed "map initialization image" as
13511 * a BPF map-backed mmap()-ed memory, but preserving the same
13512 * memory address. This will cause kernel to change process'
13513 * page table to point to a different piece of kernel memory,
13514 * but from userspace point of view memory address (and its
13515 * contents, being identical at this point) will stay the
13516 * same. This mapping will be released by bpf_object__close()
13517 * as per normal clean up procedure, so we don't need to worry
13518 * about it from skeleton's clean up perspective.
13519 */
13520 *mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map_fd, 0);
13521 if (*mmaped == MAP_FAILED) {
13522 err = -errno;
13523 *mmaped = NULL;
13524 pr_warn("failed to re-mmap() map '%s': %d\n",
13525 bpf_map__name(map), err);
13526 return libbpf_err(err);
13527 }
13528 }
13529
13530 return 0;
13531 }
13532
13533 int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
13534 {
13535 int i, err;
13536
13537 for (i = 0; i < s->prog_cnt; i++) {
13538 struct bpf_program *prog = *s->progs[i].prog;
13539 struct bpf_link **link = s->progs[i].link;
13540
13541 if (!prog->autoload || !prog->autoattach)
13542 continue;
13543
13544 /* auto-attaching not supported for this program */
13545 if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
13546 continue;
13547
13548 /* if user already set the link manually, don't attempt auto-attach */
13549 if (*link)
13550 continue;
13551
13552 err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, link);
13553 if (err) {
13554 pr_warn("prog '%s': failed to auto-attach: %d\n",
13555 bpf_program__name(prog), err);
13556 return libbpf_err(err);
13557 }
13558
13559 /* It's possible that for some SEC() definitions auto-attach
13560 * is supported in some cases (e.g., if definition completely
13561 * specifies target information), but is not in other cases.
13562 * SEC("uprobe") is one such case. If user specified target
13563 * binary and function name, such BPF program can be
13564 * auto-attached. But if not, it shouldn't trigger skeleton's
13565 * attach to fail. It should just be skipped.
13566 * attach_fn signals such case with returning 0 (no error) and
13567 * setting link to NULL.
13568 */
13569 }
13570
13571 return 0;
13572 }
13573
13574 void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
13575 {
13576 int i;
13577
13578 for (i = 0; i < s->prog_cnt; i++) {
13579 struct bpf_link **link = s->progs[i].link;
13580
13581 bpf_link__destroy(*link);
13582 *link = NULL;
13583 }
13584 }
13585
13586 void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
13587 {
13588 if (!s)
13589 return;
13590
13591 if (s->progs)
13592 bpf_object__detach_skeleton(s);
13593 if (s->obj)
13594 bpf_object__close(*s->obj);
13595 free(s->maps);
13596 free(s->progs);
13597 free(s);
13598 }