tools/lib/bpf/libbpf.c

   1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
   2
   3 /*
   4  * Common eBPF ELF object loading operations.
   5  *
   6  * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
   7  * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
   8  * Copyright (C) 2015 Huawei Inc.
   9  * Copyright (C) 2017 Nicira, Inc.
  10  * Copyright (C) 2019 Isovalent, Inc.
  11  */
  12
  13 #ifndef _GNU_SOURCE
  14 #define _GNU_SOURCE
  15 #endif
  16 #include <stdlib.h>
  17 #include <stdio.h>
  18 #include <stdarg.h>
  19 #include <libgen.h>
  20 #include <inttypes.h>
  21 #include <limits.h>
  22 #include <string.h>
  23 #include <unistd.h>
  24 #include <endian.h>
  25 #include <fcntl.h>
  26 #include <errno.h>
  27 #include <ctype.h>
  28 #include <asm/unistd.h>
  29 #include <linux/err.h>
  30 #include <linux/kernel.h>
  31 #include <linux/bpf.h>
  32 #include <linux/btf.h>
  33 #include <linux/filter.h>
  34 #include <linux/limits.h>
  35 #include <linux/perf_event.h>
  36 #include <linux/bpf_perf_event.h>
  37 #include <linux/ring_buffer.h>
  38 #include <sys/epoll.h>
  39 #include <sys/ioctl.h>
  40 #include <sys/mman.h>
  41 #include <sys/stat.h>
  42 #include <sys/types.h>
  43 #include <sys/vfs.h>
  44 #include <sys/utsname.h>
  45 #include <sys/resource.h>
  46 #include <libelf.h>
  47 #include <gelf.h>
  48 #include <zlib.h>
  49
  50 #include "libbpf.h"
  51 #include "bpf.h"
  52 #include "btf.h"
  53 #include "str_error.h"
  54 #include "libbpf_internal.h"
  55 #include "hashmap.h"
  56 #include "bpf_gen_internal.h"
  57 #include "zip.h"
  58
  59 #ifndef BPF_FS_MAGIC
  60 #define BPF_FS_MAGIC            0xcafe4a11
  61 #endif
  62
  63 #define BPF_FS_DEFAULT_PATH "/sys/fs/bpf"
  64
  65 #define BPF_INSN_SZ (sizeof(struct bpf_insn))
  66
  67 /* vsprintf() in __base_pr() uses nonliteral format string. It may break
  68  * compilation if user enables corresponding warning. Disable it explicitly.
  69  */
  70 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
  71
  72 #define __printf(a, b)  __attribute__((format(printf, a, b)))
  73
  74 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
  75 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog);
  76 static int map_set_def_max_entries(struct bpf_map *map);
  77
  78 static const char * const attach_type_name[] = {
  79         [BPF_CGROUP_INET_INGRESS]       = "cgroup_inet_ingress",
  80         [BPF_CGROUP_INET_EGRESS]        = "cgroup_inet_egress",
  81         [BPF_CGROUP_INET_SOCK_CREATE]   = "cgroup_inet_sock_create",
  82         [BPF_CGROUP_INET_SOCK_RELEASE]  = "cgroup_inet_sock_release",
  83         [BPF_CGROUP_SOCK_OPS]           = "cgroup_sock_ops",
  84         [BPF_CGROUP_DEVICE]             = "cgroup_device",
  85         [BPF_CGROUP_INET4_BIND]         = "cgroup_inet4_bind",
  86         [BPF_CGROUP_INET6_BIND]         = "cgroup_inet6_bind",
  87         [BPF_CGROUP_INET4_CONNECT]      = "cgroup_inet4_connect",
  88         [BPF_CGROUP_INET6_CONNECT]      = "cgroup_inet6_connect",
  89         [BPF_CGROUP_UNIX_CONNECT]       = "cgroup_unix_connect",
  90         [BPF_CGROUP_INET4_POST_BIND]    = "cgroup_inet4_post_bind",
  91         [BPF_CGROUP_INET6_POST_BIND]    = "cgroup_inet6_post_bind",
  92         [BPF_CGROUP_INET4_GETPEERNAME]  = "cgroup_inet4_getpeername",
  93         [BPF_CGROUP_INET6_GETPEERNAME]  = "cgroup_inet6_getpeername",
  94         [BPF_CGROUP_UNIX_GETPEERNAME]   = "cgroup_unix_getpeername",
  95         [BPF_CGROUP_INET4_GETSOCKNAME]  = "cgroup_inet4_getsockname",
  96         [BPF_CGROUP_INET6_GETSOCKNAME]  = "cgroup_inet6_getsockname",
  97         [BPF_CGROUP_UNIX_GETSOCKNAME]   = "cgroup_unix_getsockname",
  98         [BPF_CGROUP_UDP4_SENDMSG]       = "cgroup_udp4_sendmsg",
  99         [BPF_CGROUP_UDP6_SENDMSG]       = "cgroup_udp6_sendmsg",
 100         [BPF_CGROUP_UNIX_SENDMSG]       = "cgroup_unix_sendmsg",
 101         [BPF_CGROUP_SYSCTL]             = "cgroup_sysctl",
 102         [BPF_CGROUP_UDP4_RECVMSG]       = "cgroup_udp4_recvmsg",
 103         [BPF_CGROUP_UDP6_RECVMSG]       = "cgroup_udp6_recvmsg",
 104         [BPF_CGROUP_UNIX_RECVMSG]       = "cgroup_unix_recvmsg",
 105         [BPF_CGROUP_GETSOCKOPT]         = "cgroup_getsockopt",
 106         [BPF_CGROUP_SETSOCKOPT]         = "cgroup_setsockopt",
 107         [BPF_SK_SKB_STREAM_PARSER]      = "sk_skb_stream_parser",
 108         [BPF_SK_SKB_STREAM_VERDICT]     = "sk_skb_stream_verdict",
 109         [BPF_SK_SKB_VERDICT]            = "sk_skb_verdict",
 110         [BPF_SK_MSG_VERDICT]            = "sk_msg_verdict",
 111         [BPF_LIRC_MODE2]                = "lirc_mode2",
 112         [BPF_FLOW_DISSECTOR]            = "flow_dissector",
 113         [BPF_TRACE_RAW_TP]              = "trace_raw_tp",
 114         [BPF_TRACE_FENTRY]              = "trace_fentry",
 115         [BPF_TRACE_FEXIT]               = "trace_fexit",
 116         [BPF_MODIFY_RETURN]             = "modify_return",
 117         [BPF_LSM_MAC]                   = "lsm_mac",
 118         [BPF_LSM_CGROUP]                = "lsm_cgroup",
 119         [BPF_SK_LOOKUP]                 = "sk_lookup",
 120         [BPF_TRACE_ITER]                = "trace_iter",
 121         [BPF_XDP_DEVMAP]                = "xdp_devmap",
 122         [BPF_XDP_CPUMAP]                = "xdp_cpumap",
 123         [BPF_XDP]                       = "xdp",
 124         [BPF_SK_REUSEPORT_SELECT]       = "sk_reuseport_select",
 125         [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE]    = "sk_reuseport_select_or_migrate",
 126         [BPF_PERF_EVENT]                = "perf_event",
 127         [BPF_TRACE_KPROBE_MULTI]        = "trace_kprobe_multi",
 128         [BPF_STRUCT_OPS]                = "struct_ops",
 129         [BPF_NETFILTER]                 = "netfilter",
 130         [BPF_TCX_INGRESS]               = "tcx_ingress",
 131         [BPF_TCX_EGRESS]                = "tcx_egress",
 132         [BPF_TRACE_UPROBE_MULTI]        = "trace_uprobe_multi",
 133         [BPF_NETKIT_PRIMARY]            = "netkit_primary",
 134         [BPF_NETKIT_PEER]               = "netkit_peer",
 135 };
 136
 137 static const char * const link_type_name[] = {
 138         [BPF_LINK_TYPE_UNSPEC]                  = "unspec",
 139         [BPF_LINK_TYPE_RAW_TRACEPOINT]          = "raw_tracepoint",
 140         [BPF_LINK_TYPE_TRACING]                 = "tracing",
 141         [BPF_LINK_TYPE_CGROUP]                  = "cgroup",
 142         [BPF_LINK_TYPE_ITER]                    = "iter",
 143         [BPF_LINK_TYPE_NETNS]                   = "netns",
 144         [BPF_LINK_TYPE_XDP]                     = "xdp",
 145         [BPF_LINK_TYPE_PERF_EVENT]              = "perf_event",
 146         [BPF_LINK_TYPE_KPROBE_MULTI]            = "kprobe_multi",
 147         [BPF_LINK_TYPE_STRUCT_OPS]              = "struct_ops",
 148         [BPF_LINK_TYPE_NETFILTER]               = "netfilter",
 149         [BPF_LINK_TYPE_TCX]                     = "tcx",
 150         [BPF_LINK_TYPE_UPROBE_MULTI]            = "uprobe_multi",
 151         [BPF_LINK_TYPE_NETKIT]                  = "netkit",
 152 };
 153
 154 static const char * const map_type_name[] = {
 155         [BPF_MAP_TYPE_UNSPEC]                   = "unspec",
 156         [BPF_MAP_TYPE_HASH]                     = "hash",
 157         [BPF_MAP_TYPE_ARRAY]                    = "array",
 158         [BPF_MAP_TYPE_PROG_ARRAY]               = "prog_array",
 159         [BPF_MAP_TYPE_PERF_EVENT_ARRAY]         = "perf_event_array",
 160         [BPF_MAP_TYPE_PERCPU_HASH]              = "percpu_hash",
 161         [BPF_MAP_TYPE_PERCPU_ARRAY]             = "percpu_array",
 162         [BPF_MAP_TYPE_STACK_TRACE]              = "stack_trace",
 163         [BPF_MAP_TYPE_CGROUP_ARRAY]             = "cgroup_array",
 164         [BPF_MAP_TYPE_LRU_HASH]                 = "lru_hash",
 165         [BPF_MAP_TYPE_LRU_PERCPU_HASH]          = "lru_percpu_hash",
 166         [BPF_MAP_TYPE_LPM_TRIE]                 = "lpm_trie",
 167         [BPF_MAP_TYPE_ARRAY_OF_MAPS]            = "array_of_maps",
 168         [BPF_MAP_TYPE_HASH_OF_MAPS]             = "hash_of_maps",
 169         [BPF_MAP_TYPE_DEVMAP]                   = "devmap",
 170         [BPF_MAP_TYPE_DEVMAP_HASH]              = "devmap_hash",
 171         [BPF_MAP_TYPE_SOCKMAP]                  = "sockmap",
 172         [BPF_MAP_TYPE_CPUMAP]                   = "cpumap",
 173         [BPF_MAP_TYPE_XSKMAP]                   = "xskmap",
 174         [BPF_MAP_TYPE_SOCKHASH]                 = "sockhash",
 175         [BPF_MAP_TYPE_CGROUP_STORAGE]           = "cgroup_storage",
 176         [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY]      = "reuseport_sockarray",
 177         [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE]    = "percpu_cgroup_storage",
 178         [BPF_MAP_TYPE_QUEUE]                    = "queue",
 179         [BPF_MAP_TYPE_STACK]                    = "stack",
 180         [BPF_MAP_TYPE_SK_STORAGE]               = "sk_storage",
 181         [BPF_MAP_TYPE_STRUCT_OPS]               = "struct_ops",
 182         [BPF_MAP_TYPE_RINGBUF]                  = "ringbuf",
 183         [BPF_MAP_TYPE_INODE_STORAGE]            = "inode_storage",
 184         [BPF_MAP_TYPE_TASK_STORAGE]             = "task_storage",
 185         [BPF_MAP_TYPE_BLOOM_FILTER]             = "bloom_filter",
 186         [BPF_MAP_TYPE_USER_RINGBUF]             = "user_ringbuf",
 187         [BPF_MAP_TYPE_CGRP_STORAGE]             = "cgrp_storage",
 188 };
 189
 190 static const char * const prog_type_name[] = {
 191         [BPF_PROG_TYPE_UNSPEC]                  = "unspec",
 192         [BPF_PROG_TYPE_SOCKET_FILTER]           = "socket_filter",
 193         [BPF_PROG_TYPE_KPROBE]                  = "kprobe",
 194         [BPF_PROG_TYPE_SCHED_CLS]               = "sched_cls",
 195         [BPF_PROG_TYPE_SCHED_ACT]               = "sched_act",
 196         [BPF_PROG_TYPE_TRACEPOINT]              = "tracepoint",
 197         [BPF_PROG_TYPE_XDP]                     = "xdp",
 198         [BPF_PROG_TYPE_PERF_EVENT]              = "perf_event",
 199         [BPF_PROG_TYPE_CGROUP_SKB]              = "cgroup_skb",
 200         [BPF_PROG_TYPE_CGROUP_SOCK]             = "cgroup_sock",
 201         [BPF_PROG_TYPE_LWT_IN]                  = "lwt_in",
 202         [BPF_PROG_TYPE_LWT_OUT]                 = "lwt_out",
 203         [BPF_PROG_TYPE_LWT_XMIT]                = "lwt_xmit",
 204         [BPF_PROG_TYPE_SOCK_OPS]                = "sock_ops",
 205         [BPF_PROG_TYPE_SK_SKB]                  = "sk_skb",
 206         [BPF_PROG_TYPE_CGROUP_DEVICE]           = "cgroup_device",
 207         [BPF_PROG_TYPE_SK_MSG]                  = "sk_msg",
 208         [BPF_PROG_TYPE_RAW_TRACEPOINT]          = "raw_tracepoint",
 209         [BPF_PROG_TYPE_CGROUP_SOCK_ADDR]        = "cgroup_sock_addr",
 210         [BPF_PROG_TYPE_LWT_SEG6LOCAL]           = "lwt_seg6local",
 211         [BPF_PROG_TYPE_LIRC_MODE2]              = "lirc_mode2",
 212         [BPF_PROG_TYPE_SK_REUSEPORT]            = "sk_reuseport",
 213         [BPF_PROG_TYPE_FLOW_DISSECTOR]          = "flow_dissector",
 214         [BPF_PROG_TYPE_CGROUP_SYSCTL]           = "cgroup_sysctl",
 215         [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable",
 216         [BPF_PROG_TYPE_CGROUP_SOCKOPT]          = "cgroup_sockopt",
 217         [BPF_PROG_TYPE_TRACING]                 = "tracing",
 218         [BPF_PROG_TYPE_STRUCT_OPS]              = "struct_ops",
 219         [BPF_PROG_TYPE_EXT]                     = "ext",
 220         [BPF_PROG_TYPE_LSM]                     = "lsm",
 221         [BPF_PROG_TYPE_SK_LOOKUP]               = "sk_lookup",
 222         [BPF_PROG_TYPE_SYSCALL]                 = "syscall",
 223         [BPF_PROG_TYPE_NETFILTER]               = "netfilter",
 224 };
 225
 226 static int __base_pr(enum libbpf_print_level level, const char *format,
 227                      va_list args)
 228 {
 229         if (level == LIBBPF_DEBUG)
 230                 return 0;
 231
 232         return vfprintf(stderr, format, args);
 233 }
 234
 235 static libbpf_print_fn_t __libbpf_pr = __base_pr;
 236
 237 libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
 238 {
 239         libbpf_print_fn_t old_print_fn;
 240
 241         old_print_fn = __atomic_exchange_n(&__libbpf_pr, fn, __ATOMIC_RELAXED);
 242
 243         return old_print_fn;
 244 }
 245
 246 __printf(2, 3)
 247 void libbpf_print(enum libbpf_print_level level, const char *format, ...)
 248 {
 249         va_list args;
 250         int old_errno;
 251         libbpf_print_fn_t print_fn;
 252
 253         print_fn = __atomic_load_n(&__libbpf_pr, __ATOMIC_RELAXED);
 254         if (!print_fn)
 255                 return;
 256
 257         old_errno = errno;
 258
 259         va_start(args, format);
 260         __libbpf_pr(level, format, args);
 261         va_end(args);
 262
 263         errno = old_errno;
 264 }
 265
 266 static void pr_perm_msg(int err)
 267 {
 268         struct rlimit limit;
 269         char buf[100];
 270
 271         if (err != -EPERM || geteuid() != 0)
 272                 return;
 273
 274         err = getrlimit(RLIMIT_MEMLOCK, &limit);
 275         if (err)
 276                 return;
 277
 278         if (limit.rlim_cur == RLIM_INFINITY)
 279                 return;
 280
 281         if (limit.rlim_cur < 1024)
 282                 snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur);
 283         else if (limit.rlim_cur < 1024*1024)
 284                 snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024);
 285         else
 286                 snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024));
 287
 288         pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n",
 289                 buf);
 290 }
 291
 292 #define STRERR_BUFSIZE  128
 293
 294 /* Copied from tools/perf/util/util.h */
 295 #ifndef zfree
 296 # define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
 297 #endif
 298
 299 #ifndef zclose
 300 # define zclose(fd) ({                  \
 301         int ___err = 0;                 \
 302         if ((fd) >= 0)                  \
 303                 ___err = close((fd));   \
 304         fd = -1;                        \
 305         ___err; })
 306 #endif
 307
 308 static inline __u64 ptr_to_u64(const void *ptr)
 309 {
 310         return (__u64) (unsigned long) ptr;
 311 }
 312
 313 int libbpf_set_strict_mode(enum libbpf_strict_mode mode)
 314 {
 315         /* as of v1.0 libbpf_set_strict_mode() is a no-op */
 316         return 0;
 317 }
 318
 319 __u32 libbpf_major_version(void)
 320 {
 321         return LIBBPF_MAJOR_VERSION;
 322 }
 323
 324 __u32 libbpf_minor_version(void)
 325 {
 326         return LIBBPF_MINOR_VERSION;
 327 }
 328
 329 const char *libbpf_version_string(void)
 330 {
 331 #define __S(X) #X
 332 #define _S(X) __S(X)
 333         return  "v" _S(LIBBPF_MAJOR_VERSION) "." _S(LIBBPF_MINOR_VERSION);
 334 #undef _S
 335 #undef __S
 336 }
 337
 338 enum reloc_type {
 339         RELO_LD64,
 340         RELO_CALL,
 341         RELO_DATA,
 342         RELO_EXTERN_LD64,
 343         RELO_EXTERN_CALL,
 344         RELO_SUBPROG_ADDR,
 345         RELO_CORE,
 346 };
 347
 348 struct reloc_desc {
 349         enum reloc_type type;
 350         int insn_idx;
 351         union {
 352                 const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */
 353                 struct {
 354                         int map_idx;
 355                         int sym_off;
 356                         int ext_idx;
 357                 };
 358         };
 359 };
 360
 361 /* stored as sec_def->cookie for all libbpf-supported SEC()s */
 362 enum sec_def_flags {
 363         SEC_NONE = 0,
 364         /* expected_attach_type is optional, if kernel doesn't support that */
 365         SEC_EXP_ATTACH_OPT = 1,
 366         /* legacy, only used by libbpf_get_type_names() and
 367          * libbpf_attach_type_by_name(), not used by libbpf itself at all.
 368          * This used to be associated with cgroup (and few other) BPF programs
 369          * that were attachable through BPF_PROG_ATTACH command. Pretty
 370          * meaningless nowadays, though.
 371          */
 372         SEC_ATTACHABLE = 2,
 373         SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT,
 374         /* attachment target is specified through BTF ID in either kernel or
 375          * other BPF program's BTF object
 376          */
 377         SEC_ATTACH_BTF = 4,
 378         /* BPF program type allows sleeping/blocking in kernel */
 379         SEC_SLEEPABLE = 8,
 380         /* BPF program support non-linear XDP buffer */
 381         SEC_XDP_FRAGS = 16,
 382         /* Setup proper attach type for usdt probes. */
 383         SEC_USDT = 32,
 384 };
 385
 386 struct bpf_sec_def {
 387         char *sec;
 388         enum bpf_prog_type prog_type;
 389         enum bpf_attach_type expected_attach_type;
 390         long cookie;
 391         int handler_id;
 392
 393         libbpf_prog_setup_fn_t prog_setup_fn;
 394         libbpf_prog_prepare_load_fn_t prog_prepare_load_fn;
 395         libbpf_prog_attach_fn_t prog_attach_fn;
 396 };
 397
 398 /*
 399  * bpf_prog should be a better name but it has been used in
 400  * linux/filter.h.
 401  */
 402 struct bpf_program {
 403         char *name;
 404         char *sec_name;
 405         size_t sec_idx;
 406         const struct bpf_sec_def *sec_def;
 407         /* this program's instruction offset (in number of instructions)
 408          * within its containing ELF section
 409          */
 410         size_t sec_insn_off;
 411         /* number of original instructions in ELF section belonging to this
 412          * program, not taking into account subprogram instructions possible
 413          * appended later during relocation
 414          */
 415         size_t sec_insn_cnt;
 416         /* Offset (in number of instructions) of the start of instruction
 417          * belonging to this BPF program  within its containing main BPF
 418          * program. For the entry-point (main) BPF program, this is always
 419          * zero. For a sub-program, this gets reset before each of main BPF
 420          * programs are processed and relocated and is used to determined
 421          * whether sub-program was already appended to the main program, and
 422          * if yes, at which instruction offset.
 423          */
 424         size_t sub_insn_off;
 425
 426         /* instructions that belong to BPF program; insns[0] is located at
 427          * sec_insn_off instruction within its ELF section in ELF file, so
 428          * when mapping ELF file instruction index to the local instruction,
 429          * one needs to subtract sec_insn_off; and vice versa.
 430          */
 431         struct bpf_insn *insns;
 432         /* actual number of instruction in this BPF program's image; for
 433          * entry-point BPF programs this includes the size of main program
 434          * itself plus all the used sub-programs, appended at the end
 435          */
 436         size_t insns_cnt;
 437
 438         struct reloc_desc *reloc_desc;
 439         int nr_reloc;
 440
 441         /* BPF verifier log settings */
 442         char *log_buf;
 443         size_t log_size;
 444         __u32 log_level;
 445
 446         struct bpf_object *obj;
 447
 448         int fd;
 449         bool autoload;
 450         bool autoattach;
 451         bool sym_global;
 452         bool mark_btf_static;
 453         enum bpf_prog_type type;
 454         enum bpf_attach_type expected_attach_type;
 455         int exception_cb_idx;
 456
 457         int prog_ifindex;
 458         __u32 attach_btf_obj_fd;
 459         __u32 attach_btf_id;
 460         __u32 attach_prog_fd;
 461
 462         void *func_info;
 463         __u32 func_info_rec_size;
 464         __u32 func_info_cnt;
 465
 466         void *line_info;
 467         __u32 line_info_rec_size;
 468         __u32 line_info_cnt;
 469         __u32 prog_flags;
 470 };
 471
 472 struct bpf_struct_ops {
 473         const char *tname;
 474         const struct btf_type *type;
 475         struct bpf_program **progs;
 476         __u32 *kern_func_off;
 477         /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */
 478         void *data;
 479         /* e.g. struct bpf_struct_ops_tcp_congestion_ops in
 480          *      btf_vmlinux's format.
 481          * struct bpf_struct_ops_tcp_congestion_ops {
 482          *      [... some other kernel fields ...]
 483          *      struct tcp_congestion_ops data;
 484          * }
 485          * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops)
 486          * bpf_map__init_kern_struct_ops() will populate the "kern_vdata"
 487          * from "data".
 488          */
 489         void *kern_vdata;
 490         __u32 type_id;
 491 };
 492
 493 #define DATA_SEC ".data"
 494 #define BSS_SEC ".bss"
 495 #define RODATA_SEC ".rodata"
 496 #define KCONFIG_SEC ".kconfig"
 497 #define KSYMS_SEC ".ksyms"
 498 #define STRUCT_OPS_SEC ".struct_ops"
 499 #define STRUCT_OPS_LINK_SEC ".struct_ops.link"
 500
 501 enum libbpf_map_type {
 502         LIBBPF_MAP_UNSPEC,
 503         LIBBPF_MAP_DATA,
 504         LIBBPF_MAP_BSS,
 505         LIBBPF_MAP_RODATA,
 506         LIBBPF_MAP_KCONFIG,
 507 };
 508
 509 struct bpf_map_def {
 510         unsigned int type;
 511         unsigned int key_size;
 512         unsigned int value_size;
 513         unsigned int max_entries;
 514         unsigned int map_flags;
 515 };
 516
 517 struct bpf_map {
 518         struct bpf_object *obj;
 519         char *name;
 520         /* real_name is defined for special internal maps (.rodata*,
 521          * .data*, .bss, .kconfig) and preserves their original ELF section
 522          * name. This is important to be able to find corresponding BTF
 523          * DATASEC information.
 524          */
 525         char *real_name;
 526         int fd;
 527         int sec_idx;
 528         size_t sec_offset;
 529         int map_ifindex;
 530         int inner_map_fd;
 531         struct bpf_map_def def;
 532         __u32 numa_node;
 533         __u32 btf_var_idx;
 534         int mod_btf_fd;
 535         __u32 btf_key_type_id;
 536         __u32 btf_value_type_id;
 537         __u32 btf_vmlinux_value_type_id;
 538         enum libbpf_map_type libbpf_type;
 539         void *mmaped;
 540         struct bpf_struct_ops *st_ops;
 541         struct bpf_map *inner_map;
 542         void **init_slots;
 543         int init_slots_sz;
 544         char *pin_path;
 545         bool pinned;
 546         bool reused;
 547         bool autocreate;
 548         __u64 map_extra;
 549 };
 550
 551 enum extern_type {
 552         EXT_UNKNOWN,
 553         EXT_KCFG,
 554         EXT_KSYM,
 555 };
 556
 557 enum kcfg_type {
 558         KCFG_UNKNOWN,
 559         KCFG_CHAR,
 560         KCFG_BOOL,
 561         KCFG_INT,
 562         KCFG_TRISTATE,
 563         KCFG_CHAR_ARR,
 564 };
 565
 566 struct extern_desc {
 567         enum extern_type type;
 568         int sym_idx;
 569         int btf_id;
 570         int sec_btf_id;
 571         const char *name;
 572         char *essent_name;
 573         bool is_set;
 574         bool is_weak;
 575         union {
 576                 struct {
 577                         enum kcfg_type type;
 578                         int sz;
 579                         int align;
 580                         int data_off;
 581                         bool is_signed;
 582                 } kcfg;
 583                 struct {
 584                         unsigned long long addr;
 585
 586                         /* target btf_id of the corresponding kernel var. */
 587                         int kernel_btf_obj_fd;
 588                         int kernel_btf_id;
 589
 590                         /* local btf_id of the ksym extern's type. */
 591                         __u32 type_id;
 592                         /* BTF fd index to be patched in for insn->off, this is
 593                          * 0 for vmlinux BTF, index in obj->fd_array for module
 594                          * BTF
 595                          */
 596                         __s16 btf_fd_idx;
 597                 } ksym;
 598         };
 599 };
 600
 601 struct module_btf {
 602         struct btf *btf;
 603         char *name;
 604         __u32 id;
 605         int fd;
 606         int fd_array_idx;
 607 };
 608
 609 enum sec_type {
 610         SEC_UNUSED = 0,
 611         SEC_RELO,
 612         SEC_BSS,
 613         SEC_DATA,
 614         SEC_RODATA,
 615         SEC_ST_OPS,
 616 };
 617
 618 struct elf_sec_desc {
 619         enum sec_type sec_type;
 620         Elf64_Shdr *shdr;
 621         Elf_Data *data;
 622 };
 623
 624 struct elf_state {
 625         int fd;
 626         const void *obj_buf;
 627         size_t obj_buf_sz;
 628         Elf *elf;
 629         Elf64_Ehdr *ehdr;
 630         Elf_Data *symbols;
 631         size_t shstrndx; /* section index for section name strings */
 632         size_t strtabidx;
 633         struct elf_sec_desc *secs;
 634         size_t sec_cnt;
 635         int btf_maps_shndx;
 636         __u32 btf_maps_sec_btf_id;
 637         int text_shndx;
 638         int symbols_shndx;
 639         bool has_st_ops;
 640 };
 641
 642 struct usdt_manager;
 643
 644 struct bpf_object {
 645         char name[BPF_OBJ_NAME_LEN];
 646         char license[64];
 647         __u32 kern_version;
 648
 649         struct bpf_program *programs;
 650         size_t nr_programs;
 651         struct bpf_map *maps;
 652         size_t nr_maps;
 653         size_t maps_cap;
 654
 655         char *kconfig;
 656         struct extern_desc *externs;
 657         int nr_extern;
 658         int kconfig_map_idx;
 659
 660         bool loaded;
 661         bool has_subcalls;
 662         bool has_rodata;
 663
 664         struct bpf_gen *gen_loader;
 665
 666         /* Information when doing ELF related work. Only valid if efile.elf is not NULL */
 667         struct elf_state efile;
 668
 669         struct btf *btf;
 670         struct btf_ext *btf_ext;
 671
 672         /* Parse and load BTF vmlinux if any of the programs in the object need
 673          * it at load time.
 674          */
 675         struct btf *btf_vmlinux;
 676         /* Path to the custom BTF to be used for BPF CO-RE relocations as an
 677          * override for vmlinux BTF.
 678          */
 679         char *btf_custom_path;
 680         /* vmlinux BTF override for CO-RE relocations */
 681         struct btf *btf_vmlinux_override;
 682         /* Lazily initialized kernel module BTFs */
 683         struct module_btf *btf_modules;
 684         bool btf_modules_loaded;
 685         size_t btf_module_cnt;
 686         size_t btf_module_cap;
 687
 688         /* optional log settings passed to BPF_BTF_LOAD and BPF_PROG_LOAD commands */
 689         char *log_buf;
 690         size_t log_size;
 691         __u32 log_level;
 692
 693         int *fd_array;
 694         size_t fd_array_cap;
 695         size_t fd_array_cnt;
 696
 697         struct usdt_manager *usdt_man;
 698
 699         struct kern_feature_cache *feat_cache;
 700         char *token_path;
 701         int token_fd;
 702
 703         char path[];
 704 };
 705
 706 static const char *elf_sym_str(const struct bpf_object *obj, size_t off);
 707 static const char *elf_sec_str(const struct bpf_object *obj, size_t off);
 708 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx);
 709 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
 710 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn);
 711 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
 712 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
 713 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx);
 714 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx);
 715
 716 void bpf_program__unload(struct bpf_program *prog)
 717 {
 718         if (!prog)
 719                 return;
 720
 721         zclose(prog->fd);
 722
 723         zfree(&prog->func_info);
 724         zfree(&prog->line_info);
 725 }
 726
 727 static void bpf_program__exit(struct bpf_program *prog)
 728 {
 729         if (!prog)
 730                 return;
 731
 732         bpf_program__unload(prog);
 733         zfree(&prog->name);
 734         zfree(&prog->sec_name);
 735         zfree(&prog->insns);
 736         zfree(&prog->reloc_desc);
 737
 738         prog->nr_reloc = 0;
 739         prog->insns_cnt = 0;
 740         prog->sec_idx = -1;
 741 }
 742
 743 static bool insn_is_subprog_call(const struct bpf_insn *insn)
 744 {
 745         return BPF_CLASS(insn->code) == BPF_JMP &&
 746                BPF_OP(insn->code) == BPF_CALL &&
 747                BPF_SRC(insn->code) == BPF_K &&
 748                insn->src_reg == BPF_PSEUDO_CALL &&
 749                insn->dst_reg == 0 &&
 750                insn->off == 0;
 751 }
 752
 753 static bool is_call_insn(const struct bpf_insn *insn)
 754 {
 755         return insn->code == (BPF_JMP | BPF_CALL);
 756 }
 757
 758 static bool insn_is_pseudo_func(struct bpf_insn *insn)
 759 {
 760         return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
 761 }
 762
 763 static int
 764 bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
 765                       const char *name, size_t sec_idx, const char *sec_name,
 766                       size_t sec_off, void *insn_data, size_t insn_data_sz)
 767 {
 768         if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
 769                 pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
 770                         sec_name, name, sec_off, insn_data_sz);
 771                 return -EINVAL;
 772         }
 773
 774         memset(prog, 0, sizeof(*prog));
 775         prog->obj = obj;
 776
 777         prog->sec_idx = sec_idx;
 778         prog->sec_insn_off = sec_off / BPF_INSN_SZ;
 779         prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ;
 780         /* insns_cnt can later be increased by appending used subprograms */
 781         prog->insns_cnt = prog->sec_insn_cnt;
 782
 783         prog->type = BPF_PROG_TYPE_UNSPEC;
 784         prog->fd = -1;
 785         prog->exception_cb_idx = -1;
 786
 787         /* libbpf's convention for SEC("?abc...") is that it's just like
 788          * SEC("abc...") but the corresponding bpf_program starts out with
 789          * autoload set to false.
 790          */
 791         if (sec_name[0] == '?') {
 792                 prog->autoload = false;
 793                 /* from now on forget there was ? in section name */
 794                 sec_name++;
 795         } else {
 796                 prog->autoload = true;
 797         }
 798
 799         prog->autoattach = true;
 800
 801         /* inherit object's log_level */
 802         prog->log_level = obj->log_level;
 803
 804         prog->sec_name = strdup(sec_name);
 805         if (!prog->sec_name)
 806                 goto errout;
 807
 808         prog->name = strdup(name);
 809         if (!prog->name)
 810                 goto errout;
 811
 812         prog->insns = malloc(insn_data_sz);
 813         if (!prog->insns)
 814                 goto errout;
 815         memcpy(prog->insns, insn_data, insn_data_sz);
 816
 817         return 0;
 818 errout:
 819         pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
 820         bpf_program__exit(prog);
 821         return -ENOMEM;
 822 }
 823
 824 static int
 825 bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
 826                          const char *sec_name, int sec_idx)
 827 {
 828         Elf_Data *symbols = obj->efile.symbols;
 829         struct bpf_program *prog, *progs;
 830         void *data = sec_data->d_buf;
 831         size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms;
 832         int nr_progs, err, i;
 833         const char *name;
 834         Elf64_Sym *sym;
 835
 836         progs = obj->programs;
 837         nr_progs = obj->nr_programs;
 838         nr_syms = symbols->d_size / sizeof(Elf64_Sym);
 839
 840         for (i = 0; i < nr_syms; i++) {
 841                 sym = elf_sym_by_idx(obj, i);
 842
 843                 if (sym->st_shndx != sec_idx)
 844                         continue;
 845                 if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
 846                         continue;
 847
 848                 prog_sz = sym->st_size;
 849                 sec_off = sym->st_value;
 850
 851                 name = elf_sym_str(obj, sym->st_name);
 852                 if (!name) {
 853                         pr_warn("sec '%s': failed to get symbol name for offset %zu\n",
 854                                 sec_name, sec_off);
 855                         return -LIBBPF_ERRNO__FORMAT;
 856                 }
 857
 858                 if (sec_off + prog_sz > sec_sz) {
 859                         pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
 860                                 sec_name, sec_off);
 861                         return -LIBBPF_ERRNO__FORMAT;
 862                 }
 863
 864                 if (sec_idx != obj->efile.text_shndx && ELF64_ST_BIND(sym->st_info) == STB_LOCAL) {
 865                         pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name);
 866                         return -ENOTSUP;
 867                 }
 868
 869                 pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
 870                          sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz);
 871
 872                 progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs));
 873                 if (!progs) {
 874                         /*
 875                          * In this case the original obj->programs
 876                          * is still valid, so don't need special treat for
 877                          * bpf_close_object().
 878                          */
 879                         pr_warn("sec '%s': failed to alloc memory for new program '%s'\n",
 880                                 sec_name, name);
 881                         return -ENOMEM;
 882                 }
 883                 obj->programs = progs;
 884
 885                 prog = &progs[nr_progs];
 886
 887                 err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name,
 888                                             sec_off, data + sec_off, prog_sz);
 889                 if (err)
 890                         return err;
 891
 892                 if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL)
 893                         prog->sym_global = true;
 894
 895                 /* if function is a global/weak symbol, but has restricted
 896                  * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC
 897                  * as static to enable more permissive BPF verification mode
 898                  * with more outside context available to BPF verifier
 899                  */
 900                 if (prog->sym_global && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
 901                     || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL))
 902                         prog->mark_btf_static = true;
 903
 904                 nr_progs++;
 905                 obj->nr_programs = nr_progs;
 906         }
 907
 908         return 0;
 909 }
 910
 911 static const struct btf_member *
 912 find_member_by_offset(const struct btf_type *t, __u32 bit_offset)
 913 {
 914         struct btf_member *m;
 915         int i;
 916
 917         for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
 918                 if (btf_member_bit_offset(t, i) == bit_offset)
 919                         return m;
 920         }
 921
 922         return NULL;
 923 }
 924
 925 static const struct btf_member *
 926 find_member_by_name(const struct btf *btf, const struct btf_type *t,
 927                     const char *name)
 928 {
 929         struct btf_member *m;
 930         int i;
 931
 932         for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
 933                 if (!strcmp(btf__name_by_offset(btf, m->name_off), name))
 934                         return m;
 935         }
 936
 937         return NULL;
 938 }
 939
 940 static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
 941                             __u16 kind, struct btf **res_btf,
 942                             struct module_btf **res_mod_btf);
 943
 944 #define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
 945 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
 946                                    const char *name, __u32 kind);
 947
 948 static int
 949 find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw,
 950                            struct module_btf **mod_btf,
 951                            const struct btf_type **type, __u32 *type_id,
 952                            const struct btf_type **vtype, __u32 *vtype_id,
 953                            const struct btf_member **data_member)
 954 {
 955         const struct btf_type *kern_type, *kern_vtype;
 956         const struct btf_member *kern_data_member;
 957         struct btf *btf;
 958         __s32 kern_vtype_id, kern_type_id;
 959         char tname[256];
 960         __u32 i;
 961
 962         snprintf(tname, sizeof(tname), "%.*s",
 963                  (int)bpf_core_essential_name_len(tname_raw), tname_raw);
 964
 965         kern_type_id = find_ksym_btf_id(obj, tname, BTF_KIND_STRUCT,
 966                                         &btf, mod_btf);
 967         if (kern_type_id < 0) {
 968                 pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
 969                         tname);
 970                 return kern_type_id;
 971         }
 972         kern_type = btf__type_by_id(btf, kern_type_id);
 973
 974         /* Find the corresponding "map_value" type that will be used
 975          * in map_update(BPF_MAP_TYPE_STRUCT_OPS).  For example,
 976          * find "struct bpf_struct_ops_tcp_congestion_ops" from the
 977          * btf_vmlinux.
 978          */
 979         kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX,
 980                                                 tname, BTF_KIND_STRUCT);
 981         if (kern_vtype_id < 0) {
 982                 pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n",
 983                         STRUCT_OPS_VALUE_PREFIX, tname);
 984                 return kern_vtype_id;
 985         }
 986         kern_vtype = btf__type_by_id(btf, kern_vtype_id);
 987
 988         /* Find "struct tcp_congestion_ops" from
 989          * struct bpf_struct_ops_tcp_congestion_ops {
 990          *      [ ... ]
 991          *      struct tcp_congestion_ops data;
 992          * }
 993          */
 994         kern_data_member = btf_members(kern_vtype);
 995         for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) {
 996                 if (kern_data_member->type == kern_type_id)
 997                         break;
 998         }
 999         if (i == btf_vlen(kern_vtype)) {
1000                 pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n",
1001                         tname, STRUCT_OPS_VALUE_PREFIX, tname);
1002                 return -EINVAL;
1003         }
1004
1005         *type = kern_type;
1006         *type_id = kern_type_id;
1007         *vtype = kern_vtype;
1008         *vtype_id = kern_vtype_id;
1009         *data_member = kern_data_member;
1010
1011         return 0;
1012 }
1013
1014 static bool bpf_map__is_struct_ops(const struct bpf_map *map)
1015 {
1016         return map->def.type == BPF_MAP_TYPE_STRUCT_OPS;
1017 }
1018
1019 static bool is_valid_st_ops_program(struct bpf_object *obj,
1020                                     const struct bpf_program *prog)
1021 {
1022         int i;
1023
1024         for (i = 0; i < obj->nr_programs; i++) {
1025                 if (&obj->programs[i] == prog)
1026                         return prog->type == BPF_PROG_TYPE_STRUCT_OPS;
1027         }
1028
1029         return false;
1030 }
1031
1032 /* For each struct_ops program P, referenced from some struct_ops map M,
1033  * enable P.autoload if there are Ms for which M.autocreate is true,
1034  * disable P.autoload if for all Ms M.autocreate is false.
1035  * Don't change P.autoload for programs that are not referenced from any maps.
1036  */
1037 static int bpf_object_adjust_struct_ops_autoload(struct bpf_object *obj)
1038 {
1039         struct bpf_program *prog, *slot_prog;
1040         struct bpf_map *map;
1041         int i, j, k, vlen;
1042
1043         for (i = 0; i < obj->nr_programs; ++i) {
1044                 int should_load = false;
1045                 int use_cnt = 0;
1046
1047                 prog = &obj->programs[i];
1048                 if (prog->type != BPF_PROG_TYPE_STRUCT_OPS)
1049                         continue;
1050
1051                 for (j = 0; j < obj->nr_maps; ++j) {
1052                         map = &obj->maps[j];
1053                         if (!bpf_map__is_struct_ops(map))
1054                                 continue;
1055
1056                         vlen = btf_vlen(map->st_ops->type);
1057                         for (k = 0; k < vlen; ++k) {
1058                                 slot_prog = map->st_ops->progs[k];
1059                                 if (prog != slot_prog)
1060                                         continue;
1061
1062                                 use_cnt++;
1063                                 if (map->autocreate)
1064                                         should_load = true;
1065                         }
1066                 }
1067                 if (use_cnt)
1068                         prog->autoload = should_load;
1069         }
1070
1071         return 0;
1072 }
1073
1074 /* Init the map's fields that depend on kern_btf */
1075 static int bpf_map__init_kern_struct_ops(struct bpf_map *map)
1076 {
1077         const struct btf_member *member, *kern_member, *kern_data_member;
1078         const struct btf_type *type, *kern_type, *kern_vtype;
1079         __u32 i, kern_type_id, kern_vtype_id, kern_data_off;
1080         struct bpf_object *obj = map->obj;
1081         const struct btf *btf = obj->btf;
1082         struct bpf_struct_ops *st_ops;
1083         const struct btf *kern_btf;
1084         struct module_btf *mod_btf;
1085         void *data, *kern_data;
1086         const char *tname;
1087         int err;
1088
1089         st_ops = map->st_ops;
1090         type = st_ops->type;
1091         tname = st_ops->tname;
1092         err = find_struct_ops_kern_types(obj, tname, &mod_btf,
1093                                          &kern_type, &kern_type_id,
1094                                          &kern_vtype, &kern_vtype_id,
1095                                          &kern_data_member);
1096         if (err)
1097                 return err;
1098
1099         kern_btf = mod_btf ? mod_btf->btf : obj->btf_vmlinux;
1100
1101         pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n",
1102                  map->name, st_ops->type_id, kern_type_id, kern_vtype_id);
1103
1104         map->mod_btf_fd = mod_btf ? mod_btf->fd : -1;
1105         map->def.value_size = kern_vtype->size;
1106         map->btf_vmlinux_value_type_id = kern_vtype_id;
1107
1108         st_ops->kern_vdata = calloc(1, kern_vtype->size);
1109         if (!st_ops->kern_vdata)
1110                 return -ENOMEM;
1111
1112         data = st_ops->data;
1113         kern_data_off = kern_data_member->offset / 8;
1114         kern_data = st_ops->kern_vdata + kern_data_off;
1115
1116         member = btf_members(type);
1117         for (i = 0; i < btf_vlen(type); i++, member++) {
1118                 const struct btf_type *mtype, *kern_mtype;
1119                 __u32 mtype_id, kern_mtype_id;
1120                 void *mdata, *kern_mdata;
1121                 __s64 msize, kern_msize;
1122                 __u32 moff, kern_moff;
1123                 __u32 kern_member_idx;
1124                 const char *mname;
1125
1126                 mname = btf__name_by_offset(btf, member->name_off);
1127                 kern_member = find_member_by_name(kern_btf, kern_type, mname);
1128                 if (!kern_member) {
1129                         pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n",
1130                                 map->name, mname);
1131                         return -ENOTSUP;
1132                 }
1133
1134                 kern_member_idx = kern_member - btf_members(kern_type);
1135                 if (btf_member_bitfield_size(type, i) ||
1136                     btf_member_bitfield_size(kern_type, kern_member_idx)) {
1137                         pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n",
1138                                 map->name, mname);
1139                         return -ENOTSUP;
1140                 }
1141
1142                 moff = member->offset / 8;
1143                 kern_moff = kern_member->offset / 8;
1144
1145                 mdata = data + moff;
1146                 kern_mdata = kern_data + kern_moff;
1147
1148                 mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id);
1149                 kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type,
1150                                                     &kern_mtype_id);
1151                 if (BTF_INFO_KIND(mtype->info) !=
1152                     BTF_INFO_KIND(kern_mtype->info)) {
1153                         pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n",
1154                                 map->name, mname, BTF_INFO_KIND(mtype->info),
1155                                 BTF_INFO_KIND(kern_mtype->info));
1156                         return -ENOTSUP;
1157                 }
1158
1159                 if (btf_is_ptr(mtype)) {
1160                         struct bpf_program *prog;
1161
1162                         /* Update the value from the shadow type */
1163                         prog = *(void **)mdata;
1164                         st_ops->progs[i] = prog;
1165                         if (!prog)
1166                                 continue;
1167                         if (!is_valid_st_ops_program(obj, prog)) {
1168                                 pr_warn("struct_ops init_kern %s: member %s is not a struct_ops program\n",
1169                                         map->name, mname);
1170                                 return -ENOTSUP;
1171                         }
1172
1173                         kern_mtype = skip_mods_and_typedefs(kern_btf,
1174                                                             kern_mtype->type,
1175                                                             &kern_mtype_id);
1176
1177                         /* mtype->type must be a func_proto which was
1178                          * guaranteed in bpf_object__collect_st_ops_relos(),
1179                          * so only check kern_mtype for func_proto here.
1180                          */
1181                         if (!btf_is_func_proto(kern_mtype)) {
1182                                 pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n",
1183                                         map->name, mname);
1184                                 return -ENOTSUP;
1185                         }
1186
1187                         if (mod_btf)
1188                                 prog->attach_btf_obj_fd = mod_btf->fd;
1189
1190                         /* if we haven't yet processed this BPF program, record proper
1191                          * attach_btf_id and member_idx
1192                          */
1193                         if (!prog->attach_btf_id) {
1194                                 prog->attach_btf_id = kern_type_id;
1195                                 prog->expected_attach_type = kern_member_idx;
1196                         }
1197
1198                         /* struct_ops BPF prog can be re-used between multiple
1199                          * .struct_ops & .struct_ops.link as long as it's the
1200                          * same struct_ops struct definition and the same
1201                          * function pointer field
1202                          */
1203                         if (prog->attach_btf_id != kern_type_id) {
1204                                 pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: attach_btf_id %u != kern_type_id %u\n",
1205                                         map->name, mname, prog->name, prog->sec_name, prog->type,
1206                                         prog->attach_btf_id, kern_type_id);
1207                                 return -EINVAL;
1208                         }
1209                         if (prog->expected_attach_type != kern_member_idx) {
1210                                 pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: expected_attach_type %u != kern_member_idx %u\n",
1211                                         map->name, mname, prog->name, prog->sec_name, prog->type,
1212                                         prog->expected_attach_type, kern_member_idx);
1213                                 return -EINVAL;
1214                         }
1215
1216                         st_ops->kern_func_off[i] = kern_data_off + kern_moff;
1217
1218                         pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n",
1219                                  map->name, mname, prog->name, moff,
1220                                  kern_moff);
1221
1222                         continue;
1223                 }
1224
1225                 msize = btf__resolve_size(btf, mtype_id);
1226                 kern_msize = btf__resolve_size(kern_btf, kern_mtype_id);
1227                 if (msize < 0 || kern_msize < 0 || msize != kern_msize) {
1228                         pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n",
1229                                 map->name, mname, (ssize_t)msize,
1230                                 (ssize_t)kern_msize);
1231                         return -ENOTSUP;
1232                 }
1233
1234                 pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n",
1235                          map->name, mname, (unsigned int)msize,
1236                          moff, kern_moff);
1237                 memcpy(kern_mdata, mdata, msize);
1238         }
1239
1240         return 0;
1241 }
1242
1243 static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
1244 {
1245         struct bpf_map *map;
1246         size_t i;
1247         int err;
1248
1249         for (i = 0; i < obj->nr_maps; i++) {
1250                 map = &obj->maps[i];
1251
1252                 if (!bpf_map__is_struct_ops(map))
1253                         continue;
1254
1255                 if (!map->autocreate)
1256                         continue;
1257
1258                 err = bpf_map__init_kern_struct_ops(map);
1259                 if (err)
1260                         return err;
1261         }
1262
1263         return 0;
1264 }
1265
1266 static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name,
1267                                 int shndx, Elf_Data *data)
1268 {
1269         const struct btf_type *type, *datasec;
1270         const struct btf_var_secinfo *vsi;
1271         struct bpf_struct_ops *st_ops;
1272         const char *tname, *var_name;
1273         __s32 type_id, datasec_id;
1274         const struct btf *btf;
1275         struct bpf_map *map;
1276         __u32 i;
1277
1278         if (shndx == -1)
1279                 return 0;
1280
1281         btf = obj->btf;
1282         datasec_id = btf__find_by_name_kind(btf, sec_name,
1283                                             BTF_KIND_DATASEC);
1284         if (datasec_id < 0) {
1285                 pr_warn("struct_ops init: DATASEC %s not found\n",
1286                         sec_name);
1287                 return -EINVAL;
1288         }
1289
1290         datasec = btf__type_by_id(btf, datasec_id);
1291         vsi = btf_var_secinfos(datasec);
1292         for (i = 0; i < btf_vlen(datasec); i++, vsi++) {
1293                 type = btf__type_by_id(obj->btf, vsi->type);
1294                 var_name = btf__name_by_offset(obj->btf, type->name_off);
1295
1296                 type_id = btf__resolve_type(obj->btf, vsi->type);
1297                 if (type_id < 0) {
1298                         pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n",
1299                                 vsi->type, sec_name);
1300                         return -EINVAL;
1301                 }
1302
1303                 type = btf__type_by_id(obj->btf, type_id);
1304                 tname = btf__name_by_offset(obj->btf, type->name_off);
1305                 if (!tname[0]) {
1306                         pr_warn("struct_ops init: anonymous type is not supported\n");
1307                         return -ENOTSUP;
1308                 }
1309                 if (!btf_is_struct(type)) {
1310                         pr_warn("struct_ops init: %s is not a struct\n", tname);
1311                         return -EINVAL;
1312                 }
1313
1314                 map = bpf_object__add_map(obj);
1315                 if (IS_ERR(map))
1316                         return PTR_ERR(map);
1317
1318                 map->sec_idx = shndx;
1319                 map->sec_offset = vsi->offset;
1320                 map->name = strdup(var_name);
1321                 if (!map->name)
1322                         return -ENOMEM;
1323                 map->btf_value_type_id = type_id;
1324
1325                 /* Follow same convention as for programs autoload:
1326                  * SEC("?.struct_ops") means map is not created by default.
1327                  */
1328                 if (sec_name[0] == '?') {
1329                         map->autocreate = false;
1330                         /* from now on forget there was ? in section name */
1331                         sec_name++;
1332                 }
1333
1334                 map->def.type = BPF_MAP_TYPE_STRUCT_OPS;
1335                 map->def.key_size = sizeof(int);
1336                 map->def.value_size = type->size;
1337                 map->def.max_entries = 1;
1338                 map->def.map_flags = strcmp(sec_name, STRUCT_OPS_LINK_SEC) == 0 ? BPF_F_LINK : 0;
1339
1340                 map->st_ops = calloc(1, sizeof(*map->st_ops));
1341                 if (!map->st_ops)
1342                         return -ENOMEM;
1343                 st_ops = map->st_ops;
1344                 st_ops->data = malloc(type->size);
1345                 st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs));
1346                 st_ops->kern_func_off = malloc(btf_vlen(type) *
1347                                                sizeof(*st_ops->kern_func_off));
1348                 if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off)
1349                         return -ENOMEM;
1350
1351                 if (vsi->offset + type->size > data->d_size) {
1352                         pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n",
1353                                 var_name, sec_name);
1354                         return -EINVAL;
1355                 }
1356
1357                 memcpy(st_ops->data,
1358                        data->d_buf + vsi->offset,
1359                        type->size);
1360                 st_ops->tname = tname;
1361                 st_ops->type = type;
1362                 st_ops->type_id = type_id;
1363
1364                 pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n",
1365                          tname, type_id, var_name, vsi->offset);
1366         }
1367
1368         return 0;
1369 }
1370
1371 static int bpf_object_init_struct_ops(struct bpf_object *obj)
1372 {
1373         const char *sec_name;
1374         int sec_idx, err;
1375
1376         for (sec_idx = 0; sec_idx < obj->efile.sec_cnt; ++sec_idx) {
1377                 struct elf_sec_desc *desc = &obj->efile.secs[sec_idx];
1378
1379                 if (desc->sec_type != SEC_ST_OPS)
1380                         continue;
1381
1382                 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1383                 if (!sec_name)
1384                         return -LIBBPF_ERRNO__FORMAT;
1385
1386                 err = init_struct_ops_maps(obj, sec_name, sec_idx, desc->data);
1387                 if (err)
1388                         return err;
1389         }
1390
1391         return 0;
1392 }
1393
1394 static struct bpf_object *bpf_object__new(const char *path,
1395                                           const void *obj_buf,
1396                                           size_t obj_buf_sz,
1397                                           const char *obj_name)
1398 {
1399         struct bpf_object *obj;
1400         char *end;
1401
1402         obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
1403         if (!obj) {
1404                 pr_warn("alloc memory failed for %s\n", path);
1405                 return ERR_PTR(-ENOMEM);
1406         }
1407
1408         strcpy(obj->path, path);
1409         if (obj_name) {
1410                 libbpf_strlcpy(obj->name, obj_name, sizeof(obj->name));
1411         } else {
1412                 /* Using basename() GNU version which doesn't modify arg. */
1413                 libbpf_strlcpy(obj->name, basename((void *)path), sizeof(obj->name));
1414                 end = strchr(obj->name, '.');
1415                 if (end)
1416                         *end = 0;
1417         }
1418
1419         obj->efile.fd = -1;
1420         /*
1421          * Caller of this function should also call
1422          * bpf_object__elf_finish() after data collection to return
1423          * obj_buf to user. If not, we should duplicate the buffer to
1424          * avoid user freeing them before elf finish.
1425          */
1426         obj->efile.obj_buf = obj_buf;
1427         obj->efile.obj_buf_sz = obj_buf_sz;
1428         obj->efile.btf_maps_shndx = -1;
1429         obj->kconfig_map_idx = -1;
1430
1431         obj->kern_version = get_kernel_version();
1432         obj->loaded = false;
1433
1434         return obj;
1435 }
1436
1437 static void bpf_object__elf_finish(struct bpf_object *obj)
1438 {
1439         if (!obj->efile.elf)
1440                 return;
1441
1442         elf_end(obj->efile.elf);
1443         obj->efile.elf = NULL;
1444         obj->efile.symbols = NULL;
1445
1446         zfree(&obj->efile.secs);
1447         obj->efile.sec_cnt = 0;
1448         zclose(obj->efile.fd);
1449         obj->efile.obj_buf = NULL;
1450         obj->efile.obj_buf_sz = 0;
1451 }
1452
1453 static int bpf_object__elf_init(struct bpf_object *obj)
1454 {
1455         Elf64_Ehdr *ehdr;
1456         int err = 0;
1457         Elf *elf;
1458
1459         if (obj->efile.elf) {
1460                 pr_warn("elf: init internal error\n");
1461                 return -LIBBPF_ERRNO__LIBELF;
1462         }
1463
1464         if (obj->efile.obj_buf_sz > 0) {
1465                 /* obj_buf should have been validated by bpf_object__open_mem(). */
1466                 elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz);
1467         } else {
1468                 obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC);
1469                 if (obj->efile.fd < 0) {
1470                         char errmsg[STRERR_BUFSIZE], *cp;
1471
1472                         err = -errno;
1473                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
1474                         pr_warn("elf: failed to open %s: %s\n", obj->path, cp);
1475                         return err;
1476                 }
1477
1478                 elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
1479         }
1480
1481         if (!elf) {
1482                 pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1));
1483                 err = -LIBBPF_ERRNO__LIBELF;
1484                 goto errout;
1485         }
1486
1487         obj->efile.elf = elf;
1488
1489         if (elf_kind(elf) != ELF_K_ELF) {
1490                 err = -LIBBPF_ERRNO__FORMAT;
1491                 pr_warn("elf: '%s' is not a proper ELF object\n", obj->path);
1492                 goto errout;
1493         }
1494
1495         if (gelf_getclass(elf) != ELFCLASS64) {
1496                 err = -LIBBPF_ERRNO__FORMAT;
1497                 pr_warn("elf: '%s' is not a 64-bit ELF object\n", obj->path);
1498                 goto errout;
1499         }
1500
1501         obj->efile.ehdr = ehdr = elf64_getehdr(elf);
1502         if (!obj->efile.ehdr) {
1503                 pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1));
1504                 err = -LIBBPF_ERRNO__FORMAT;
1505                 goto errout;
1506         }
1507
1508         if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) {
1509                 pr_warn("elf: failed to get section names section index for %s: %s\n",
1510                         obj->path, elf_errmsg(-1));
1511                 err = -LIBBPF_ERRNO__FORMAT;
1512                 goto errout;
1513         }
1514
1515         /* ELF is corrupted/truncated, avoid calling elf_strptr. */
1516         if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) {
1517                 pr_warn("elf: failed to get section names strings from %s: %s\n",
1518                         obj->path, elf_errmsg(-1));
1519                 err = -LIBBPF_ERRNO__FORMAT;
1520                 goto errout;
1521         }
1522
1523         /* Old LLVM set e_machine to EM_NONE */
1524         if (ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF)) {
1525                 pr_warn("elf: %s is not a valid eBPF object file\n", obj->path);
1526                 err = -LIBBPF_ERRNO__FORMAT;
1527                 goto errout;
1528         }
1529
1530         return 0;
1531 errout:
1532         bpf_object__elf_finish(obj);
1533         return err;
1534 }
1535
1536 static int bpf_object__check_endianness(struct bpf_object *obj)
1537 {
1538 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1539         if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
1540                 return 0;
1541 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
1542         if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
1543                 return 0;
1544 #else
1545 # error "Unrecognized __BYTE_ORDER__"
1546 #endif
1547         pr_warn("elf: endianness mismatch in %s.\n", obj->path);
1548         return -LIBBPF_ERRNO__ENDIAN;
1549 }
1550
1551 static int
1552 bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
1553 {
1554         if (!data) {
1555                 pr_warn("invalid license section in %s\n", obj->path);
1556                 return -LIBBPF_ERRNO__FORMAT;
1557         }
1558         /* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't
1559          * go over allowed ELF data section buffer
1560          */
1561         libbpf_strlcpy(obj->license, data, min(size + 1, sizeof(obj->license)));
1562         pr_debug("license of %s is %s\n", obj->path, obj->license);
1563         return 0;
1564 }
1565
1566 static int
1567 bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
1568 {
1569         __u32 kver;
1570
1571         if (!data || size != sizeof(kver)) {
1572                 pr_warn("invalid kver section in %s\n", obj->path);
1573                 return -LIBBPF_ERRNO__FORMAT;
1574         }
1575         memcpy(&kver, data, sizeof(kver));
1576         obj->kern_version = kver;
1577         pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
1578         return 0;
1579 }
1580
1581 static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
1582 {
1583         if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
1584             type == BPF_MAP_TYPE_HASH_OF_MAPS)
1585                 return true;
1586         return false;
1587 }
1588
1589 static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size)
1590 {
1591         Elf_Data *data;
1592         Elf_Scn *scn;
1593
1594         if (!name)
1595                 return -EINVAL;
1596
1597         scn = elf_sec_by_name(obj, name);
1598         data = elf_sec_data(obj, scn);
1599         if (data) {
1600                 *size = data->d_size;
1601                 return 0; /* found it */
1602         }
1603
1604         return -ENOENT;
1605 }
1606
1607 static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *name)
1608 {
1609         Elf_Data *symbols = obj->efile.symbols;
1610         const char *sname;
1611         size_t si;
1612
1613         for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) {
1614                 Elf64_Sym *sym = elf_sym_by_idx(obj, si);
1615
1616                 if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT)
1617                         continue;
1618
1619                 if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
1620                     ELF64_ST_BIND(sym->st_info) != STB_WEAK)
1621                         continue;
1622
1623                 sname = elf_sym_str(obj, sym->st_name);
1624                 if (!sname) {
1625                         pr_warn("failed to get sym name string for var %s\n", name);
1626                         return ERR_PTR(-EIO);
1627                 }
1628                 if (strcmp(name, sname) == 0)
1629                         return sym;
1630         }
1631
1632         return ERR_PTR(-ENOENT);
1633 }
1634
1635 /* Some versions of Android don't provide memfd_create() in their libc
1636  * implementation, so avoid complications and just go straight to Linux
1637  * syscall.
1638  */
1639 static int sys_memfd_create(const char *name, unsigned flags)
1640 {
1641         return syscall(__NR_memfd_create, name, flags);
1642 }
1643
1644 static int create_placeholder_fd(void)
1645 {
1646         int fd;
1647
1648         fd = ensure_good_fd(sys_memfd_create("libbpf-placeholder-fd", MFD_CLOEXEC));
1649         if (fd < 0)
1650                 return -errno;
1651         return fd;
1652 }
1653
1654 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
1655 {
1656         struct bpf_map *map;
1657         int err;
1658
1659         err = libbpf_ensure_mem((void **)&obj->maps, &obj->maps_cap,
1660                                 sizeof(*obj->maps), obj->nr_maps + 1);
1661         if (err)
1662                 return ERR_PTR(err);
1663
1664         map = &obj->maps[obj->nr_maps++];
1665         map->obj = obj;
1666         /* Preallocate map FD without actually creating BPF map just yet.
1667          * These map FD "placeholders" will be reused later without changing
1668          * FD value when map is actually created in the kernel.
1669          *
1670          * This is useful to be able to perform BPF program relocations
1671          * without having to create BPF maps before that step. This allows us
1672          * to finalize and load BTF very late in BPF object's loading phase,
1673          * right before BPF maps have to be created and BPF programs have to
1674          * be loaded. By having these map FD placeholders we can perform all
1675          * the sanitizations, relocations, and any other adjustments before we
1676          * start creating actual BPF kernel objects (BTF, maps, progs).
1677          */
1678         map->fd = create_placeholder_fd();
1679         if (map->fd < 0)
1680                 return ERR_PTR(map->fd);
1681         map->inner_map_fd = -1;
1682         map->autocreate = true;
1683
1684         return map;
1685 }
1686
1687 static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries)
1688 {
1689         const long page_sz = sysconf(_SC_PAGE_SIZE);
1690         size_t map_sz;
1691
1692         map_sz = (size_t)roundup(value_sz, 8) * max_entries;
1693         map_sz = roundup(map_sz, page_sz);
1694         return map_sz;
1695 }
1696
1697 static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz)
1698 {
1699         void *mmaped;
1700
1701         if (!map->mmaped)
1702                 return -EINVAL;
1703
1704         if (old_sz == new_sz)
1705                 return 0;
1706
1707         mmaped = mmap(NULL, new_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1708         if (mmaped == MAP_FAILED)
1709                 return -errno;
1710
1711         memcpy(mmaped, map->mmaped, min(old_sz, new_sz));
1712         munmap(map->mmaped, old_sz);
1713         map->mmaped = mmaped;
1714         return 0;
1715 }
1716
1717 static char *internal_map_name(struct bpf_object *obj, const char *real_name)
1718 {
1719         char map_name[BPF_OBJ_NAME_LEN], *p;
1720         int pfx_len, sfx_len = max((size_t)7, strlen(real_name));
1721
1722         /* This is one of the more confusing parts of libbpf for various
1723          * reasons, some of which are historical. The original idea for naming
1724          * internal names was to include as much of BPF object name prefix as
1725          * possible, so that it can be distinguished from similar internal
1726          * maps of a different BPF object.
1727          * As an example, let's say we have bpf_object named 'my_object_name'
1728          * and internal map corresponding to '.rodata' ELF section. The final
1729          * map name advertised to user and to the kernel will be
1730          * 'my_objec.rodata', taking first 8 characters of object name and
1731          * entire 7 characters of '.rodata'.
1732          * Somewhat confusingly, if internal map ELF section name is shorter
1733          * than 7 characters, e.g., '.bss', we still reserve 7 characters
1734          * for the suffix, even though we only have 4 actual characters, and
1735          * resulting map will be called 'my_objec.bss', not even using all 15
1736          * characters allowed by the kernel. Oh well, at least the truncated
1737          * object name is somewhat consistent in this case. But if the map
1738          * name is '.kconfig', we'll still have entirety of '.kconfig' added
1739          * (8 chars) and thus will be left with only first 7 characters of the
1740          * object name ('my_obje'). Happy guessing, user, that the final map
1741          * name will be "my_obje.kconfig".
1742          * Now, with libbpf starting to support arbitrarily named .rodata.*
1743          * and .data.* data sections, it's possible that ELF section name is
1744          * longer than allowed 15 chars, so we now need to be careful to take
1745          * only up to 15 first characters of ELF name, taking no BPF object
1746          * name characters at all. So '.rodata.abracadabra' will result in
1747          * '.rodata.abracad' kernel and user-visible name.
1748          * We need to keep this convoluted logic intact for .data, .bss and
1749          * .rodata maps, but for new custom .data.custom and .rodata.custom
1750          * maps we use their ELF names as is, not prepending bpf_object name
1751          * in front. We still need to truncate them to 15 characters for the
1752          * kernel. Full name can be recovered for such maps by using DATASEC
1753          * BTF type associated with such map's value type, though.
1754          */
1755         if (sfx_len >= BPF_OBJ_NAME_LEN)
1756                 sfx_len = BPF_OBJ_NAME_LEN - 1;
1757
1758         /* if there are two or more dots in map name, it's a custom dot map */
1759         if (strchr(real_name + 1, '.') != NULL)
1760                 pfx_len = 0;
1761         else
1762                 pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, strlen(obj->name));
1763
1764         snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
1765                  sfx_len, real_name);
1766
1767         /* sanitise map name to characters allowed by kernel */
1768         for (p = map_name; *p && p < map_name + sizeof(map_name); p++)
1769                 if (!isalnum(*p) && *p != '_' && *p != '.')
1770                         *p = '_';
1771
1772         return strdup(map_name);
1773 }
1774
1775 static int
1776 map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map);
1777
1778 /* Internal BPF map is mmap()'able only if at least one of corresponding
1779  * DATASEC's VARs are to be exposed through BPF skeleton. I.e., it's a GLOBAL
1780  * variable and it's not marked as __hidden (which turns it into, effectively,
1781  * a STATIC variable).
1782  */
1783 static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map)
1784 {
1785         const struct btf_type *t, *vt;
1786         struct btf_var_secinfo *vsi;
1787         int i, n;
1788
1789         if (!map->btf_value_type_id)
1790                 return false;
1791
1792         t = btf__type_by_id(obj->btf, map->btf_value_type_id);
1793         if (!btf_is_datasec(t))
1794                 return false;
1795
1796         vsi = btf_var_secinfos(t);
1797         for (i = 0, n = btf_vlen(t); i < n; i++, vsi++) {
1798                 vt = btf__type_by_id(obj->btf, vsi->type);
1799                 if (!btf_is_var(vt))
1800                         continue;
1801
1802                 if (btf_var(vt)->linkage != BTF_VAR_STATIC)
1803                         return true;
1804         }
1805
1806         return false;
1807 }
1808
1809 static int
1810 bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
1811                               const char *real_name, int sec_idx, void *data, size_t data_sz)
1812 {
1813         struct bpf_map_def *def;
1814         struct bpf_map *map;
1815         size_t mmap_sz;
1816         int err;
1817
1818         map = bpf_object__add_map(obj);
1819         if (IS_ERR(map))
1820                 return PTR_ERR(map);
1821
1822         map->libbpf_type = type;
1823         map->sec_idx = sec_idx;
1824         map->sec_offset = 0;
1825         map->real_name = strdup(real_name);
1826         map->name = internal_map_name(obj, real_name);
1827         if (!map->real_name || !map->name) {
1828                 zfree(&map->real_name);
1829                 zfree(&map->name);
1830                 return -ENOMEM;
1831         }
1832
1833         def = &map->def;
1834         def->type = BPF_MAP_TYPE_ARRAY;
1835         def->key_size = sizeof(int);
1836         def->value_size = data_sz;
1837         def->max_entries = 1;
1838         def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
1839                          ? BPF_F_RDONLY_PROG : 0;
1840
1841         /* failures are fine because of maps like .rodata.str1.1 */
1842         (void) map_fill_btf_type_info(obj, map);
1843
1844         if (map_is_mmapable(obj, map))
1845                 def->map_flags |= BPF_F_MMAPABLE;
1846
1847         pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
1848                  map->name, map->sec_idx, map->sec_offset, def->map_flags);
1849
1850         mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
1851         map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
1852                            MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1853         if (map->mmaped == MAP_FAILED) {
1854                 err = -errno;
1855                 map->mmaped = NULL;
1856                 pr_warn("failed to alloc map '%s' content buffer: %d\n",
1857                         map->name, err);
1858                 zfree(&map->real_name);
1859                 zfree(&map->name);
1860                 return err;
1861         }
1862
1863         if (data)
1864                 memcpy(map->mmaped, data, data_sz);
1865
1866         pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
1867         return 0;
1868 }
1869
1870 static int bpf_object__init_global_data_maps(struct bpf_object *obj)
1871 {
1872         struct elf_sec_desc *sec_desc;
1873         const char *sec_name;
1874         int err = 0, sec_idx;
1875
1876         /*
1877          * Populate obj->maps with libbpf internal maps.
1878          */
1879         for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) {
1880                 sec_desc = &obj->efile.secs[sec_idx];
1881
1882                 /* Skip recognized sections with size 0. */
1883                 if (!sec_desc->data || sec_desc->data->d_size == 0)
1884                         continue;
1885
1886                 switch (sec_desc->sec_type) {
1887                 case SEC_DATA:
1888                         sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1889                         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
1890                                                             sec_name, sec_idx,
1891                                                             sec_desc->data->d_buf,
1892                                                             sec_desc->data->d_size);
1893                         break;
1894                 case SEC_RODATA:
1895                         obj->has_rodata = true;
1896                         sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1897                         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
1898                                                             sec_name, sec_idx,
1899                                                             sec_desc->data->d_buf,
1900                                                             sec_desc->data->d_size);
1901                         break;
1902                 case SEC_BSS:
1903                         sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1904                         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
1905                                                             sec_name, sec_idx,
1906                                                             NULL,
1907                                                             sec_desc->data->d_size);
1908                         break;
1909                 default:
1910                         /* skip */
1911                         break;
1912                 }
1913                 if (err)
1914                         return err;
1915         }
1916         return 0;
1917 }
1918
1919
1920 static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
1921                                                const void *name)
1922 {
1923         int i;
1924
1925         for (i = 0; i < obj->nr_extern; i++) {
1926                 if (strcmp(obj->externs[i].name, name) == 0)
1927                         return &obj->externs[i];
1928         }
1929         return NULL;
1930 }
1931
1932 static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
1933                               char value)
1934 {
1935         switch (ext->kcfg.type) {
1936         case KCFG_BOOL:
1937                 if (value == 'm') {
1938                         pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n",
1939                                 ext->name, value);
1940                         return -EINVAL;
1941                 }
1942                 *(bool *)ext_val = value == 'y' ? true : false;
1943                 break;
1944         case KCFG_TRISTATE:
1945                 if (value == 'y')
1946                         *(enum libbpf_tristate *)ext_val = TRI_YES;
1947                 else if (value == 'm')
1948                         *(enum libbpf_tristate *)ext_val = TRI_MODULE;
1949                 else /* value == 'n' */
1950                         *(enum libbpf_tristate *)ext_val = TRI_NO;
1951                 break;
1952         case KCFG_CHAR:
1953                 *(char *)ext_val = value;
1954                 break;
1955         case KCFG_UNKNOWN:
1956         case KCFG_INT:
1957         case KCFG_CHAR_ARR:
1958         default:
1959                 pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n",
1960                         ext->name, value);
1961                 return -EINVAL;
1962         }
1963         ext->is_set = true;
1964         return 0;
1965 }
1966
1967 static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
1968                               const char *value)
1969 {
1970         size_t len;
1971
1972         if (ext->kcfg.type != KCFG_CHAR_ARR) {
1973                 pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n",
1974                         ext->name, value);
1975                 return -EINVAL;
1976         }
1977
1978         len = strlen(value);
1979         if (value[len - 1] != '"') {
1980                 pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
1981                         ext->name, value);
1982                 return -EINVAL;
1983         }
1984
1985         /* strip quotes */
1986         len -= 2;
1987         if (len >= ext->kcfg.sz) {
1988                 pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n",
1989                         ext->name, value, len, ext->kcfg.sz - 1);
1990                 len = ext->kcfg.sz - 1;
1991         }
1992         memcpy(ext_val, value + 1, len);
1993         ext_val[len] = '\0';
1994         ext->is_set = true;
1995         return 0;
1996 }
1997
1998 static int parse_u64(const char *value, __u64 *res)
1999 {
2000         char *value_end;
2001         int err;
2002
2003         errno = 0;
2004         *res = strtoull(value, &value_end, 0);
2005         if (errno) {
2006                 err = -errno;
2007                 pr_warn("failed to parse '%s' as integer: %d\n", value, err);
2008                 return err;
2009         }
2010         if (*value_end) {
2011                 pr_warn("failed to parse '%s' as integer completely\n", value);
2012                 return -EINVAL;
2013         }
2014         return 0;
2015 }
2016
2017 static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
2018 {
2019         int bit_sz = ext->kcfg.sz * 8;
2020
2021         if (ext->kcfg.sz == 8)
2022                 return true;
2023
2024         /* Validate that value stored in u64 fits in integer of `ext->sz`
2025          * bytes size without any loss of information. If the target integer
2026          * is signed, we rely on the following limits of integer type of
2027          * Y bits and subsequent transformation:
2028          *
2029          *     -2^(Y-1) <= X           <= 2^(Y-1) - 1
2030          *            0 <= X + 2^(Y-1) <= 2^Y - 1
2031          *            0 <= X + 2^(Y-1) <  2^Y
2032          *
2033          *  For unsigned target integer, check that all the (64 - Y) bits are
2034          *  zero.
2035          */
2036         if (ext->kcfg.is_signed)
2037                 return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
2038         else
2039                 return (v >> bit_sz) == 0;
2040 }
2041
2042 static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
2043                               __u64 value)
2044 {
2045         if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR &&
2046             ext->kcfg.type != KCFG_BOOL) {
2047                 pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n",
2048                         ext->name, (unsigned long long)value);
2049                 return -EINVAL;
2050         }
2051         if (ext->kcfg.type == KCFG_BOOL && value > 1) {
2052                 pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n",
2053                         ext->name, (unsigned long long)value);
2054                 return -EINVAL;
2055
2056         }
2057         if (!is_kcfg_value_in_range(ext, value)) {
2058                 pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n",
2059                         ext->name, (unsigned long long)value, ext->kcfg.sz);
2060                 return -ERANGE;
2061         }
2062         switch (ext->kcfg.sz) {
2063         case 1:
2064                 *(__u8 *)ext_val = value;
2065                 break;
2066         case 2:
2067                 *(__u16 *)ext_val = value;
2068                 break;
2069         case 4:
2070                 *(__u32 *)ext_val = value;
2071                 break;
2072         case 8:
2073                 *(__u64 *)ext_val = value;
2074                 break;
2075         default:
2076                 return -EINVAL;
2077         }
2078         ext->is_set = true;
2079         return 0;
2080 }
2081
2082 static int bpf_object__process_kconfig_line(struct bpf_object *obj,
2083                                             char *buf, void *data)
2084 {
2085         struct extern_desc *ext;
2086         char *sep, *value;
2087         int len, err = 0;
2088         void *ext_val;
2089         __u64 num;
2090
2091         if (!str_has_pfx(buf, "CONFIG_"))
2092                 return 0;
2093
2094         sep = strchr(buf, '=');
2095         if (!sep) {
2096                 pr_warn("failed to parse '%s': no separator\n", buf);
2097                 return -EINVAL;
2098         }
2099
2100         /* Trim ending '\n' */
2101         len = strlen(buf);
2102         if (buf[len - 1] == '\n')
2103                 buf[len - 1] = '\0';
2104         /* Split on '=' and ensure that a value is present. */
2105         *sep = '\0';
2106         if (!sep[1]) {
2107                 *sep = '=';
2108                 pr_warn("failed to parse '%s': no value\n", buf);
2109                 return -EINVAL;
2110         }
2111
2112         ext = find_extern_by_name(obj, buf);
2113         if (!ext || ext->is_set)
2114                 return 0;
2115
2116         ext_val = data + ext->kcfg.data_off;
2117         value = sep + 1;
2118
2119         switch (*value) {
2120         case 'y': case 'n': case 'm':
2121                 err = set_kcfg_value_tri(ext, ext_val, *value);
2122                 break;
2123         case '"':
2124                 err = set_kcfg_value_str(ext, ext_val, value);
2125                 break;
2126         default:
2127                 /* assume integer */
2128                 err = parse_u64(value, &num);
2129                 if (err) {
2130                         pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext->name, value);
2131                         return err;
2132                 }
2133                 if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
2134                         pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext->name, value);
2135                         return -EINVAL;
2136                 }
2137                 err = set_kcfg_value_num(ext, ext_val, num);
2138                 break;
2139         }
2140         if (err)
2141                 return err;
2142         pr_debug("extern (kcfg) '%s': set to %s\n", ext->name, value);
2143         return 0;
2144 }
2145
2146 static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
2147 {
2148         char buf[PATH_MAX];
2149         struct utsname uts;
2150         int len, err = 0;
2151         gzFile file;
2152
2153         uname(&uts);
2154         len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release);
2155         if (len < 0)
2156                 return -EINVAL;
2157         else if (len >= PATH_MAX)
2158                 return -ENAMETOOLONG;
2159
2160         /* gzopen also accepts uncompressed files. */
2161         file = gzopen(buf, "re");
2162         if (!file)
2163                 file = gzopen("/proc/config.gz", "re");
2164
2165         if (!file) {
2166                 pr_warn("failed to open system Kconfig\n");
2167                 return -ENOENT;
2168         }
2169
2170         while (gzgets(file, buf, sizeof(buf))) {
2171                 err = bpf_object__process_kconfig_line(obj, buf, data);
2172                 if (err) {
2173                         pr_warn("error parsing system Kconfig line '%s': %d\n",
2174                                 buf, err);
2175                         goto out;
2176                 }
2177         }
2178
2179 out:
2180         gzclose(file);
2181         return err;
2182 }
2183
2184 static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
2185                                         const char *config, void *data)
2186 {
2187         char buf[PATH_MAX];
2188         int err = 0;
2189         FILE *file;
2190
2191         file = fmemopen((void *)config, strlen(config), "r");
2192         if (!file) {
2193                 err = -errno;
2194                 pr_warn("failed to open in-memory Kconfig: %d\n", err);
2195                 return err;
2196         }
2197
2198         while (fgets(buf, sizeof(buf), file)) {
2199                 err = bpf_object__process_kconfig_line(obj, buf, data);
2200                 if (err) {
2201                         pr_warn("error parsing in-memory Kconfig line '%s': %d\n",
2202                                 buf, err);
2203                         break;
2204                 }
2205         }
2206
2207         fclose(file);
2208         return err;
2209 }
2210
2211 static int bpf_object__init_kconfig_map(struct bpf_object *obj)
2212 {
2213         struct extern_desc *last_ext = NULL, *ext;
2214         size_t map_sz;
2215         int i, err;
2216
2217         for (i = 0; i < obj->nr_extern; i++) {
2218                 ext = &obj->externs[i];
2219                 if (ext->type == EXT_KCFG)
2220                         last_ext = ext;
2221         }
2222
2223         if (!last_ext)
2224                 return 0;
2225
2226         map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
2227         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
2228                                             ".kconfig", obj->efile.symbols_shndx,
2229                                             NULL, map_sz);
2230         if (err)
2231                 return err;
2232
2233         obj->kconfig_map_idx = obj->nr_maps - 1;
2234
2235         return 0;
2236 }
2237
2238 const struct btf_type *
2239 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
2240 {
2241         const struct btf_type *t = btf__type_by_id(btf, id);
2242
2243         if (res_id)
2244                 *res_id = id;
2245
2246         while (btf_is_mod(t) || btf_is_typedef(t)) {
2247                 if (res_id)
2248                         *res_id = t->type;
2249                 t = btf__type_by_id(btf, t->type);
2250         }
2251
2252         return t;
2253 }
2254
2255 static const struct btf_type *
2256 resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
2257 {
2258         const struct btf_type *t;
2259
2260         t = skip_mods_and_typedefs(btf, id, NULL);
2261         if (!btf_is_ptr(t))
2262                 return NULL;
2263
2264         t = skip_mods_and_typedefs(btf, t->type, res_id);
2265
2266         return btf_is_func_proto(t) ? t : NULL;
2267 }
2268
2269 static const char *__btf_kind_str(__u16 kind)
2270 {
2271         switch (kind) {
2272         case BTF_KIND_UNKN: return "void";
2273         case BTF_KIND_INT: return "int";
2274         case BTF_KIND_PTR: return "ptr";
2275         case BTF_KIND_ARRAY: return "array";
2276         case BTF_KIND_STRUCT: return "struct";
2277         case BTF_KIND_UNION: return "union";
2278         case BTF_KIND_ENUM: return "enum";
2279         case BTF_KIND_FWD: return "fwd";
2280         case BTF_KIND_TYPEDEF: return "typedef";
2281         case BTF_KIND_VOLATILE: return "volatile";
2282         case BTF_KIND_CONST: return "const";
2283         case BTF_KIND_RESTRICT: return "restrict";
2284         case BTF_KIND_FUNC: return "func";
2285         case BTF_KIND_FUNC_PROTO: return "func_proto";
2286         case BTF_KIND_VAR: return "var";
2287         case BTF_KIND_DATASEC: return "datasec";
2288         case BTF_KIND_FLOAT: return "float";
2289         case BTF_KIND_DECL_TAG: return "decl_tag";
2290         case BTF_KIND_TYPE_TAG: return "type_tag";
2291         case BTF_KIND_ENUM64: return "enum64";
2292         default: return "unknown";
2293         }
2294 }
2295
2296 const char *btf_kind_str(const struct btf_type *t)
2297 {
2298         return __btf_kind_str(btf_kind(t));
2299 }
2300
2301 /*
2302  * Fetch integer attribute of BTF map definition. Such attributes are
2303  * represented using a pointer to an array, in which dimensionality of array
2304  * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
2305  * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
2306  * type definition, while using only sizeof(void *) space in ELF data section.
2307  */
2308 static bool get_map_field_int(const char *map_name, const struct btf *btf,
2309                               const struct btf_member *m, __u32 *res)
2310 {
2311         const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
2312         const char *name = btf__name_by_offset(btf, m->name_off);
2313         const struct btf_array *arr_info;
2314         const struct btf_type *arr_t;
2315
2316         if (!btf_is_ptr(t)) {
2317                 pr_warn("map '%s': attr '%s': expected PTR, got %s.\n",
2318                         map_name, name, btf_kind_str(t));
2319                 return false;
2320         }
2321
2322         arr_t = btf__type_by_id(btf, t->type);
2323         if (!arr_t) {
2324                 pr_warn("map '%s': attr '%s': type [%u] not found.\n",
2325                         map_name, name, t->type);
2326                 return false;
2327         }
2328         if (!btf_is_array(arr_t)) {
2329                 pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n",
2330                         map_name, name, btf_kind_str(arr_t));
2331                 return false;
2332         }
2333         arr_info = btf_array(arr_t);
2334         *res = arr_info->nelems;
2335         return true;
2336 }
2337
2338 static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name)
2339 {
2340         int len;
2341
2342         len = snprintf(buf, buf_sz, "%s/%s", path, name);
2343         if (len < 0)
2344                 return -EINVAL;
2345         if (len >= buf_sz)
2346                 return -ENAMETOOLONG;
2347
2348         return 0;
2349 }
2350
2351 static int build_map_pin_path(struct bpf_map *map, const char *path)
2352 {
2353         char buf[PATH_MAX];
2354         int err;
2355
2356         if (!path)
2357                 path = BPF_FS_DEFAULT_PATH;
2358
2359         err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
2360         if (err)
2361                 return err;
2362
2363         return bpf_map__set_pin_path(map, buf);
2364 }
2365
2366 /* should match definition in bpf_helpers.h */
2367 enum libbpf_pin_type {
2368         LIBBPF_PIN_NONE,
2369         /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
2370         LIBBPF_PIN_BY_NAME,
2371 };
2372
2373 int parse_btf_map_def(const char *map_name, struct btf *btf,
2374                       const struct btf_type *def_t, bool strict,
2375                       struct btf_map_def *map_def, struct btf_map_def *inner_def)
2376 {
2377         const struct btf_type *t;
2378         const struct btf_member *m;
2379         bool is_inner = inner_def == NULL;
2380         int vlen, i;
2381
2382         vlen = btf_vlen(def_t);
2383         m = btf_members(def_t);
2384         for (i = 0; i < vlen; i++, m++) {
2385                 const char *name = btf__name_by_offset(btf, m->name_off);
2386
2387                 if (!name) {
2388                         pr_warn("map '%s': invalid field #%d.\n", map_name, i);
2389                         return -EINVAL;
2390                 }
2391                 if (strcmp(name, "type") == 0) {
2392                         if (!get_map_field_int(map_name, btf, m, &map_def->map_type))
2393                                 return -EINVAL;
2394                         map_def->parts |= MAP_DEF_MAP_TYPE;
2395                 } else if (strcmp(name, "max_entries") == 0) {
2396                         if (!get_map_field_int(map_name, btf, m, &map_def->max_entries))
2397                                 return -EINVAL;
2398                         map_def->parts |= MAP_DEF_MAX_ENTRIES;
2399                 } else if (strcmp(name, "map_flags") == 0) {
2400                         if (!get_map_field_int(map_name, btf, m, &map_def->map_flags))
2401                                 return -EINVAL;
2402                         map_def->parts |= MAP_DEF_MAP_FLAGS;
2403                 } else if (strcmp(name, "numa_node") == 0) {
2404                         if (!get_map_field_int(map_name, btf, m, &map_def->numa_node))
2405                                 return -EINVAL;
2406                         map_def->parts |= MAP_DEF_NUMA_NODE;
2407                 } else if (strcmp(name, "key_size") == 0) {
2408                         __u32 sz;
2409
2410                         if (!get_map_field_int(map_name, btf, m, &sz))
2411                                 return -EINVAL;
2412                         if (map_def->key_size && map_def->key_size != sz) {
2413                                 pr_warn("map '%s': conflicting key size %u != %u.\n",
2414                                         map_name, map_def->key_size, sz);
2415                                 return -EINVAL;
2416                         }
2417                         map_def->key_size = sz;
2418                         map_def->parts |= MAP_DEF_KEY_SIZE;
2419                 } else if (strcmp(name, "key") == 0) {
2420                         __s64 sz;
2421
2422                         t = btf__type_by_id(btf, m->type);
2423                         if (!t) {
2424                                 pr_warn("map '%s': key type [%d] not found.\n",
2425                                         map_name, m->type);
2426                                 return -EINVAL;
2427                         }
2428                         if (!btf_is_ptr(t)) {
2429                                 pr_warn("map '%s': key spec is not PTR: %s.\n",
2430                                         map_name, btf_kind_str(t));
2431                                 return -EINVAL;
2432                         }
2433                         sz = btf__resolve_size(btf, t->type);
2434                         if (sz < 0) {
2435                                 pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
2436                                         map_name, t->type, (ssize_t)sz);
2437                                 return sz;
2438                         }
2439                         if (map_def->key_size && map_def->key_size != sz) {
2440                                 pr_warn("map '%s': conflicting key size %u != %zd.\n",
2441                                         map_name, map_def->key_size, (ssize_t)sz);
2442                                 return -EINVAL;
2443                         }
2444                         map_def->key_size = sz;
2445                         map_def->key_type_id = t->type;
2446                         map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE;
2447                 } else if (strcmp(name, "value_size") == 0) {
2448                         __u32 sz;
2449
2450                         if (!get_map_field_int(map_name, btf, m, &sz))
2451                                 return -EINVAL;
2452                         if (map_def->value_size && map_def->value_size != sz) {
2453                                 pr_warn("map '%s': conflicting value size %u != %u.\n",
2454                                         map_name, map_def->value_size, sz);
2455                                 return -EINVAL;
2456                         }
2457                         map_def->value_size = sz;
2458                         map_def->parts |= MAP_DEF_VALUE_SIZE;
2459                 } else if (strcmp(name, "value") == 0) {
2460                         __s64 sz;
2461
2462                         t = btf__type_by_id(btf, m->type);
2463                         if (!t) {
2464                                 pr_warn("map '%s': value type [%d] not found.\n",
2465                                         map_name, m->type);
2466                                 return -EINVAL;
2467                         }
2468                         if (!btf_is_ptr(t)) {
2469                                 pr_warn("map '%s': value spec is not PTR: %s.\n",
2470                                         map_name, btf_kind_str(t));
2471                                 return -EINVAL;
2472                         }
2473                         sz = btf__resolve_size(btf, t->type);
2474                         if (sz < 0) {
2475                                 pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
2476                                         map_name, t->type, (ssize_t)sz);
2477                                 return sz;
2478                         }
2479                         if (map_def->value_size && map_def->value_size != sz) {
2480                                 pr_warn("map '%s': conflicting value size %u != %zd.\n",
2481                                         map_name, map_def->value_size, (ssize_t)sz);
2482                                 return -EINVAL;
2483                         }
2484                         map_def->value_size = sz;
2485                         map_def->value_type_id = t->type;
2486                         map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE;
2487                 }
2488                 else if (strcmp(name, "values") == 0) {
2489                         bool is_map_in_map = bpf_map_type__is_map_in_map(map_def->map_type);
2490                         bool is_prog_array = map_def->map_type == BPF_MAP_TYPE_PROG_ARRAY;
2491                         const char *desc = is_map_in_map ? "map-in-map inner" : "prog-array value";
2492                         char inner_map_name[128];
2493                         int err;
2494
2495                         if (is_inner) {
2496                                 pr_warn("map '%s': multi-level inner maps not supported.\n",
2497                                         map_name);
2498                                 return -ENOTSUP;
2499                         }
2500                         if (i != vlen - 1) {
2501                                 pr_warn("map '%s': '%s' member should be last.\n",
2502                                         map_name, name);
2503                                 return -EINVAL;
2504                         }
2505                         if (!is_map_in_map && !is_prog_array) {
2506                                 pr_warn("map '%s': should be map-in-map or prog-array.\n",
2507                                         map_name);
2508                                 return -ENOTSUP;
2509                         }
2510                         if (map_def->value_size && map_def->value_size != 4) {
2511                                 pr_warn("map '%s': conflicting value size %u != 4.\n",
2512                                         map_name, map_def->value_size);
2513                                 return -EINVAL;
2514                         }
2515                         map_def->value_size = 4;
2516                         t = btf__type_by_id(btf, m->type);
2517                         if (!t) {
2518                                 pr_warn("map '%s': %s type [%d] not found.\n",
2519                                         map_name, desc, m->type);
2520                                 return -EINVAL;
2521                         }
2522                         if (!btf_is_array(t) || btf_array(t)->nelems) {
2523                                 pr_warn("map '%s': %s spec is not a zero-sized array.\n",
2524                                         map_name, desc);
2525                                 return -EINVAL;
2526                         }
2527                         t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL);
2528                         if (!btf_is_ptr(t)) {
2529                                 pr_warn("map '%s': %s def is of unexpected kind %s.\n",
2530                                         map_name, desc, btf_kind_str(t));
2531                                 return -EINVAL;
2532                         }
2533                         t = skip_mods_and_typedefs(btf, t->type, NULL);
2534                         if (is_prog_array) {
2535                                 if (!btf_is_func_proto(t)) {
2536                                         pr_warn("map '%s': prog-array value def is of unexpected kind %s.\n",
2537                                                 map_name, btf_kind_str(t));
2538                                         return -EINVAL;
2539                                 }
2540                                 continue;
2541                         }
2542                         if (!btf_is_struct(t)) {
2543                                 pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
2544                                         map_name, btf_kind_str(t));
2545                                 return -EINVAL;
2546                         }
2547
2548                         snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name);
2549                         err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL);
2550                         if (err)
2551                                 return err;
2552
2553                         map_def->parts |= MAP_DEF_INNER_MAP;
2554                 } else if (strcmp(name, "pinning") == 0) {
2555                         __u32 val;
2556
2557                         if (is_inner) {
2558                                 pr_warn("map '%s': inner def can't be pinned.\n", map_name);
2559                                 return -EINVAL;
2560                         }
2561                         if (!get_map_field_int(map_name, btf, m, &val))
2562                                 return -EINVAL;
2563                         if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) {
2564                                 pr_warn("map '%s': invalid pinning value %u.\n",
2565                                         map_name, val);
2566                                 return -EINVAL;
2567                         }
2568                         map_def->pinning = val;
2569                         map_def->parts |= MAP_DEF_PINNING;
2570                 } else if (strcmp(name, "map_extra") == 0) {
2571                         __u32 map_extra;
2572
2573                         if (!get_map_field_int(map_name, btf, m, &map_extra))
2574                                 return -EINVAL;
2575                         map_def->map_extra = map_extra;
2576                         map_def->parts |= MAP_DEF_MAP_EXTRA;
2577                 } else {
2578                         if (strict) {
2579                                 pr_warn("map '%s': unknown field '%s'.\n", map_name, name);
2580                                 return -ENOTSUP;
2581                         }
2582                         pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name);
2583                 }
2584         }
2585
2586         if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) {
2587                 pr_warn("map '%s': map type isn't specified.\n", map_name);
2588                 return -EINVAL;
2589         }
2590
2591         return 0;
2592 }
2593
2594 static size_t adjust_ringbuf_sz(size_t sz)
2595 {
2596         __u32 page_sz = sysconf(_SC_PAGE_SIZE);
2597         __u32 mul;
2598
2599         /* if user forgot to set any size, make sure they see error */
2600         if (sz == 0)
2601                 return 0;
2602         /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be
2603          * a power-of-2 multiple of kernel's page size. If user diligently
2604          * satisified these conditions, pass the size through.
2605          */
2606         if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz))
2607                 return sz;
2608
2609         /* Otherwise find closest (page_sz * power_of_2) product bigger than
2610          * user-set size to satisfy both user size request and kernel
2611          * requirements and substitute correct max_entries for map creation.
2612          */
2613         for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) {
2614                 if (mul * page_sz > sz)
2615                         return mul * page_sz;
2616         }
2617
2618         /* if it's impossible to satisfy the conditions (i.e., user size is
2619          * very close to UINT_MAX but is not a power-of-2 multiple of
2620          * page_size) then just return original size and let kernel reject it
2621          */
2622         return sz;
2623 }
2624
2625 static bool map_is_ringbuf(const struct bpf_map *map)
2626 {
2627         return map->def.type == BPF_MAP_TYPE_RINGBUF ||
2628                map->def.type == BPF_MAP_TYPE_USER_RINGBUF;
2629 }
2630
2631 static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def)
2632 {
2633         map->def.type = def->map_type;
2634         map->def.key_size = def->key_size;
2635         map->def.value_size = def->value_size;
2636         map->def.max_entries = def->max_entries;
2637         map->def.map_flags = def->map_flags;
2638         map->map_extra = def->map_extra;
2639
2640         map->numa_node = def->numa_node;
2641         map->btf_key_type_id = def->key_type_id;
2642         map->btf_value_type_id = def->value_type_id;
2643
2644         /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
2645         if (map_is_ringbuf(map))
2646                 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
2647
2648         if (def->parts & MAP_DEF_MAP_TYPE)
2649                 pr_debug("map '%s': found type = %u.\n", map->name, def->map_type);
2650
2651         if (def->parts & MAP_DEF_KEY_TYPE)
2652                 pr_debug("map '%s': found key [%u], sz = %u.\n",
2653                          map->name, def->key_type_id, def->key_size);
2654         else if (def->parts & MAP_DEF_KEY_SIZE)
2655                 pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size);
2656
2657         if (def->parts & MAP_DEF_VALUE_TYPE)
2658                 pr_debug("map '%s': found value [%u], sz = %u.\n",
2659                          map->name, def->value_type_id, def->value_size);
2660         else if (def->parts & MAP_DEF_VALUE_SIZE)
2661                 pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size);
2662
2663         if (def->parts & MAP_DEF_MAX_ENTRIES)
2664                 pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries);
2665         if (def->parts & MAP_DEF_MAP_FLAGS)
2666                 pr_debug("map '%s': found map_flags = 0x%x.\n", map->name, def->map_flags);
2667         if (def->parts & MAP_DEF_MAP_EXTRA)
2668                 pr_debug("map '%s': found map_extra = 0x%llx.\n", map->name,
2669                          (unsigned long long)def->map_extra);
2670         if (def->parts & MAP_DEF_PINNING)
2671                 pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning);
2672         if (def->parts & MAP_DEF_NUMA_NODE)
2673                 pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node);
2674
2675         if (def->parts & MAP_DEF_INNER_MAP)
2676                 pr_debug("map '%s': found inner map definition.\n", map->name);
2677 }
2678
2679 static const char *btf_var_linkage_str(__u32 linkage)
2680 {
2681         switch (linkage) {
2682         case BTF_VAR_STATIC: return "static";
2683         case BTF_VAR_GLOBAL_ALLOCATED: return "global";
2684         case BTF_VAR_GLOBAL_EXTERN: return "extern";
2685         default: return "unknown";
2686         }
2687 }
2688
2689 static int bpf_object__init_user_btf_map(struct bpf_object *obj,
2690                                          const struct btf_type *sec,
2691                                          int var_idx, int sec_idx,
2692                                          const Elf_Data *data, bool strict,
2693                                          const char *pin_root_path)
2694 {
2695         struct btf_map_def map_def = {}, inner_def = {};
2696         const struct btf_type *var, *def;
2697         const struct btf_var_secinfo *vi;
2698         const struct btf_var *var_extra;
2699         const char *map_name;
2700         struct bpf_map *map;
2701         int err;
2702
2703         vi = btf_var_secinfos(sec) + var_idx;
2704         var = btf__type_by_id(obj->btf, vi->type);
2705         var_extra = btf_var(var);
2706         map_name = btf__name_by_offset(obj->btf, var->name_off);
2707
2708         if (map_name == NULL || map_name[0] == '\0') {
2709                 pr_warn("map #%d: empty name.\n", var_idx);
2710                 return -EINVAL;
2711         }
2712         if ((__u64)vi->offset + vi->size > data->d_size) {
2713                 pr_warn("map '%s' BTF data is corrupted.\n", map_name);
2714                 return -EINVAL;
2715         }
2716         if (!btf_is_var(var)) {
2717                 pr_warn("map '%s': unexpected var kind %s.\n",
2718                         map_name, btf_kind_str(var));
2719                 return -EINVAL;
2720         }
2721         if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
2722                 pr_warn("map '%s': unsupported map linkage %s.\n",
2723                         map_name, btf_var_linkage_str(var_extra->linkage));
2724                 return -EOPNOTSUPP;
2725         }
2726
2727         def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
2728         if (!btf_is_struct(def)) {
2729                 pr_warn("map '%s': unexpected def kind %s.\n",
2730                         map_name, btf_kind_str(var));
2731                 return -EINVAL;
2732         }
2733         if (def->size > vi->size) {
2734                 pr_warn("map '%s': invalid def size.\n", map_name);
2735                 return -EINVAL;
2736         }
2737
2738         map = bpf_object__add_map(obj);
2739         if (IS_ERR(map))
2740                 return PTR_ERR(map);
2741         map->name = strdup(map_name);
2742         if (!map->name) {
2743                 pr_warn("map '%s': failed to alloc map name.\n", map_name);
2744                 return -ENOMEM;
2745         }
2746         map->libbpf_type = LIBBPF_MAP_UNSPEC;
2747         map->def.type = BPF_MAP_TYPE_UNSPEC;
2748         map->sec_idx = sec_idx;
2749         map->sec_offset = vi->offset;
2750         map->btf_var_idx = var_idx;
2751         pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
2752                  map_name, map->sec_idx, map->sec_offset);
2753
2754         err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def);
2755         if (err)
2756                 return err;
2757
2758         fill_map_from_def(map, &map_def);
2759
2760         if (map_def.pinning == LIBBPF_PIN_BY_NAME) {
2761                 err = build_map_pin_path(map, pin_root_path);
2762                 if (err) {
2763                         pr_warn("map '%s': couldn't build pin path.\n", map->name);
2764                         return err;
2765                 }
2766         }
2767
2768         if (map_def.parts & MAP_DEF_INNER_MAP) {
2769                 map->inner_map = calloc(1, sizeof(*map->inner_map));
2770                 if (!map->inner_map)
2771                         return -ENOMEM;
2772                 map->inner_map->fd = create_placeholder_fd();
2773                 if (map->inner_map->fd < 0)
2774                         return map->inner_map->fd;
2775                 map->inner_map->sec_idx = sec_idx;
2776                 map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1);
2777                 if (!map->inner_map->name)
2778                         return -ENOMEM;
2779                 sprintf(map->inner_map->name, "%s.inner", map_name);
2780
2781                 fill_map_from_def(map->inner_map, &inner_def);
2782         }
2783
2784         err = map_fill_btf_type_info(obj, map);
2785         if (err)
2786                 return err;
2787
2788         return 0;
2789 }
2790
2791 static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
2792                                           const char *pin_root_path)
2793 {
2794         const struct btf_type *sec = NULL;
2795         int nr_types, i, vlen, err;
2796         const struct btf_type *t;
2797         const char *name;
2798         Elf_Data *data;
2799         Elf_Scn *scn;
2800
2801         if (obj->efile.btf_maps_shndx < 0)
2802                 return 0;
2803
2804         scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
2805         data = elf_sec_data(obj, scn);
2806         if (!scn || !data) {
2807                 pr_warn("elf: failed to get %s map definitions for %s\n",
2808                         MAPS_ELF_SEC, obj->path);
2809                 return -EINVAL;
2810         }
2811
2812         nr_types = btf__type_cnt(obj->btf);
2813         for (i = 1; i < nr_types; i++) {
2814                 t = btf__type_by_id(obj->btf, i);
2815                 if (!btf_is_datasec(t))
2816                         continue;
2817                 name = btf__name_by_offset(obj->btf, t->name_off);
2818                 if (strcmp(name, MAPS_ELF_SEC) == 0) {
2819                         sec = t;
2820                         obj->efile.btf_maps_sec_btf_id = i;
2821                         break;
2822                 }
2823         }
2824
2825         if (!sec) {
2826                 pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
2827                 return -ENOENT;
2828         }
2829
2830         vlen = btf_vlen(sec);
2831         for (i = 0; i < vlen; i++) {
2832                 err = bpf_object__init_user_btf_map(obj, sec, i,
2833                                                     obj->efile.btf_maps_shndx,
2834                                                     data, strict,
2835                                                     pin_root_path);
2836                 if (err)
2837                         return err;
2838         }
2839
2840         return 0;
2841 }
2842
2843 static int bpf_object__init_maps(struct bpf_object *obj,
2844                                  const struct bpf_object_open_opts *opts)
2845 {
2846         const char *pin_root_path;
2847         bool strict;
2848         int err = 0;
2849
2850         strict = !OPTS_GET(opts, relaxed_maps, false);
2851         pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
2852
2853         err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
2854         err = err ?: bpf_object__init_global_data_maps(obj);
2855         err = err ?: bpf_object__init_kconfig_map(obj);
2856         err = err ?: bpf_object_init_struct_ops(obj);
2857
2858         return err;
2859 }
2860
2861 static bool section_have_execinstr(struct bpf_object *obj, int idx)
2862 {
2863         Elf64_Shdr *sh;
2864
2865         sh = elf_sec_hdr(obj, elf_sec_by_idx(obj, idx));
2866         if (!sh)
2867                 return false;
2868
2869         return sh->sh_flags & SHF_EXECINSTR;
2870 }
2871
2872 static bool starts_with_qmark(const char *s)
2873 {
2874         return s && s[0] == '?';
2875 }
2876
2877 static bool btf_needs_sanitization(struct bpf_object *obj)
2878 {
2879         bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
2880         bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
2881         bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
2882         bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
2883         bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
2884         bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
2885         bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
2886         bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC);
2887
2888         return !has_func || !has_datasec || !has_func_global || !has_float ||
2889                !has_decl_tag || !has_type_tag || !has_enum64 || !has_qmark_datasec;
2890 }
2891
2892 static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
2893 {
2894         bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
2895         bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
2896         bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
2897         bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
2898         bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
2899         bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
2900         bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
2901         bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC);
2902         int enum64_placeholder_id = 0;
2903         struct btf_type *t;
2904         int i, j, vlen;
2905
2906         for (i = 1; i < btf__type_cnt(btf); i++) {
2907                 t = (struct btf_type *)btf__type_by_id(btf, i);
2908
2909                 if ((!has_datasec && btf_is_var(t)) || (!has_decl_tag && btf_is_decl_tag(t))) {
2910                         /* replace VAR/DECL_TAG with INT */
2911                         t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
2912                         /*
2913                          * using size = 1 is the safest choice, 4 will be too
2914                          * big and cause kernel BTF validation failure if
2915                          * original variable took less than 4 bytes
2916                          */
2917                         t->size = 1;
2918                         *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
2919                 } else if (!has_datasec && btf_is_datasec(t)) {
2920                         /* replace DATASEC with STRUCT */
2921                         const struct btf_var_secinfo *v = btf_var_secinfos(t);
2922                         struct btf_member *m = btf_members(t);
2923                         struct btf_type *vt;
2924                         char *name;
2925
2926                         name = (char *)btf__name_by_offset(btf, t->name_off);
2927                         while (*name) {
2928                                 if (*name == '.' || *name == '?')
2929                                         *name = '_';
2930                                 name++;
2931                         }
2932
2933                         vlen = btf_vlen(t);
2934                         t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
2935                         for (j = 0; j < vlen; j++, v++, m++) {
2936                                 /* order of field assignments is important */
2937                                 m->offset = v->offset * 8;
2938                                 m->type = v->type;
2939                                 /* preserve variable name as member name */
2940                                 vt = (void *)btf__type_by_id(btf, v->type);
2941                                 m->name_off = vt->name_off;
2942                         }
2943                 } else if (!has_qmark_datasec && btf_is_datasec(t) &&
2944                            starts_with_qmark(btf__name_by_offset(btf, t->name_off))) {
2945                         /* replace '?' prefix with '_' for DATASEC names */
2946                         char *name;
2947
2948                         name = (char *)btf__name_by_offset(btf, t->name_off);
2949                         if (name[0] == '?')
2950                                 name[0] = '_';
2951                 } else if (!has_func && btf_is_func_proto(t)) {
2952                         /* replace FUNC_PROTO with ENUM */
2953                         vlen = btf_vlen(t);
2954                         t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
2955                         t->size = sizeof(__u32); /* kernel enforced */
2956                 } else if (!has_func && btf_is_func(t)) {
2957                         /* replace FUNC with TYPEDEF */
2958                         t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
2959                 } else if (!has_func_global && btf_is_func(t)) {
2960                         /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
2961                         t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
2962                 } else if (!has_float && btf_is_float(t)) {
2963                         /* replace FLOAT with an equally-sized empty STRUCT;
2964                          * since C compilers do not accept e.g. "float" as a
2965                          * valid struct name, make it anonymous
2966                          */
2967                         t->name_off = 0;
2968                         t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0);
2969                 } else if (!has_type_tag && btf_is_type_tag(t)) {
2970                         /* replace TYPE_TAG with a CONST */
2971                         t->name_off = 0;
2972                         t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0);
2973                 } else if (!has_enum64 && btf_is_enum(t)) {
2974                         /* clear the kflag */
2975                         t->info = btf_type_info(btf_kind(t), btf_vlen(t), false);
2976                 } else if (!has_enum64 && btf_is_enum64(t)) {
2977                         /* replace ENUM64 with a union */
2978                         struct btf_member *m;
2979
2980                         if (enum64_placeholder_id == 0) {
2981                                 enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0);
2982                                 if (enum64_placeholder_id < 0)
2983                                         return enum64_placeholder_id;
2984
2985                                 t = (struct btf_type *)btf__type_by_id(btf, i);
2986                         }
2987
2988                         m = btf_members(t);
2989                         vlen = btf_vlen(t);
2990                         t->info = BTF_INFO_ENC(BTF_KIND_UNION, 0, vlen);
2991                         for (j = 0; j < vlen; j++, m++) {
2992                                 m->type = enum64_placeholder_id;
2993                                 m->offset = 0;
2994                         }
2995                 }
2996         }
2997
2998         return 0;
2999 }
3000
3001 static bool libbpf_needs_btf(const struct bpf_object *obj)
3002 {
3003         return obj->efile.btf_maps_shndx >= 0 ||
3004                obj->efile.has_st_ops ||
3005                obj->nr_extern > 0;
3006 }
3007
3008 static bool kernel_needs_btf(const struct bpf_object *obj)
3009 {
3010         return obj->efile.has_st_ops;
3011 }
3012
3013 static int bpf_object__init_btf(struct bpf_object *obj,
3014                                 Elf_Data *btf_data,
3015                                 Elf_Data *btf_ext_data)
3016 {
3017         int err = -ENOENT;
3018
3019         if (btf_data) {
3020                 obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
3021                 err = libbpf_get_error(obj->btf);
3022                 if (err) {
3023                         obj->btf = NULL;
3024                         pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err);
3025                         goto out;
3026                 }
3027                 /* enforce 8-byte pointers for BPF-targeted BTFs */
3028                 btf__set_pointer_size(obj->btf, 8);
3029         }
3030         if (btf_ext_data) {
3031                 struct btf_ext_info *ext_segs[3];
3032                 int seg_num, sec_num;
3033
3034                 if (!obj->btf) {
3035                         pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
3036                                  BTF_EXT_ELF_SEC, BTF_ELF_SEC);
3037                         goto out;
3038                 }
3039                 obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
3040                 err = libbpf_get_error(obj->btf_ext);
3041                 if (err) {
3042                         pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n",
3043                                 BTF_EXT_ELF_SEC, err);
3044                         obj->btf_ext = NULL;
3045                         goto out;
3046                 }
3047
3048                 /* setup .BTF.ext to ELF section mapping */
3049                 ext_segs[0] = &obj->btf_ext->func_info;
3050                 ext_segs[1] = &obj->btf_ext->line_info;
3051                 ext_segs[2] = &obj->btf_ext->core_relo_info;
3052                 for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) {
3053                         struct btf_ext_info *seg = ext_segs[seg_num];
3054                         const struct btf_ext_info_sec *sec;
3055                         const char *sec_name;
3056                         Elf_Scn *scn;
3057
3058                         if (seg->sec_cnt == 0)
3059                                 continue;
3060
3061                         seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs));
3062                         if (!seg->sec_idxs) {
3063                                 err = -ENOMEM;
3064                                 goto out;
3065                         }
3066
3067                         sec_num = 0;
3068                         for_each_btf_ext_sec(seg, sec) {
3069                                 /* preventively increment index to avoid doing
3070                                  * this before every continue below
3071                                  */
3072                                 sec_num++;
3073
3074                                 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
3075                                 if (str_is_empty(sec_name))
3076                                         continue;
3077                                 scn = elf_sec_by_name(obj, sec_name);
3078                                 if (!scn)
3079                                         continue;
3080
3081                                 seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn);
3082                         }
3083                 }
3084         }
3085 out:
3086         if (err && libbpf_needs_btf(obj)) {
3087                 pr_warn("BTF is required, but is missing or corrupted.\n");
3088                 return err;
3089         }
3090         return 0;
3091 }
3092
3093 static int compare_vsi_off(const void *_a, const void *_b)
3094 {
3095         const struct btf_var_secinfo *a = _a;
3096         const struct btf_var_secinfo *b = _b;
3097
3098         return a->offset - b->offset;
3099 }
3100
3101 static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
3102                              struct btf_type *t)
3103 {
3104         __u32 size = 0, i, vars = btf_vlen(t);
3105         const char *sec_name = btf__name_by_offset(btf, t->name_off);
3106         struct btf_var_secinfo *vsi;
3107         bool fixup_offsets = false;
3108         int err;
3109
3110         if (!sec_name) {
3111                 pr_debug("No name found in string section for DATASEC kind.\n");
3112                 return -ENOENT;
3113         }
3114
3115         /* Extern-backing datasecs (.ksyms, .kconfig) have their size and
3116          * variable offsets set at the previous step. Further, not every
3117          * extern BTF VAR has corresponding ELF symbol preserved, so we skip
3118          * all fixups altogether for such sections and go straight to sorting
3119          * VARs within their DATASEC.
3120          */
3121         if (strcmp(sec_name, KCONFIG_SEC) == 0 || strcmp(sec_name, KSYMS_SEC) == 0)
3122                 goto sort_vars;
3123
3124         /* Clang leaves DATASEC size and VAR offsets as zeroes, so we need to
3125          * fix this up. But BPF static linker already fixes this up and fills
3126          * all the sizes and offsets during static linking. So this step has
3127          * to be optional. But the STV_HIDDEN handling is non-optional for any
3128          * non-extern DATASEC, so the variable fixup loop below handles both
3129          * functions at the same time, paying the cost of BTF VAR <-> ELF
3130          * symbol matching just once.
3131          */
3132         if (t->size == 0) {
3133                 err = find_elf_sec_sz(obj, sec_name, &size);
3134                 if (err || !size) {
3135                         pr_debug("sec '%s': failed to determine size from ELF: size %u, err %d\n",
3136                                  sec_name, size, err);
3137                         return -ENOENT;
3138                 }
3139
3140                 t->size = size;
3141                 fixup_offsets = true;
3142         }
3143
3144         for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
3145                 const struct btf_type *t_var;
3146                 struct btf_var *var;
3147                 const char *var_name;
3148                 Elf64_Sym *sym;
3149
3150                 t_var = btf__type_by_id(btf, vsi->type);
3151                 if (!t_var || !btf_is_var(t_var)) {
3152                         pr_debug("sec '%s': unexpected non-VAR type found\n", sec_name);
3153                         return -EINVAL;
3154                 }
3155
3156                 var = btf_var(t_var);
3157                 if (var->linkage == BTF_VAR_STATIC || var->linkage == BTF_VAR_GLOBAL_EXTERN)
3158                         continue;
3159
3160                 var_name = btf__name_by_offset(btf, t_var->name_off);
3161                 if (!var_name) {
3162                         pr_debug("sec '%s': failed to find name of DATASEC's member #%d\n",
3163                                  sec_name, i);
3164                         return -ENOENT;
3165                 }
3166
3167                 sym = find_elf_var_sym(obj, var_name);
3168                 if (IS_ERR(sym)) {
3169                         pr_debug("sec '%s': failed to find ELF symbol for VAR '%s'\n",
3170                                  sec_name, var_name);
3171                         return -ENOENT;
3172                 }
3173
3174                 if (fixup_offsets)
3175                         vsi->offset = sym->st_value;
3176
3177                 /* if variable is a global/weak symbol, but has restricted
3178                  * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF VAR
3179                  * as static. This follows similar logic for functions (BPF
3180                  * subprogs) and influences libbpf's further decisions about
3181                  * whether to make global data BPF array maps as
3182                  * BPF_F_MMAPABLE.
3183                  */
3184                 if (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
3185                     || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)
3186                         var->linkage = BTF_VAR_STATIC;
3187         }
3188
3189 sort_vars:
3190         qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off);
3191         return 0;
3192 }
3193
3194 static int bpf_object_fixup_btf(struct bpf_object *obj)
3195 {
3196         int i, n, err = 0;
3197
3198         if (!obj->btf)
3199                 return 0;
3200
3201         n = btf__type_cnt(obj->btf);
3202         for (i = 1; i < n; i++) {
3203                 struct btf_type *t = btf_type_by_id(obj->btf, i);
3204
3205                 /* Loader needs to fix up some of the things compiler
3206                  * couldn't get its hands on while emitting BTF. This
3207                  * is section size and global variable offset. We use
3208                  * the info from the ELF itself for this purpose.
3209                  */
3210                 if (btf_is_datasec(t)) {
3211                         err = btf_fixup_datasec(obj, obj->btf, t);
3212                         if (err)
3213                                 return err;
3214                 }
3215         }
3216
3217         return 0;
3218 }
3219
3220 static bool prog_needs_vmlinux_btf(struct bpf_program *prog)
3221 {
3222         if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
3223             prog->type == BPF_PROG_TYPE_LSM)
3224                 return true;
3225
3226         /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
3227          * also need vmlinux BTF
3228          */
3229         if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd)
3230                 return true;
3231
3232         return false;
3233 }
3234
3235 static bool map_needs_vmlinux_btf(struct bpf_map *map)
3236 {
3237         return bpf_map__is_struct_ops(map);
3238 }
3239
3240 static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
3241 {
3242         struct bpf_program *prog;
3243         struct bpf_map *map;
3244         int i;
3245
3246         /* CO-RE relocations need kernel BTF, only when btf_custom_path
3247          * is not specified
3248          */
3249         if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path)
3250                 return true;
3251
3252         /* Support for typed ksyms needs kernel BTF */
3253         for (i = 0; i < obj->nr_extern; i++) {
3254                 const struct extern_desc *ext;
3255
3256                 ext = &obj->externs[i];
3257                 if (ext->type == EXT_KSYM && ext->ksym.type_id)
3258                         return true;
3259         }
3260
3261         bpf_object__for_each_program(prog, obj) {
3262                 if (!prog->autoload)
3263                         continue;
3264                 if (prog_needs_vmlinux_btf(prog))
3265                         return true;
3266         }
3267
3268         bpf_object__for_each_map(map, obj) {
3269                 if (map_needs_vmlinux_btf(map))
3270                         return true;
3271         }
3272
3273         return false;
3274 }
3275
3276 static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
3277 {
3278         int err;
3279
3280         /* btf_vmlinux could be loaded earlier */
3281         if (obj->btf_vmlinux || obj->gen_loader)
3282                 return 0;
3283
3284         if (!force && !obj_needs_vmlinux_btf(obj))
3285                 return 0;
3286
3287         obj->btf_vmlinux = btf__load_vmlinux_btf();
3288         err = libbpf_get_error(obj->btf_vmlinux);
3289         if (err) {
3290                 pr_warn("Error loading vmlinux BTF: %d\n", err);
3291                 obj->btf_vmlinux = NULL;
3292                 return err;
3293         }
3294         return 0;
3295 }
3296
3297 static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
3298 {
3299         struct btf *kern_btf = obj->btf;
3300         bool btf_mandatory, sanitize;
3301         int i, err = 0;
3302
3303         if (!obj->btf)
3304                 return 0;
3305
3306         if (!kernel_supports(obj, FEAT_BTF)) {
3307                 if (kernel_needs_btf(obj)) {
3308                         err = -EOPNOTSUPP;
3309                         goto report;
3310                 }
3311                 pr_debug("Kernel doesn't support BTF, skipping uploading it.\n");
3312                 return 0;
3313         }
3314
3315         /* Even though some subprogs are global/weak, user might prefer more
3316          * permissive BPF verification process that BPF verifier performs for
3317          * static functions, taking into account more context from the caller
3318          * functions. In such case, they need to mark such subprogs with
3319          * __attribute__((visibility("hidden"))) and libbpf will adjust
3320          * corresponding FUNC BTF type to be marked as static and trigger more
3321          * involved BPF verification process.
3322          */
3323         for (i = 0; i < obj->nr_programs; i++) {
3324                 struct bpf_program *prog = &obj->programs[i];
3325                 struct btf_type *t;
3326                 const char *name;
3327                 int j, n;
3328
3329                 if (!prog->mark_btf_static || !prog_is_subprog(obj, prog))
3330                         continue;
3331
3332                 n = btf__type_cnt(obj->btf);
3333                 for (j = 1; j < n; j++) {
3334                         t = btf_type_by_id(obj->btf, j);
3335                         if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL)
3336                                 continue;
3337
3338                         name = btf__str_by_offset(obj->btf, t->name_off);
3339                         if (strcmp(name, prog->name) != 0)
3340                                 continue;
3341
3342                         t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0);
3343                         break;
3344                 }
3345         }
3346
3347         sanitize = btf_needs_sanitization(obj);
3348         if (sanitize) {
3349                 const void *raw_data;
3350                 __u32 sz;
3351
3352                 /* clone BTF to sanitize a copy and leave the original intact */
3353                 raw_data = btf__raw_data(obj->btf, &sz);
3354                 kern_btf = btf__new(raw_data, sz);
3355                 err = libbpf_get_error(kern_btf);
3356                 if (err)
3357                         return err;
3358
3359                 /* enforce 8-byte pointers for BPF-targeted BTFs */
3360                 btf__set_pointer_size(obj->btf, 8);
3361                 err = bpf_object__sanitize_btf(obj, kern_btf);
3362                 if (err)
3363                         return err;
3364         }
3365
3366         if (obj->gen_loader) {
3367                 __u32 raw_size = 0;
3368                 const void *raw_data = btf__raw_data(kern_btf, &raw_size);
3369
3370                 if (!raw_data)
3371                         return -ENOMEM;
3372                 bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size);
3373                 /* Pretend to have valid FD to pass various fd >= 0 checks.
3374                  * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
3375                  */
3376                 btf__set_fd(kern_btf, 0);
3377         } else {
3378                 /* currently BPF_BTF_LOAD only supports log_level 1 */
3379                 err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size,
3380                                            obj->log_level ? 1 : 0, obj->token_fd);
3381         }
3382         if (sanitize) {
3383                 if (!err) {
3384                         /* move fd to libbpf's BTF */
3385                         btf__set_fd(obj->btf, btf__fd(kern_btf));
3386                         btf__set_fd(kern_btf, -1);
3387                 }
3388                 btf__free(kern_btf);
3389         }
3390 report:
3391         if (err) {
3392                 btf_mandatory = kernel_needs_btf(obj);
3393                 pr_warn("Error loading .BTF into kernel: %d. %s\n", err,
3394                         btf_mandatory ? "BTF is mandatory, can't proceed."
3395                                       : "BTF is optional, ignoring.");
3396                 if (!btf_mandatory)
3397                         err = 0;
3398         }
3399         return err;
3400 }
3401
3402 static const char *elf_sym_str(const struct bpf_object *obj, size_t off)
3403 {
3404         const char *name;
3405
3406         name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off);
3407         if (!name) {
3408                 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
3409                         off, obj->path, elf_errmsg(-1));
3410                 return NULL;
3411         }
3412
3413         return name;
3414 }
3415
3416 static const char *elf_sec_str(const struct bpf_object *obj, size_t off)
3417 {
3418         const char *name;
3419
3420         name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off);
3421         if (!name) {
3422                 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
3423                         off, obj->path, elf_errmsg(-1));
3424                 return NULL;
3425         }
3426
3427         return name;
3428 }
3429
3430 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx)
3431 {
3432         Elf_Scn *scn;
3433
3434         scn = elf_getscn(obj->efile.elf, idx);
3435         if (!scn) {
3436                 pr_warn("elf: failed to get section(%zu) from %s: %s\n",
3437                         idx, obj->path, elf_errmsg(-1));
3438                 return NULL;
3439         }
3440         return scn;
3441 }
3442
3443 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name)
3444 {
3445         Elf_Scn *scn = NULL;
3446         Elf *elf = obj->efile.elf;
3447         const char *sec_name;
3448
3449         while ((scn = elf_nextscn(elf, scn)) != NULL) {
3450                 sec_name = elf_sec_name(obj, scn);
3451                 if (!sec_name)
3452                         return NULL;
3453
3454                 if (strcmp(sec_name, name) != 0)
3455                         continue;
3456
3457                 return scn;
3458         }
3459         return NULL;
3460 }
3461
3462 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn)
3463 {
3464         Elf64_Shdr *shdr;
3465
3466         if (!scn)
3467                 return NULL;
3468
3469         shdr = elf64_getshdr(scn);
3470         if (!shdr) {
3471                 pr_warn("elf: failed to get section(%zu) header from %s: %s\n",
3472                         elf_ndxscn(scn), obj->path, elf_errmsg(-1));
3473                 return NULL;
3474         }
3475
3476         return shdr;
3477 }
3478
3479 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn)
3480 {
3481         const char *name;
3482         Elf64_Shdr *sh;
3483
3484         if (!scn)
3485                 return NULL;
3486
3487         sh = elf_sec_hdr(obj, scn);
3488         if (!sh)
3489                 return NULL;
3490
3491         name = elf_sec_str(obj, sh->sh_name);
3492         if (!name) {
3493                 pr_warn("elf: failed to get section(%zu) name from %s: %s\n",
3494                         elf_ndxscn(scn), obj->path, elf_errmsg(-1));
3495                 return NULL;
3496         }
3497
3498         return name;
3499 }
3500
3501 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
3502 {
3503         Elf_Data *data;
3504
3505         if (!scn)
3506                 return NULL;
3507
3508         data = elf_getdata(scn, 0);
3509         if (!data) {
3510                 pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n",
3511                         elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>",
3512                         obj->path, elf_errmsg(-1));
3513                 return NULL;
3514         }
3515
3516         return data;
3517 }
3518
3519 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx)
3520 {
3521         if (idx >= obj->efile.symbols->d_size / sizeof(Elf64_Sym))
3522                 return NULL;
3523
3524         return (Elf64_Sym *)obj->efile.symbols->d_buf + idx;
3525 }
3526
3527 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx)
3528 {
3529         if (idx >= data->d_size / sizeof(Elf64_Rel))
3530                 return NULL;
3531
3532         return (Elf64_Rel *)data->d_buf + idx;
3533 }
3534
3535 static bool is_sec_name_dwarf(const char *name)
3536 {
3537         /* approximation, but the actual list is too long */
3538         return str_has_pfx(name, ".debug_");
3539 }
3540
3541 static bool ignore_elf_section(Elf64_Shdr *hdr, const char *name)
3542 {
3543         /* no special handling of .strtab */
3544         if (hdr->sh_type == SHT_STRTAB)
3545                 return true;
3546
3547         /* ignore .llvm_addrsig section as well */
3548         if (hdr->sh_type == SHT_LLVM_ADDRSIG)
3549                 return true;
3550
3551         /* no subprograms will lead to an empty .text section, ignore it */
3552         if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 &&
3553             strcmp(name, ".text") == 0)
3554                 return true;
3555
3556         /* DWARF sections */
3557         if (is_sec_name_dwarf(name))
3558                 return true;
3559
3560         if (str_has_pfx(name, ".rel")) {
3561                 name += sizeof(".rel") - 1;
3562                 /* DWARF section relocations */
3563                 if (is_sec_name_dwarf(name))
3564                         return true;
3565
3566                 /* .BTF and .BTF.ext don't need relocations */
3567                 if (strcmp(name, BTF_ELF_SEC) == 0 ||
3568                     strcmp(name, BTF_EXT_ELF_SEC) == 0)
3569                         return true;
3570         }
3571
3572         return false;
3573 }
3574
3575 static int cmp_progs(const void *_a, const void *_b)
3576 {
3577         const struct bpf_program *a = _a;
3578         const struct bpf_program *b = _b;
3579
3580         if (a->sec_idx != b->sec_idx)
3581                 return a->sec_idx < b->sec_idx ? -1 : 1;
3582
3583         /* sec_insn_off can't be the same within the section */
3584         return a->sec_insn_off < b->sec_insn_off ? -1 : 1;
3585 }
3586
3587 static int bpf_object__elf_collect(struct bpf_object *obj)
3588 {
3589         struct elf_sec_desc *sec_desc;
3590         Elf *elf = obj->efile.elf;
3591         Elf_Data *btf_ext_data = NULL;
3592         Elf_Data *btf_data = NULL;
3593         int idx = 0, err = 0;
3594         const char *name;
3595         Elf_Data *data;
3596         Elf_Scn *scn;
3597         Elf64_Shdr *sh;
3598
3599         /* ELF section indices are 0-based, but sec #0 is special "invalid"
3600          * section. Since section count retrieved by elf_getshdrnum() does
3601          * include sec #0, it is already the necessary size of an array to keep
3602          * all the sections.
3603          */
3604         if (elf_getshdrnum(obj->efile.elf, &obj->efile.sec_cnt)) {
3605                 pr_warn("elf: failed to get the number of sections for %s: %s\n",
3606                         obj->path, elf_errmsg(-1));
3607                 return -LIBBPF_ERRNO__FORMAT;
3608         }
3609         obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs));
3610         if (!obj->efile.secs)
3611                 return -ENOMEM;
3612
3613         /* a bunch of ELF parsing functionality depends on processing symbols,
3614          * so do the first pass and find the symbol table
3615          */
3616         scn = NULL;
3617         while ((scn = elf_nextscn(elf, scn)) != NULL) {
3618                 sh = elf_sec_hdr(obj, scn);
3619                 if (!sh)
3620                         return -LIBBPF_ERRNO__FORMAT;
3621
3622                 if (sh->sh_type == SHT_SYMTAB) {
3623                         if (obj->efile.symbols) {
3624                                 pr_warn("elf: multiple symbol tables in %s\n", obj->path);
3625                                 return -LIBBPF_ERRNO__FORMAT;
3626                         }
3627
3628                         data = elf_sec_data(obj, scn);
3629                         if (!data)
3630                                 return -LIBBPF_ERRNO__FORMAT;
3631
3632                         idx = elf_ndxscn(scn);
3633
3634                         obj->efile.symbols = data;
3635                         obj->efile.symbols_shndx = idx;
3636                         obj->efile.strtabidx = sh->sh_link;
3637                 }
3638         }
3639
3640         if (!obj->efile.symbols) {
3641                 pr_warn("elf: couldn't find symbol table in %s, stripped object file?\n",
3642                         obj->path);
3643                 return -ENOENT;
3644         }
3645
3646         scn = NULL;
3647         while ((scn = elf_nextscn(elf, scn)) != NULL) {
3648                 idx = elf_ndxscn(scn);
3649                 sec_desc = &obj->efile.secs[idx];
3650
3651                 sh = elf_sec_hdr(obj, scn);
3652                 if (!sh)
3653                         return -LIBBPF_ERRNO__FORMAT;
3654
3655                 name = elf_sec_str(obj, sh->sh_name);
3656                 if (!name)
3657                         return -LIBBPF_ERRNO__FORMAT;
3658
3659                 if (ignore_elf_section(sh, name))
3660                         continue;
3661
3662                 data = elf_sec_data(obj, scn);
3663                 if (!data)
3664                         return -LIBBPF_ERRNO__FORMAT;
3665
3666                 pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
3667                          idx, name, (unsigned long)data->d_size,
3668                          (int)sh->sh_link, (unsigned long)sh->sh_flags,
3669                          (int)sh->sh_type);
3670
3671                 if (strcmp(name, "license") == 0) {
3672                         err = bpf_object__init_license(obj, data->d_buf, data->d_size);
3673                         if (err)
3674                                 return err;
3675                 } else if (strcmp(name, "version") == 0) {
3676                         err = bpf_object__init_kversion(obj, data->d_buf, data->d_size);
3677                         if (err)
3678                                 return err;
3679                 } else if (strcmp(name, "maps") == 0) {
3680                         pr_warn("elf: legacy map definitions in 'maps' section are not supported by libbpf v1.0+\n");
3681                         return -ENOTSUP;
3682                 } else if (strcmp(name, MAPS_ELF_SEC) == 0) {
3683                         obj->efile.btf_maps_shndx = idx;
3684                 } else if (strcmp(name, BTF_ELF_SEC) == 0) {
3685                         if (sh->sh_type != SHT_PROGBITS)
3686                                 return -LIBBPF_ERRNO__FORMAT;
3687                         btf_data = data;
3688                 } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
3689                         if (sh->sh_type != SHT_PROGBITS)
3690                                 return -LIBBPF_ERRNO__FORMAT;
3691                         btf_ext_data = data;
3692                 } else if (sh->sh_type == SHT_SYMTAB) {
3693                         /* already processed during the first pass above */
3694                 } else if (sh->sh_type == SHT_PROGBITS && data->d_size > 0) {
3695                         if (sh->sh_flags & SHF_EXECINSTR) {
3696                                 if (strcmp(name, ".text") == 0)
3697                                         obj->efile.text_shndx = idx;
3698                                 err = bpf_object__add_programs(obj, data, name, idx);
3699                                 if (err)
3700                                         return err;
3701                         } else if (strcmp(name, DATA_SEC) == 0 ||
3702                                    str_has_pfx(name, DATA_SEC ".")) {
3703                                 sec_desc->sec_type = SEC_DATA;
3704                                 sec_desc->shdr = sh;
3705                                 sec_desc->data = data;
3706                         } else if (strcmp(name, RODATA_SEC) == 0 ||
3707                                    str_has_pfx(name, RODATA_SEC ".")) {
3708                                 sec_desc->sec_type = SEC_RODATA;
3709                                 sec_desc->shdr = sh;
3710                                 sec_desc->data = data;
3711                         } else if (strcmp(name, STRUCT_OPS_SEC) == 0 ||
3712                                    strcmp(name, STRUCT_OPS_LINK_SEC) == 0 ||
3713                                    strcmp(name, "?" STRUCT_OPS_SEC) == 0 ||
3714                                    strcmp(name, "?" STRUCT_OPS_LINK_SEC) == 0) {
3715                                 sec_desc->sec_type = SEC_ST_OPS;
3716                                 sec_desc->shdr = sh;
3717                                 sec_desc->data = data;
3718                                 obj->efile.has_st_ops = true;
3719                         } else {
3720                                 pr_info("elf: skipping unrecognized data section(%d) %s\n",
3721                                         idx, name);
3722                         }
3723                 } else if (sh->sh_type == SHT_REL) {
3724                         int targ_sec_idx = sh->sh_info; /* points to other section */
3725
3726                         if (sh->sh_entsize != sizeof(Elf64_Rel) ||
3727                             targ_sec_idx >= obj->efile.sec_cnt)
3728                                 return -LIBBPF_ERRNO__FORMAT;
3729
3730                         /* Only do relo for section with exec instructions */
3731                         if (!section_have_execinstr(obj, targ_sec_idx) &&
3732                             strcmp(name, ".rel" STRUCT_OPS_SEC) &&
3733                             strcmp(name, ".rel" STRUCT_OPS_LINK_SEC) &&
3734                             strcmp(name, ".rel?" STRUCT_OPS_SEC) &&
3735                             strcmp(name, ".rel?" STRUCT_OPS_LINK_SEC) &&
3736                             strcmp(name, ".rel" MAPS_ELF_SEC)) {
3737                                 pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
3738                                         idx, name, targ_sec_idx,
3739                                         elf_sec_name(obj, elf_sec_by_idx(obj, targ_sec_idx)) ?: "<?>");
3740                                 continue;
3741                         }
3742
3743                         sec_desc->sec_type = SEC_RELO;
3744                         sec_desc->shdr = sh;
3745                         sec_desc->data = data;
3746                 } else if (sh->sh_type == SHT_NOBITS && (strcmp(name, BSS_SEC) == 0 ||
3747                                                          str_has_pfx(name, BSS_SEC "."))) {
3748                         sec_desc->sec_type = SEC_BSS;
3749                         sec_desc->shdr = sh;
3750                         sec_desc->data = data;
3751                 } else {
3752                         pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name,
3753                                 (size_t)sh->sh_size);
3754                 }
3755         }
3756
3757         if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
3758                 pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path);
3759                 return -LIBBPF_ERRNO__FORMAT;
3760         }
3761
3762         /* sort BPF programs by section name and in-section instruction offset
3763          * for faster search
3764          */
3765         if (obj->nr_programs)
3766                 qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
3767
3768         return bpf_object__init_btf(obj, btf_data, btf_ext_data);
3769 }
3770
3771 static bool sym_is_extern(const Elf64_Sym *sym)
3772 {
3773         int bind = ELF64_ST_BIND(sym->st_info);
3774         /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
3775         return sym->st_shndx == SHN_UNDEF &&
3776                (bind == STB_GLOBAL || bind == STB_WEAK) &&
3777                ELF64_ST_TYPE(sym->st_info) == STT_NOTYPE;
3778 }
3779
3780 static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx)
3781 {
3782         int bind = ELF64_ST_BIND(sym->st_info);
3783         int type = ELF64_ST_TYPE(sym->st_info);
3784
3785         /* in .text section */
3786         if (sym->st_shndx != text_shndx)
3787                 return false;
3788
3789         /* local function */
3790         if (bind == STB_LOCAL && type == STT_SECTION)
3791                 return true;
3792
3793         /* global function */
3794         return bind == STB_GLOBAL && type == STT_FUNC;
3795 }
3796
3797 static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
3798 {
3799         const struct btf_type *t;
3800         const char *tname;
3801         int i, n;
3802
3803         if (!btf)
3804                 return -ESRCH;
3805
3806         n = btf__type_cnt(btf);
3807         for (i = 1; i < n; i++) {
3808                 t = btf__type_by_id(btf, i);
3809
3810                 if (!btf_is_var(t) && !btf_is_func(t))
3811                         continue;
3812
3813                 tname = btf__name_by_offset(btf, t->name_off);
3814                 if (strcmp(tname, ext_name))
3815                         continue;
3816
3817                 if (btf_is_var(t) &&
3818                     btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
3819                         return -EINVAL;
3820
3821                 if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN)
3822                         return -EINVAL;
3823
3824                 return i;
3825         }
3826
3827         return -ENOENT;
3828 }
3829
3830 static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
3831         const struct btf_var_secinfo *vs;
3832         const struct btf_type *t;
3833         int i, j, n;
3834
3835         if (!btf)
3836                 return -ESRCH;
3837
3838         n = btf__type_cnt(btf);
3839         for (i = 1; i < n; i++) {
3840                 t = btf__type_by_id(btf, i);
3841
3842                 if (!btf_is_datasec(t))
3843                         continue;
3844
3845                 vs = btf_var_secinfos(t);
3846                 for (j = 0; j < btf_vlen(t); j++, vs++) {
3847                         if (vs->type == ext_btf_id)
3848                                 return i;
3849                 }
3850         }
3851
3852         return -ENOENT;
3853 }
3854
3855 static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
3856                                      bool *is_signed)
3857 {
3858         const struct btf_type *t;
3859         const char *name;
3860
3861         t = skip_mods_and_typedefs(btf, id, NULL);
3862         name = btf__name_by_offset(btf, t->name_off);
3863
3864         if (is_signed)
3865                 *is_signed = false;
3866         switch (btf_kind(t)) {
3867         case BTF_KIND_INT: {
3868                 int enc = btf_int_encoding(t);
3869
3870                 if (enc & BTF_INT_BOOL)
3871                         return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
3872                 if (is_signed)
3873                         *is_signed = enc & BTF_INT_SIGNED;
3874                 if (t->size == 1)
3875                         return KCFG_CHAR;
3876                 if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
3877                         return KCFG_UNKNOWN;
3878                 return KCFG_INT;
3879         }
3880         case BTF_KIND_ENUM:
3881                 if (t->size != 4)
3882                         return KCFG_UNKNOWN;
3883                 if (strcmp(name, "libbpf_tristate"))
3884                         return KCFG_UNKNOWN;
3885                 return KCFG_TRISTATE;
3886         case BTF_KIND_ENUM64:
3887                 if (strcmp(name, "libbpf_tristate"))
3888                         return KCFG_UNKNOWN;
3889                 return KCFG_TRISTATE;
3890         case BTF_KIND_ARRAY:
3891                 if (btf_array(t)->nelems == 0)
3892                         return KCFG_UNKNOWN;
3893                 if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
3894                         return KCFG_UNKNOWN;
3895                 return KCFG_CHAR_ARR;
3896         default:
3897                 return KCFG_UNKNOWN;
3898         }
3899 }
3900
3901 static int cmp_externs(const void *_a, const void *_b)
3902 {
3903         const struct extern_desc *a = _a;
3904         const struct extern_desc *b = _b;
3905
3906         if (a->type != b->type)
3907                 return a->type < b->type ? -1 : 1;
3908
3909         if (a->type == EXT_KCFG) {
3910                 /* descending order by alignment requirements */
3911                 if (a->kcfg.align != b->kcfg.align)
3912                         return a->kcfg.align > b->kcfg.align ? -1 : 1;
3913                 /* ascending order by size, within same alignment class */
3914                 if (a->kcfg.sz != b->kcfg.sz)
3915                         return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
3916         }
3917
3918         /* resolve ties by name */
3919         return strcmp(a->name, b->name);
3920 }
3921
3922 static int find_int_btf_id(const struct btf *btf)
3923 {
3924         const struct btf_type *t;
3925         int i, n;
3926
3927         n = btf__type_cnt(btf);
3928         for (i = 1; i < n; i++) {
3929                 t = btf__type_by_id(btf, i);
3930
3931                 if (btf_is_int(t) && btf_int_bits(t) == 32)
3932                         return i;
3933         }
3934
3935         return 0;
3936 }
3937
3938 static int add_dummy_ksym_var(struct btf *btf)
3939 {
3940         int i, int_btf_id, sec_btf_id, dummy_var_btf_id;
3941         const struct btf_var_secinfo *vs;
3942         const struct btf_type *sec;
3943
3944         if (!btf)
3945                 return 0;
3946
3947         sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC,
3948                                             BTF_KIND_DATASEC);
3949         if (sec_btf_id < 0)
3950                 return 0;
3951
3952         sec = btf__type_by_id(btf, sec_btf_id);
3953         vs = btf_var_secinfos(sec);
3954         for (i = 0; i < btf_vlen(sec); i++, vs++) {
3955                 const struct btf_type *vt;
3956
3957                 vt = btf__type_by_id(btf, vs->type);
3958                 if (btf_is_func(vt))
3959                         break;
3960         }
3961
3962         /* No func in ksyms sec.  No need to add dummy var. */
3963         if (i == btf_vlen(sec))
3964                 return 0;
3965
3966         int_btf_id = find_int_btf_id(btf);
3967         dummy_var_btf_id = btf__add_var(btf,
3968                                         "dummy_ksym",
3969                                         BTF_VAR_GLOBAL_ALLOCATED,
3970                                         int_btf_id);
3971         if (dummy_var_btf_id < 0)
3972                 pr_warn("cannot create a dummy_ksym var\n");
3973
3974         return dummy_var_btf_id;
3975 }
3976
3977 static int bpf_object__collect_externs(struct bpf_object *obj)
3978 {
3979         struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
3980         const struct btf_type *t;
3981         struct extern_desc *ext;
3982         int i, n, off, dummy_var_btf_id;
3983         const char *ext_name, *sec_name;
3984         size_t ext_essent_len;
3985         Elf_Scn *scn;
3986         Elf64_Shdr *sh;
3987
3988         if (!obj->efile.symbols)
3989                 return 0;
3990
3991         scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);
3992         sh = elf_sec_hdr(obj, scn);
3993         if (!sh || sh->sh_entsize != sizeof(Elf64_Sym))
3994                 return -LIBBPF_ERRNO__FORMAT;
3995
3996         dummy_var_btf_id = add_dummy_ksym_var(obj->btf);
3997         if (dummy_var_btf_id < 0)
3998                 return dummy_var_btf_id;
3999
4000         n = sh->sh_size / sh->sh_entsize;
4001         pr_debug("looking for externs among %d symbols...\n", n);
4002
4003         for (i = 0; i < n; i++) {
4004                 Elf64_Sym *sym = elf_sym_by_idx(obj, i);
4005
4006                 if (!sym)
4007                         return -LIBBPF_ERRNO__FORMAT;
4008                 if (!sym_is_extern(sym))
4009                         continue;
4010                 ext_name = elf_sym_str(obj, sym->st_name);
4011                 if (!ext_name || !ext_name[0])
4012                         continue;
4013
4014                 ext = obj->externs;
4015                 ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
4016                 if (!ext)
4017                         return -ENOMEM;
4018                 obj->externs = ext;
4019                 ext = &ext[obj->nr_extern];
4020                 memset(ext, 0, sizeof(*ext));
4021                 obj->nr_extern++;
4022
4023                 ext->btf_id = find_extern_btf_id(obj->btf, ext_name);
4024                 if (ext->btf_id <= 0) {
4025                         pr_warn("failed to find BTF for extern '%s': %d\n",
4026                                 ext_name, ext->btf_id);
4027                         return ext->btf_id;
4028                 }
4029                 t = btf__type_by_id(obj->btf, ext->btf_id);
4030                 ext->name = btf__name_by_offset(obj->btf, t->name_off);
4031                 ext->sym_idx = i;
4032                 ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK;
4033
4034                 ext_essent_len = bpf_core_essential_name_len(ext->name);
4035                 ext->essent_name = NULL;
4036                 if (ext_essent_len != strlen(ext->name)) {
4037                         ext->essent_name = strndup(ext->name, ext_essent_len);
4038                         if (!ext->essent_name)
4039                                 return -ENOMEM;
4040                 }
4041
4042                 ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
4043                 if (ext->sec_btf_id <= 0) {
4044                         pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
4045                                 ext_name, ext->btf_id, ext->sec_btf_id);
4046                         return ext->sec_btf_id;
4047                 }
4048                 sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id);
4049                 sec_name = btf__name_by_offset(obj->btf, sec->name_off);
4050
4051                 if (strcmp(sec_name, KCONFIG_SEC) == 0) {
4052                         if (btf_is_func(t)) {
4053                                 pr_warn("extern function %s is unsupported under %s section\n",
4054                                         ext->name, KCONFIG_SEC);
4055                                 return -ENOTSUP;
4056                         }
4057                         kcfg_sec = sec;
4058                         ext->type = EXT_KCFG;
4059                         ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
4060                         if (ext->kcfg.sz <= 0) {
4061                                 pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
4062                                         ext_name, ext->kcfg.sz);
4063                                 return ext->kcfg.sz;
4064                         }
4065                         ext->kcfg.align = btf__align_of(obj->btf, t->type);
4066                         if (ext->kcfg.align <= 0) {
4067                                 pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
4068                                         ext_name, ext->kcfg.align);
4069                                 return -EINVAL;
4070                         }
4071                         ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
4072                                                         &ext->kcfg.is_signed);
4073                         if (ext->kcfg.type == KCFG_UNKNOWN) {
4074                                 pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name);
4075                                 return -ENOTSUP;
4076                         }
4077                 } else if (strcmp(sec_name, KSYMS_SEC) == 0) {
4078                         ksym_sec = sec;
4079                         ext->type = EXT_KSYM;
4080                         skip_mods_and_typedefs(obj->btf, t->type,
4081                                                &ext->ksym.type_id);
4082                 } else {
4083                         pr_warn("unrecognized extern section '%s'\n", sec_name);
4084                         return -ENOTSUP;
4085                 }
4086         }
4087         pr_debug("collected %d externs total\n", obj->nr_extern);
4088
4089         if (!obj->nr_extern)
4090                 return 0;
4091
4092         /* sort externs by type, for kcfg ones also by (align, size, name) */
4093         qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
4094
4095         /* for .ksyms section, we need to turn all externs into allocated
4096          * variables in BTF to pass kernel verification; we do this by
4097          * pretending that each extern is a 8-byte variable
4098          */
4099         if (ksym_sec) {
4100                 /* find existing 4-byte integer type in BTF to use for fake
4101                  * extern variables in DATASEC
4102                  */
4103                 int int_btf_id = find_int_btf_id(obj->btf);
4104                 /* For extern function, a dummy_var added earlier
4105                  * will be used to replace the vs->type and
4106                  * its name string will be used to refill
4107                  * the missing param's name.
4108                  */
4109                 const struct btf_type *dummy_var;
4110
4111                 dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id);
4112                 for (i = 0; i < obj->nr_extern; i++) {
4113                         ext = &obj->externs[i];
4114                         if (ext->type != EXT_KSYM)
4115                                 continue;
4116                         pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
4117                                  i, ext->sym_idx, ext->name);
4118                 }
4119
4120                 sec = ksym_sec;
4121                 n = btf_vlen(sec);
4122                 for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
4123                         struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
4124                         struct btf_type *vt;
4125
4126                         vt = (void *)btf__type_by_id(obj->btf, vs->type);
4127                         ext_name = btf__name_by_offset(obj->btf, vt->name_off);
4128                         ext = find_extern_by_name(obj, ext_name);
4129                         if (!ext) {
4130                                 pr_warn("failed to find extern definition for BTF %s '%s'\n",
4131                                         btf_kind_str(vt), ext_name);
4132                                 return -ESRCH;
4133                         }
4134                         if (btf_is_func(vt)) {
4135                                 const struct btf_type *func_proto;
4136                                 struct btf_param *param;
4137                                 int j;
4138
4139                                 func_proto = btf__type_by_id(obj->btf,
4140                                                              vt->type);
4141                                 param = btf_params(func_proto);
4142                                 /* Reuse the dummy_var string if the
4143                                  * func proto does not have param name.
4144                                  */
4145                                 for (j = 0; j < btf_vlen(func_proto); j++)
4146                                         if (param[j].type && !param[j].name_off)
4147                                                 param[j].name_off =
4148                                                         dummy_var->name_off;
4149                                 vs->type = dummy_var_btf_id;
4150                                 vt->info &= ~0xffff;
4151                                 vt->info |= BTF_FUNC_GLOBAL;
4152                         } else {
4153                                 btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
4154                                 vt->type = int_btf_id;
4155                         }
4156                         vs->offset = off;
4157                         vs->size = sizeof(int);
4158                 }
4159                 sec->size = off;
4160         }
4161
4162         if (kcfg_sec) {
4163                 sec = kcfg_sec;
4164                 /* for kcfg externs calculate their offsets within a .kconfig map */
4165                 off = 0;
4166                 for (i = 0; i < obj->nr_extern; i++) {
4167                         ext = &obj->externs[i];
4168                         if (ext->type != EXT_KCFG)
4169                                 continue;
4170
4171                         ext->kcfg.data_off = roundup(off, ext->kcfg.align);
4172                         off = ext->kcfg.data_off + ext->kcfg.sz;
4173                         pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
4174                                  i, ext->sym_idx, ext->kcfg.data_off, ext->name);
4175                 }
4176                 sec->size = off;
4177                 n = btf_vlen(sec);
4178                 for (i = 0; i < n; i++) {
4179                         struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
4180
4181                         t = btf__type_by_id(obj->btf, vs->type);
4182                         ext_name = btf__name_by_offset(obj->btf, t->name_off);
4183                         ext = find_extern_by_name(obj, ext_name);
4184                         if (!ext) {
4185                                 pr_warn("failed to find extern definition for BTF var '%s'\n",
4186                                         ext_name);
4187                                 return -ESRCH;
4188                         }
4189                         btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
4190                         vs->offset = ext->kcfg.data_off;
4191                 }
4192         }
4193         return 0;
4194 }
4195
4196 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog)
4197 {
4198         return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1;
4199 }
4200
4201 struct bpf_program *
4202 bpf_object__find_program_by_name(const struct bpf_object *obj,
4203                                  const char *name)
4204 {
4205         struct bpf_program *prog;
4206
4207         bpf_object__for_each_program(prog, obj) {
4208                 if (prog_is_subprog(obj, prog))
4209                         continue;
4210                 if (!strcmp(prog->name, name))
4211                         return prog;
4212         }
4213         return errno = ENOENT, NULL;
4214 }
4215
4216 static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
4217                                       int shndx)
4218 {
4219         switch (obj->efile.secs[shndx].sec_type) {
4220         case SEC_BSS:
4221         case SEC_DATA:
4222         case SEC_RODATA:
4223                 return true;
4224         default:
4225                 return false;
4226         }
4227 }
4228
4229 static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
4230                                       int shndx)
4231 {
4232         return shndx == obj->efile.btf_maps_shndx;
4233 }
4234
4235 static enum libbpf_map_type
4236 bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
4237 {
4238         if (shndx == obj->efile.symbols_shndx)
4239                 return LIBBPF_MAP_KCONFIG;
4240
4241         switch (obj->efile.secs[shndx].sec_type) {
4242         case SEC_BSS:
4243                 return LIBBPF_MAP_BSS;
4244         case SEC_DATA:
4245                 return LIBBPF_MAP_DATA;
4246         case SEC_RODATA:
4247                 return LIBBPF_MAP_RODATA;
4248         default:
4249                 return LIBBPF_MAP_UNSPEC;
4250         }
4251 }
4252
4253 static int bpf_program__record_reloc(struct bpf_program *prog,
4254                                      struct reloc_desc *reloc_desc,
4255                                      __u32 insn_idx, const char *sym_name,
4256                                      const Elf64_Sym *sym, const Elf64_Rel *rel)
4257 {
4258         struct bpf_insn *insn = &prog->insns[insn_idx];
4259         size_t map_idx, nr_maps = prog->obj->nr_maps;
4260         struct bpf_object *obj = prog->obj;
4261         __u32 shdr_idx = sym->st_shndx;
4262         enum libbpf_map_type type;
4263         const char *sym_sec_name;
4264         struct bpf_map *map;
4265
4266         if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) {
4267                 pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
4268                         prog->name, sym_name, insn_idx, insn->code);
4269                 return -LIBBPF_ERRNO__RELOC;
4270         }
4271
4272         if (sym_is_extern(sym)) {
4273                 int sym_idx = ELF64_R_SYM(rel->r_info);
4274                 int i, n = obj->nr_extern;
4275                 struct extern_desc *ext;
4276
4277                 for (i = 0; i < n; i++) {
4278                         ext = &obj->externs[i];
4279                         if (ext->sym_idx == sym_idx)
4280                                 break;
4281                 }
4282                 if (i >= n) {
4283                         pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
4284                                 prog->name, sym_name, sym_idx);
4285                         return -LIBBPF_ERRNO__RELOC;
4286                 }
4287                 pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
4288                          prog->name, i, ext->name, ext->sym_idx, insn_idx);
4289                 if (insn->code == (BPF_JMP | BPF_CALL))
4290                         reloc_desc->type = RELO_EXTERN_CALL;
4291                 else
4292                         reloc_desc->type = RELO_EXTERN_LD64;
4293                 reloc_desc->insn_idx = insn_idx;
4294                 reloc_desc->ext_idx = i;
4295                 return 0;
4296         }
4297
4298         /* sub-program call relocation */
4299         if (is_call_insn(insn)) {
4300                 if (insn->src_reg != BPF_PSEUDO_CALL) {
4301                         pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
4302                         return -LIBBPF_ERRNO__RELOC;
4303                 }
4304                 /* text_shndx can be 0, if no default "main" program exists */
4305                 if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
4306                         sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
4307                         pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
4308                                 prog->name, sym_name, sym_sec_name);
4309                         return -LIBBPF_ERRNO__RELOC;
4310                 }
4311                 if (sym->st_value % BPF_INSN_SZ) {
4312                         pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
4313                                 prog->name, sym_name, (size_t)sym->st_value);
4314                         return -LIBBPF_ERRNO__RELOC;
4315                 }
4316                 reloc_desc->type = RELO_CALL;
4317                 reloc_desc->insn_idx = insn_idx;
4318                 reloc_desc->sym_off = sym->st_value;
4319                 return 0;
4320         }
4321
4322         if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
4323                 pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
4324                         prog->name, sym_name, shdr_idx);
4325                 return -LIBBPF_ERRNO__RELOC;
4326         }
4327
4328         /* loading subprog addresses */
4329         if (sym_is_subprog(sym, obj->efile.text_shndx)) {
4330                 /* global_func: sym->st_value = offset in the section, insn->imm = 0.
4331                  * local_func: sym->st_value = 0, insn->imm = offset in the section.
4332                  */
4333                 if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) {
4334                         pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
4335                                 prog->name, sym_name, (size_t)sym->st_value, insn->imm);
4336                         return -LIBBPF_ERRNO__RELOC;
4337                 }
4338
4339                 reloc_desc->type = RELO_SUBPROG_ADDR;
4340                 reloc_desc->insn_idx = insn_idx;
4341                 reloc_desc->sym_off = sym->st_value;
4342                 return 0;
4343         }
4344
4345         type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
4346         sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
4347
4348         /* generic map reference relocation */
4349         if (type == LIBBPF_MAP_UNSPEC) {
4350                 if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
4351                         pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n",
4352                                 prog->name, sym_name, sym_sec_name);
4353                         return -LIBBPF_ERRNO__RELOC;
4354                 }
4355                 for (map_idx = 0; map_idx < nr_maps; map_idx++) {
4356                         map = &obj->maps[map_idx];
4357                         if (map->libbpf_type != type ||
4358                             map->sec_idx != sym->st_shndx ||
4359                             map->sec_offset != sym->st_value)
4360                                 continue;
4361                         pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n",
4362                                  prog->name, map_idx, map->name, map->sec_idx,
4363                                  map->sec_offset, insn_idx);
4364                         break;
4365                 }
4366                 if (map_idx >= nr_maps) {
4367                         pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n",
4368                                 prog->name, sym_sec_name, (size_t)sym->st_value);
4369                         return -LIBBPF_ERRNO__RELOC;
4370                 }
4371                 reloc_desc->type = RELO_LD64;
4372                 reloc_desc->insn_idx = insn_idx;
4373                 reloc_desc->map_idx = map_idx;
4374                 reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */
4375                 return 0;
4376         }
4377
4378         /* global data map relocation */
4379         if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
4380                 pr_warn("prog '%s': bad data relo against section '%s'\n",
4381                         prog->name, sym_sec_name);
4382                 return -LIBBPF_ERRNO__RELOC;
4383         }
4384         for (map_idx = 0; map_idx < nr_maps; map_idx++) {
4385                 map = &obj->maps[map_idx];
4386                 if (map->libbpf_type != type || map->sec_idx != sym->st_shndx)
4387                         continue;
4388                 pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n",
4389                          prog->name, map_idx, map->name, map->sec_idx,
4390                          map->sec_offset, insn_idx);
4391                 break;
4392         }
4393         if (map_idx >= nr_maps) {
4394                 pr_warn("prog '%s': data relo failed to find map for section '%s'\n",
4395                         prog->name, sym_sec_name);
4396                 return -LIBBPF_ERRNO__RELOC;
4397         }
4398
4399         reloc_desc->type = RELO_DATA;
4400         reloc_desc->insn_idx = insn_idx;
4401         reloc_desc->map_idx = map_idx;
4402         reloc_desc->sym_off = sym->st_value;
4403         return 0;
4404 }
4405
4406 static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx)
4407 {
4408         return insn_idx >= prog->sec_insn_off &&
4409                insn_idx < prog->sec_insn_off + prog->sec_insn_cnt;
4410 }
4411
4412 static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
4413                                                  size_t sec_idx, size_t insn_idx)
4414 {
4415         int l = 0, r = obj->nr_programs - 1, m;
4416         struct bpf_program *prog;
4417
4418         if (!obj->nr_programs)
4419                 return NULL;
4420
4421         while (l < r) {
4422                 m = l + (r - l + 1) / 2;
4423                 prog = &obj->programs[m];
4424
4425                 if (prog->sec_idx < sec_idx ||
4426                     (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx))
4427                         l = m;
4428                 else
4429                         r = m - 1;
4430         }
4431         /* matching program could be at index l, but it still might be the
4432          * wrong one, so we need to double check conditions for the last time
4433          */
4434         prog = &obj->programs[l];
4435         if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx))
4436                 return prog;
4437         return NULL;
4438 }
4439
4440 static int
4441 bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data)
4442 {
4443         const char *relo_sec_name, *sec_name;
4444         size_t sec_idx = shdr->sh_info, sym_idx;
4445         struct bpf_program *prog;
4446         struct reloc_desc *relos;
4447         int err, i, nrels;
4448         const char *sym_name;
4449         __u32 insn_idx;
4450         Elf_Scn *scn;
4451         Elf_Data *scn_data;
4452         Elf64_Sym *sym;
4453         Elf64_Rel *rel;
4454
4455         if (sec_idx >= obj->efile.sec_cnt)
4456                 return -EINVAL;
4457
4458         scn = elf_sec_by_idx(obj, sec_idx);
4459         scn_data = elf_sec_data(obj, scn);
4460         if (!scn_data)
4461                 return -LIBBPF_ERRNO__FORMAT;
4462
4463         relo_sec_name = elf_sec_str(obj, shdr->sh_name);
4464         sec_name = elf_sec_name(obj, scn);
4465         if (!relo_sec_name || !sec_name)
4466                 return -EINVAL;
4467
4468         pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n",
4469                  relo_sec_name, sec_idx, sec_name);
4470         nrels = shdr->sh_size / shdr->sh_entsize;
4471
4472         for (i = 0; i < nrels; i++) {
4473                 rel = elf_rel_by_idx(data, i);
4474                 if (!rel) {
4475                         pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i);
4476                         return -LIBBPF_ERRNO__FORMAT;
4477                 }
4478
4479                 sym_idx = ELF64_R_SYM(rel->r_info);
4480                 sym = elf_sym_by_idx(obj, sym_idx);
4481                 if (!sym) {
4482                         pr_warn("sec '%s': symbol #%zu not found for relo #%d\n",
4483                                 relo_sec_name, sym_idx, i);
4484                         return -LIBBPF_ERRNO__FORMAT;
4485                 }
4486
4487                 if (sym->st_shndx >= obj->efile.sec_cnt) {
4488                         pr_warn("sec '%s': corrupted symbol #%zu pointing to invalid section #%zu for relo #%d\n",
4489                                 relo_sec_name, sym_idx, (size_t)sym->st_shndx, i);
4490                         return -LIBBPF_ERRNO__FORMAT;
4491                 }
4492
4493                 if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) {
4494                         pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
4495                                 relo_sec_name, (size_t)rel->r_offset, i);
4496                         return -LIBBPF_ERRNO__FORMAT;
4497                 }
4498
4499                 insn_idx = rel->r_offset / BPF_INSN_SZ;
4500                 /* relocations against static functions are recorded as
4501                  * relocations against the section that contains a function;
4502                  * in such case, symbol will be STT_SECTION and sym.st_name
4503                  * will point to empty string (0), so fetch section name
4504                  * instead
4505                  */
4506                 if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && sym->st_name == 0)
4507                         sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym->st_shndx));
4508                 else
4509                         sym_name = elf_sym_str(obj, sym->st_name);
4510                 sym_name = sym_name ?: "<?";
4511
4512                 pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n",
4513                          relo_sec_name, i, insn_idx, sym_name);
4514
4515                 prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
4516                 if (!prog) {
4517                         pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n",
4518                                 relo_sec_name, i, sec_name, insn_idx);
4519                         continue;
4520                 }
4521
4522                 relos = libbpf_reallocarray(prog->reloc_desc,
4523                                             prog->nr_reloc + 1, sizeof(*relos));
4524                 if (!relos)
4525                         return -ENOMEM;
4526                 prog->reloc_desc = relos;
4527
4528                 /* adjust insn_idx to local BPF program frame of reference */
4529                 insn_idx -= prog->sec_insn_off;
4530                 err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc],
4531                                                 insn_idx, sym_name, sym, rel);
4532                 if (err)
4533                         return err;
4534
4535                 prog->nr_reloc++;
4536         }
4537         return 0;
4538 }
4539
4540 static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map)
4541 {
4542         int id;
4543
4544         if (!obj->btf)
4545                 return -ENOENT;
4546
4547         /* if it's BTF-defined map, we don't need to search for type IDs.
4548          * For struct_ops map, it does not need btf_key_type_id and
4549          * btf_value_type_id.
4550          */
4551         if (map->sec_idx == obj->efile.btf_maps_shndx || bpf_map__is_struct_ops(map))
4552                 return 0;
4553
4554         /*
4555          * LLVM annotates global data differently in BTF, that is,
4556          * only as '.data', '.bss' or '.rodata'.
4557          */
4558         if (!bpf_map__is_internal(map))
4559                 return -ENOENT;
4560
4561         id = btf__find_by_name(obj->btf, map->real_name);
4562         if (id < 0)
4563                 return id;
4564
4565         map->btf_key_type_id = 0;
4566         map->btf_value_type_id = id;
4567         return 0;
4568 }
4569
4570 static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
4571 {
4572         char file[PATH_MAX], buff[4096];
4573         FILE *fp;
4574         __u32 val;
4575         int err;
4576
4577         snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
4578         memset(info, 0, sizeof(*info));
4579
4580         fp = fopen(file, "re");
4581         if (!fp) {
4582                 err = -errno;
4583                 pr_warn("failed to open %s: %d. No procfs support?\n", file,
4584                         err);
4585                 return err;
4586         }
4587
4588         while (fgets(buff, sizeof(buff), fp)) {
4589                 if (sscanf(buff, "map_type:\t%u", &val) == 1)
4590                         info->type = val;
4591                 else if (sscanf(buff, "key_size:\t%u", &val) == 1)
4592                         info->key_size = val;
4593                 else if (sscanf(buff, "value_size:\t%u", &val) == 1)
4594                         info->value_size = val;
4595                 else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
4596                         info->max_entries = val;
4597                 else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
4598                         info->map_flags = val;
4599         }
4600
4601         fclose(fp);
4602
4603         return 0;
4604 }
4605
4606 bool bpf_map__autocreate(const struct bpf_map *map)
4607 {
4608         return map->autocreate;
4609 }
4610
4611 int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate)
4612 {
4613         if (map->obj->loaded)
4614                 return libbpf_err(-EBUSY);
4615
4616         map->autocreate = autocreate;
4617         return 0;
4618 }
4619
4620 int bpf_map__reuse_fd(struct bpf_map *map, int fd)
4621 {
4622         struct bpf_map_info info;
4623         __u32 len = sizeof(info), name_len;
4624         int new_fd, err;
4625         char *new_name;
4626
4627         memset(&info, 0, len);
4628         err = bpf_map_get_info_by_fd(fd, &info, &len);
4629         if (err && errno == EINVAL)
4630                 err = bpf_get_map_info_from_fdinfo(fd, &info);
4631         if (err)
4632                 return libbpf_err(err);
4633
4634         name_len = strlen(info.name);
4635         if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0)
4636                 new_name = strdup(map->name);
4637         else
4638                 new_name = strdup(info.name);
4639
4640         if (!new_name)
4641                 return libbpf_err(-errno);
4642
4643         /*
4644          * Like dup(), but make sure new FD is >= 3 and has O_CLOEXEC set.
4645          * This is similar to what we do in ensure_good_fd(), but without
4646          * closing original FD.
4647          */
4648         new_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
4649         if (new_fd < 0) {
4650                 err = -errno;
4651                 goto err_free_new_name;
4652         }
4653
4654         err = reuse_fd(map->fd, new_fd);
4655         if (err)
4656                 goto err_free_new_name;
4657
4658         free(map->name);
4659
4660         map->name = new_name;
4661         map->def.type = info.type;
4662         map->def.key_size = info.key_size;
4663         map->def.value_size = info.value_size;
4664         map->def.max_entries = info.max_entries;
4665         map->def.map_flags = info.map_flags;
4666         map->btf_key_type_id = info.btf_key_type_id;
4667         map->btf_value_type_id = info.btf_value_type_id;
4668         map->reused = true;
4669         map->map_extra = info.map_extra;
4670
4671         return 0;
4672
4673 err_free_new_name:
4674         free(new_name);
4675         return libbpf_err(err);
4676 }
4677
4678 __u32 bpf_map__max_entries(const struct bpf_map *map)
4679 {
4680         return map->def.max_entries;
4681 }
4682
4683 struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
4684 {
4685         if (!bpf_map_type__is_map_in_map(map->def.type))
4686                 return errno = EINVAL, NULL;
4687
4688         return map->inner_map;
4689 }
4690
4691 int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
4692 {
4693         if (map->obj->loaded)
4694                 return libbpf_err(-EBUSY);
4695
4696         map->def.max_entries = max_entries;
4697
4698         /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
4699         if (map_is_ringbuf(map))
4700                 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
4701
4702         return 0;
4703 }
4704
4705 static int bpf_object_prepare_token(struct bpf_object *obj)
4706 {
4707         const char *bpffs_path;
4708         int bpffs_fd = -1, token_fd, err;
4709         bool mandatory;
4710         enum libbpf_print_level level;
4711
4712         /* token is explicitly prevented */
4713         if (obj->token_path && obj->token_path[0] == '\0') {
4714                 pr_debug("object '%s': token is prevented, skipping...\n", obj->name);
4715                 return 0;
4716         }
4717
4718         mandatory = obj->token_path != NULL;
4719         level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG;
4720
4721         bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH;
4722         bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR);
4723         if (bpffs_fd < 0) {
4724                 err = -errno;
4725                 __pr(level, "object '%s': failed (%d) to open BPF FS mount at '%s'%s\n",
4726                      obj->name, err, bpffs_path,
4727                      mandatory ? "" : ", skipping optional step...");
4728                 return mandatory ? err : 0;
4729         }
4730
4731         token_fd = bpf_token_create(bpffs_fd, 0);
4732         close(bpffs_fd);
4733         if (token_fd < 0) {
4734                 if (!mandatory && token_fd == -ENOENT) {
4735                         pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n",
4736                                  obj->name, bpffs_path);
4737                         return 0;
4738                 }
4739                 __pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n",
4740                      obj->name, token_fd, bpffs_path,
4741                      mandatory ? "" : ", skipping optional step...");
4742                 return mandatory ? token_fd : 0;
4743         }
4744
4745         obj->feat_cache = calloc(1, sizeof(*obj->feat_cache));
4746         if (!obj->feat_cache) {
4747                 close(token_fd);
4748                 return -ENOMEM;
4749         }
4750
4751         obj->token_fd = token_fd;
4752         obj->feat_cache->token_fd = token_fd;
4753
4754         return 0;
4755 }
4756
4757 static int
4758 bpf_object__probe_loading(struct bpf_object *obj)
4759 {
4760         char *cp, errmsg[STRERR_BUFSIZE];
4761         struct bpf_insn insns[] = {
4762                 BPF_MOV64_IMM(BPF_REG_0, 0),
4763                 BPF_EXIT_INSN(),
4764         };
4765         int ret, insn_cnt = ARRAY_SIZE(insns);
4766         LIBBPF_OPTS(bpf_prog_load_opts, opts,
4767                 .token_fd = obj->token_fd,
4768                 .prog_flags = obj->token_fd ? BPF_F_TOKEN_FD : 0,
4769         );
4770
4771         if (obj->gen_loader)
4772                 return 0;
4773
4774         ret = bump_rlimit_memlock();
4775         if (ret)
4776                 pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret);
4777
4778         /* make sure basic loading works */
4779         ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts);
4780         if (ret < 0)
4781                 ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
4782         if (ret < 0) {
4783                 ret = errno;
4784                 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
4785                 pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF "
4786                         "program. Make sure your kernel supports BPF "
4787                         "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is "
4788                         "set to big enough value.\n", __func__, cp, ret);
4789                 return -ret;
4790         }
4791         close(ret);
4792
4793         return 0;
4794 }
4795
4796 bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
4797 {
4798         if (obj->gen_loader)
4799                 /* To generate loader program assume the latest kernel
4800                  * to avoid doing extra prog_load, map_create syscalls.
4801                  */
4802                 return true;
4803
4804         if (obj->token_fd)
4805                 return feat_supported(obj->feat_cache, feat_id);
4806
4807         return feat_supported(NULL, feat_id);
4808 }
4809
4810 static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
4811 {
4812         struct bpf_map_info map_info;
4813         char msg[STRERR_BUFSIZE];
4814         __u32 map_info_len = sizeof(map_info);
4815         int err;
4816
4817         memset(&map_info, 0, map_info_len);
4818         err = bpf_map_get_info_by_fd(map_fd, &map_info, &map_info_len);
4819         if (err && errno == EINVAL)
4820                 err = bpf_get_map_info_from_fdinfo(map_fd, &map_info);
4821         if (err) {
4822                 pr_warn("failed to get map info for map FD %d: %s\n", map_fd,
4823                         libbpf_strerror_r(errno, msg, sizeof(msg)));
4824                 return false;
4825         }
4826
4827         return (map_info.type == map->def.type &&
4828                 map_info.key_size == map->def.key_size &&
4829                 map_info.value_size == map->def.value_size &&
4830                 map_info.max_entries == map->def.max_entries &&
4831                 map_info.map_flags == map->def.map_flags &&
4832                 map_info.map_extra == map->map_extra);
4833 }
4834
4835 static int
4836 bpf_object__reuse_map(struct bpf_map *map)
4837 {
4838         char *cp, errmsg[STRERR_BUFSIZE];
4839         int err, pin_fd;
4840
4841         pin_fd = bpf_obj_get(map->pin_path);
4842         if (pin_fd < 0) {
4843                 err = -errno;
4844                 if (err == -ENOENT) {
4845                         pr_debug("found no pinned map to reuse at '%s'\n",
4846                                  map->pin_path);
4847                         return 0;
4848                 }
4849
4850                 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
4851                 pr_warn("couldn't retrieve pinned map '%s': %s\n",
4852                         map->pin_path, cp);
4853                 return err;
4854         }
4855
4856         if (!map_is_reuse_compat(map, pin_fd)) {
4857                 pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
4858                         map->pin_path);
4859                 close(pin_fd);
4860                 return -EINVAL;
4861         }
4862
4863         err = bpf_map__reuse_fd(map, pin_fd);
4864         close(pin_fd);
4865         if (err)
4866                 return err;
4867
4868         map->pinned = true;
4869         pr_debug("reused pinned map at '%s'\n", map->pin_path);
4870
4871         return 0;
4872 }
4873
4874 static int
4875 bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
4876 {
4877         enum libbpf_map_type map_type = map->libbpf_type;
4878         char *cp, errmsg[STRERR_BUFSIZE];
4879         int err, zero = 0;
4880
4881         if (obj->gen_loader) {
4882                 bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps,
4883                                          map->mmaped, map->def.value_size);
4884                 if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG)
4885                         bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
4886                 return 0;
4887         }
4888         err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
4889         if (err) {
4890                 err = -errno;
4891                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4892                 pr_warn("Error setting initial map(%s) contents: %s\n",
4893                         map->name, cp);
4894                 return err;
4895         }
4896
4897         /* Freeze .rodata and .kconfig map as read-only from syscall side. */
4898         if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) {
4899                 err = bpf_map_freeze(map->fd);
4900                 if (err) {
4901                         err = -errno;
4902                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4903                         pr_warn("Error freezing map(%s) as read-only: %s\n",
4904                                 map->name, cp);
4905                         return err;
4906                 }
4907         }
4908         return 0;
4909 }
4910
4911 static void bpf_map__destroy(struct bpf_map *map);
4912
4913 static bool map_is_created(const struct bpf_map *map)
4914 {
4915         return map->obj->loaded || map->reused;
4916 }
4917
4918 static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
4919 {
4920         LIBBPF_OPTS(bpf_map_create_opts, create_attr);
4921         struct bpf_map_def *def = &map->def;
4922         const char *map_name = NULL;
4923         int err = 0, map_fd;
4924
4925         if (kernel_supports(obj, FEAT_PROG_NAME))
4926                 map_name = map->name;
4927         create_attr.map_ifindex = map->map_ifindex;
4928         create_attr.map_flags = def->map_flags;
4929         create_attr.numa_node = map->numa_node;
4930         create_attr.map_extra = map->map_extra;
4931         create_attr.token_fd = obj->token_fd;
4932         if (obj->token_fd)
4933                 create_attr.map_flags |= BPF_F_TOKEN_FD;
4934
4935         if (bpf_map__is_struct_ops(map)) {
4936                 create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
4937                 if (map->mod_btf_fd >= 0) {
4938                         create_attr.value_type_btf_obj_fd = map->mod_btf_fd;
4939                         create_attr.map_flags |= BPF_F_VTYPE_BTF_OBJ_FD;
4940                 }
4941         }
4942
4943         if (obj->btf && btf__fd(obj->btf) >= 0) {
4944                 create_attr.btf_fd = btf__fd(obj->btf);
4945                 create_attr.btf_key_type_id = map->btf_key_type_id;
4946                 create_attr.btf_value_type_id = map->btf_value_type_id;
4947         }
4948
4949         if (bpf_map_type__is_map_in_map(def->type)) {
4950                 if (map->inner_map) {
4951                         err = map_set_def_max_entries(map->inner_map);
4952                         if (err)
4953                                 return err;
4954                         err = bpf_object__create_map(obj, map->inner_map, true);
4955                         if (err) {
4956                                 pr_warn("map '%s': failed to create inner map: %d\n",
4957                                         map->name, err);
4958                                 return err;
4959                         }
4960                         map->inner_map_fd = map->inner_map->fd;
4961                 }
4962                 if (map->inner_map_fd >= 0)
4963                         create_attr.inner_map_fd = map->inner_map_fd;
4964         }
4965
4966         switch (def->type) {
4967         case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
4968         case BPF_MAP_TYPE_CGROUP_ARRAY:
4969         case BPF_MAP_TYPE_STACK_TRACE:
4970         case BPF_MAP_TYPE_ARRAY_OF_MAPS:
4971         case BPF_MAP_TYPE_HASH_OF_MAPS:
4972         case BPF_MAP_TYPE_DEVMAP:
4973         case BPF_MAP_TYPE_DEVMAP_HASH:
4974         case BPF_MAP_TYPE_CPUMAP:
4975         case BPF_MAP_TYPE_XSKMAP:
4976         case BPF_MAP_TYPE_SOCKMAP:
4977         case BPF_MAP_TYPE_SOCKHASH:
4978         case BPF_MAP_TYPE_QUEUE:
4979         case BPF_MAP_TYPE_STACK:
4980                 create_attr.btf_fd = 0;
4981                 create_attr.btf_key_type_id = 0;
4982                 create_attr.btf_value_type_id = 0;
4983                 map->btf_key_type_id = 0;
4984                 map->btf_value_type_id = 0;
4985                 break;
4986         case BPF_MAP_TYPE_STRUCT_OPS:
4987                 create_attr.btf_value_type_id = 0;
4988                 break;
4989         default:
4990                 break;
4991         }
4992
4993         if (obj->gen_loader) {
4994                 bpf_gen__map_create(obj->gen_loader, def->type, map_name,
4995                                     def->key_size, def->value_size, def->max_entries,
4996                                     &create_attr, is_inner ? -1 : map - obj->maps);
4997                 /* We keep pretenting we have valid FD to pass various fd >= 0
4998                  * checks by just keeping original placeholder FDs in place.
4999                  * See bpf_object__add_map() comment.
5000                  * This placeholder fd will not be used with any syscall and
5001                  * will be reset to -1 eventually.
5002                  */
5003                 map_fd = map->fd;
5004         } else {
5005                 map_fd = bpf_map_create(def->type, map_name,
5006                                         def->key_size, def->value_size,
5007                                         def->max_entries, &create_attr);
5008         }
5009         if (map_fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) {
5010                 char *cp, errmsg[STRERR_BUFSIZE];
5011
5012                 err = -errno;
5013                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
5014                 pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
5015                         map->name, cp, err);
5016                 create_attr.btf_fd = 0;
5017                 create_attr.btf_key_type_id = 0;
5018                 create_attr.btf_value_type_id = 0;
5019                 map->btf_key_type_id = 0;
5020                 map->btf_value_type_id = 0;
5021                 map_fd = bpf_map_create(def->type, map_name,
5022                                         def->key_size, def->value_size,
5023                                         def->max_entries, &create_attr);
5024         }
5025
5026         if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
5027                 if (obj->gen_loader)
5028                         map->inner_map->fd = -1;
5029                 bpf_map__destroy(map->inner_map);
5030                 zfree(&map->inner_map);
5031         }
5032
5033         if (map_fd < 0)
5034                 return map_fd;
5035
5036         /* obj->gen_loader case, prevent reuse_fd() from closing map_fd */
5037         if (map->fd == map_fd)
5038                 return 0;
5039
5040         /* Keep placeholder FD value but now point it to the BPF map object.
5041          * This way everything that relied on this map's FD (e.g., relocated
5042          * ldimm64 instructions) will stay valid and won't need adjustments.
5043          * map->fd stays valid but now point to what map_fd points to.
5044          */
5045         return reuse_fd(map->fd, map_fd);
5046 }
5047
5048 static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map)
5049 {
5050         const struct bpf_map *targ_map;
5051         unsigned int i;
5052         int fd, err = 0;
5053
5054         for (i = 0; i < map->init_slots_sz; i++) {
5055                 if (!map->init_slots[i])
5056                         continue;
5057
5058                 targ_map = map->init_slots[i];
5059                 fd = targ_map->fd;
5060
5061                 if (obj->gen_loader) {
5062                         bpf_gen__populate_outer_map(obj->gen_loader,
5063                                                     map - obj->maps, i,
5064                                                     targ_map - obj->maps);
5065                 } else {
5066                         err = bpf_map_update_elem(map->fd, &i, &fd, 0);
5067                 }
5068                 if (err) {
5069                         err = -errno;
5070                         pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
5071                                 map->name, i, targ_map->name, fd, err);
5072                         return err;
5073                 }
5074                 pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
5075                          map->name, i, targ_map->name, fd);
5076         }
5077
5078         zfree(&map->init_slots);
5079         map->init_slots_sz = 0;
5080
5081         return 0;
5082 }
5083
5084 static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map)
5085 {
5086         const struct bpf_program *targ_prog;
5087         unsigned int i;
5088         int fd, err;
5089
5090         if (obj->gen_loader)
5091                 return -ENOTSUP;
5092
5093         for (i = 0; i < map->init_slots_sz; i++) {
5094                 if (!map->init_slots[i])
5095                         continue;
5096
5097                 targ_prog = map->init_slots[i];
5098                 fd = bpf_program__fd(targ_prog);
5099
5100                 err = bpf_map_update_elem(map->fd, &i, &fd, 0);
5101                 if (err) {
5102                         err = -errno;
5103                         pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %d\n",
5104                                 map->name, i, targ_prog->name, fd, err);
5105                         return err;
5106                 }
5107                 pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n",
5108                          map->name, i, targ_prog->name, fd);
5109         }
5110
5111         zfree(&map->init_slots);
5112         map->init_slots_sz = 0;
5113
5114         return 0;
5115 }
5116
5117 static int bpf_object_init_prog_arrays(struct bpf_object *obj)
5118 {
5119         struct bpf_map *map;
5120         int i, err;
5121
5122         for (i = 0; i < obj->nr_maps; i++) {
5123                 map = &obj->maps[i];
5124
5125                 if (!map->init_slots_sz || map->def.type != BPF_MAP_TYPE_PROG_ARRAY)
5126                         continue;
5127
5128                 err = init_prog_array_slots(obj, map);
5129                 if (err < 0)
5130                         return err;
5131         }
5132         return 0;
5133 }
5134
5135 static int map_set_def_max_entries(struct bpf_map *map)
5136 {
5137         if (map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !map->def.max_entries) {
5138                 int nr_cpus;
5139
5140                 nr_cpus = libbpf_num_possible_cpus();
5141                 if (nr_cpus < 0) {
5142                         pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
5143                                 map->name, nr_cpus);
5144                         return nr_cpus;
5145                 }
5146                 pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
5147                 map->def.max_entries = nr_cpus;
5148         }
5149
5150         return 0;
5151 }
5152
5153 static int
5154 bpf_object__create_maps(struct bpf_object *obj)
5155 {
5156         struct bpf_map *map;
5157         char *cp, errmsg[STRERR_BUFSIZE];
5158         unsigned int i, j;
5159         int err;
5160         bool retried;
5161
5162         for (i = 0; i < obj->nr_maps; i++) {
5163                 map = &obj->maps[i];
5164
5165                 /* To support old kernels, we skip creating global data maps
5166                  * (.rodata, .data, .kconfig, etc); later on, during program
5167                  * loading, if we detect that at least one of the to-be-loaded
5168                  * programs is referencing any global data map, we'll error
5169                  * out with program name and relocation index logged.
5170                  * This approach allows to accommodate Clang emitting
5171                  * unnecessary .rodata.str1.1 sections for string literals,
5172                  * but also it allows to have CO-RE applications that use
5173                  * global variables in some of BPF programs, but not others.
5174                  * If those global variable-using programs are not loaded at
5175                  * runtime due to bpf_program__set_autoload(prog, false),
5176                  * bpf_object loading will succeed just fine even on old
5177                  * kernels.
5178                  */
5179                 if (bpf_map__is_internal(map) && !kernel_supports(obj, FEAT_GLOBAL_DATA))
5180                         map->autocreate = false;
5181
5182                 if (!map->autocreate) {
5183                         pr_debug("map '%s': skipped auto-creating...\n", map->name);
5184                         continue;
5185                 }
5186
5187                 err = map_set_def_max_entries(map);
5188                 if (err)
5189                         goto err_out;
5190
5191                 retried = false;
5192 retry:
5193                 if (map->pin_path) {
5194                         err = bpf_object__reuse_map(map);
5195                         if (err) {
5196                                 pr_warn("map '%s': error reusing pinned map\n",
5197                                         map->name);
5198                                 goto err_out;
5199                         }
5200                         if (retried && map->fd < 0) {
5201                                 pr_warn("map '%s': cannot find pinned map\n",
5202                                         map->name);
5203                                 err = -ENOENT;
5204                                 goto err_out;
5205                         }
5206                 }
5207
5208                 if (map->reused) {
5209                         pr_debug("map '%s': skipping creation (preset fd=%d)\n",
5210                                  map->name, map->fd);
5211                 } else {
5212                         err = bpf_object__create_map(obj, map, false);
5213                         if (err)
5214                                 goto err_out;
5215
5216                         pr_debug("map '%s': created successfully, fd=%d\n",
5217                                  map->name, map->fd);
5218
5219                         if (bpf_map__is_internal(map)) {
5220                                 err = bpf_object__populate_internal_map(obj, map);
5221                                 if (err < 0)
5222                                         goto err_out;
5223                         }
5224
5225                         if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) {
5226                                 err = init_map_in_map_slots(obj, map);
5227                                 if (err < 0)
5228                                         goto err_out;
5229                         }
5230                 }
5231
5232                 if (map->pin_path && !map->pinned) {
5233                         err = bpf_map__pin(map, NULL);
5234                         if (err) {
5235                                 if (!retried && err == -EEXIST) {
5236                                         retried = true;
5237                                         goto retry;
5238                                 }
5239                                 pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
5240                                         map->name, map->pin_path, err);
5241                                 goto err_out;
5242                         }
5243                 }
5244         }
5245
5246         return 0;
5247
5248 err_out:
5249         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
5250         pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err);
5251         pr_perm_msg(err);
5252         for (j = 0; j < i; j++)
5253                 zclose(obj->maps[j].fd);
5254         return err;
5255 }
5256
5257 static bool bpf_core_is_flavor_sep(const char *s)
5258 {
5259         /* check X___Y name pattern, where X and Y are not underscores */
5260         return s[0] != '_' &&                                 /* X */
5261                s[1] == '_' && s[2] == '_' && s[3] == '_' &&   /* ___ */
5262                s[4] != '_';                                   /* Y */
5263 }
5264
5265 /* Given 'some_struct_name___with_flavor' return the length of a name prefix
5266  * before last triple underscore. Struct name part after last triple
5267  * underscore is ignored by BPF CO-RE relocation during relocation matching.
5268  */
5269 size_t bpf_core_essential_name_len(const char *name)
5270 {
5271         size_t n = strlen(name);
5272         int i;
5273
5274         for (i = n - 5; i >= 0; i--) {
5275                 if (bpf_core_is_flavor_sep(name + i))
5276                         return i + 1;
5277         }
5278         return n;
5279 }
5280
5281 void bpf_core_free_cands(struct bpf_core_cand_list *cands)
5282 {
5283         if (!cands)
5284                 return;
5285
5286         free(cands->cands);
5287         free(cands);
5288 }
5289
5290 int bpf_core_add_cands(struct bpf_core_cand *local_cand,
5291                        size_t local_essent_len,
5292                        const struct btf *targ_btf,
5293                        const char *targ_btf_name,
5294                        int targ_start_id,
5295                        struct bpf_core_cand_list *cands)
5296 {
5297         struct bpf_core_cand *new_cands, *cand;
5298         const struct btf_type *t, *local_t;
5299         const char *targ_name, *local_name;
5300         size_t targ_essent_len;
5301         int n, i;
5302
5303         local_t = btf__type_by_id(local_cand->btf, local_cand->id);
5304         local_name = btf__str_by_offset(local_cand->btf, local_t->name_off);
5305
5306         n = btf__type_cnt(targ_btf);
5307         for (i = targ_start_id; i < n; i++) {
5308                 t = btf__type_by_id(targ_btf, i);
5309                 if (!btf_kind_core_compat(t, local_t))
5310                         continue;
5311
5312                 targ_name = btf__name_by_offset(targ_btf, t->name_off);
5313                 if (str_is_empty(targ_name))
5314                         continue;
5315
5316                 targ_essent_len = bpf_core_essential_name_len(targ_name);
5317                 if (targ_essent_len != local_essent_len)
5318                         continue;
5319
5320                 if (strncmp(local_name, targ_name, local_essent_len) != 0)
5321                         continue;
5322
5323                 pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n",
5324                          local_cand->id, btf_kind_str(local_t),
5325                          local_name, i, btf_kind_str(t), targ_name,
5326                          targ_btf_name);
5327                 new_cands = libbpf_reallocarray(cands->cands, cands->len + 1,
5328                                               sizeof(*cands->cands));
5329                 if (!new_cands)
5330                         return -ENOMEM;
5331
5332                 cand = &new_cands[cands->len];
5333                 cand->btf = targ_btf;
5334                 cand->id = i;
5335
5336                 cands->cands = new_cands;
5337                 cands->len++;
5338         }
5339         return 0;
5340 }
5341
5342 static int load_module_btfs(struct bpf_object *obj)
5343 {
5344         struct bpf_btf_info info;
5345         struct module_btf *mod_btf;
5346         struct btf *btf;
5347         char name[64];
5348         __u32 id = 0, len;
5349         int err, fd;
5350
5351         if (obj->btf_modules_loaded)
5352                 return 0;
5353
5354         if (obj->gen_loader)
5355                 return 0;
5356
5357         /* don't do this again, even if we find no module BTFs */
5358         obj->btf_modules_loaded = true;
5359
5360         /* kernel too old to support module BTFs */
5361         if (!kernel_supports(obj, FEAT_MODULE_BTF))
5362                 return 0;
5363
5364         while (true) {
5365                 err = bpf_btf_get_next_id(id, &id);
5366                 if (err && errno == ENOENT)
5367                         return 0;
5368                 if (err && errno == EPERM) {
5369                         pr_debug("skipping module BTFs loading, missing privileges\n");
5370                         return 0;
5371                 }
5372                 if (err) {
5373                         err = -errno;
5374                         pr_warn("failed to iterate BTF objects: %d\n", err);
5375                         return err;
5376                 }
5377
5378                 fd = bpf_btf_get_fd_by_id(id);
5379                 if (fd < 0) {
5380                         if (errno == ENOENT)
5381                                 continue; /* expected race: BTF was unloaded */
5382                         err = -errno;
5383                         pr_warn("failed to get BTF object #%d FD: %d\n", id, err);
5384                         return err;
5385                 }
5386
5387                 len = sizeof(info);
5388                 memset(&info, 0, sizeof(info));
5389                 info.name = ptr_to_u64(name);
5390                 info.name_len = sizeof(name);
5391
5392                 err = bpf_btf_get_info_by_fd(fd, &info, &len);
5393                 if (err) {
5394                         err = -errno;
5395                         pr_warn("failed to get BTF object #%d info: %d\n", id, err);
5396                         goto err_out;
5397                 }
5398
5399                 /* ignore non-module BTFs */
5400                 if (!info.kernel_btf || strcmp(name, "vmlinux") == 0) {
5401                         close(fd);
5402                         continue;
5403                 }
5404
5405                 btf = btf_get_from_fd(fd, obj->btf_vmlinux);
5406                 err = libbpf_get_error(btf);
5407                 if (err) {
5408                         pr_warn("failed to load module [%s]'s BTF object #%d: %d\n",
5409                                 name, id, err);
5410                         goto err_out;
5411                 }
5412
5413                 err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
5414                                         sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
5415                 if (err)
5416                         goto err_out;
5417
5418                 mod_btf = &obj->btf_modules[obj->btf_module_cnt++];
5419
5420                 mod_btf->btf = btf;
5421                 mod_btf->id = id;
5422                 mod_btf->fd = fd;
5423                 mod_btf->name = strdup(name);
5424                 if (!mod_btf->name) {
5425                         err = -ENOMEM;
5426                         goto err_out;
5427                 }
5428                 continue;
5429
5430 err_out:
5431                 close(fd);
5432                 return err;
5433         }
5434
5435         return 0;
5436 }
5437
5438 static struct bpf_core_cand_list *
5439 bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)
5440 {
5441         struct bpf_core_cand local_cand = {};
5442         struct bpf_core_cand_list *cands;
5443         const struct btf *main_btf;
5444         const struct btf_type *local_t;
5445         const char *local_name;
5446         size_t local_essent_len;
5447         int err, i;
5448
5449         local_cand.btf = local_btf;
5450         local_cand.id = local_type_id;
5451         local_t = btf__type_by_id(local_btf, local_type_id);
5452         if (!local_t)
5453                 return ERR_PTR(-EINVAL);
5454
5455         local_name = btf__name_by_offset(local_btf, local_t->name_off);
5456         if (str_is_empty(local_name))
5457                 return ERR_PTR(-EINVAL);
5458         local_essent_len = bpf_core_essential_name_len(local_name);
5459
5460         cands = calloc(1, sizeof(*cands));
5461         if (!cands)
5462                 return ERR_PTR(-ENOMEM);
5463
5464         /* Attempt to find target candidates in vmlinux BTF first */
5465         main_btf = obj->btf_vmlinux_override ?: obj->btf_vmlinux;
5466         err = bpf_core_add_cands(&local_cand, local_essent_len, main_btf, "vmlinux", 1, cands);
5467         if (err)
5468                 goto err_out;
5469
5470         /* if vmlinux BTF has any candidate, don't got for module BTFs */
5471         if (cands->len)
5472                 return cands;
5473
5474         /* if vmlinux BTF was overridden, don't attempt to load module BTFs */
5475         if (obj->btf_vmlinux_override)
5476                 return cands;
5477
5478         /* now look through module BTFs, trying to still find candidates */
5479         err = load_module_btfs(obj);
5480         if (err)
5481                 goto err_out;
5482
5483         for (i = 0; i < obj->btf_module_cnt; i++) {
5484                 err = bpf_core_add_cands(&local_cand, local_essent_len,
5485                                          obj->btf_modules[i].btf,
5486                                          obj->btf_modules[i].name,
5487                                          btf__type_cnt(obj->btf_vmlinux),
5488                                          cands);
5489                 if (err)
5490                         goto err_out;
5491         }
5492
5493         return cands;
5494 err_out:
5495         bpf_core_free_cands(cands);
5496         return ERR_PTR(err);
5497 }
5498
5499 /* Check local and target types for compatibility. This check is used for
5500  * type-based CO-RE relocations and follow slightly different rules than
5501  * field-based relocations. This function assumes that root types were already
5502  * checked for name match. Beyond that initial root-level name check, names
5503  * are completely ignored. Compatibility rules are as follows:
5504  *   - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
5505  *     kind should match for local and target types (i.e., STRUCT is not
5506  *     compatible with UNION);
5507  *   - for ENUMs, the size is ignored;
5508  *   - for INT, size and signedness are ignored;
5509  *   - for ARRAY, dimensionality is ignored, element types are checked for
5510  *     compatibility recursively;
5511  *   - CONST/VOLATILE/RESTRICT modifiers are ignored;
5512  *   - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
5513  *   - FUNC_PROTOs are compatible if they have compatible signature: same
5514  *     number of input args and compatible return and argument types.
5515  * These rules are not set in stone and probably will be adjusted as we get
5516  * more experience with using BPF CO-RE relocations.
5517  */
5518 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
5519                               const struct btf *targ_btf, __u32 targ_id)
5520 {
5521         return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32);
5522 }
5523
5524 int bpf_core_types_match(const struct btf *local_btf, __u32 local_id,
5525                          const struct btf *targ_btf, __u32 targ_id)
5526 {
5527         return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32);
5528 }
5529
5530 static size_t bpf_core_hash_fn(const long key, void *ctx)
5531 {
5532         return key;
5533 }
5534
5535 static bool bpf_core_equal_fn(const long k1, const long k2, void *ctx)
5536 {
5537         return k1 == k2;
5538 }
5539
5540 static int record_relo_core(struct bpf_program *prog,
5541                             const struct bpf_core_relo *core_relo, int insn_idx)
5542 {
5543         struct reloc_desc *relos, *relo;
5544
5545         relos = libbpf_reallocarray(prog->reloc_desc,
5546                                     prog->nr_reloc + 1, sizeof(*relos));
5547         if (!relos)
5548                 return -ENOMEM;
5549         relo = &relos[prog->nr_reloc];
5550         relo->type = RELO_CORE;
5551         relo->insn_idx = insn_idx;
5552         relo->core_relo = core_relo;
5553         prog->reloc_desc = relos;
5554         prog->nr_reloc++;
5555         return 0;
5556 }
5557
5558 static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx)
5559 {
5560         struct reloc_desc *relo;
5561         int i;
5562
5563         for (i = 0; i < prog->nr_reloc; i++) {
5564                 relo = &prog->reloc_desc[i];
5565                 if (relo->type != RELO_CORE || relo->insn_idx != insn_idx)
5566                         continue;
5567
5568                 return relo->core_relo;
5569         }
5570
5571         return NULL;
5572 }
5573
5574 static int bpf_core_resolve_relo(struct bpf_program *prog,
5575                                  const struct bpf_core_relo *relo,
5576                                  int relo_idx,
5577                                  const struct btf *local_btf,
5578                                  struct hashmap *cand_cache,
5579                                  struct bpf_core_relo_res *targ_res)
5580 {
5581         struct bpf_core_spec specs_scratch[3] = {};
5582         struct bpf_core_cand_list *cands = NULL;
5583         const char *prog_name = prog->name;
5584         const struct btf_type *local_type;
5585         const char *local_name;
5586         __u32 local_id = relo->type_id;
5587         int err;
5588
5589         local_type = btf__type_by_id(local_btf, local_id);
5590         if (!local_type)
5591                 return -EINVAL;
5592
5593         local_name = btf__name_by_offset(local_btf, local_type->name_off);
5594         if (!local_name)
5595                 return -EINVAL;
5596
5597         if (relo->kind != BPF_CORE_TYPE_ID_LOCAL &&
5598             !hashmap__find(cand_cache, local_id, &cands)) {
5599                 cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
5600                 if (IS_ERR(cands)) {
5601                         pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
5602                                 prog_name, relo_idx, local_id, btf_kind_str(local_type),
5603                                 local_name, PTR_ERR(cands));
5604                         return PTR_ERR(cands);
5605                 }
5606                 err = hashmap__set(cand_cache, local_id, cands, NULL, NULL);
5607                 if (err) {
5608                         bpf_core_free_cands(cands);
5609                         return err;
5610                 }
5611         }
5612
5613         return bpf_core_calc_relo_insn(prog_name, relo, relo_idx, local_btf, cands, specs_scratch,
5614                                        targ_res);
5615 }
5616
5617 static int
5618 bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
5619 {
5620         const struct btf_ext_info_sec *sec;
5621         struct bpf_core_relo_res targ_res;
5622         const struct bpf_core_relo *rec;
5623         const struct btf_ext_info *seg;
5624         struct hashmap_entry *entry;
5625         struct hashmap *cand_cache = NULL;
5626         struct bpf_program *prog;
5627         struct bpf_insn *insn;
5628         const char *sec_name;
5629         int i, err = 0, insn_idx, sec_idx, sec_num;
5630
5631         if (obj->btf_ext->core_relo_info.len == 0)
5632                 return 0;
5633
5634         if (targ_btf_path) {
5635                 obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL);
5636                 err = libbpf_get_error(obj->btf_vmlinux_override);
5637                 if (err) {
5638                         pr_warn("failed to parse target BTF: %d\n", err);
5639                         return err;
5640                 }
5641         }
5642
5643         cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
5644         if (IS_ERR(cand_cache)) {
5645                 err = PTR_ERR(cand_cache);
5646                 goto out;
5647         }
5648
5649         seg = &obj->btf_ext->core_relo_info;
5650         sec_num = 0;
5651         for_each_btf_ext_sec(seg, sec) {
5652                 sec_idx = seg->sec_idxs[sec_num];
5653                 sec_num++;
5654
5655                 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
5656                 if (str_is_empty(sec_name)) {
5657                         err = -EINVAL;
5658                         goto out;
5659                 }
5660
5661                 pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info);
5662
5663                 for_each_btf_ext_rec(seg, sec, i, rec) {
5664                         if (rec->insn_off % BPF_INSN_SZ)
5665                                 return -EINVAL;
5666                         insn_idx = rec->insn_off / BPF_INSN_SZ;
5667                         prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
5668                         if (!prog) {
5669                                 /* When __weak subprog is "overridden" by another instance
5670                                  * of the subprog from a different object file, linker still
5671                                  * appends all the .BTF.ext info that used to belong to that
5672                                  * eliminated subprogram.
5673                                  * This is similar to what x86-64 linker does for relocations.
5674                                  * So just ignore such relocations just like we ignore
5675                                  * subprog instructions when discovering subprograms.
5676                                  */
5677                                 pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n",
5678                                          sec_name, i, insn_idx);
5679                                 continue;
5680                         }
5681                         /* no need to apply CO-RE relocation if the program is
5682                          * not going to be loaded
5683                          */
5684                         if (!prog->autoload)
5685                                 continue;
5686
5687                         /* adjust insn_idx from section frame of reference to the local
5688                          * program's frame of reference; (sub-)program code is not yet
5689                          * relocated, so it's enough to just subtract in-section offset
5690                          */
5691                         insn_idx = insn_idx - prog->sec_insn_off;
5692                         if (insn_idx >= prog->insns_cnt)
5693                                 return -EINVAL;
5694                         insn = &prog->insns[insn_idx];
5695
5696                         err = record_relo_core(prog, rec, insn_idx);
5697                         if (err) {
5698                                 pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n",
5699                                         prog->name, i, err);
5700                                 goto out;
5701                         }
5702
5703                         if (prog->obj->gen_loader)
5704                                 continue;
5705
5706                         err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res);
5707                         if (err) {
5708                                 pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
5709                                         prog->name, i, err);
5710                                 goto out;
5711                         }
5712
5713                         err = bpf_core_patch_insn(prog->name, insn, insn_idx, rec, i, &targ_res);
5714                         if (err) {
5715                                 pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n",
5716                                         prog->name, i, insn_idx, err);
5717                                 goto out;
5718                         }
5719                 }
5720         }
5721
5722 out:
5723         /* obj->btf_vmlinux and module BTFs are freed after object load */
5724         btf__free(obj->btf_vmlinux_override);
5725         obj->btf_vmlinux_override = NULL;
5726
5727         if (!IS_ERR_OR_NULL(cand_cache)) {
5728                 hashmap__for_each_entry(cand_cache, entry, i) {
5729                         bpf_core_free_cands(entry->pvalue);
5730                 }
5731                 hashmap__free(cand_cache);
5732         }
5733         return err;
5734 }
5735
5736 /* base map load ldimm64 special constant, used also for log fixup logic */
5737 #define POISON_LDIMM64_MAP_BASE 2001000000
5738 #define POISON_LDIMM64_MAP_PFX "200100"
5739
5740 static void poison_map_ldimm64(struct bpf_program *prog, int relo_idx,
5741                                int insn_idx, struct bpf_insn *insn,
5742                                int map_idx, const struct bpf_map *map)
5743 {
5744         int i;
5745
5746         pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n",
5747                  prog->name, relo_idx, insn_idx, map_idx, map->name);
5748
5749         /* we turn single ldimm64 into two identical invalid calls */
5750         for (i = 0; i < 2; i++) {
5751                 insn->code = BPF_JMP | BPF_CALL;
5752                 insn->dst_reg = 0;
5753                 insn->src_reg = 0;
5754                 insn->off = 0;
5755                 /* if this instruction is reachable (not a dead code),
5756                  * verifier will complain with something like:
5757                  * invalid func unknown#2001000123
5758                  * where lower 123 is map index into obj->maps[] array
5759                  */
5760                 insn->imm = POISON_LDIMM64_MAP_BASE + map_idx;
5761
5762                 insn++;
5763         }
5764 }
5765
5766 /* unresolved kfunc call special constant, used also for log fixup logic */
5767 #define POISON_CALL_KFUNC_BASE 2002000000
5768 #define POISON_CALL_KFUNC_PFX "2002"
5769
5770 static void poison_kfunc_call(struct bpf_program *prog, int relo_idx,
5771                               int insn_idx, struct bpf_insn *insn,
5772                               int ext_idx, const struct extern_desc *ext)
5773 {
5774         pr_debug("prog '%s': relo #%d: poisoning insn #%d that calls kfunc '%s'\n",
5775                  prog->name, relo_idx, insn_idx, ext->name);
5776
5777         /* we turn kfunc call into invalid helper call with identifiable constant */
5778         insn->code = BPF_JMP | BPF_CALL;
5779         insn->dst_reg = 0;
5780         insn->src_reg = 0;
5781         insn->off = 0;
5782         /* if this instruction is reachable (not a dead code),
5783          * verifier will complain with something like:
5784          * invalid func unknown#2001000123
5785          * where lower 123 is extern index into obj->externs[] array
5786          */
5787         insn->imm = POISON_CALL_KFUNC_BASE + ext_idx;
5788 }
5789
5790 /* Relocate data references within program code:
5791  *  - map references;
5792  *  - global variable references;
5793  *  - extern references.
5794  */
5795 static int
5796 bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
5797 {
5798         int i;
5799
5800         for (i = 0; i < prog->nr_reloc; i++) {
5801                 struct reloc_desc *relo = &prog->reloc_desc[i];
5802                 struct bpf_insn *insn = &prog->insns[relo->insn_idx];
5803                 const struct bpf_map *map;
5804                 struct extern_desc *ext;
5805
5806                 switch (relo->type) {
5807                 case RELO_LD64:
5808                         map = &obj->maps[relo->map_idx];
5809                         if (obj->gen_loader) {
5810                                 insn[0].src_reg = BPF_PSEUDO_MAP_IDX;
5811                                 insn[0].imm = relo->map_idx;
5812                         } else if (map->autocreate) {
5813                                 insn[0].src_reg = BPF_PSEUDO_MAP_FD;
5814                                 insn[0].imm = map->fd;
5815                         } else {
5816                                 poison_map_ldimm64(prog, i, relo->insn_idx, insn,
5817                                                    relo->map_idx, map);
5818                         }
5819                         break;
5820                 case RELO_DATA:
5821                         map = &obj->maps[relo->map_idx];
5822                         insn[1].imm = insn[0].imm + relo->sym_off;
5823                         if (obj->gen_loader) {
5824                                 insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
5825                                 insn[0].imm = relo->map_idx;
5826                         } else if (map->autocreate) {
5827                                 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
5828                                 insn[0].imm = map->fd;
5829                         } else {
5830                                 poison_map_ldimm64(prog, i, relo->insn_idx, insn,
5831                                                    relo->map_idx, map);
5832                         }
5833                         break;
5834                 case RELO_EXTERN_LD64:
5835                         ext = &obj->externs[relo->ext_idx];
5836                         if (ext->type == EXT_KCFG) {
5837                                 if (obj->gen_loader) {
5838                                         insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
5839                                         insn[0].imm = obj->kconfig_map_idx;
5840                                 } else {
5841                                         insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
5842                                         insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
5843                                 }
5844                                 insn[1].imm = ext->kcfg.data_off;
5845                         } else /* EXT_KSYM */ {
5846                                 if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */
5847                                         insn[0].src_reg = BPF_PSEUDO_BTF_ID;
5848                                         insn[0].imm = ext->ksym.kernel_btf_id;
5849                                         insn[1].imm = ext->ksym.kernel_btf_obj_fd;
5850                                 } else { /* typeless ksyms or unresolved typed ksyms */
5851                                         insn[0].imm = (__u32)ext->ksym.addr;
5852                                         insn[1].imm = ext->ksym.addr >> 32;
5853                                 }
5854                         }
5855                         break;
5856                 case RELO_EXTERN_CALL:
5857                         ext = &obj->externs[relo->ext_idx];
5858                         insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL;
5859                         if (ext->is_set) {
5860                                 insn[0].imm = ext->ksym.kernel_btf_id;
5861                                 insn[0].off = ext->ksym.btf_fd_idx;
5862                         } else { /* unresolved weak kfunc call */
5863                                 poison_kfunc_call(prog, i, relo->insn_idx, insn,
5864                                                   relo->ext_idx, ext);
5865                         }
5866                         break;
5867                 case RELO_SUBPROG_ADDR:
5868                         if (insn[0].src_reg != BPF_PSEUDO_FUNC) {
5869                                 pr_warn("prog '%s': relo #%d: bad insn\n",
5870                                         prog->name, i);
5871                                 return -EINVAL;
5872                         }
5873                         /* handled already */
5874                         break;
5875                 case RELO_CALL:
5876                         /* handled already */
5877                         break;
5878                 case RELO_CORE:
5879                         /* will be handled by bpf_program_record_relos() */
5880                         break;
5881                 default:
5882                         pr_warn("prog '%s': relo #%d: bad relo type %d\n",
5883                                 prog->name, i, relo->type);
5884                         return -EINVAL;
5885                 }
5886         }
5887
5888         return 0;
5889 }
5890
5891 static int adjust_prog_btf_ext_info(const struct bpf_object *obj,
5892                                     const struct bpf_program *prog,
5893                                     const struct btf_ext_info *ext_info,
5894                                     void **prog_info, __u32 *prog_rec_cnt,
5895                                     __u32 *prog_rec_sz)
5896 {
5897         void *copy_start = NULL, *copy_end = NULL;
5898         void *rec, *rec_end, *new_prog_info;
5899         const struct btf_ext_info_sec *sec;
5900         size_t old_sz, new_sz;
5901         int i, sec_num, sec_idx, off_adj;
5902
5903         sec_num = 0;
5904         for_each_btf_ext_sec(ext_info, sec) {
5905                 sec_idx = ext_info->sec_idxs[sec_num];
5906                 sec_num++;
5907                 if (prog->sec_idx != sec_idx)
5908                         continue;
5909
5910                 for_each_btf_ext_rec(ext_info, sec, i, rec) {
5911                         __u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ;
5912
5913                         if (insn_off < prog->sec_insn_off)
5914                                 continue;
5915                         if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt)
5916                                 break;
5917
5918                         if (!copy_start)
5919                                 copy_start = rec;
5920                         copy_end = rec + ext_info->rec_size;
5921                 }
5922
5923                 if (!copy_start)
5924                         return -ENOENT;
5925
5926                 /* append func/line info of a given (sub-)program to the main
5927                  * program func/line info
5928                  */
5929                 old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size;
5930                 new_sz = old_sz + (copy_end - copy_start);
5931                 new_prog_info = realloc(*prog_info, new_sz);
5932                 if (!new_prog_info)
5933                         return -ENOMEM;
5934                 *prog_info = new_prog_info;
5935                 *prog_rec_cnt = new_sz / ext_info->rec_size;
5936                 memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start);
5937
5938                 /* Kernel instruction offsets are in units of 8-byte
5939                  * instructions, while .BTF.ext instruction offsets generated
5940                  * by Clang are in units of bytes. So convert Clang offsets
5941                  * into kernel offsets and adjust offset according to program
5942                  * relocated position.
5943                  */
5944                 off_adj = prog->sub_insn_off - prog->sec_insn_off;
5945                 rec = new_prog_info + old_sz;
5946                 rec_end = new_prog_info + new_sz;
5947                 for (; rec < rec_end; rec += ext_info->rec_size) {
5948                         __u32 *insn_off = rec;
5949
5950                         *insn_off = *insn_off / BPF_INSN_SZ + off_adj;
5951                 }
5952                 *prog_rec_sz = ext_info->rec_size;
5953                 return 0;
5954         }
5955
5956         return -ENOENT;
5957 }
5958
5959 static int
5960 reloc_prog_func_and_line_info(const struct bpf_object *obj,
5961                               struct bpf_program *main_prog,
5962                               const struct bpf_program *prog)
5963 {
5964         int err;
5965
5966         /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
5967          * support func/line info
5968          */
5969         if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC))
5970                 return 0;
5971
5972         /* only attempt func info relocation if main program's func_info
5973          * relocation was successful
5974          */
5975         if (main_prog != prog && !main_prog->func_info)
5976                 goto line_info;
5977
5978         err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info,
5979                                        &main_prog->func_info,
5980                                        &main_prog->func_info_cnt,
5981                                        &main_prog->func_info_rec_size);
5982         if (err) {
5983                 if (err != -ENOENT) {
5984                         pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n",
5985                                 prog->name, err);
5986                         return err;
5987                 }
5988                 if (main_prog->func_info) {
5989                         /*
5990                          * Some info has already been found but has problem
5991                          * in the last btf_ext reloc. Must have to error out.
5992                          */
5993                         pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name);
5994                         return err;
5995                 }
5996                 /* Have problem loading the very first info. Ignore the rest. */
5997                 pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n",
5998                         prog->name);
5999         }
6000
6001 line_info:
6002         /* don't relocate line info if main program's relocation failed */
6003         if (main_prog != prog && !main_prog->line_info)
6004                 return 0;
6005
6006         err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info,
6007                                        &main_prog->line_info,
6008                                        &main_prog->line_info_cnt,
6009                                        &main_prog->line_info_rec_size);
6010         if (err) {
6011                 if (err != -ENOENT) {
6012                         pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n",
6013                                 prog->name, err);
6014                         return err;
6015                 }
6016                 if (main_prog->line_info) {
6017                         /*
6018                          * Some info has already been found but has problem
6019                          * in the last btf_ext reloc. Must have to error out.
6020                          */
6021                         pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name);
6022                         return err;
6023                 }
6024                 /* Have problem loading the very first info. Ignore the rest. */
6025                 pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n",
6026                         prog->name);
6027         }
6028         return 0;
6029 }
6030
6031 static int cmp_relo_by_insn_idx(const void *key, const void *elem)
6032 {
6033         size_t insn_idx = *(const size_t *)key;
6034         const struct reloc_desc *relo = elem;
6035
6036         if (insn_idx == relo->insn_idx)
6037                 return 0;
6038         return insn_idx < relo->insn_idx ? -1 : 1;
6039 }
6040
6041 static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx)
6042 {
6043         if (!prog->nr_reloc)
6044                 return NULL;
6045         return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,
6046                        sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
6047 }
6048
6049 static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog)
6050 {
6051         int new_cnt = main_prog->nr_reloc + subprog->nr_reloc;
6052         struct reloc_desc *relos;
6053         int i;
6054
6055         if (main_prog == subprog)
6056                 return 0;
6057         relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos));
6058         /* if new count is zero, reallocarray can return a valid NULL result;
6059          * in this case the previous pointer will be freed, so we *have to*
6060          * reassign old pointer to the new value (even if it's NULL)
6061          */
6062         if (!relos && new_cnt)
6063                 return -ENOMEM;
6064         if (subprog->nr_reloc)
6065                 memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc,
6066                        sizeof(*relos) * subprog->nr_reloc);
6067
6068         for (i = main_prog->nr_reloc; i < new_cnt; i++)
6069                 relos[i].insn_idx += subprog->sub_insn_off;
6070         /* After insn_idx adjustment the 'relos' array is still sorted
6071          * by insn_idx and doesn't break bsearch.
6072          */
6073         main_prog->reloc_desc = relos;
6074         main_prog->nr_reloc = new_cnt;
6075         return 0;
6076 }
6077
6078 static int
6079 bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog,
6080                                 struct bpf_program *subprog)
6081 {
6082        struct bpf_insn *insns;
6083        size_t new_cnt;
6084        int err;
6085
6086        subprog->sub_insn_off = main_prog->insns_cnt;
6087
6088        new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
6089        insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
6090        if (!insns) {
6091                pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
6092                return -ENOMEM;
6093        }
6094        main_prog->insns = insns;
6095        main_prog->insns_cnt = new_cnt;
6096
6097        memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
6098               subprog->insns_cnt * sizeof(*insns));
6099
6100        pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
6101                 main_prog->name, subprog->insns_cnt, subprog->name);
6102
6103        /* The subprog insns are now appended. Append its relos too. */
6104        err = append_subprog_relos(main_prog, subprog);
6105        if (err)
6106                return err;
6107        return 0;
6108 }
6109
6110 static int
6111 bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
6112                        struct bpf_program *prog)
6113 {
6114         size_t sub_insn_idx, insn_idx;
6115         struct bpf_program *subprog;
6116         struct reloc_desc *relo;
6117         struct bpf_insn *insn;
6118         int err;
6119
6120         err = reloc_prog_func_and_line_info(obj, main_prog, prog);
6121         if (err)
6122                 return err;
6123
6124         for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
6125                 insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6126                 if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
6127                         continue;
6128
6129                 relo = find_prog_insn_relo(prog, insn_idx);
6130                 if (relo && relo->type == RELO_EXTERN_CALL)
6131                         /* kfunc relocations will be handled later
6132                          * in bpf_object__relocate_data()
6133                          */
6134                         continue;
6135                 if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
6136                         pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
6137                                 prog->name, insn_idx, relo->type);
6138                         return -LIBBPF_ERRNO__RELOC;
6139                 }
6140                 if (relo) {
6141                         /* sub-program instruction index is a combination of
6142                          * an offset of a symbol pointed to by relocation and
6143                          * call instruction's imm field; for global functions,
6144                          * call always has imm = -1, but for static functions
6145                          * relocation is against STT_SECTION and insn->imm
6146                          * points to a start of a static function
6147                          *
6148                          * for subprog addr relocation, the relo->sym_off + insn->imm is
6149                          * the byte offset in the corresponding section.
6150                          */
6151                         if (relo->type == RELO_CALL)
6152                                 sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
6153                         else
6154                                 sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ;
6155                 } else if (insn_is_pseudo_func(insn)) {
6156                         /*
6157                          * RELO_SUBPROG_ADDR relo is always emitted even if both
6158                          * functions are in the same section, so it shouldn't reach here.
6159                          */
6160                         pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
6161                                 prog->name, insn_idx);
6162                         return -LIBBPF_ERRNO__RELOC;
6163                 } else {
6164                         /* if subprogram call is to a static function within
6165                          * the same ELF section, there won't be any relocation
6166                          * emitted, but it also means there is no additional
6167                          * offset necessary, insns->imm is relative to
6168                          * instruction's original position within the section
6169                          */
6170                         sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1;
6171                 }
6172
6173                 /* we enforce that sub-programs should be in .text section */
6174                 subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx);
6175                 if (!subprog) {
6176                         pr_warn("prog '%s': no .text section found yet sub-program call exists\n",
6177                                 prog->name);
6178                         return -LIBBPF_ERRNO__RELOC;
6179                 }
6180
6181                 /* if it's the first call instruction calling into this
6182                  * subprogram (meaning this subprog hasn't been processed
6183                  * yet) within the context of current main program:
6184                  *   - append it at the end of main program's instructions blog;
6185                  *   - process is recursively, while current program is put on hold;
6186                  *   - if that subprogram calls some other not yet processes
6187                  *   subprogram, same thing will happen recursively until
6188                  *   there are no more unprocesses subprograms left to append
6189                  *   and relocate.
6190                  */
6191                 if (subprog->sub_insn_off == 0) {
6192                         err = bpf_object__append_subprog_code(obj, main_prog, subprog);
6193                         if (err)
6194                                 return err;
6195                         err = bpf_object__reloc_code(obj, main_prog, subprog);
6196                         if (err)
6197                                 return err;
6198                 }
6199
6200                 /* main_prog->insns memory could have been re-allocated, so
6201                  * calculate pointer again
6202                  */
6203                 insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6204                 /* calculate correct instruction position within current main
6205                  * prog; each main prog can have a different set of
6206                  * subprograms appended (potentially in different order as
6207                  * well), so position of any subprog can be different for
6208                  * different main programs
6209                  */
6210                 insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
6211
6212                 pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
6213                          prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
6214         }
6215
6216         return 0;
6217 }
6218
6219 /*
6220  * Relocate sub-program calls.
6221  *
6222  * Algorithm operates as follows. Each entry-point BPF program (referred to as
6223  * main prog) is processed separately. For each subprog (non-entry functions,
6224  * that can be called from either entry progs or other subprogs) gets their
6225  * sub_insn_off reset to zero. This serves as indicator that this subprogram
6226  * hasn't been yet appended and relocated within current main prog. Once its
6227  * relocated, sub_insn_off will point at the position within current main prog
6228  * where given subprog was appended. This will further be used to relocate all
6229  * the call instructions jumping into this subprog.
6230  *
6231  * We start with main program and process all call instructions. If the call
6232  * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off
6233  * is zero), subprog instructions are appended at the end of main program's
6234  * instruction array. Then main program is "put on hold" while we recursively
6235  * process newly appended subprogram. If that subprogram calls into another
6236  * subprogram that hasn't been appended, new subprogram is appended again to
6237  * the *main* prog's instructions (subprog's instructions are always left
6238  * untouched, as they need to be in unmodified state for subsequent main progs
6239  * and subprog instructions are always sent only as part of a main prog) and
6240  * the process continues recursively. Once all the subprogs called from a main
6241  * prog or any of its subprogs are appended (and relocated), all their
6242  * positions within finalized instructions array are known, so it's easy to
6243  * rewrite call instructions with correct relative offsets, corresponding to
6244  * desired target subprog.
6245  *
6246  * Its important to realize that some subprogs might not be called from some
6247  * main prog and any of its called/used subprogs. Those will keep their
6248  * subprog->sub_insn_off as zero at all times and won't be appended to current
6249  * main prog and won't be relocated within the context of current main prog.
6250  * They might still be used from other main progs later.
6251  *
6252  * Visually this process can be shown as below. Suppose we have two main
6253  * programs mainA and mainB and BPF object contains three subprogs: subA,
6254  * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and
6255  * subC both call subB:
6256  *
6257  *        +--------+ +-------+
6258  *        |        v v       |
6259  *     +--+---+ +--+-+-+ +---+--+
6260  *     | subA | | subB | | subC |
6261  *     +--+---+ +------+ +---+--+
6262  *        ^                  ^
6263  *        |                  |
6264  *    +---+-------+   +------+----+
6265  *    |   mainA   |   |   mainB   |
6266  *    +-----------+   +-----------+
6267  *
6268  * We'll start relocating mainA, will find subA, append it and start
6269  * processing sub A recursively:
6270  *
6271  *    +-----------+------+
6272  *    |   mainA   | subA |
6273  *    +-----------+------+
6274  *
6275  * At this point we notice that subB is used from subA, so we append it and
6276  * relocate (there are no further subcalls from subB):
6277  *
6278  *    +-----------+------+------+
6279  *    |   mainA   | subA | subB |
6280  *    +-----------+------+------+
6281  *
6282  * At this point, we relocate subA calls, then go one level up and finish with
6283  * relocatin mainA calls. mainA is done.
6284  *
6285  * For mainB process is similar but results in different order. We start with
6286  * mainB and skip subA and subB, as mainB never calls them (at least
6287  * directly), but we see subC is needed, so we append and start processing it:
6288  *
6289  *    +-----------+------+
6290  *    |   mainB   | subC |
6291  *    +-----------+------+
6292  * Now we see subC needs subB, so we go back to it, append and relocate it:
6293  *
6294  *    +-----------+------+------+
6295  *    |   mainB   | subC | subB |
6296  *    +-----------+------+------+
6297  *
6298  * At this point we unwind recursion, relocate calls in subC, then in mainB.
6299  */
6300 static int
6301 bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
6302 {
6303         struct bpf_program *subprog;
6304         int i, err;
6305
6306         /* mark all subprogs as not relocated (yet) within the context of
6307          * current main program
6308          */
6309         for (i = 0; i < obj->nr_programs; i++) {
6310                 subprog = &obj->programs[i];
6311                 if (!prog_is_subprog(obj, subprog))
6312                         continue;
6313
6314                 subprog->sub_insn_off = 0;
6315         }
6316
6317         err = bpf_object__reloc_code(obj, prog, prog);
6318         if (err)
6319                 return err;
6320
6321         return 0;
6322 }
6323
6324 static void
6325 bpf_object__free_relocs(struct bpf_object *obj)
6326 {
6327         struct bpf_program *prog;
6328         int i;
6329
6330         /* free up relocation descriptors */
6331         for (i = 0; i < obj->nr_programs; i++) {
6332                 prog = &obj->programs[i];
6333                 zfree(&prog->reloc_desc);
6334                 prog->nr_reloc = 0;
6335         }
6336 }
6337
6338 static int cmp_relocs(const void *_a, const void *_b)
6339 {
6340         const struct reloc_desc *a = _a;
6341         const struct reloc_desc *b = _b;
6342
6343         if (a->insn_idx != b->insn_idx)
6344                 return a->insn_idx < b->insn_idx ? -1 : 1;
6345
6346         /* no two relocations should have the same insn_idx, but ... */
6347         if (a->type != b->type)
6348                 return a->type < b->type ? -1 : 1;
6349
6350         return 0;
6351 }
6352
6353 static void bpf_object__sort_relos(struct bpf_object *obj)
6354 {
6355         int i;
6356
6357         for (i = 0; i < obj->nr_programs; i++) {
6358                 struct bpf_program *p = &obj->programs[i];
6359
6360                 if (!p->nr_reloc)
6361                         continue;
6362
6363                 qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs);
6364         }
6365 }
6366
6367 static int bpf_prog_assign_exc_cb(struct bpf_object *obj, struct bpf_program *prog)
6368 {
6369         const char *str = "exception_callback:";
6370         size_t pfx_len = strlen(str);
6371         int i, j, n;
6372
6373         if (!obj->btf || !kernel_supports(obj, FEAT_BTF_DECL_TAG))
6374                 return 0;
6375
6376         n = btf__type_cnt(obj->btf);
6377         for (i = 1; i < n; i++) {
6378                 const char *name;
6379                 struct btf_type *t;
6380
6381                 t = btf_type_by_id(obj->btf, i);
6382                 if (!btf_is_decl_tag(t) || btf_decl_tag(t)->component_idx != -1)
6383                         continue;
6384
6385                 name = btf__str_by_offset(obj->btf, t->name_off);
6386                 if (strncmp(name, str, pfx_len) != 0)
6387                         continue;
6388
6389                 t = btf_type_by_id(obj->btf, t->type);
6390                 if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) {
6391                         pr_warn("prog '%s': exception_callback:<value> decl tag not applied to the main program\n",
6392                                 prog->name);
6393                         return -EINVAL;
6394                 }
6395                 if (strcmp(prog->name, btf__str_by_offset(obj->btf, t->name_off)) != 0)
6396                         continue;
6397                 /* Multiple callbacks are specified for the same prog,
6398                  * the verifier will eventually return an error for this
6399                  * case, hence simply skip appending a subprog.
6400                  */
6401                 if (prog->exception_cb_idx >= 0) {
6402                         prog->exception_cb_idx = -1;
6403                         break;
6404                 }
6405
6406                 name += pfx_len;
6407                 if (str_is_empty(name)) {
6408                         pr_warn("prog '%s': exception_callback:<value> decl tag contains empty value\n",
6409                                 prog->name);
6410                         return -EINVAL;
6411                 }
6412
6413                 for (j = 0; j < obj->nr_programs; j++) {
6414                         struct bpf_program *subprog = &obj->programs[j];
6415
6416                         if (!prog_is_subprog(obj, subprog))
6417                                 continue;
6418                         if (strcmp(name, subprog->name) != 0)
6419                                 continue;
6420                         /* Enforce non-hidden, as from verifier point of
6421                          * view it expects global functions, whereas the
6422                          * mark_btf_static fixes up linkage as static.
6423                          */
6424                         if (!subprog->sym_global || subprog->mark_btf_static) {
6425                                 pr_warn("prog '%s': exception callback %s must be a global non-hidden function\n",
6426                                         prog->name, subprog->name);
6427                                 return -EINVAL;
6428                         }
6429                         /* Let's see if we already saw a static exception callback with the same name */
6430                         if (prog->exception_cb_idx >= 0) {
6431                                 pr_warn("prog '%s': multiple subprogs with same name as exception callback '%s'\n",
6432                                         prog->name, subprog->name);
6433                                 return -EINVAL;
6434                         }
6435                         prog->exception_cb_idx = j;
6436                         break;
6437                 }
6438
6439                 if (prog->exception_cb_idx >= 0)
6440                         continue;
6441
6442                 pr_warn("prog '%s': cannot find exception callback '%s'\n", prog->name, name);
6443                 return -ENOENT;
6444         }
6445
6446         return 0;
6447 }
6448
6449 static struct {
6450         enum bpf_prog_type prog_type;
6451         const char *ctx_name;
6452 } global_ctx_map[] = {
6453         { BPF_PROG_TYPE_CGROUP_DEVICE,           "bpf_cgroup_dev_ctx" },
6454         { BPF_PROG_TYPE_CGROUP_SKB,              "__sk_buff" },
6455         { BPF_PROG_TYPE_CGROUP_SOCK,             "bpf_sock" },
6456         { BPF_PROG_TYPE_CGROUP_SOCK_ADDR,        "bpf_sock_addr" },
6457         { BPF_PROG_TYPE_CGROUP_SOCKOPT,          "bpf_sockopt" },
6458         { BPF_PROG_TYPE_CGROUP_SYSCTL,           "bpf_sysctl" },
6459         { BPF_PROG_TYPE_FLOW_DISSECTOR,          "__sk_buff" },
6460         { BPF_PROG_TYPE_KPROBE,                  "bpf_user_pt_regs_t" },
6461         { BPF_PROG_TYPE_LWT_IN,                  "__sk_buff" },
6462         { BPF_PROG_TYPE_LWT_OUT,                 "__sk_buff" },
6463         { BPF_PROG_TYPE_LWT_SEG6LOCAL,           "__sk_buff" },
6464         { BPF_PROG_TYPE_LWT_XMIT,                "__sk_buff" },
6465         { BPF_PROG_TYPE_NETFILTER,               "bpf_nf_ctx" },
6466         { BPF_PROG_TYPE_PERF_EVENT,              "bpf_perf_event_data" },
6467         { BPF_PROG_TYPE_RAW_TRACEPOINT,          "bpf_raw_tracepoint_args" },
6468         { BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, "bpf_raw_tracepoint_args" },
6469         { BPF_PROG_TYPE_SCHED_ACT,               "__sk_buff" },
6470         { BPF_PROG_TYPE_SCHED_CLS,               "__sk_buff" },
6471         { BPF_PROG_TYPE_SK_LOOKUP,               "bpf_sk_lookup" },
6472         { BPF_PROG_TYPE_SK_MSG,                  "sk_msg_md" },
6473         { BPF_PROG_TYPE_SK_REUSEPORT,            "sk_reuseport_md" },
6474         { BPF_PROG_TYPE_SK_SKB,                  "__sk_buff" },
6475         { BPF_PROG_TYPE_SOCK_OPS,                "bpf_sock_ops" },
6476         { BPF_PROG_TYPE_SOCKET_FILTER,           "__sk_buff" },
6477         { BPF_PROG_TYPE_XDP,                     "xdp_md" },
6478         /* all other program types don't have "named" context structs */
6479 };
6480
6481 /* forward declarations for arch-specific underlying types of bpf_user_pt_regs_t typedef,
6482  * for below __builtin_types_compatible_p() checks;
6483  * with this approach we don't need any extra arch-specific #ifdef guards
6484  */
6485 struct pt_regs;
6486 struct user_pt_regs;
6487 struct user_regs_struct;
6488
6489 static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_program *prog,
6490                                      const char *subprog_name, int arg_idx,
6491                                      int arg_type_id, const char *ctx_name)
6492 {
6493         const struct btf_type *t;
6494         const char *tname;
6495
6496         /* check if existing parameter already matches verifier expectations */
6497         t = skip_mods_and_typedefs(btf, arg_type_id, NULL);
6498         if (!btf_is_ptr(t))
6499                 goto out_warn;
6500
6501         /* typedef bpf_user_pt_regs_t is a special PITA case, valid for kprobe
6502          * and perf_event programs, so check this case early on and forget
6503          * about it for subsequent checks
6504          */
6505         while (btf_is_mod(t))
6506                 t = btf__type_by_id(btf, t->type);
6507         if (btf_is_typedef(t) &&
6508             (prog->type == BPF_PROG_TYPE_KPROBE || prog->type == BPF_PROG_TYPE_PERF_EVENT)) {
6509                 tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>";
6510                 if (strcmp(tname, "bpf_user_pt_regs_t") == 0)
6511                         return false; /* canonical type for kprobe/perf_event */
6512         }
6513
6514         /* now we can ignore typedefs moving forward */
6515         t = skip_mods_and_typedefs(btf, t->type, NULL);
6516
6517         /* if it's `void *`, definitely fix up BTF info */
6518         if (btf_is_void(t))
6519                 return true;
6520
6521         /* if it's already proper canonical type, no need to fix up */
6522         tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>";
6523         if (btf_is_struct(t) && strcmp(tname, ctx_name) == 0)
6524                 return false;
6525
6526         /* special cases */
6527         switch (prog->type) {
6528         case BPF_PROG_TYPE_KPROBE:
6529                 /* `struct pt_regs *` is expected, but we need to fix up */
6530                 if (btf_is_struct(t) && strcmp(tname, "pt_regs") == 0)
6531                         return true;
6532                 break;
6533         case BPF_PROG_TYPE_PERF_EVENT:
6534                 if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct pt_regs) &&
6535                     btf_is_struct(t) && strcmp(tname, "pt_regs") == 0)
6536                         return true;
6537                 if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_pt_regs) &&
6538                     btf_is_struct(t) && strcmp(tname, "user_pt_regs") == 0)
6539                         return true;
6540                 if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_regs_struct) &&
6541                     btf_is_struct(t) && strcmp(tname, "user_regs_struct") == 0)
6542                         return true;
6543                 break;
6544         case BPF_PROG_TYPE_RAW_TRACEPOINT:
6545         case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
6546                 /* allow u64* as ctx */
6547                 if (btf_is_int(t) && t->size == 8)
6548                         return true;
6549                 break;
6550         default:
6551                 break;
6552         }
6553
6554 out_warn:
6555         pr_warn("prog '%s': subprog '%s' arg#%d is expected to be of `struct %s *` type\n",
6556                 prog->name, subprog_name, arg_idx, ctx_name);
6557         return false;
6558 }
6559
6560 static int clone_func_btf_info(struct btf *btf, int orig_fn_id, struct bpf_program *prog)
6561 {
6562         int fn_id, fn_proto_id, ret_type_id, orig_proto_id;
6563         int i, err, arg_cnt, fn_name_off, linkage;
6564         struct btf_type *fn_t, *fn_proto_t, *t;
6565         struct btf_param *p;
6566
6567         /* caller already validated FUNC -> FUNC_PROTO validity */
6568         fn_t = btf_type_by_id(btf, orig_fn_id);
6569         fn_proto_t = btf_type_by_id(btf, fn_t->type);
6570
6571         /* Note that each btf__add_xxx() operation invalidates
6572          * all btf_type and string pointers, so we need to be
6573          * very careful when cloning BTF types. BTF type
6574          * pointers have to be always refetched. And to avoid
6575          * problems with invalidated string pointers, we
6576          * add empty strings initially, then just fix up
6577          * name_off offsets in place. Offsets are stable for
6578          * existing strings, so that works out.
6579          */
6580         fn_name_off = fn_t->name_off; /* we are about to invalidate fn_t */
6581         linkage = btf_func_linkage(fn_t);
6582         orig_proto_id = fn_t->type; /* original FUNC_PROTO ID */
6583         ret_type_id = fn_proto_t->type; /* fn_proto_t will be invalidated */
6584         arg_cnt = btf_vlen(fn_proto_t);
6585
6586         /* clone FUNC_PROTO and its params */
6587         fn_proto_id = btf__add_func_proto(btf, ret_type_id);
6588         if (fn_proto_id < 0)
6589                 return -EINVAL;
6590
6591         for (i = 0; i < arg_cnt; i++) {
6592                 int name_off;
6593
6594                 /* copy original parameter data */
6595                 t = btf_type_by_id(btf, orig_proto_id);
6596                 p = &btf_params(t)[i];
6597                 name_off = p->name_off;
6598
6599                 err = btf__add_func_param(btf, "", p->type);
6600                 if (err)
6601                         return err;
6602
6603                 fn_proto_t = btf_type_by_id(btf, fn_proto_id);
6604                 p = &btf_params(fn_proto_t)[i];
6605                 p->name_off = name_off; /* use remembered str offset */
6606         }
6607
6608         /* clone FUNC now, btf__add_func() enforces non-empty name, so use
6609          * entry program's name as a placeholder, which we replace immediately
6610          * with original name_off
6611          */
6612         fn_id = btf__add_func(btf, prog->name, linkage, fn_proto_id);
6613         if (fn_id < 0)
6614                 return -EINVAL;
6615
6616         fn_t = btf_type_by_id(btf, fn_id);
6617         fn_t->name_off = fn_name_off; /* reuse original string */
6618
6619         return fn_id;
6620 }
6621
6622 /* Check if main program or global subprog's function prototype has `arg:ctx`
6623  * argument tags, and, if necessary, substitute correct type to match what BPF
6624  * verifier would expect, taking into account specific program type. This
6625  * allows to support __arg_ctx tag transparently on old kernels that don't yet
6626  * have a native support for it in the verifier, making user's life much
6627  * easier.
6628  */
6629 static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_program *prog)
6630 {
6631         const char *ctx_name = NULL, *ctx_tag = "arg:ctx", *fn_name;
6632         struct bpf_func_info_min *func_rec;
6633         struct btf_type *fn_t, *fn_proto_t;
6634         struct btf *btf = obj->btf;
6635         const struct btf_type *t;
6636         struct btf_param *p;
6637         int ptr_id = 0, struct_id, tag_id, orig_fn_id;
6638         int i, n, arg_idx, arg_cnt, err, rec_idx;
6639         int *orig_ids;
6640
6641         /* no .BTF.ext, no problem */
6642         if (!obj->btf_ext || !prog->func_info)
6643                 return 0;
6644
6645         /* don't do any fix ups if kernel natively supports __arg_ctx */
6646         if (kernel_supports(obj, FEAT_ARG_CTX_TAG))
6647                 return 0;
6648
6649         /* some BPF program types just don't have named context structs, so
6650          * this fallback mechanism doesn't work for them
6651          */
6652         for (i = 0; i < ARRAY_SIZE(global_ctx_map); i++) {
6653                 if (global_ctx_map[i].prog_type != prog->type)
6654                         continue;
6655                 ctx_name = global_ctx_map[i].ctx_name;
6656                 break;
6657         }
6658         if (!ctx_name)
6659                 return 0;
6660
6661         /* remember original func BTF IDs to detect if we already cloned them */
6662         orig_ids = calloc(prog->func_info_cnt, sizeof(*orig_ids));
6663         if (!orig_ids)
6664                 return -ENOMEM;
6665         for (i = 0; i < prog->func_info_cnt; i++) {
6666                 func_rec = prog->func_info + prog->func_info_rec_size * i;
6667                 orig_ids[i] = func_rec->type_id;
6668         }
6669
6670         /* go through each DECL_TAG with "arg:ctx" and see if it points to one
6671          * of our subprogs; if yes and subprog is global and needs adjustment,
6672          * clone and adjust FUNC -> FUNC_PROTO combo
6673          */
6674         for (i = 1, n = btf__type_cnt(btf); i < n; i++) {
6675                 /* only DECL_TAG with "arg:ctx" value are interesting */
6676                 t = btf__type_by_id(btf, i);
6677                 if (!btf_is_decl_tag(t))
6678                         continue;
6679                 if (strcmp(btf__str_by_offset(btf, t->name_off), ctx_tag) != 0)
6680                         continue;
6681
6682                 /* only global funcs need adjustment, if at all */
6683                 orig_fn_id = t->type;
6684                 fn_t = btf_type_by_id(btf, orig_fn_id);
6685                 if (!btf_is_func(fn_t) || btf_func_linkage(fn_t) != BTF_FUNC_GLOBAL)
6686                         continue;
6687
6688                 /* sanity check FUNC -> FUNC_PROTO chain, just in case */
6689                 fn_proto_t = btf_type_by_id(btf, fn_t->type);
6690                 if (!fn_proto_t || !btf_is_func_proto(fn_proto_t))
6691                         continue;
6692
6693                 /* find corresponding func_info record */
6694                 func_rec = NULL;
6695                 for (rec_idx = 0; rec_idx < prog->func_info_cnt; rec_idx++) {
6696                         if (orig_ids[rec_idx] == t->type) {
6697                                 func_rec = prog->func_info + prog->func_info_rec_size * rec_idx;
6698                                 break;
6699                         }
6700                 }
6701                 /* current main program doesn't call into this subprog */
6702                 if (!func_rec)
6703                         continue;
6704
6705                 /* some more sanity checking of DECL_TAG */
6706                 arg_cnt = btf_vlen(fn_proto_t);
6707                 arg_idx = btf_decl_tag(t)->component_idx;
6708                 if (arg_idx < 0 || arg_idx >= arg_cnt)
6709                         continue;
6710
6711                 /* check if we should fix up argument type */
6712                 p = &btf_params(fn_proto_t)[arg_idx];
6713                 fn_name = btf__str_by_offset(btf, fn_t->name_off) ?: "<anon>";
6714                 if (!need_func_arg_type_fixup(btf, prog, fn_name, arg_idx, p->type, ctx_name))
6715                         continue;
6716
6717                 /* clone fn/fn_proto, unless we already did it for another arg */
6718                 if (func_rec->type_id == orig_fn_id) {
6719                         int fn_id;
6720
6721                         fn_id = clone_func_btf_info(btf, orig_fn_id, prog);
6722                         if (fn_id < 0) {
6723                                 err = fn_id;
6724                                 goto err_out;
6725                         }
6726
6727                         /* point func_info record to a cloned FUNC type */
6728                         func_rec->type_id = fn_id;
6729                 }
6730
6731                 /* create PTR -> STRUCT type chain to mark PTR_TO_CTX argument;
6732                  * we do it just once per main BPF program, as all global
6733                  * funcs share the same program type, so need only PTR ->
6734                  * STRUCT type chain
6735                  */
6736                 if (ptr_id == 0) {
6737                         struct_id = btf__add_struct(btf, ctx_name, 0);
6738                         ptr_id = btf__add_ptr(btf, struct_id);
6739                         if (ptr_id < 0 || struct_id < 0) {
6740                                 err = -EINVAL;
6741                                 goto err_out;
6742                         }
6743                 }
6744
6745                 /* for completeness, clone DECL_TAG and point it to cloned param */
6746                 tag_id = btf__add_decl_tag(btf, ctx_tag, func_rec->type_id, arg_idx);
6747                 if (tag_id < 0) {
6748                         err = -EINVAL;
6749                         goto err_out;
6750                 }
6751
6752                 /* all the BTF manipulations invalidated pointers, refetch them */
6753                 fn_t = btf_type_by_id(btf, func_rec->type_id);
6754                 fn_proto_t = btf_type_by_id(btf, fn_t->type);
6755
6756                 /* fix up type ID pointed to by param */
6757                 p = &btf_params(fn_proto_t)[arg_idx];
6758                 p->type = ptr_id;
6759         }
6760
6761         free(orig_ids);
6762         return 0;
6763 err_out:
6764         free(orig_ids);
6765         return err;
6766 }
6767
6768 static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
6769 {
6770         struct bpf_program *prog;
6771         size_t i, j;
6772         int err;
6773
6774         if (obj->btf_ext) {
6775                 err = bpf_object__relocate_core(obj, targ_btf_path);
6776                 if (err) {
6777                         pr_warn("failed to perform CO-RE relocations: %d\n",
6778                                 err);
6779                         return err;
6780                 }
6781                 bpf_object__sort_relos(obj);
6782         }
6783
6784         /* Before relocating calls pre-process relocations and mark
6785          * few ld_imm64 instructions that points to subprogs.
6786          * Otherwise bpf_object__reloc_code() later would have to consider
6787          * all ld_imm64 insns as relocation candidates. That would
6788          * reduce relocation speed, since amount of find_prog_insn_relo()
6789          * would increase and most of them will fail to find a relo.
6790          */
6791         for (i = 0; i < obj->nr_programs; i++) {
6792                 prog = &obj->programs[i];
6793                 for (j = 0; j < prog->nr_reloc; j++) {
6794                         struct reloc_desc *relo = &prog->reloc_desc[j];
6795                         struct bpf_insn *insn = &prog->insns[relo->insn_idx];
6796
6797                         /* mark the insn, so it's recognized by insn_is_pseudo_func() */
6798                         if (relo->type == RELO_SUBPROG_ADDR)
6799                                 insn[0].src_reg = BPF_PSEUDO_FUNC;
6800                 }
6801         }
6802
6803         /* relocate subprogram calls and append used subprograms to main
6804          * programs; each copy of subprogram code needs to be relocated
6805          * differently for each main program, because its code location might
6806          * have changed.
6807          * Append subprog relos to main programs to allow data relos to be
6808          * processed after text is completely relocated.
6809          */
6810         for (i = 0; i < obj->nr_programs; i++) {
6811                 prog = &obj->programs[i];
6812                 /* sub-program's sub-calls are relocated within the context of
6813                  * its main program only
6814                  */
6815                 if (prog_is_subprog(obj, prog))
6816                         continue;
6817                 if (!prog->autoload)
6818                         continue;
6819
6820                 err = bpf_object__relocate_calls(obj, prog);
6821                 if (err) {
6822                         pr_warn("prog '%s': failed to relocate calls: %d\n",
6823                                 prog->name, err);
6824                         return err;
6825                 }
6826
6827                 err = bpf_prog_assign_exc_cb(obj, prog);
6828                 if (err)
6829                         return err;
6830                 /* Now, also append exception callback if it has not been done already. */
6831                 if (prog->exception_cb_idx >= 0) {
6832                         struct bpf_program *subprog = &obj->programs[prog->exception_cb_idx];
6833
6834                         /* Calling exception callback directly is disallowed, which the
6835                          * verifier will reject later. In case it was processed already,
6836                          * we can skip this step, otherwise for all other valid cases we
6837                          * have to append exception callback now.
6838                          */
6839                         if (subprog->sub_insn_off == 0) {
6840                                 err = bpf_object__append_subprog_code(obj, prog, subprog);
6841                                 if (err)
6842                                         return err;
6843                                 err = bpf_object__reloc_code(obj, prog, subprog);
6844                                 if (err)
6845                                         return err;
6846                         }
6847                 }
6848         }
6849         for (i = 0; i < obj->nr_programs; i++) {
6850                 prog = &obj->programs[i];
6851                 if (prog_is_subprog(obj, prog))
6852                         continue;
6853                 if (!prog->autoload)
6854                         continue;
6855
6856                 /* Process data relos for main programs */
6857                 err = bpf_object__relocate_data(obj, prog);
6858                 if (err) {
6859                         pr_warn("prog '%s': failed to relocate data references: %d\n",
6860                                 prog->name, err);
6861                         return err;
6862                 }
6863
6864                 /* Fix up .BTF.ext information, if necessary */
6865                 err = bpf_program_fixup_func_info(obj, prog);
6866                 if (err) {
6867                         pr_warn("prog '%s': failed to perform .BTF.ext fix ups: %d\n",
6868                                 prog->name, err);
6869                         return err;
6870                 }
6871         }
6872
6873         return 0;
6874 }
6875
6876 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
6877                                             Elf64_Shdr *shdr, Elf_Data *data);
6878
6879 static int bpf_object__collect_map_relos(struct bpf_object *obj,
6880                                          Elf64_Shdr *shdr, Elf_Data *data)
6881 {
6882         const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *);
6883         int i, j, nrels, new_sz;
6884         const struct btf_var_secinfo *vi = NULL;
6885         const struct btf_type *sec, *var, *def;
6886         struct bpf_map *map = NULL, *targ_map = NULL;
6887         struct bpf_program *targ_prog = NULL;
6888         bool is_prog_array, is_map_in_map;
6889         const struct btf_member *member;
6890         const char *name, *mname, *type;
6891         unsigned int moff;
6892         Elf64_Sym *sym;
6893         Elf64_Rel *rel;
6894         void *tmp;
6895
6896         if (!obj->efile.btf_maps_sec_btf_id || !obj->btf)
6897                 return -EINVAL;
6898         sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id);
6899         if (!sec)
6900                 return -EINVAL;
6901
6902         nrels = shdr->sh_size / shdr->sh_entsize;
6903         for (i = 0; i < nrels; i++) {
6904                 rel = elf_rel_by_idx(data, i);
6905                 if (!rel) {
6906                         pr_warn(".maps relo #%d: failed to get ELF relo\n", i);
6907                         return -LIBBPF_ERRNO__FORMAT;
6908                 }
6909
6910                 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
6911                 if (!sym) {
6912                         pr_warn(".maps relo #%d: symbol %zx not found\n",
6913                                 i, (size_t)ELF64_R_SYM(rel->r_info));
6914                         return -LIBBPF_ERRNO__FORMAT;
6915                 }
6916                 name = elf_sym_str(obj, sym->st_name) ?: "<?>";
6917
6918                 pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n",
6919                          i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value,
6920                          (size_t)rel->r_offset, sym->st_name, name);
6921
6922                 for (j = 0; j < obj->nr_maps; j++) {
6923                         map = &obj->maps[j];
6924                         if (map->sec_idx != obj->efile.btf_maps_shndx)
6925                                 continue;
6926
6927                         vi = btf_var_secinfos(sec) + map->btf_var_idx;
6928                         if (vi->offset <= rel->r_offset &&
6929                             rel->r_offset + bpf_ptr_sz <= vi->offset + vi->size)
6930                                 break;
6931                 }
6932                 if (j == obj->nr_maps) {
6933                         pr_warn(".maps relo #%d: cannot find map '%s' at rel->r_offset %zu\n",
6934                                 i, name, (size_t)rel->r_offset);
6935                         return -EINVAL;
6936                 }
6937
6938                 is_map_in_map = bpf_map_type__is_map_in_map(map->def.type);
6939                 is_prog_array = map->def.type == BPF_MAP_TYPE_PROG_ARRAY;
6940                 type = is_map_in_map ? "map" : "prog";
6941                 if (is_map_in_map) {
6942                         if (sym->st_shndx != obj->efile.btf_maps_shndx) {
6943                                 pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
6944                                         i, name);
6945                                 return -LIBBPF_ERRNO__RELOC;
6946                         }
6947                         if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
6948                             map->def.key_size != sizeof(int)) {
6949                                 pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
6950                                         i, map->name, sizeof(int));
6951                                 return -EINVAL;
6952                         }
6953                         targ_map = bpf_object__find_map_by_name(obj, name);
6954                         if (!targ_map) {
6955                                 pr_warn(".maps relo #%d: '%s' isn't a valid map reference\n",
6956                                         i, name);
6957                                 return -ESRCH;
6958                         }
6959                 } else if (is_prog_array) {
6960                         targ_prog = bpf_object__find_program_by_name(obj, name);
6961                         if (!targ_prog) {
6962                                 pr_warn(".maps relo #%d: '%s' isn't a valid program reference\n",
6963                                         i, name);
6964                                 return -ESRCH;
6965                         }
6966                         if (targ_prog->sec_idx != sym->st_shndx ||
6967                             targ_prog->sec_insn_off * 8 != sym->st_value ||
6968                             prog_is_subprog(obj, targ_prog)) {
6969                                 pr_warn(".maps relo #%d: '%s' isn't an entry-point program\n",
6970                                         i, name);
6971                                 return -LIBBPF_ERRNO__RELOC;
6972                         }
6973                 } else {
6974                         return -EINVAL;
6975                 }
6976
6977                 var = btf__type_by_id(obj->btf, vi->type);
6978                 def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
6979                 if (btf_vlen(def) == 0)
6980                         return -EINVAL;
6981                 member = btf_members(def) + btf_vlen(def) - 1;
6982                 mname = btf__name_by_offset(obj->btf, member->name_off);
6983                 if (strcmp(mname, "values"))
6984                         return -EINVAL;
6985
6986                 moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8;
6987                 if (rel->r_offset - vi->offset < moff)
6988                         return -EINVAL;
6989
6990                 moff = rel->r_offset - vi->offset - moff;
6991                 /* here we use BPF pointer size, which is always 64 bit, as we
6992                  * are parsing ELF that was built for BPF target
6993                  */
6994                 if (moff % bpf_ptr_sz)
6995                         return -EINVAL;
6996                 moff /= bpf_ptr_sz;
6997                 if (moff >= map->init_slots_sz) {
6998                         new_sz = moff + 1;
6999                         tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz);
7000                         if (!tmp)
7001                                 return -ENOMEM;
7002                         map->init_slots = tmp;
7003                         memset(map->init_slots + map->init_slots_sz, 0,
7004                                (new_sz - map->init_slots_sz) * host_ptr_sz);
7005                         map->init_slots_sz = new_sz;
7006                 }
7007                 map->init_slots[moff] = is_map_in_map ? (void *)targ_map : (void *)targ_prog;
7008
7009                 pr_debug(".maps relo #%d: map '%s' slot [%d] points to %s '%s'\n",
7010                          i, map->name, moff, type, name);
7011         }
7012
7013         return 0;
7014 }
7015
7016 static int bpf_object__collect_relos(struct bpf_object *obj)
7017 {
7018         int i, err;
7019
7020         for (i = 0; i < obj->efile.sec_cnt; i++) {
7021                 struct elf_sec_desc *sec_desc = &obj->efile.secs[i];
7022                 Elf64_Shdr *shdr;
7023                 Elf_Data *data;
7024                 int idx;
7025
7026                 if (sec_desc->sec_type != SEC_RELO)
7027                         continue;
7028
7029                 shdr = sec_desc->shdr;
7030                 data = sec_desc->data;
7031                 idx = shdr->sh_info;
7032
7033                 if (shdr->sh_type != SHT_REL || idx < 0 || idx >= obj->efile.sec_cnt) {
7034                         pr_warn("internal error at %d\n", __LINE__);
7035                         return -LIBBPF_ERRNO__INTERNAL;
7036                 }
7037
7038                 if (obj->efile.secs[idx].sec_type == SEC_ST_OPS)
7039                         err = bpf_object__collect_st_ops_relos(obj, shdr, data);
7040                 else if (idx == obj->efile.btf_maps_shndx)
7041                         err = bpf_object__collect_map_relos(obj, shdr, data);
7042                 else
7043                         err = bpf_object__collect_prog_relos(obj, shdr, data);
7044                 if (err)
7045                         return err;
7046         }
7047
7048         bpf_object__sort_relos(obj);
7049         return 0;
7050 }
7051
7052 static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id)
7053 {
7054         if (BPF_CLASS(insn->code) == BPF_JMP &&
7055             BPF_OP(insn->code) == BPF_CALL &&
7056             BPF_SRC(insn->code) == BPF_K &&
7057             insn->src_reg == 0 &&
7058             insn->dst_reg == 0) {
7059                     *func_id = insn->imm;
7060                     return true;
7061         }
7062         return false;
7063 }
7064
7065 static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog)
7066 {
7067         struct bpf_insn *insn = prog->insns;
7068         enum bpf_func_id func_id;
7069         int i;
7070
7071         if (obj->gen_loader)
7072                 return 0;
7073
7074         for (i = 0; i < prog->insns_cnt; i++, insn++) {
7075                 if (!insn_is_helper_call(insn, &func_id))
7076                         continue;
7077
7078                 /* on kernels that don't yet support
7079                  * bpf_probe_read_{kernel,user}[_str] helpers, fall back
7080                  * to bpf_probe_read() which works well for old kernels
7081                  */
7082                 switch (func_id) {
7083                 case BPF_FUNC_probe_read_kernel:
7084                 case BPF_FUNC_probe_read_user:
7085                         if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
7086                                 insn->imm = BPF_FUNC_probe_read;
7087                         break;
7088                 case BPF_FUNC_probe_read_kernel_str:
7089                 case BPF_FUNC_probe_read_user_str:
7090                         if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
7091                                 insn->imm = BPF_FUNC_probe_read_str;
7092                         break;
7093                 default:
7094                         break;
7095                 }
7096         }
7097         return 0;
7098 }
7099
7100 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
7101                                      int *btf_obj_fd, int *btf_type_id);
7102
7103 /* this is called as prog->sec_def->prog_prepare_load_fn for libbpf-supported sec_defs */
7104 static int libbpf_prepare_prog_load(struct bpf_program *prog,
7105                                     struct bpf_prog_load_opts *opts, long cookie)
7106 {
7107         enum sec_def_flags def = cookie;
7108
7109         /* old kernels might not support specifying expected_attach_type */
7110         if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE))
7111                 opts->expected_attach_type = 0;
7112
7113         if (def & SEC_SLEEPABLE)
7114                 opts->prog_flags |= BPF_F_SLEEPABLE;
7115
7116         if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS))
7117                 opts->prog_flags |= BPF_F_XDP_HAS_FRAGS;
7118
7119         /* special check for usdt to use uprobe_multi link */
7120         if ((def & SEC_USDT) && kernel_supports(prog->obj, FEAT_UPROBE_MULTI_LINK))
7121                 prog->expected_attach_type = BPF_TRACE_UPROBE_MULTI;
7122
7123         if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) {
7124                 int btf_obj_fd = 0, btf_type_id = 0, err;
7125                 const char *attach_name;
7126
7127                 attach_name = strchr(prog->sec_name, '/');
7128                 if (!attach_name) {
7129                         /* if BPF program is annotated with just SEC("fentry")
7130                          * (or similar) without declaratively specifying
7131                          * target, then it is expected that target will be
7132                          * specified with bpf_program__set_attach_target() at
7133                          * runtime before BPF object load step. If not, then
7134                          * there is nothing to load into the kernel as BPF
7135                          * verifier won't be able to validate BPF program
7136                          * correctness anyways.
7137                          */
7138                         pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n",
7139                                 prog->name);
7140                         return -EINVAL;
7141                 }
7142                 attach_name++; /* skip over / */
7143
7144                 err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id);
7145                 if (err)
7146                         return err;
7147
7148                 /* cache resolved BTF FD and BTF type ID in the prog */
7149                 prog->attach_btf_obj_fd = btf_obj_fd;
7150                 prog->attach_btf_id = btf_type_id;
7151
7152                 /* but by now libbpf common logic is not utilizing
7153                  * prog->atach_btf_obj_fd/prog->attach_btf_id anymore because
7154                  * this callback is called after opts were populated by
7155                  * libbpf, so this callback has to update opts explicitly here
7156                  */
7157                 opts->attach_btf_obj_fd = btf_obj_fd;
7158                 opts->attach_btf_id = btf_type_id;
7159         }
7160         return 0;
7161 }
7162
7163 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz);
7164
7165 static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog,
7166                                 struct bpf_insn *insns, int insns_cnt,
7167                                 const char *license, __u32 kern_version, int *prog_fd)
7168 {
7169         LIBBPF_OPTS(bpf_prog_load_opts, load_attr);
7170         const char *prog_name = NULL;
7171         char *cp, errmsg[STRERR_BUFSIZE];
7172         size_t log_buf_size = 0;
7173         char *log_buf = NULL, *tmp;
7174         int btf_fd, ret, err;
7175         bool own_log_buf = true;
7176         __u32 log_level = prog->log_level;
7177
7178         if (prog->type == BPF_PROG_TYPE_UNSPEC) {
7179                 /*
7180                  * The program type must be set.  Most likely we couldn't find a proper
7181                  * section definition at load time, and thus we didn't infer the type.
7182                  */
7183                 pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n",
7184                         prog->name, prog->sec_name);
7185                 return -EINVAL;
7186         }
7187
7188         if (!insns || !insns_cnt)
7189                 return -EINVAL;
7190
7191         if (kernel_supports(obj, FEAT_PROG_NAME))
7192                 prog_name = prog->name;
7193         load_attr.attach_prog_fd = prog->attach_prog_fd;
7194         load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd;
7195         load_attr.attach_btf_id = prog->attach_btf_id;
7196         load_attr.kern_version = kern_version;
7197         load_attr.prog_ifindex = prog->prog_ifindex;
7198
7199         /* specify func_info/line_info only if kernel supports them */
7200         btf_fd = btf__fd(obj->btf);
7201         if (btf_fd >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) {
7202                 load_attr.prog_btf_fd = btf_fd;
7203                 load_attr.func_info = prog->func_info;
7204                 load_attr.func_info_rec_size = prog->func_info_rec_size;
7205                 load_attr.func_info_cnt = prog->func_info_cnt;
7206                 load_attr.line_info = prog->line_info;
7207                 load_attr.line_info_rec_size = prog->line_info_rec_size;
7208                 load_attr.line_info_cnt = prog->line_info_cnt;
7209         }
7210         load_attr.log_level = log_level;
7211         load_attr.prog_flags = prog->prog_flags;
7212         load_attr.fd_array = obj->fd_array;
7213
7214         load_attr.token_fd = obj->token_fd;
7215         if (obj->token_fd)
7216                 load_attr.prog_flags |= BPF_F_TOKEN_FD;
7217
7218         /* adjust load_attr if sec_def provides custom preload callback */
7219         if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) {
7220                 err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie);
7221                 if (err < 0) {
7222                         pr_warn("prog '%s': failed to prepare load attributes: %d\n",
7223                                 prog->name, err);
7224                         return err;
7225                 }
7226                 insns = prog->insns;
7227                 insns_cnt = prog->insns_cnt;
7228         }
7229
7230         /* allow prog_prepare_load_fn to change expected_attach_type */
7231         load_attr.expected_attach_type = prog->expected_attach_type;
7232
7233         if (obj->gen_loader) {
7234                 bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name,
7235                                    license, insns, insns_cnt, &load_attr,
7236                                    prog - obj->programs);
7237                 *prog_fd = -1;
7238                 return 0;
7239         }
7240
7241 retry_load:
7242         /* if log_level is zero, we don't request logs initially even if
7243          * custom log_buf is specified; if the program load fails, then we'll
7244          * bump log_level to 1 and use either custom log_buf or we'll allocate
7245          * our own and retry the load to get details on what failed
7246          */
7247         if (log_level) {
7248                 if (prog->log_buf) {
7249                         log_buf = prog->log_buf;
7250                         log_buf_size = prog->log_size;
7251                         own_log_buf = false;
7252                 } else if (obj->log_buf) {
7253                         log_buf = obj->log_buf;
7254                         log_buf_size = obj->log_size;
7255                         own_log_buf = false;
7256                 } else {
7257                         log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, log_buf_size * 2);
7258                         tmp = realloc(log_buf, log_buf_size);
7259                         if (!tmp) {
7260                                 ret = -ENOMEM;
7261                                 goto out;
7262                         }
7263                         log_buf = tmp;
7264                         log_buf[0] = '\0';
7265                         own_log_buf = true;
7266                 }
7267         }
7268
7269         load_attr.log_buf = log_buf;
7270         load_attr.log_size = log_buf_size;
7271         load_attr.log_level = log_level;
7272
7273         ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr);
7274         if (ret >= 0) {
7275                 if (log_level && own_log_buf) {
7276                         pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
7277                                  prog->name, log_buf);
7278                 }
7279
7280                 if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) {
7281                         struct bpf_map *map;
7282                         int i;
7283
7284                         for (i = 0; i < obj->nr_maps; i++) {
7285                                 map = &prog->obj->maps[i];
7286                                 if (map->libbpf_type != LIBBPF_MAP_RODATA)
7287                                         continue;
7288
7289                                 if (bpf_prog_bind_map(ret, map->fd, NULL)) {
7290                                         cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
7291                                         pr_warn("prog '%s': failed to bind map '%s': %s\n",
7292                                                 prog->name, map->real_name, cp);
7293                                         /* Don't fail hard if can't bind rodata. */
7294                                 }
7295                         }
7296                 }
7297
7298                 *prog_fd = ret;
7299                 ret = 0;
7300                 goto out;
7301         }
7302
7303         if (log_level == 0) {
7304                 log_level = 1;
7305                 goto retry_load;
7306         }
7307         /* On ENOSPC, increase log buffer size and retry, unless custom
7308          * log_buf is specified.
7309          * Be careful to not overflow u32, though. Kernel's log buf size limit
7310          * isn't part of UAPI so it can always be bumped to full 4GB. So don't
7311          * multiply by 2 unless we are sure we'll fit within 32 bits.
7312          * Currently, we'll get -EINVAL when we reach (UINT_MAX >> 2).
7313          */
7314         if (own_log_buf && errno == ENOSPC && log_buf_size <= UINT_MAX / 2)
7315                 goto retry_load;
7316
7317         ret = -errno;
7318
7319         /* post-process verifier log to improve error descriptions */
7320         fixup_verifier_log(prog, log_buf, log_buf_size);
7321
7322         cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
7323         pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp);
7324         pr_perm_msg(ret);
7325
7326         if (own_log_buf && log_buf && log_buf[0] != '\0') {
7327                 pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
7328                         prog->name, log_buf);
7329         }
7330
7331 out:
7332         if (own_log_buf)
7333                 free(log_buf);
7334         return ret;
7335 }
7336
7337 static char *find_prev_line(char *buf, char *cur)
7338 {
7339         char *p;
7340
7341         if (cur == buf) /* end of a log buf */
7342                 return NULL;
7343
7344         p = cur - 1;
7345         while (p - 1 >= buf && *(p - 1) != '\n')
7346                 p--;
7347
7348         return p;
7349 }
7350
7351 static void patch_log(char *buf, size_t buf_sz, size_t log_sz,
7352                       char *orig, size_t orig_sz, const char *patch)
7353 {
7354         /* size of the remaining log content to the right from the to-be-replaced part */
7355         size_t rem_sz = (buf + log_sz) - (orig + orig_sz);
7356         size_t patch_sz = strlen(patch);
7357
7358         if (patch_sz != orig_sz) {
7359                 /* If patch line(s) are longer than original piece of verifier log,
7360                  * shift log contents by (patch_sz - orig_sz) bytes to the right
7361                  * starting from after to-be-replaced part of the log.
7362                  *
7363                  * If patch line(s) are shorter than original piece of verifier log,
7364                  * shift log contents by (orig_sz - patch_sz) bytes to the left
7365                  * starting from after to-be-replaced part of the log
7366                  *
7367                  * We need to be careful about not overflowing available
7368                  * buf_sz capacity. If that's the case, we'll truncate the end
7369                  * of the original log, as necessary.
7370                  */
7371                 if (patch_sz > orig_sz) {
7372                         if (orig + patch_sz >= buf + buf_sz) {
7373                                 /* patch is big enough to cover remaining space completely */
7374                                 patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1;
7375                                 rem_sz = 0;
7376                         } else if (patch_sz - orig_sz > buf_sz - log_sz) {
7377                                 /* patch causes part of remaining log to be truncated */
7378                                 rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz);
7379                         }
7380                 }
7381                 /* shift remaining log to the right by calculated amount */
7382                 memmove(orig + patch_sz, orig + orig_sz, rem_sz);
7383         }
7384
7385         memcpy(orig, patch, patch_sz);
7386 }
7387
7388 static void fixup_log_failed_core_relo(struct bpf_program *prog,
7389                                        char *buf, size_t buf_sz, size_t log_sz,
7390                                        char *line1, char *line2, char *line3)
7391 {
7392         /* Expected log for failed and not properly guarded CO-RE relocation:
7393          * line1 -> 123: (85) call unknown#195896080
7394          * line2 -> invalid func unknown#195896080
7395          * line3 -> <anything else or end of buffer>
7396          *
7397          * "123" is the index of the instruction that was poisoned. We extract
7398          * instruction index to find corresponding CO-RE relocation and
7399          * replace this part of the log with more relevant information about
7400          * failed CO-RE relocation.
7401          */
7402         const struct bpf_core_relo *relo;
7403         struct bpf_core_spec spec;
7404         char patch[512], spec_buf[256];
7405         int insn_idx, err, spec_len;
7406
7407         if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1)
7408                 return;
7409
7410         relo = find_relo_core(prog, insn_idx);
7411         if (!relo)
7412                 return;
7413
7414         err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec);
7415         if (err)
7416                 return;
7417
7418         spec_len = bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec);
7419         snprintf(patch, sizeof(patch),
7420                  "%d: <invalid CO-RE relocation>\n"
7421                  "failed to resolve CO-RE relocation %s%s\n",
7422                  insn_idx, spec_buf, spec_len >= sizeof(spec_buf) ? "..." : "");
7423
7424         patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
7425 }
7426
7427 static void fixup_log_missing_map_load(struct bpf_program *prog,
7428                                        char *buf, size_t buf_sz, size_t log_sz,
7429                                        char *line1, char *line2, char *line3)
7430 {
7431         /* Expected log for failed and not properly guarded map reference:
7432          * line1 -> 123: (85) call unknown#2001000345
7433          * line2 -> invalid func unknown#2001000345
7434          * line3 -> <anything else or end of buffer>
7435          *
7436          * "123" is the index of the instruction that was poisoned.
7437          * "345" in "2001000345" is a map index in obj->maps to fetch map name.
7438          */
7439         struct bpf_object *obj = prog->obj;
7440         const struct bpf_map *map;
7441         int insn_idx, map_idx;
7442         char patch[128];
7443
7444         if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &map_idx) != 2)
7445                 return;
7446
7447         map_idx -= POISON_LDIMM64_MAP_BASE;
7448         if (map_idx < 0 || map_idx >= obj->nr_maps)
7449                 return;
7450         map = &obj->maps[map_idx];
7451
7452         snprintf(patch, sizeof(patch),
7453                  "%d: <invalid BPF map reference>\n"
7454                  "BPF map '%s' is referenced but wasn't created\n",
7455                  insn_idx, map->name);
7456
7457         patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
7458 }
7459
7460 static void fixup_log_missing_kfunc_call(struct bpf_program *prog,
7461                                          char *buf, size_t buf_sz, size_t log_sz,
7462                                          char *line1, char *line2, char *line3)
7463 {
7464         /* Expected log for failed and not properly guarded kfunc call:
7465          * line1 -> 123: (85) call unknown#2002000345
7466          * line2 -> invalid func unknown#2002000345
7467          * line3 -> <anything else or end of buffer>
7468          *
7469          * "123" is the index of the instruction that was poisoned.
7470          * "345" in "2002000345" is an extern index in obj->externs to fetch kfunc name.
7471          */
7472         struct bpf_object *obj = prog->obj;
7473         const struct extern_desc *ext;
7474         int insn_idx, ext_idx;
7475         char patch[128];
7476
7477         if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &ext_idx) != 2)
7478                 return;
7479
7480         ext_idx -= POISON_CALL_KFUNC_BASE;
7481         if (ext_idx < 0 || ext_idx >= obj->nr_extern)
7482                 return;
7483         ext = &obj->externs[ext_idx];
7484
7485         snprintf(patch, sizeof(patch),
7486                  "%d: <invalid kfunc call>\n"
7487                  "kfunc '%s' is referenced but wasn't resolved\n",
7488                  insn_idx, ext->name);
7489
7490         patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
7491 }
7492
7493 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz)
7494 {
7495         /* look for familiar error patterns in last N lines of the log */
7496         const size_t max_last_line_cnt = 10;
7497         char *prev_line, *cur_line, *next_line;
7498         size_t log_sz;
7499         int i;
7500
7501         if (!buf)
7502                 return;
7503
7504         log_sz = strlen(buf) + 1;
7505         next_line = buf + log_sz - 1;
7506
7507         for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) {
7508                 cur_line = find_prev_line(buf, next_line);
7509                 if (!cur_line)
7510                         return;
7511
7512                 if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) {
7513                         prev_line = find_prev_line(buf, cur_line);
7514                         if (!prev_line)
7515                                 continue;
7516
7517                         /* failed CO-RE relocation case */
7518                         fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz,
7519                                                    prev_line, cur_line, next_line);
7520                         return;
7521                 } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_LDIMM64_MAP_PFX)) {
7522                         prev_line = find_prev_line(buf, cur_line);
7523                         if (!prev_line)
7524                                 continue;
7525
7526                         /* reference to uncreated BPF map */
7527                         fixup_log_missing_map_load(prog, buf, buf_sz, log_sz,
7528                                                    prev_line, cur_line, next_line);
7529                         return;
7530                 } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_CALL_KFUNC_PFX)) {
7531                         prev_line = find_prev_line(buf, cur_line);
7532                         if (!prev_line)
7533                                 continue;
7534
7535                         /* reference to unresolved kfunc */
7536                         fixup_log_missing_kfunc_call(prog, buf, buf_sz, log_sz,
7537                                                      prev_line, cur_line, next_line);
7538                         return;
7539                 }
7540         }
7541 }
7542
7543 static int bpf_program_record_relos(struct bpf_program *prog)
7544 {
7545         struct bpf_object *obj = prog->obj;
7546         int i;
7547
7548         for (i = 0; i < prog->nr_reloc; i++) {
7549                 struct reloc_desc *relo = &prog->reloc_desc[i];
7550                 struct extern_desc *ext = &obj->externs[relo->ext_idx];
7551                 int kind;
7552
7553                 switch (relo->type) {
7554                 case RELO_EXTERN_LD64:
7555                         if (ext->type != EXT_KSYM)
7556                                 continue;
7557                         kind = btf_is_var(btf__type_by_id(obj->btf, ext->btf_id)) ?
7558                                 BTF_KIND_VAR : BTF_KIND_FUNC;
7559                         bpf_gen__record_extern(obj->gen_loader, ext->name,
7560                                                ext->is_weak, !ext->ksym.type_id,
7561                                                true, kind, relo->insn_idx);
7562                         break;
7563                 case RELO_EXTERN_CALL:
7564                         bpf_gen__record_extern(obj->gen_loader, ext->name,
7565                                                ext->is_weak, false, false, BTF_KIND_FUNC,
7566                                                relo->insn_idx);
7567                         break;
7568                 case RELO_CORE: {
7569                         struct bpf_core_relo cr = {
7570                                 .insn_off = relo->insn_idx * 8,
7571                                 .type_id = relo->core_relo->type_id,
7572                                 .access_str_off = relo->core_relo->access_str_off,
7573                                 .kind = relo->core_relo->kind,
7574                         };
7575
7576                         bpf_gen__record_relo_core(obj->gen_loader, &cr);
7577                         break;
7578                 }
7579                 default:
7580                         continue;
7581                 }
7582         }
7583         return 0;
7584 }
7585
7586 static int
7587 bpf_object__load_progs(struct bpf_object *obj, int log_level)
7588 {
7589         struct bpf_program *prog;
7590         size_t i;
7591         int err;
7592
7593         for (i = 0; i < obj->nr_programs; i++) {
7594                 prog = &obj->programs[i];
7595                 err = bpf_object__sanitize_prog(obj, prog);
7596                 if (err)
7597                         return err;
7598         }
7599
7600         for (i = 0; i < obj->nr_programs; i++) {
7601                 prog = &obj->programs[i];
7602                 if (prog_is_subprog(obj, prog))
7603                         continue;
7604                 if (!prog->autoload) {
7605                         pr_debug("prog '%s': skipped loading\n", prog->name);
7606                         continue;
7607                 }
7608                 prog->log_level |= log_level;
7609
7610                 if (obj->gen_loader)
7611                         bpf_program_record_relos(prog);
7612
7613                 err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt,
7614                                            obj->license, obj->kern_version, &prog->fd);
7615                 if (err) {
7616                         pr_warn("prog '%s': failed to load: %d\n", prog->name, err);
7617                         return err;
7618                 }
7619         }
7620
7621         bpf_object__free_relocs(obj);
7622         return 0;
7623 }
7624
7625 static const struct bpf_sec_def *find_sec_def(const char *sec_name);
7626
7627 static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts)
7628 {
7629         struct bpf_program *prog;
7630         int err;
7631
7632         bpf_object__for_each_program(prog, obj) {
7633                 prog->sec_def = find_sec_def(prog->sec_name);
7634                 if (!prog->sec_def) {
7635                         /* couldn't guess, but user might manually specify */
7636                         pr_debug("prog '%s': unrecognized ELF section name '%s'\n",
7637                                 prog->name, prog->sec_name);
7638                         continue;
7639                 }
7640
7641                 prog->type = prog->sec_def->prog_type;
7642                 prog->expected_attach_type = prog->sec_def->expected_attach_type;
7643
7644                 /* sec_def can have custom callback which should be called
7645                  * after bpf_program is initialized to adjust its properties
7646                  */
7647                 if (prog->sec_def->prog_setup_fn) {
7648                         err = prog->sec_def->prog_setup_fn(prog, prog->sec_def->cookie);
7649                         if (err < 0) {
7650                                 pr_warn("prog '%s': failed to initialize: %d\n",
7651                                         prog->name, err);
7652                                 return err;
7653                         }
7654                 }
7655         }
7656
7657         return 0;
7658 }
7659
7660 static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz,
7661                                           const struct bpf_object_open_opts *opts)
7662 {
7663         const char *obj_name, *kconfig, *btf_tmp_path, *token_path;
7664         struct bpf_object *obj;
7665         char tmp_name[64];
7666         int err;
7667         char *log_buf;
7668         size_t log_size;
7669         __u32 log_level;
7670
7671         if (elf_version(EV_CURRENT) == EV_NONE) {
7672                 pr_warn("failed to init libelf for %s\n",
7673                         path ? : "(mem buf)");
7674                 return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
7675         }
7676
7677         if (!OPTS_VALID(opts, bpf_object_open_opts))
7678                 return ERR_PTR(-EINVAL);
7679
7680         obj_name = OPTS_GET(opts, object_name, NULL);
7681         if (obj_buf) {
7682                 if (!obj_name) {
7683                         snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
7684                                  (unsigned long)obj_buf,
7685                                  (unsigned long)obj_buf_sz);
7686                         obj_name = tmp_name;
7687                 }
7688                 path = obj_name;
7689                 pr_debug("loading object '%s' from buffer\n", obj_name);
7690         }
7691
7692         log_buf = OPTS_GET(opts, kernel_log_buf, NULL);
7693         log_size = OPTS_GET(opts, kernel_log_size, 0);
7694         log_level = OPTS_GET(opts, kernel_log_level, 0);
7695         if (log_size > UINT_MAX)
7696                 return ERR_PTR(-EINVAL);
7697         if (log_size && !log_buf)
7698                 return ERR_PTR(-EINVAL);
7699
7700         token_path = OPTS_GET(opts, bpf_token_path, NULL);
7701         /* if user didn't specify bpf_token_path explicitly, check if
7702          * LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as bpf_token_path
7703          * option
7704          */
7705         if (!token_path)
7706                 token_path = getenv("LIBBPF_BPF_TOKEN_PATH");
7707         if (token_path && strlen(token_path) >= PATH_MAX)
7708                 return ERR_PTR(-ENAMETOOLONG);
7709
7710         obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
7711         if (IS_ERR(obj))
7712                 return obj;
7713
7714         obj->log_buf = log_buf;
7715         obj->log_size = log_size;
7716         obj->log_level = log_level;
7717
7718         if (token_path) {
7719                 obj->token_path = strdup(token_path);
7720                 if (!obj->token_path) {
7721                         err = -ENOMEM;
7722                         goto out;
7723                 }
7724         }
7725
7726         btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
7727         if (btf_tmp_path) {
7728                 if (strlen(btf_tmp_path) >= PATH_MAX) {
7729                         err = -ENAMETOOLONG;
7730                         goto out;
7731                 }
7732                 obj->btf_custom_path = strdup(btf_tmp_path);
7733                 if (!obj->btf_custom_path) {
7734                         err = -ENOMEM;
7735                         goto out;
7736                 }
7737         }
7738
7739         kconfig = OPTS_GET(opts, kconfig, NULL);
7740         if (kconfig) {
7741                 obj->kconfig = strdup(kconfig);
7742                 if (!obj->kconfig) {
7743                         err = -ENOMEM;
7744                         goto out;
7745                 }
7746         }
7747
7748         err = bpf_object__elf_init(obj);
7749         err = err ? : bpf_object__check_endianness(obj);
7750         err = err ? : bpf_object__elf_collect(obj);
7751         err = err ? : bpf_object__collect_externs(obj);
7752         err = err ? : bpf_object_fixup_btf(obj);
7753         err = err ? : bpf_object__init_maps(obj, opts);
7754         err = err ? : bpf_object_init_progs(obj, opts);
7755         err = err ? : bpf_object__collect_relos(obj);
7756         if (err)
7757                 goto out;
7758
7759         bpf_object__elf_finish(obj);
7760
7761         return obj;
7762 out:
7763         bpf_object__close(obj);
7764         return ERR_PTR(err);
7765 }
7766
7767 struct bpf_object *
7768 bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
7769 {
7770         if (!path)
7771                 return libbpf_err_ptr(-EINVAL);
7772
7773         pr_debug("loading %s\n", path);
7774
7775         return libbpf_ptr(bpf_object_open(path, NULL, 0, opts));
7776 }
7777
7778 struct bpf_object *bpf_object__open(const char *path)
7779 {
7780         return bpf_object__open_file(path, NULL);
7781 }
7782
7783 struct bpf_object *
7784 bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
7785                      const struct bpf_object_open_opts *opts)
7786 {
7787         if (!obj_buf || obj_buf_sz == 0)
7788                 return libbpf_err_ptr(-EINVAL);
7789
7790         return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts));
7791 }
7792
7793 static int bpf_object_unload(struct bpf_object *obj)
7794 {
7795         size_t i;
7796
7797         if (!obj)
7798                 return libbpf_err(-EINVAL);
7799
7800         for (i = 0; i < obj->nr_maps; i++) {
7801                 zclose(obj->maps[i].fd);
7802                 if (obj->maps[i].st_ops)
7803                         zfree(&obj->maps[i].st_ops->kern_vdata);
7804         }
7805
7806         for (i = 0; i < obj->nr_programs; i++)
7807                 bpf_program__unload(&obj->programs[i]);
7808
7809         return 0;
7810 }
7811
7812 static int bpf_object__sanitize_maps(struct bpf_object *obj)
7813 {
7814         struct bpf_map *m;
7815
7816         bpf_object__for_each_map(m, obj) {
7817                 if (!bpf_map__is_internal(m))
7818                         continue;
7819                 if (!kernel_supports(obj, FEAT_ARRAY_MMAP))
7820                         m->def.map_flags &= ~BPF_F_MMAPABLE;
7821         }
7822
7823         return 0;
7824 }
7825
7826 int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx)
7827 {
7828         char sym_type, sym_name[500];
7829         unsigned long long sym_addr;
7830         int ret, err = 0;
7831         FILE *f;
7832
7833         f = fopen("/proc/kallsyms", "re");
7834         if (!f) {
7835                 err = -errno;
7836                 pr_warn("failed to open /proc/kallsyms: %d\n", err);
7837                 return err;
7838         }
7839
7840         while (true) {
7841                 ret = fscanf(f, "%llx %c %499s%*[^\n]\n",
7842                              &sym_addr, &sym_type, sym_name);
7843                 if (ret == EOF && feof(f))
7844                         break;
7845                 if (ret != 3) {
7846                         pr_warn("failed to read kallsyms entry: %d\n", ret);
7847                         err = -EINVAL;
7848                         break;
7849                 }
7850
7851                 err = cb(sym_addr, sym_type, sym_name, ctx);
7852                 if (err)
7853                         break;
7854         }
7855
7856         fclose(f);
7857         return err;
7858 }
7859
7860 static int kallsyms_cb(unsigned long long sym_addr, char sym_type,
7861                        const char *sym_name, void *ctx)
7862 {
7863         struct bpf_object *obj = ctx;
7864         const struct btf_type *t;
7865         struct extern_desc *ext;
7866
7867         ext = find_extern_by_name(obj, sym_name);
7868         if (!ext || ext->type != EXT_KSYM)
7869                 return 0;
7870
7871         t = btf__type_by_id(obj->btf, ext->btf_id);
7872         if (!btf_is_var(t))
7873                 return 0;
7874
7875         if (ext->is_set && ext->ksym.addr != sym_addr) {
7876                 pr_warn("extern (ksym) '%s': resolution is ambiguous: 0x%llx or 0x%llx\n",
7877                         sym_name, ext->ksym.addr, sym_addr);
7878                 return -EINVAL;
7879         }
7880         if (!ext->is_set) {
7881                 ext->is_set = true;
7882                 ext->ksym.addr = sym_addr;
7883                 pr_debug("extern (ksym) '%s': set to 0x%llx\n", sym_name, sym_addr);
7884         }
7885         return 0;
7886 }
7887
7888 static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
7889 {
7890         return libbpf_kallsyms_parse(kallsyms_cb, obj);
7891 }
7892
7893 static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
7894                             __u16 kind, struct btf **res_btf,
7895                             struct module_btf **res_mod_btf)
7896 {
7897         struct module_btf *mod_btf;
7898         struct btf *btf;
7899         int i, id, err;
7900
7901         btf = obj->btf_vmlinux;
7902         mod_btf = NULL;
7903         id = btf__find_by_name_kind(btf, ksym_name, kind);
7904
7905         if (id == -ENOENT) {
7906                 err = load_module_btfs(obj);
7907                 if (err)
7908                         return err;
7909
7910                 for (i = 0; i < obj->btf_module_cnt; i++) {
7911                         /* we assume module_btf's BTF FD is always >0 */
7912                         mod_btf = &obj->btf_modules[i];
7913                         btf = mod_btf->btf;
7914                         id = btf__find_by_name_kind_own(btf, ksym_name, kind);
7915                         if (id != -ENOENT)
7916                                 break;
7917                 }
7918         }
7919         if (id <= 0)
7920                 return -ESRCH;
7921
7922         *res_btf = btf;
7923         *res_mod_btf = mod_btf;
7924         return id;
7925 }
7926
7927 static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
7928                                                struct extern_desc *ext)
7929 {
7930         const struct btf_type *targ_var, *targ_type;
7931         __u32 targ_type_id, local_type_id;
7932         struct module_btf *mod_btf = NULL;
7933         const char *targ_var_name;
7934         struct btf *btf = NULL;
7935         int id, err;
7936
7937         id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf);
7938         if (id < 0) {
7939                 if (id == -ESRCH && ext->is_weak)
7940                         return 0;
7941                 pr_warn("extern (var ksym) '%s': not found in kernel BTF\n",
7942                         ext->name);
7943                 return id;
7944         }
7945
7946         /* find local type_id */
7947         local_type_id = ext->ksym.type_id;
7948
7949         /* find target type_id */
7950         targ_var = btf__type_by_id(btf, id);
7951         targ_var_name = btf__name_by_offset(btf, targ_var->name_off);
7952         targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id);
7953
7954         err = bpf_core_types_are_compat(obj->btf, local_type_id,
7955                                         btf, targ_type_id);
7956         if (err <= 0) {
7957                 const struct btf_type *local_type;
7958                 const char *targ_name, *local_name;
7959
7960                 local_type = btf__type_by_id(obj->btf, local_type_id);
7961                 local_name = btf__name_by_offset(obj->btf, local_type->name_off);
7962                 targ_name = btf__name_by_offset(btf, targ_type->name_off);
7963
7964                 pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
7965                         ext->name, local_type_id,
7966                         btf_kind_str(local_type), local_name, targ_type_id,
7967                         btf_kind_str(targ_type), targ_name);
7968                 return -EINVAL;
7969         }
7970
7971         ext->is_set = true;
7972         ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
7973         ext->ksym.kernel_btf_id = id;
7974         pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n",
7975                  ext->name, id, btf_kind_str(targ_var), targ_var_name);
7976
7977         return 0;
7978 }
7979
7980 static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj,
7981                                                 struct extern_desc *ext)
7982 {
7983         int local_func_proto_id, kfunc_proto_id, kfunc_id;
7984         struct module_btf *mod_btf = NULL;
7985         const struct btf_type *kern_func;
7986         struct btf *kern_btf = NULL;
7987         int ret;
7988
7989         local_func_proto_id = ext->ksym.type_id;
7990
7991         kfunc_id = find_ksym_btf_id(obj, ext->essent_name ?: ext->name, BTF_KIND_FUNC, &kern_btf,
7992                                     &mod_btf);
7993         if (kfunc_id < 0) {
7994                 if (kfunc_id == -ESRCH && ext->is_weak)
7995                         return 0;
7996                 pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n",
7997                         ext->name);
7998                 return kfunc_id;
7999         }
8000
8001         kern_func = btf__type_by_id(kern_btf, kfunc_id);
8002         kfunc_proto_id = kern_func->type;
8003
8004         ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id,
8005                                         kern_btf, kfunc_proto_id);
8006         if (ret <= 0) {
8007                 if (ext->is_weak)
8008                         return 0;
8009
8010                 pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with %s [%d]\n",
8011                         ext->name, local_func_proto_id,
8012                         mod_btf ? mod_btf->name : "vmlinux", kfunc_proto_id);
8013                 return -EINVAL;
8014         }
8015
8016         /* set index for module BTF fd in fd_array, if unset */
8017         if (mod_btf && !mod_btf->fd_array_idx) {
8018                 /* insn->off is s16 */
8019                 if (obj->fd_array_cnt == INT16_MAX) {
8020                         pr_warn("extern (func ksym) '%s': module BTF fd index %d too big to fit in bpf_insn offset\n",
8021                                 ext->name, mod_btf->fd_array_idx);
8022                         return -E2BIG;
8023                 }
8024                 /* Cannot use index 0 for module BTF fd */
8025                 if (!obj->fd_array_cnt)
8026                         obj->fd_array_cnt = 1;
8027
8028                 ret = libbpf_ensure_mem((void **)&obj->fd_array, &obj->fd_array_cap, sizeof(int),
8029                                         obj->fd_array_cnt + 1);
8030                 if (ret)
8031                         return ret;
8032                 mod_btf->fd_array_idx = obj->fd_array_cnt;
8033                 /* we assume module BTF FD is always >0 */
8034                 obj->fd_array[obj->fd_array_cnt++] = mod_btf->fd;
8035         }
8036
8037         ext->is_set = true;
8038         ext->ksym.kernel_btf_id = kfunc_id;
8039         ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0;
8040         /* Also set kernel_btf_obj_fd to make sure that bpf_object__relocate_data()
8041          * populates FD into ld_imm64 insn when it's used to point to kfunc.
8042          * {kernel_btf_id, btf_fd_idx} -> fixup bpf_call.
8043          * {kernel_btf_id, kernel_btf_obj_fd} -> fixup ld_imm64.
8044          */
8045         ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
8046         pr_debug("extern (func ksym) '%s': resolved to %s [%d]\n",
8047                  ext->name, mod_btf ? mod_btf->name : "vmlinux", kfunc_id);
8048
8049         return 0;
8050 }
8051
8052 static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
8053 {
8054         const struct btf_type *t;
8055         struct extern_desc *ext;
8056         int i, err;
8057
8058         for (i = 0; i < obj->nr_extern; i++) {
8059                 ext = &obj->externs[i];
8060                 if (ext->type != EXT_KSYM || !ext->ksym.type_id)
8061                         continue;
8062
8063                 if (obj->gen_loader) {
8064                         ext->is_set = true;
8065                         ext->ksym.kernel_btf_obj_fd = 0;
8066                         ext->ksym.kernel_btf_id = 0;
8067                         continue;
8068                 }
8069                 t = btf__type_by_id(obj->btf, ext->btf_id);
8070                 if (btf_is_var(t))
8071                         err = bpf_object__resolve_ksym_var_btf_id(obj, ext);
8072                 else
8073                         err = bpf_object__resolve_ksym_func_btf_id(obj, ext);
8074                 if (err)
8075                         return err;
8076         }
8077         return 0;
8078 }
8079
8080 static int bpf_object__resolve_externs(struct bpf_object *obj,
8081                                        const char *extra_kconfig)
8082 {
8083         bool need_config = false, need_kallsyms = false;
8084         bool need_vmlinux_btf = false;
8085         struct extern_desc *ext;
8086         void *kcfg_data = NULL;
8087         int err, i;
8088
8089         if (obj->nr_extern == 0)
8090                 return 0;
8091
8092         if (obj->kconfig_map_idx >= 0)
8093                 kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped;
8094
8095         for (i = 0; i < obj->nr_extern; i++) {
8096                 ext = &obj->externs[i];
8097
8098                 if (ext->type == EXT_KSYM) {
8099                         if (ext->ksym.type_id)
8100                                 need_vmlinux_btf = true;
8101                         else
8102                                 need_kallsyms = true;
8103                         continue;
8104                 } else if (ext->type == EXT_KCFG) {
8105                         void *ext_ptr = kcfg_data + ext->kcfg.data_off;
8106                         __u64 value = 0;
8107
8108                         /* Kconfig externs need actual /proc/config.gz */
8109                         if (str_has_pfx(ext->name, "CONFIG_")) {
8110                                 need_config = true;
8111                                 continue;
8112                         }
8113
8114                         /* Virtual kcfg externs are customly handled by libbpf */
8115                         if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
8116                                 value = get_kernel_version();
8117                                 if (!value) {
8118                                         pr_warn("extern (kcfg) '%s': failed to get kernel version\n", ext->name);
8119                                         return -EINVAL;
8120                                 }
8121                         } else if (strcmp(ext->name, "LINUX_HAS_BPF_COOKIE") == 0) {
8122                                 value = kernel_supports(obj, FEAT_BPF_COOKIE);
8123                         } else if (strcmp(ext->name, "LINUX_HAS_SYSCALL_WRAPPER") == 0) {
8124                                 value = kernel_supports(obj, FEAT_SYSCALL_WRAPPER);
8125                         } else if (!str_has_pfx(ext->name, "LINUX_") || !ext->is_weak) {
8126                                 /* Currently libbpf supports only CONFIG_ and LINUX_ prefixed
8127                                  * __kconfig externs, where LINUX_ ones are virtual and filled out
8128                                  * customly by libbpf (their values don't come from Kconfig).
8129                                  * If LINUX_xxx variable is not recognized by libbpf, but is marked
8130                                  * __weak, it defaults to zero value, just like for CONFIG_xxx
8131                                  * externs.
8132                                  */
8133                                 pr_warn("extern (kcfg) '%s': unrecognized virtual extern\n", ext->name);
8134                                 return -EINVAL;
8135                         }
8136
8137                         err = set_kcfg_value_num(ext, ext_ptr, value);
8138                         if (err)
8139                                 return err;
8140                         pr_debug("extern (kcfg) '%s': set to 0x%llx\n",
8141                                  ext->name, (long long)value);
8142                 } else {
8143                         pr_warn("extern '%s': unrecognized extern kind\n", ext->name);
8144                         return -EINVAL;
8145                 }
8146         }
8147         if (need_config && extra_kconfig) {
8148                 err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data);
8149                 if (err)
8150                         return -EINVAL;
8151                 need_config = false;
8152                 for (i = 0; i < obj->nr_extern; i++) {
8153                         ext = &obj->externs[i];
8154                         if (ext->type == EXT_KCFG && !ext->is_set) {
8155                                 need_config = true;
8156                                 break;
8157                         }
8158                 }
8159         }
8160         if (need_config) {
8161                 err = bpf_object__read_kconfig_file(obj, kcfg_data);
8162                 if (err)
8163                         return -EINVAL;
8164         }
8165         if (need_kallsyms) {
8166                 err = bpf_object__read_kallsyms_file(obj);
8167                 if (err)
8168                         return -EINVAL;
8169         }
8170         if (need_vmlinux_btf) {
8171                 err = bpf_object__resolve_ksyms_btf_id(obj);
8172                 if (err)
8173                         return -EINVAL;
8174         }
8175         for (i = 0; i < obj->nr_extern; i++) {
8176                 ext = &obj->externs[i];
8177
8178                 if (!ext->is_set && !ext->is_weak) {
8179                         pr_warn("extern '%s' (strong): not resolved\n", ext->name);
8180                         return -ESRCH;
8181                 } else if (!ext->is_set) {
8182                         pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n",
8183                                  ext->name);
8184                 }
8185         }
8186
8187         return 0;
8188 }
8189
8190 static void bpf_map_prepare_vdata(const struct bpf_map *map)
8191 {
8192         struct bpf_struct_ops *st_ops;
8193         __u32 i;
8194
8195         st_ops = map->st_ops;
8196         for (i = 0; i < btf_vlen(st_ops->type); i++) {
8197                 struct bpf_program *prog = st_ops->progs[i];
8198                 void *kern_data;
8199                 int prog_fd;
8200
8201                 if (!prog)
8202                         continue;
8203
8204                 prog_fd = bpf_program__fd(prog);
8205                 kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
8206                 *(unsigned long *)kern_data = prog_fd;
8207         }
8208 }
8209
8210 static int bpf_object_prepare_struct_ops(struct bpf_object *obj)
8211 {
8212         struct bpf_map *map;
8213         int i;
8214
8215         for (i = 0; i < obj->nr_maps; i++) {
8216                 map = &obj->maps[i];
8217
8218                 if (!bpf_map__is_struct_ops(map))
8219                         continue;
8220
8221                 if (!map->autocreate)
8222                         continue;
8223
8224                 bpf_map_prepare_vdata(map);
8225         }
8226
8227         return 0;
8228 }
8229
8230 static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path)
8231 {
8232         int err, i;
8233
8234         if (!obj)
8235                 return libbpf_err(-EINVAL);
8236
8237         if (obj->loaded) {
8238                 pr_warn("object '%s': load can't be attempted twice\n", obj->name);
8239                 return libbpf_err(-EINVAL);
8240         }
8241
8242         if (obj->gen_loader)
8243                 bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps);
8244
8245         err = bpf_object_prepare_token(obj);
8246         err = err ? : bpf_object__probe_loading(obj);
8247         err = err ? : bpf_object__load_vmlinux_btf(obj, false);
8248         err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
8249         err = err ? : bpf_object__sanitize_maps(obj);
8250         err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
8251         err = err ? : bpf_object_adjust_struct_ops_autoload(obj);
8252         err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
8253         err = err ? : bpf_object__sanitize_and_load_btf(obj);
8254         err = err ? : bpf_object__create_maps(obj);
8255         err = err ? : bpf_object__load_progs(obj, extra_log_level);
8256         err = err ? : bpf_object_init_prog_arrays(obj);
8257         err = err ? : bpf_object_prepare_struct_ops(obj);
8258
8259         if (obj->gen_loader) {
8260                 /* reset FDs */
8261                 if (obj->btf)
8262                         btf__set_fd(obj->btf, -1);
8263                 if (!err)
8264                         err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps);
8265         }
8266
8267         /* clean up fd_array */
8268         zfree(&obj->fd_array);
8269
8270         /* clean up module BTFs */
8271         for (i = 0; i < obj->btf_module_cnt; i++) {
8272                 close(obj->btf_modules[i].fd);
8273                 btf__free(obj->btf_modules[i].btf);
8274                 free(obj->btf_modules[i].name);
8275         }
8276         free(obj->btf_modules);
8277
8278         /* clean up vmlinux BTF */
8279         btf__free(obj->btf_vmlinux);
8280         obj->btf_vmlinux = NULL;
8281
8282         obj->loaded = true; /* doesn't matter if successfully or not */
8283
8284         if (err)
8285                 goto out;
8286
8287         return 0;
8288 out:
8289         /* unpin any maps that were auto-pinned during load */
8290         for (i = 0; i < obj->nr_maps; i++)
8291                 if (obj->maps[i].pinned && !obj->maps[i].reused)
8292                         bpf_map__unpin(&obj->maps[i], NULL);
8293
8294         bpf_object_unload(obj);
8295         pr_warn("failed to load object '%s'\n", obj->path);
8296         return libbpf_err(err);
8297 }
8298
8299 int bpf_object__load(struct bpf_object *obj)
8300 {
8301         return bpf_object_load(obj, 0, NULL);
8302 }
8303
8304 static int make_parent_dir(const char *path)
8305 {
8306         char *cp, errmsg[STRERR_BUFSIZE];
8307         char *dname, *dir;
8308         int err = 0;
8309
8310         dname = strdup(path);
8311         if (dname == NULL)
8312                 return -ENOMEM;
8313
8314         dir = dirname(dname);
8315         if (mkdir(dir, 0700) && errno != EEXIST)
8316                 err = -errno;
8317
8318         free(dname);
8319         if (err) {
8320                 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
8321                 pr_warn("failed to mkdir %s: %s\n", path, cp);
8322         }
8323         return err;
8324 }
8325
8326 static int check_path(const char *path)
8327 {
8328         char *cp, errmsg[STRERR_BUFSIZE];
8329         struct statfs st_fs;
8330         char *dname, *dir;
8331         int err = 0;
8332
8333         if (path == NULL)
8334                 return -EINVAL;
8335
8336         dname = strdup(path);
8337         if (dname == NULL)
8338                 return -ENOMEM;
8339
8340         dir = dirname(dname);
8341         if (statfs(dir, &st_fs)) {
8342                 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
8343                 pr_warn("failed to statfs %s: %s\n", dir, cp);
8344                 err = -errno;
8345         }
8346         free(dname);
8347
8348         if (!err && st_fs.f_type != BPF_FS_MAGIC) {
8349                 pr_warn("specified path %s is not on BPF FS\n", path);
8350                 err = -EINVAL;
8351         }
8352
8353         return err;
8354 }
8355
8356 int bpf_program__pin(struct bpf_program *prog, const char *path)
8357 {
8358         char *cp, errmsg[STRERR_BUFSIZE];
8359         int err;
8360
8361         if (prog->fd < 0) {
8362                 pr_warn("prog '%s': can't pin program that wasn't loaded\n", prog->name);
8363                 return libbpf_err(-EINVAL);
8364         }
8365
8366         err = make_parent_dir(path);
8367         if (err)
8368                 return libbpf_err(err);
8369
8370         err = check_path(path);
8371         if (err)
8372                 return libbpf_err(err);
8373
8374         if (bpf_obj_pin(prog->fd, path)) {
8375                 err = -errno;
8376                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
8377                 pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, cp);
8378                 return libbpf_err(err);
8379         }
8380
8381         pr_debug("prog '%s': pinned at '%s'\n", prog->name, path);
8382         return 0;
8383 }
8384
8385 int bpf_program__unpin(struct bpf_program *prog, const char *path)
8386 {
8387         int err;
8388
8389         if (prog->fd < 0) {
8390                 pr_warn("prog '%s': can't unpin program that wasn't loaded\n", prog->name);
8391                 return libbpf_err(-EINVAL);
8392         }
8393
8394         err = check_path(path);
8395         if (err)
8396                 return libbpf_err(err);
8397
8398         err = unlink(path);
8399         if (err)
8400                 return libbpf_err(-errno);
8401
8402         pr_debug("prog '%s': unpinned from '%s'\n", prog->name, path);
8403         return 0;
8404 }
8405
8406 int bpf_map__pin(struct bpf_map *map, const char *path)
8407 {
8408         char *cp, errmsg[STRERR_BUFSIZE];
8409         int err;
8410
8411         if (map == NULL) {
8412                 pr_warn("invalid map pointer\n");
8413                 return libbpf_err(-EINVAL);
8414         }
8415
8416         if (map->pin_path) {
8417                 if (path && strcmp(path, map->pin_path)) {
8418                         pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
8419                                 bpf_map__name(map), map->pin_path, path);
8420                         return libbpf_err(-EINVAL);
8421                 } else if (map->pinned) {
8422                         pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
8423                                  bpf_map__name(map), map->pin_path);
8424                         return 0;
8425                 }
8426         } else {
8427                 if (!path) {
8428                         pr_warn("missing a path to pin map '%s' at\n",
8429                                 bpf_map__name(map));
8430                         return libbpf_err(-EINVAL);
8431                 } else if (map->pinned) {
8432                         pr_warn("map '%s' already pinned\n", bpf_map__name(map));
8433                         return libbpf_err(-EEXIST);
8434                 }
8435
8436                 map->pin_path = strdup(path);
8437                 if (!map->pin_path) {
8438                         err = -errno;
8439                         goto out_err;
8440                 }
8441         }
8442
8443         err = make_parent_dir(map->pin_path);
8444         if (err)
8445                 return libbpf_err(err);
8446
8447         err = check_path(map->pin_path);
8448         if (err)
8449                 return libbpf_err(err);
8450
8451         if (bpf_obj_pin(map->fd, map->pin_path)) {
8452                 err = -errno;
8453                 goto out_err;
8454         }
8455
8456         map->pinned = true;
8457         pr_debug("pinned map '%s'\n", map->pin_path);
8458
8459         return 0;
8460
8461 out_err:
8462         cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
8463         pr_warn("failed to pin map: %s\n", cp);
8464         return libbpf_err(err);
8465 }
8466
8467 int bpf_map__unpin(struct bpf_map *map, const char *path)
8468 {
8469         int err;
8470
8471         if (map == NULL) {
8472                 pr_warn("invalid map pointer\n");
8473                 return libbpf_err(-EINVAL);
8474         }
8475
8476         if (map->pin_path) {
8477                 if (path && strcmp(path, map->pin_path)) {
8478                         pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
8479                                 bpf_map__name(map), map->pin_path, path);
8480                         return libbpf_err(-EINVAL);
8481                 }
8482                 path = map->pin_path;
8483         } else if (!path) {
8484                 pr_warn("no path to unpin map '%s' from\n",
8485                         bpf_map__name(map));
8486                 return libbpf_err(-EINVAL);
8487         }
8488
8489         err = check_path(path);
8490         if (err)
8491                 return libbpf_err(err);
8492
8493         err = unlink(path);
8494         if (err != 0)
8495                 return libbpf_err(-errno);
8496
8497         map->pinned = false;
8498         pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
8499
8500         return 0;
8501 }
8502
8503 int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
8504 {
8505         char *new = NULL;
8506
8507         if (path) {
8508                 new = strdup(path);
8509                 if (!new)
8510                         return libbpf_err(-errno);
8511         }
8512
8513         free(map->pin_path);
8514         map->pin_path = new;
8515         return 0;
8516 }
8517
8518 __alias(bpf_map__pin_path)
8519 const char *bpf_map__get_pin_path(const struct bpf_map *map);
8520
8521 const char *bpf_map__pin_path(const struct bpf_map *map)
8522 {
8523         return map->pin_path;
8524 }
8525
8526 bool bpf_map__is_pinned(const struct bpf_map *map)
8527 {
8528         return map->pinned;
8529 }
8530
8531 static void sanitize_pin_path(char *s)
8532 {
8533         /* bpffs disallows periods in path names */
8534         while (*s) {
8535                 if (*s == '.')
8536                         *s = '_';
8537                 s++;
8538         }
8539 }
8540
8541 int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
8542 {
8543         struct bpf_map *map;
8544         int err;
8545
8546         if (!obj)
8547                 return libbpf_err(-ENOENT);
8548
8549         if (!obj->loaded) {
8550                 pr_warn("object not yet loaded; load it first\n");
8551                 return libbpf_err(-ENOENT);
8552         }
8553
8554         bpf_object__for_each_map(map, obj) {
8555                 char *pin_path = NULL;
8556                 char buf[PATH_MAX];
8557
8558                 if (!map->autocreate)
8559                         continue;
8560
8561                 if (path) {
8562                         err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
8563                         if (err)
8564                                 goto err_unpin_maps;
8565                         sanitize_pin_path(buf);
8566                         pin_path = buf;
8567                 } else if (!map->pin_path) {
8568                         continue;
8569                 }
8570
8571                 err = bpf_map__pin(map, pin_path);
8572                 if (err)
8573                         goto err_unpin_maps;
8574         }
8575
8576         return 0;
8577
8578 err_unpin_maps:
8579         while ((map = bpf_object__prev_map(obj, map))) {
8580                 if (!map->pin_path)
8581                         continue;
8582
8583                 bpf_map__unpin(map, NULL);
8584         }
8585
8586         return libbpf_err(err);
8587 }
8588
8589 int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
8590 {
8591         struct bpf_map *map;
8592         int err;
8593
8594         if (!obj)
8595                 return libbpf_err(-ENOENT);
8596
8597         bpf_object__for_each_map(map, obj) {
8598                 char *pin_path = NULL;
8599                 char buf[PATH_MAX];
8600
8601                 if (path) {
8602                         err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
8603                         if (err)
8604                                 return libbpf_err(err);
8605                         sanitize_pin_path(buf);
8606                         pin_path = buf;
8607                 } else if (!map->pin_path) {
8608                         continue;
8609                 }
8610
8611                 err = bpf_map__unpin(map, pin_path);
8612                 if (err)
8613                         return libbpf_err(err);
8614         }
8615
8616         return 0;
8617 }
8618
8619 int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
8620 {
8621         struct bpf_program *prog;
8622         char buf[PATH_MAX];
8623         int err;
8624
8625         if (!obj)
8626                 return libbpf_err(-ENOENT);
8627
8628         if (!obj->loaded) {
8629                 pr_warn("object not yet loaded; load it first\n");
8630                 return libbpf_err(-ENOENT);
8631         }
8632
8633         bpf_object__for_each_program(prog, obj) {
8634                 err = pathname_concat(buf, sizeof(buf), path, prog->name);
8635                 if (err)
8636                         goto err_unpin_programs;
8637
8638                 err = bpf_program__pin(prog, buf);
8639                 if (err)
8640                         goto err_unpin_programs;
8641         }
8642
8643         return 0;
8644
8645 err_unpin_programs:
8646         while ((prog = bpf_object__prev_program(obj, prog))) {
8647                 if (pathname_concat(buf, sizeof(buf), path, prog->name))
8648                         continue;
8649
8650                 bpf_program__unpin(prog, buf);
8651         }
8652
8653         return libbpf_err(err);
8654 }
8655
8656 int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
8657 {
8658         struct bpf_program *prog;
8659         int err;
8660
8661         if (!obj)
8662                 return libbpf_err(-ENOENT);
8663
8664         bpf_object__for_each_program(prog, obj) {
8665                 char buf[PATH_MAX];
8666
8667                 err = pathname_concat(buf, sizeof(buf), path, prog->name);
8668                 if (err)
8669                         return libbpf_err(err);
8670
8671                 err = bpf_program__unpin(prog, buf);
8672                 if (err)
8673                         return libbpf_err(err);
8674         }
8675
8676         return 0;
8677 }
8678
8679 int bpf_object__pin(struct bpf_object *obj, const char *path)
8680 {
8681         int err;
8682
8683         err = bpf_object__pin_maps(obj, path);
8684         if (err)
8685                 return libbpf_err(err);
8686
8687         err = bpf_object__pin_programs(obj, path);
8688         if (err) {
8689                 bpf_object__unpin_maps(obj, path);
8690                 return libbpf_err(err);
8691         }
8692
8693         return 0;
8694 }
8695
8696 int bpf_object__unpin(struct bpf_object *obj, const char *path)
8697 {
8698         int err;
8699
8700         err = bpf_object__unpin_programs(obj, path);
8701         if (err)
8702                 return libbpf_err(err);
8703
8704         err = bpf_object__unpin_maps(obj, path);
8705         if (err)
8706                 return libbpf_err(err);
8707
8708         return 0;
8709 }
8710
8711 static void bpf_map__destroy(struct bpf_map *map)
8712 {
8713         if (map->inner_map) {
8714                 bpf_map__destroy(map->inner_map);
8715                 zfree(&map->inner_map);
8716         }
8717
8718         zfree(&map->init_slots);
8719         map->init_slots_sz = 0;
8720
8721         if (map->mmaped) {
8722                 size_t mmap_sz;
8723
8724                 mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
8725                 munmap(map->mmaped, mmap_sz);
8726                 map->mmaped = NULL;
8727         }
8728
8729         if (map->st_ops) {
8730                 zfree(&map->st_ops->data);
8731                 zfree(&map->st_ops->progs);
8732                 zfree(&map->st_ops->kern_func_off);
8733                 zfree(&map->st_ops);
8734         }
8735
8736         zfree(&map->name);
8737         zfree(&map->real_name);
8738         zfree(&map->pin_path);
8739
8740         if (map->fd >= 0)
8741                 zclose(map->fd);
8742 }
8743
8744 void bpf_object__close(struct bpf_object *obj)
8745 {
8746         size_t i;
8747
8748         if (IS_ERR_OR_NULL(obj))
8749                 return;
8750
8751         usdt_manager_free(obj->usdt_man);
8752         obj->usdt_man = NULL;
8753
8754         bpf_gen__free(obj->gen_loader);
8755         bpf_object__elf_finish(obj);
8756         bpf_object_unload(obj);
8757         btf__free(obj->btf);
8758         btf__free(obj->btf_vmlinux);
8759         btf_ext__free(obj->btf_ext);
8760
8761         for (i = 0; i < obj->nr_maps; i++)
8762                 bpf_map__destroy(&obj->maps[i]);
8763
8764         zfree(&obj->btf_custom_path);
8765         zfree(&obj->kconfig);
8766
8767         for (i = 0; i < obj->nr_extern; i++)
8768                 zfree(&obj->externs[i].essent_name);
8769
8770         zfree(&obj->externs);
8771         obj->nr_extern = 0;
8772
8773         zfree(&obj->maps);
8774         obj->nr_maps = 0;
8775
8776         if (obj->programs && obj->nr_programs) {
8777                 for (i = 0; i < obj->nr_programs; i++)
8778                         bpf_program__exit(&obj->programs[i]);
8779         }
8780         zfree(&obj->programs);
8781
8782         zfree(&obj->feat_cache);
8783         zfree(&obj->token_path);
8784         if (obj->token_fd > 0)
8785                 close(obj->token_fd);
8786
8787         free(obj);
8788 }
8789
8790 const char *bpf_object__name(const struct bpf_object *obj)
8791 {
8792         return obj ? obj->name : libbpf_err_ptr(-EINVAL);
8793 }
8794
8795 unsigned int bpf_object__kversion(const struct bpf_object *obj)
8796 {
8797         return obj ? obj->kern_version : 0;
8798 }
8799
8800 struct btf *bpf_object__btf(const struct bpf_object *obj)
8801 {
8802         return obj ? obj->btf : NULL;
8803 }
8804
8805 int bpf_object__btf_fd(const struct bpf_object *obj)
8806 {
8807         return obj->btf ? btf__fd(obj->btf) : -1;
8808 }
8809
8810 int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
8811 {
8812         if (obj->loaded)
8813                 return libbpf_err(-EINVAL);
8814
8815         obj->kern_version = kern_version;
8816
8817         return 0;
8818 }
8819
8820 int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
8821 {
8822         struct bpf_gen *gen;
8823
8824         if (!opts)
8825                 return -EFAULT;
8826         if (!OPTS_VALID(opts, gen_loader_opts))
8827                 return -EINVAL;
8828         gen = calloc(sizeof(*gen), 1);
8829         if (!gen)
8830                 return -ENOMEM;
8831         gen->opts = opts;
8832         obj->gen_loader = gen;
8833         return 0;
8834 }
8835
8836 static struct bpf_program *
8837 __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
8838                     bool forward)
8839 {
8840         size_t nr_programs = obj->nr_programs;
8841         ssize_t idx;
8842
8843         if (!nr_programs)
8844                 return NULL;
8845
8846         if (!p)
8847                 /* Iter from the beginning */
8848                 return forward ? &obj->programs[0] :
8849                         &obj->programs[nr_programs - 1];
8850
8851         if (p->obj != obj) {
8852                 pr_warn("error: program handler doesn't match object\n");
8853                 return errno = EINVAL, NULL;
8854         }
8855
8856         idx = (p - obj->programs) + (forward ? 1 : -1);
8857         if (idx >= obj->nr_programs || idx < 0)
8858                 return NULL;
8859         return &obj->programs[idx];
8860 }
8861
8862 struct bpf_program *
8863 bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
8864 {
8865         struct bpf_program *prog = prev;
8866
8867         do {
8868                 prog = __bpf_program__iter(prog, obj, true);
8869         } while (prog && prog_is_subprog(obj, prog));
8870
8871         return prog;
8872 }
8873
8874 struct bpf_program *
8875 bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next)
8876 {
8877         struct bpf_program *prog = next;
8878
8879         do {
8880                 prog = __bpf_program__iter(prog, obj, false);
8881         } while (prog && prog_is_subprog(obj, prog));
8882
8883         return prog;
8884 }
8885
8886 void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
8887 {
8888         prog->prog_ifindex = ifindex;
8889 }
8890
8891 const char *bpf_program__name(const struct bpf_program *prog)
8892 {
8893         return prog->name;
8894 }
8895
8896 const char *bpf_program__section_name(const struct bpf_program *prog)
8897 {
8898         return prog->sec_name;
8899 }
8900
8901 bool bpf_program__autoload(const struct bpf_program *prog)
8902 {
8903         return prog->autoload;
8904 }
8905
8906 int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
8907 {
8908         if (prog->obj->loaded)
8909                 return libbpf_err(-EINVAL);
8910
8911         prog->autoload = autoload;
8912         return 0;
8913 }
8914
8915 bool bpf_program__autoattach(const struct bpf_program *prog)
8916 {
8917         return prog->autoattach;
8918 }
8919
8920 void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach)
8921 {
8922         prog->autoattach = autoattach;
8923 }
8924
8925 const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog)
8926 {
8927         return prog->insns;
8928 }
8929
8930 size_t bpf_program__insn_cnt(const struct bpf_program *prog)
8931 {
8932         return prog->insns_cnt;
8933 }
8934
8935 int bpf_program__set_insns(struct bpf_program *prog,
8936                            struct bpf_insn *new_insns, size_t new_insn_cnt)
8937 {
8938         struct bpf_insn *insns;
8939
8940         if (prog->obj->loaded)
8941                 return -EBUSY;
8942
8943         insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns));
8944         /* NULL is a valid return from reallocarray if the new count is zero */
8945         if (!insns && new_insn_cnt) {
8946                 pr_warn("prog '%s': failed to realloc prog code\n", prog->name);
8947                 return -ENOMEM;
8948         }
8949         memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns));
8950
8951         prog->insns = insns;
8952         prog->insns_cnt = new_insn_cnt;
8953         return 0;
8954 }
8955
8956 int bpf_program__fd(const struct bpf_program *prog)
8957 {
8958         if (!prog)
8959                 return libbpf_err(-EINVAL);
8960
8961         if (prog->fd < 0)
8962                 return libbpf_err(-ENOENT);
8963
8964         return prog->fd;
8965 }
8966
8967 __alias(bpf_program__type)
8968 enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
8969
8970 enum bpf_prog_type bpf_program__type(const struct bpf_program *prog)
8971 {
8972         return prog->type;
8973 }
8974
8975 static size_t custom_sec_def_cnt;
8976 static struct bpf_sec_def *custom_sec_defs;
8977 static struct bpf_sec_def custom_fallback_def;
8978 static bool has_custom_fallback_def;
8979 static int last_custom_sec_def_handler_id;
8980
8981 int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
8982 {
8983         if (prog->obj->loaded)
8984                 return libbpf_err(-EBUSY);
8985
8986         /* if type is not changed, do nothing */
8987         if (prog->type == type)
8988                 return 0;
8989
8990         prog->type = type;
8991
8992         /* If a program type was changed, we need to reset associated SEC()
8993          * handler, as it will be invalid now. The only exception is a generic
8994          * fallback handler, which by definition is program type-agnostic and
8995          * is a catch-all custom handler, optionally set by the application,
8996          * so should be able to handle any type of BPF program.
8997          */
8998         if (prog->sec_def != &custom_fallback_def)
8999                 prog->sec_def = NULL;
9000         return 0;
9001 }
9002
9003 __alias(bpf_program__expected_attach_type)
9004 enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog);
9005
9006 enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog)
9007 {
9008         return prog->expected_attach_type;
9009 }
9010
9011 int bpf_program__set_expected_attach_type(struct bpf_program *prog,
9012                                            enum bpf_attach_type type)
9013 {
9014         if (prog->obj->loaded)
9015                 return libbpf_err(-EBUSY);
9016
9017         prog->expected_attach_type = type;
9018         return 0;
9019 }
9020
9021 __u32 bpf_program__flags(const struct bpf_program *prog)
9022 {
9023         return prog->prog_flags;
9024 }
9025
9026 int bpf_program__set_flags(struct bpf_program *prog, __u32 flags)
9027 {
9028         if (prog->obj->loaded)
9029                 return libbpf_err(-EBUSY);
9030
9031         prog->prog_flags = flags;
9032         return 0;
9033 }
9034
9035 __u32 bpf_program__log_level(const struct bpf_program *prog)
9036 {
9037         return prog->log_level;
9038 }
9039
9040 int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level)
9041 {
9042         if (prog->obj->loaded)
9043                 return libbpf_err(-EBUSY);
9044
9045         prog->log_level = log_level;
9046         return 0;
9047 }
9048
9049 const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size)
9050 {
9051         *log_size = prog->log_size;
9052         return prog->log_buf;
9053 }
9054
9055 int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size)
9056 {
9057         if (log_size && !log_buf)
9058                 return -EINVAL;
9059         if (prog->log_size > UINT_MAX)
9060                 return -EINVAL;
9061         if (prog->obj->loaded)
9062                 return -EBUSY;
9063
9064         prog->log_buf = log_buf;
9065         prog->log_size = log_size;
9066         return 0;
9067 }
9068
9069 #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) {                        \
9070         .sec = (char *)sec_pfx,                                             \
9071         .prog_type = BPF_PROG_TYPE_##ptype,                                 \
9072         .expected_attach_type = atype,                                      \
9073         .cookie = (long)(flags),                                            \
9074         .prog_prepare_load_fn = libbpf_prepare_prog_load,                   \
9075         __VA_ARGS__                                                         \
9076 }
9077
9078 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9079 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9080 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9081 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9082 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9083 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9084 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9085 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9086 static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9087 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9088 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9089
9090 static const struct bpf_sec_def section_defs[] = {
9091         SEC_DEF("socket",               SOCKET_FILTER, 0, SEC_NONE),
9092         SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE),
9093         SEC_DEF("sk_reuseport",         SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE),
9094         SEC_DEF("kprobe+",              KPROBE, 0, SEC_NONE, attach_kprobe),
9095         SEC_DEF("uprobe+",              KPROBE, 0, SEC_NONE, attach_uprobe),
9096         SEC_DEF("uprobe.s+",            KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
9097         SEC_DEF("kretprobe+",           KPROBE, 0, SEC_NONE, attach_kprobe),
9098         SEC_DEF("uretprobe+",           KPROBE, 0, SEC_NONE, attach_uprobe),
9099         SEC_DEF("uretprobe.s+",         KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
9100         SEC_DEF("kprobe.multi+",        KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
9101         SEC_DEF("kretprobe.multi+",     KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
9102         SEC_DEF("uprobe.multi+",        KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi),
9103         SEC_DEF("uretprobe.multi+",     KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi),
9104         SEC_DEF("uprobe.multi.s+",      KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi),
9105         SEC_DEF("uretprobe.multi.s+",   KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi),
9106         SEC_DEF("ksyscall+",            KPROBE, 0, SEC_NONE, attach_ksyscall),
9107         SEC_DEF("kretsyscall+",         KPROBE, 0, SEC_NONE, attach_ksyscall),
9108         SEC_DEF("usdt+",                KPROBE, 0, SEC_USDT, attach_usdt),
9109         SEC_DEF("usdt.s+",              KPROBE, 0, SEC_USDT | SEC_SLEEPABLE, attach_usdt),
9110         SEC_DEF("tc/ingress",           SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), /* alias for tcx */
9111         SEC_DEF("tc/egress",            SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE),  /* alias for tcx */
9112         SEC_DEF("tcx/ingress",          SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE),
9113         SEC_DEF("tcx/egress",           SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE),
9114         SEC_DEF("tc",                   SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */
9115         SEC_DEF("classifier",           SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */
9116         SEC_DEF("action",               SCHED_ACT, 0, SEC_NONE), /* deprecated / legacy, use tcx */
9117         SEC_DEF("netkit/primary",       SCHED_CLS, BPF_NETKIT_PRIMARY, SEC_NONE),
9118         SEC_DEF("netkit/peer",          SCHED_CLS, BPF_NETKIT_PEER, SEC_NONE),
9119         SEC_DEF("tracepoint+",          TRACEPOINT, 0, SEC_NONE, attach_tp),
9120         SEC_DEF("tp+",                  TRACEPOINT, 0, SEC_NONE, attach_tp),
9121         SEC_DEF("raw_tracepoint+",      RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
9122         SEC_DEF("raw_tp+",              RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
9123         SEC_DEF("raw_tracepoint.w+",    RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
9124         SEC_DEF("raw_tp.w+",            RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
9125         SEC_DEF("tp_btf+",              TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace),
9126         SEC_DEF("fentry+",              TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace),
9127         SEC_DEF("fmod_ret+",            TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace),
9128         SEC_DEF("fexit+",               TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace),
9129         SEC_DEF("fentry.s+",            TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
9130         SEC_DEF("fmod_ret.s+",          TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
9131         SEC_DEF("fexit.s+",             TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
9132         SEC_DEF("freplace+",            EXT, 0, SEC_ATTACH_BTF, attach_trace),
9133         SEC_DEF("lsm+",                 LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
9134         SEC_DEF("lsm.s+",               LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
9135         SEC_DEF("lsm_cgroup+",          LSM, BPF_LSM_CGROUP, SEC_ATTACH_BTF),
9136         SEC_DEF("iter+",                TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
9137         SEC_DEF("iter.s+",              TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
9138         SEC_DEF("syscall",              SYSCALL, 0, SEC_SLEEPABLE),
9139         SEC_DEF("xdp.frags/devmap",     XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS),
9140         SEC_DEF("xdp/devmap",           XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
9141         SEC_DEF("xdp.frags/cpumap",     XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS),
9142         SEC_DEF("xdp/cpumap",           XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
9143         SEC_DEF("xdp.frags",            XDP, BPF_XDP, SEC_XDP_FRAGS),
9144         SEC_DEF("xdp",                  XDP, BPF_XDP, SEC_ATTACHABLE_OPT),
9145         SEC_DEF("perf_event",           PERF_EVENT, 0, SEC_NONE),
9146         SEC_DEF("lwt_in",               LWT_IN, 0, SEC_NONE),
9147         SEC_DEF("lwt_out",              LWT_OUT, 0, SEC_NONE),
9148         SEC_DEF("lwt_xmit",             LWT_XMIT, 0, SEC_NONE),
9149         SEC_DEF("lwt_seg6local",        LWT_SEG6LOCAL, 0, SEC_NONE),
9150         SEC_DEF("sockops",              SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT),
9151         SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT),
9152         SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT),
9153         SEC_DEF("sk_skb",               SK_SKB, 0, SEC_NONE),
9154         SEC_DEF("sk_msg",               SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT),
9155         SEC_DEF("lirc_mode2",           LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT),
9156         SEC_DEF("flow_dissector",       FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT),
9157         SEC_DEF("cgroup_skb/ingress",   CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT),
9158         SEC_DEF("cgroup_skb/egress",    CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT),
9159         SEC_DEF("cgroup/skb",           CGROUP_SKB, 0, SEC_NONE),
9160         SEC_DEF("cgroup/sock_create",   CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE),
9161         SEC_DEF("cgroup/sock_release",  CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE),
9162         SEC_DEF("cgroup/sock",          CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT),
9163         SEC_DEF("cgroup/post_bind4",    CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE),
9164         SEC_DEF("cgroup/post_bind6",    CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE),
9165         SEC_DEF("cgroup/bind4",         CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE),
9166         SEC_DEF("cgroup/bind6",         CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE),
9167         SEC_DEF("cgroup/connect4",      CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE),
9168         SEC_DEF("cgroup/connect6",      CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE),
9169         SEC_DEF("cgroup/connect_unix",  CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT, SEC_ATTACHABLE),
9170         SEC_DEF("cgroup/sendmsg4",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE),
9171         SEC_DEF("cgroup/sendmsg6",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE),
9172         SEC_DEF("cgroup/sendmsg_unix",  CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG, SEC_ATTACHABLE),
9173         SEC_DEF("cgroup/recvmsg4",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE),
9174         SEC_DEF("cgroup/recvmsg6",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE),
9175         SEC_DEF("cgroup/recvmsg_unix",  CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG, SEC_ATTACHABLE),
9176         SEC_DEF("cgroup/getpeername4",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE),
9177         SEC_DEF("cgroup/getpeername6",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE),
9178         SEC_DEF("cgroup/getpeername_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME, SEC_ATTACHABLE),
9179         SEC_DEF("cgroup/getsockname4",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE),
9180         SEC_DEF("cgroup/getsockname6",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE),
9181         SEC_DEF("cgroup/getsockname_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME, SEC_ATTACHABLE),
9182         SEC_DEF("cgroup/sysctl",        CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE),
9183         SEC_DEF("cgroup/getsockopt",    CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE),
9184         SEC_DEF("cgroup/setsockopt",    CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE),
9185         SEC_DEF("cgroup/dev",           CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT),
9186         SEC_DEF("struct_ops+",          STRUCT_OPS, 0, SEC_NONE),
9187         SEC_DEF("struct_ops.s+",        STRUCT_OPS, 0, SEC_SLEEPABLE),
9188         SEC_DEF("sk_lookup",            SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE),
9189         SEC_DEF("netfilter",            NETFILTER, BPF_NETFILTER, SEC_NONE),
9190 };
9191
9192 int libbpf_register_prog_handler(const char *sec,
9193                                  enum bpf_prog_type prog_type,
9194                                  enum bpf_attach_type exp_attach_type,
9195                                  const struct libbpf_prog_handler_opts *opts)
9196 {
9197         struct bpf_sec_def *sec_def;
9198
9199         if (!OPTS_VALID(opts, libbpf_prog_handler_opts))
9200                 return libbpf_err(-EINVAL);
9201
9202         if (last_custom_sec_def_handler_id == INT_MAX) /* prevent overflow */
9203                 return libbpf_err(-E2BIG);
9204
9205         if (sec) {
9206                 sec_def = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt + 1,
9207                                               sizeof(*sec_def));
9208                 if (!sec_def)
9209                         return libbpf_err(-ENOMEM);
9210
9211                 custom_sec_defs = sec_def;
9212                 sec_def = &custom_sec_defs[custom_sec_def_cnt];
9213         } else {
9214                 if (has_custom_fallback_def)
9215                         return libbpf_err(-EBUSY);
9216
9217                 sec_def = &custom_fallback_def;
9218         }
9219
9220         sec_def->sec = sec ? strdup(sec) : NULL;
9221         if (sec && !sec_def->sec)
9222                 return libbpf_err(-ENOMEM);
9223
9224         sec_def->prog_type = prog_type;
9225         sec_def->expected_attach_type = exp_attach_type;
9226         sec_def->cookie = OPTS_GET(opts, cookie, 0);
9227
9228         sec_def->prog_setup_fn = OPTS_GET(opts, prog_setup_fn, NULL);
9229         sec_def->prog_prepare_load_fn = OPTS_GET(opts, prog_prepare_load_fn, NULL);
9230         sec_def->prog_attach_fn = OPTS_GET(opts, prog_attach_fn, NULL);
9231
9232         sec_def->handler_id = ++last_custom_sec_def_handler_id;
9233
9234         if (sec)
9235                 custom_sec_def_cnt++;
9236         else
9237                 has_custom_fallback_def = true;
9238
9239         return sec_def->handler_id;
9240 }
9241
9242 int libbpf_unregister_prog_handler(int handler_id)
9243 {
9244         struct bpf_sec_def *sec_defs;
9245         int i;
9246
9247         if (handler_id <= 0)
9248                 return libbpf_err(-EINVAL);
9249
9250         if (has_custom_fallback_def && custom_fallback_def.handler_id == handler_id) {
9251                 memset(&custom_fallback_def, 0, sizeof(custom_fallback_def));
9252                 has_custom_fallback_def = false;
9253                 return 0;
9254         }
9255
9256         for (i = 0; i < custom_sec_def_cnt; i++) {
9257                 if (custom_sec_defs[i].handler_id == handler_id)
9258                         break;
9259         }
9260
9261         if (i == custom_sec_def_cnt)
9262                 return libbpf_err(-ENOENT);
9263
9264         free(custom_sec_defs[i].sec);
9265         for (i = i + 1; i < custom_sec_def_cnt; i++)
9266                 custom_sec_defs[i - 1] = custom_sec_defs[i];
9267         custom_sec_def_cnt--;
9268
9269         /* try to shrink the array, but it's ok if we couldn't */
9270         sec_defs = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt, sizeof(*sec_defs));
9271         /* if new count is zero, reallocarray can return a valid NULL result;
9272          * in this case the previous pointer will be freed, so we *have to*
9273          * reassign old pointer to the new value (even if it's NULL)
9274          */
9275         if (sec_defs || custom_sec_def_cnt == 0)
9276                 custom_sec_defs = sec_defs;
9277
9278         return 0;
9279 }
9280
9281 static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name)
9282 {
9283         size_t len = strlen(sec_def->sec);
9284
9285         /* "type/" always has to have proper SEC("type/extras") form */
9286         if (sec_def->sec[len - 1] == '/') {
9287                 if (str_has_pfx(sec_name, sec_def->sec))
9288                         return true;
9289                 return false;
9290         }
9291
9292         /* "type+" means it can be either exact SEC("type") or
9293          * well-formed SEC("type/extras") with proper '/' separator
9294          */
9295         if (sec_def->sec[len - 1] == '+') {
9296                 len--;
9297                 /* not even a prefix */
9298                 if (strncmp(sec_name, sec_def->sec, len) != 0)
9299                         return false;
9300                 /* exact match or has '/' separator */
9301                 if (sec_name[len] == '\0' || sec_name[len] == '/')
9302                         return true;
9303                 return false;
9304         }
9305
9306         return strcmp(sec_name, sec_def->sec) == 0;
9307 }
9308
9309 static const struct bpf_sec_def *find_sec_def(const char *sec_name)
9310 {
9311         const struct bpf_sec_def *sec_def;
9312         int i, n;
9313
9314         n = custom_sec_def_cnt;
9315         for (i = 0; i < n; i++) {
9316                 sec_def = &custom_sec_defs[i];
9317                 if (sec_def_matches(sec_def, sec_name))
9318                         return sec_def;
9319         }
9320
9321         n = ARRAY_SIZE(section_defs);
9322         for (i = 0; i < n; i++) {
9323                 sec_def = &section_defs[i];
9324                 if (sec_def_matches(sec_def, sec_name))
9325                         return sec_def;
9326         }
9327
9328         if (has_custom_fallback_def)
9329                 return &custom_fallback_def;
9330
9331         return NULL;
9332 }
9333
9334 #define MAX_TYPE_NAME_SIZE 32
9335
9336 static char *libbpf_get_type_names(bool attach_type)
9337 {
9338         int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE;
9339         char *buf;
9340
9341         buf = malloc(len);
9342         if (!buf)
9343                 return NULL;
9344
9345         buf[0] = '\0';
9346         /* Forge string buf with all available names */
9347         for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
9348                 const struct bpf_sec_def *sec_def = &section_defs[i];
9349
9350                 if (attach_type) {
9351                         if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
9352                                 continue;
9353
9354                         if (!(sec_def->cookie & SEC_ATTACHABLE))
9355                                 continue;
9356                 }
9357
9358                 if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) {
9359                         free(buf);
9360                         return NULL;
9361                 }
9362                 strcat(buf, " ");
9363                 strcat(buf, section_defs[i].sec);
9364         }
9365
9366         return buf;
9367 }
9368
9369 int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
9370                              enum bpf_attach_type *expected_attach_type)
9371 {
9372         const struct bpf_sec_def *sec_def;
9373         char *type_names;
9374
9375         if (!name)
9376                 return libbpf_err(-EINVAL);
9377
9378         sec_def = find_sec_def(name);
9379         if (sec_def) {
9380                 *prog_type = sec_def->prog_type;
9381                 *expected_attach_type = sec_def->expected_attach_type;
9382                 return 0;
9383         }
9384
9385         pr_debug("failed to guess program type from ELF section '%s'\n", name);
9386         type_names = libbpf_get_type_names(false);
9387         if (type_names != NULL) {
9388                 pr_debug("supported section(type) names are:%s\n", type_names);
9389                 free(type_names);
9390         }
9391
9392         return libbpf_err(-ESRCH);
9393 }
9394
9395 const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t)
9396 {
9397         if (t < 0 || t >= ARRAY_SIZE(attach_type_name))
9398                 return NULL;
9399
9400         return attach_type_name[t];
9401 }
9402
9403 const char *libbpf_bpf_link_type_str(enum bpf_link_type t)
9404 {
9405         if (t < 0 || t >= ARRAY_SIZE(link_type_name))
9406                 return NULL;
9407
9408         return link_type_name[t];
9409 }
9410
9411 const char *libbpf_bpf_map_type_str(enum bpf_map_type t)
9412 {
9413         if (t < 0 || t >= ARRAY_SIZE(map_type_name))
9414                 return NULL;
9415
9416         return map_type_name[t];
9417 }
9418
9419 const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t)
9420 {
9421         if (t < 0 || t >= ARRAY_SIZE(prog_type_name))
9422                 return NULL;
9423
9424         return prog_type_name[t];
9425 }
9426
9427 static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
9428                                                      int sec_idx,
9429                                                      size_t offset)
9430 {
9431         struct bpf_map *map;
9432         size_t i;
9433
9434         for (i = 0; i < obj->nr_maps; i++) {
9435                 map = &obj->maps[i];
9436                 if (!bpf_map__is_struct_ops(map))
9437                         continue;
9438                 if (map->sec_idx == sec_idx &&
9439                     map->sec_offset <= offset &&
9440                     offset - map->sec_offset < map->def.value_size)
9441                         return map;
9442         }
9443
9444         return NULL;
9445 }
9446
9447 /* Collect the reloc from ELF, populate the st_ops->progs[], and update
9448  * st_ops->data for shadow type.
9449  */
9450 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
9451                                             Elf64_Shdr *shdr, Elf_Data *data)
9452 {
9453         const struct btf_member *member;
9454         struct bpf_struct_ops *st_ops;
9455         struct bpf_program *prog;
9456         unsigned int shdr_idx;
9457         const struct btf *btf;
9458         struct bpf_map *map;
9459         unsigned int moff, insn_idx;
9460         const char *name;
9461         __u32 member_idx;
9462         Elf64_Sym *sym;
9463         Elf64_Rel *rel;
9464         int i, nrels;
9465
9466         btf = obj->btf;
9467         nrels = shdr->sh_size / shdr->sh_entsize;
9468         for (i = 0; i < nrels; i++) {
9469                 rel = elf_rel_by_idx(data, i);
9470                 if (!rel) {
9471                         pr_warn("struct_ops reloc: failed to get %d reloc\n", i);
9472                         return -LIBBPF_ERRNO__FORMAT;
9473                 }
9474
9475                 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
9476                 if (!sym) {
9477                         pr_warn("struct_ops reloc: symbol %zx not found\n",
9478                                 (size_t)ELF64_R_SYM(rel->r_info));
9479                         return -LIBBPF_ERRNO__FORMAT;
9480                 }
9481
9482                 name = elf_sym_str(obj, sym->st_name) ?: "<?>";
9483                 map = find_struct_ops_map_by_offset(obj, shdr->sh_info, rel->r_offset);
9484                 if (!map) {
9485                         pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n",
9486                                 (size_t)rel->r_offset);
9487                         return -EINVAL;
9488                 }
9489
9490                 moff = rel->r_offset - map->sec_offset;
9491                 shdr_idx = sym->st_shndx;
9492                 st_ops = map->st_ops;
9493                 pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel->r_offset %zu map->sec_offset %zu name %d (\'%s\')\n",
9494                          map->name,
9495                          (long long)(rel->r_info >> 32),
9496                          (long long)sym->st_value,
9497                          shdr_idx, (size_t)rel->r_offset,
9498                          map->sec_offset, sym->st_name, name);
9499
9500                 if (shdr_idx >= SHN_LORESERVE) {
9501                         pr_warn("struct_ops reloc %s: rel->r_offset %zu shdr_idx %u unsupported non-static function\n",
9502                                 map->name, (size_t)rel->r_offset, shdr_idx);
9503                         return -LIBBPF_ERRNO__RELOC;
9504                 }
9505                 if (sym->st_value % BPF_INSN_SZ) {
9506                         pr_warn("struct_ops reloc %s: invalid target program offset %llu\n",
9507                                 map->name, (unsigned long long)sym->st_value);
9508                         return -LIBBPF_ERRNO__FORMAT;
9509                 }
9510                 insn_idx = sym->st_value / BPF_INSN_SZ;
9511
9512                 member = find_member_by_offset(st_ops->type, moff * 8);
9513                 if (!member) {
9514                         pr_warn("struct_ops reloc %s: cannot find member at moff %u\n",
9515                                 map->name, moff);
9516                         return -EINVAL;
9517                 }
9518                 member_idx = member - btf_members(st_ops->type);
9519                 name = btf__name_by_offset(btf, member->name_off);
9520
9521                 if (!resolve_func_ptr(btf, member->type, NULL)) {
9522                         pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n",
9523                                 map->name, name);
9524                         return -EINVAL;
9525                 }
9526
9527                 prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx);
9528                 if (!prog) {
9529                         pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",
9530                                 map->name, shdr_idx, name);
9531                         return -EINVAL;
9532                 }
9533
9534                 /* prevent the use of BPF prog with invalid type */
9535                 if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) {
9536                         pr_warn("struct_ops reloc %s: prog %s is not struct_ops BPF program\n",
9537                                 map->name, prog->name);
9538                         return -EINVAL;
9539                 }
9540
9541                 st_ops->progs[member_idx] = prog;
9542
9543                 /* st_ops->data will be exposed to users, being returned by
9544                  * bpf_map__initial_value() as a pointer to the shadow
9545                  * type. All function pointers in the original struct type
9546                  * should be converted to a pointer to struct bpf_program
9547                  * in the shadow type.
9548                  */
9549                 *((struct bpf_program **)(st_ops->data + moff)) = prog;
9550         }
9551
9552         return 0;
9553 }
9554
9555 #define BTF_TRACE_PREFIX "btf_trace_"
9556 #define BTF_LSM_PREFIX "bpf_lsm_"
9557 #define BTF_ITER_PREFIX "bpf_iter_"
9558 #define BTF_MAX_NAME_SIZE 128
9559
9560 void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
9561                                 const char **prefix, int *kind)
9562 {
9563         switch (attach_type) {
9564         case BPF_TRACE_RAW_TP:
9565                 *prefix = BTF_TRACE_PREFIX;
9566                 *kind = BTF_KIND_TYPEDEF;
9567                 break;
9568         case BPF_LSM_MAC:
9569         case BPF_LSM_CGROUP:
9570                 *prefix = BTF_LSM_PREFIX;
9571                 *kind = BTF_KIND_FUNC;
9572                 break;
9573         case BPF_TRACE_ITER:
9574                 *prefix = BTF_ITER_PREFIX;
9575                 *kind = BTF_KIND_FUNC;
9576                 break;
9577         default:
9578                 *prefix = "";
9579                 *kind = BTF_KIND_FUNC;
9580         }
9581 }
9582
9583 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
9584                                    const char *name, __u32 kind)
9585 {
9586         char btf_type_name[BTF_MAX_NAME_SIZE];
9587         int ret;
9588
9589         ret = snprintf(btf_type_name, sizeof(btf_type_name),
9590                        "%s%s", prefix, name);
9591         /* snprintf returns the number of characters written excluding the
9592          * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
9593          * indicates truncation.
9594          */
9595         if (ret < 0 || ret >= sizeof(btf_type_name))
9596                 return -ENAMETOOLONG;
9597         return btf__find_by_name_kind(btf, btf_type_name, kind);
9598 }
9599
9600 static inline int find_attach_btf_id(struct btf *btf, const char *name,
9601                                      enum bpf_attach_type attach_type)
9602 {
9603         const char *prefix;
9604         int kind;
9605
9606         btf_get_kernel_prefix_kind(attach_type, &prefix, &kind);
9607         return find_btf_by_prefix_kind(btf, prefix, name, kind);
9608 }
9609
9610 int libbpf_find_vmlinux_btf_id(const char *name,
9611                                enum bpf_attach_type attach_type)
9612 {
9613         struct btf *btf;
9614         int err;
9615
9616         btf = btf__load_vmlinux_btf();
9617         err = libbpf_get_error(btf);
9618         if (err) {
9619                 pr_warn("vmlinux BTF is not found\n");
9620                 return libbpf_err(err);
9621         }
9622
9623         err = find_attach_btf_id(btf, name, attach_type);
9624         if (err <= 0)
9625                 pr_warn("%s is not found in vmlinux BTF\n", name);
9626
9627         btf__free(btf);
9628         return libbpf_err(err);
9629 }
9630
9631 static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
9632 {
9633         struct bpf_prog_info info;
9634         __u32 info_len = sizeof(info);
9635         struct btf *btf;
9636         int err;
9637
9638         memset(&info, 0, info_len);
9639         err = bpf_prog_get_info_by_fd(attach_prog_fd, &info, &info_len);
9640         if (err) {
9641                 pr_warn("failed bpf_prog_get_info_by_fd for FD %d: %d\n",
9642                         attach_prog_fd, err);
9643                 return err;
9644         }
9645
9646         err = -EINVAL;
9647         if (!info.btf_id) {
9648                 pr_warn("The target program doesn't have BTF\n");
9649                 goto out;
9650         }
9651         btf = btf__load_from_kernel_by_id(info.btf_id);
9652         err = libbpf_get_error(btf);
9653         if (err) {
9654                 pr_warn("Failed to get BTF %d of the program: %d\n", info.btf_id, err);
9655                 goto out;
9656         }
9657         err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
9658         btf__free(btf);
9659         if (err <= 0) {
9660                 pr_warn("%s is not found in prog's BTF\n", name);
9661                 goto out;
9662         }
9663 out:
9664         return err;
9665 }
9666
9667 static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name,
9668                               enum bpf_attach_type attach_type,
9669                               int *btf_obj_fd, int *btf_type_id)
9670 {
9671         int ret, i;
9672
9673         ret = find_attach_btf_id(obj->btf_vmlinux, attach_name, attach_type);
9674         if (ret > 0) {
9675                 *btf_obj_fd = 0; /* vmlinux BTF */
9676                 *btf_type_id = ret;
9677                 return 0;
9678         }
9679         if (ret != -ENOENT)
9680                 return ret;
9681
9682         ret = load_module_btfs(obj);
9683         if (ret)
9684                 return ret;
9685
9686         for (i = 0; i < obj->btf_module_cnt; i++) {
9687                 const struct module_btf *mod = &obj->btf_modules[i];
9688
9689                 ret = find_attach_btf_id(mod->btf, attach_name, attach_type);
9690                 if (ret > 0) {
9691                         *btf_obj_fd = mod->fd;
9692                         *btf_type_id = ret;
9693                         return 0;
9694                 }
9695                 if (ret == -ENOENT)
9696                         continue;
9697
9698                 return ret;
9699         }
9700
9701         return -ESRCH;
9702 }
9703
9704 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
9705                                      int *btf_obj_fd, int *btf_type_id)
9706 {
9707         enum bpf_attach_type attach_type = prog->expected_attach_type;
9708         __u32 attach_prog_fd = prog->attach_prog_fd;
9709         int err = 0;
9710
9711         /* BPF program's BTF ID */
9712         if (prog->type == BPF_PROG_TYPE_EXT || attach_prog_fd) {
9713                 if (!attach_prog_fd) {
9714                         pr_warn("prog '%s': attach program FD is not set\n", prog->name);
9715                         return -EINVAL;
9716                 }
9717                 err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd);
9718                 if (err < 0) {
9719                         pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %d\n",
9720                                  prog->name, attach_prog_fd, attach_name, err);
9721                         return err;
9722                 }
9723                 *btf_obj_fd = 0;
9724                 *btf_type_id = err;
9725                 return 0;
9726         }
9727
9728         /* kernel/module BTF ID */
9729         if (prog->obj->gen_loader) {
9730                 bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type);
9731                 *btf_obj_fd = 0;
9732                 *btf_type_id = 1;
9733         } else {
9734                 err = find_kernel_btf_id(prog->obj, attach_name,
9735                                          attach_type, btf_obj_fd,
9736                                          btf_type_id);
9737         }
9738         if (err) {
9739                 pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %d\n",
9740                         prog->name, attach_name, err);
9741                 return err;
9742         }
9743         return 0;
9744 }
9745
9746 int libbpf_attach_type_by_name(const char *name,
9747                                enum bpf_attach_type *attach_type)
9748 {
9749         char *type_names;
9750         const struct bpf_sec_def *sec_def;
9751
9752         if (!name)
9753                 return libbpf_err(-EINVAL);
9754
9755         sec_def = find_sec_def(name);
9756         if (!sec_def) {
9757                 pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
9758                 type_names = libbpf_get_type_names(true);
9759                 if (type_names != NULL) {
9760                         pr_debug("attachable section(type) names are:%s\n", type_names);
9761                         free(type_names);
9762                 }
9763
9764                 return libbpf_err(-EINVAL);
9765         }
9766
9767         if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
9768                 return libbpf_err(-EINVAL);
9769         if (!(sec_def->cookie & SEC_ATTACHABLE))
9770                 return libbpf_err(-EINVAL);
9771
9772         *attach_type = sec_def->expected_attach_type;
9773         return 0;
9774 }
9775
9776 int bpf_map__fd(const struct bpf_map *map)
9777 {
9778         if (!map)
9779                 return libbpf_err(-EINVAL);
9780         if (!map_is_created(map))
9781                 return -1;
9782         return map->fd;
9783 }
9784
9785 static bool map_uses_real_name(const struct bpf_map *map)
9786 {
9787         /* Since libbpf started to support custom .data.* and .rodata.* maps,
9788          * their user-visible name differs from kernel-visible name. Users see
9789          * such map's corresponding ELF section name as a map name.
9790          * This check distinguishes .data/.rodata from .data.* and .rodata.*
9791          * maps to know which name has to be returned to the user.
9792          */
9793         if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0)
9794                 return true;
9795         if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0)
9796                 return true;
9797         return false;
9798 }
9799
9800 const char *bpf_map__name(const struct bpf_map *map)
9801 {
9802         if (!map)
9803                 return NULL;
9804
9805         if (map_uses_real_name(map))
9806                 return map->real_name;
9807
9808         return map->name;
9809 }
9810
9811 enum bpf_map_type bpf_map__type(const struct bpf_map *map)
9812 {
9813         return map->def.type;
9814 }
9815
9816 int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
9817 {
9818         if (map_is_created(map))
9819                 return libbpf_err(-EBUSY);
9820         map->def.type = type;
9821         return 0;
9822 }
9823
9824 __u32 bpf_map__map_flags(const struct bpf_map *map)
9825 {
9826         return map->def.map_flags;
9827 }
9828
9829 int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
9830 {
9831         if (map_is_created(map))
9832                 return libbpf_err(-EBUSY);
9833         map->def.map_flags = flags;
9834         return 0;
9835 }
9836
9837 __u64 bpf_map__map_extra(const struct bpf_map *map)
9838 {
9839         return map->map_extra;
9840 }
9841
9842 int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra)
9843 {
9844         if (map_is_created(map))
9845                 return libbpf_err(-EBUSY);
9846         map->map_extra = map_extra;
9847         return 0;
9848 }
9849
9850 __u32 bpf_map__numa_node(const struct bpf_map *map)
9851 {
9852         return map->numa_node;
9853 }
9854
9855 int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
9856 {
9857         if (map_is_created(map))
9858                 return libbpf_err(-EBUSY);
9859         map->numa_node = numa_node;
9860         return 0;
9861 }
9862
9863 __u32 bpf_map__key_size(const struct bpf_map *map)
9864 {
9865         return map->def.key_size;
9866 }
9867
9868 int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
9869 {
9870         if (map_is_created(map))
9871                 return libbpf_err(-EBUSY);
9872         map->def.key_size = size;
9873         return 0;
9874 }
9875
9876 __u32 bpf_map__value_size(const struct bpf_map *map)
9877 {
9878         return map->def.value_size;
9879 }
9880
9881 static int map_btf_datasec_resize(struct bpf_map *map, __u32 size)
9882 {
9883         struct btf *btf;
9884         struct btf_type *datasec_type, *var_type;
9885         struct btf_var_secinfo *var;
9886         const struct btf_type *array_type;
9887         const struct btf_array *array;
9888         int vlen, element_sz, new_array_id;
9889         __u32 nr_elements;
9890
9891         /* check btf existence */
9892         btf = bpf_object__btf(map->obj);
9893         if (!btf)
9894                 return -ENOENT;
9895
9896         /* verify map is datasec */
9897         datasec_type = btf_type_by_id(btf, bpf_map__btf_value_type_id(map));
9898         if (!btf_is_datasec(datasec_type)) {
9899                 pr_warn("map '%s': cannot be resized, map value type is not a datasec\n",
9900                         bpf_map__name(map));
9901                 return -EINVAL;
9902         }
9903
9904         /* verify datasec has at least one var */
9905         vlen = btf_vlen(datasec_type);
9906         if (vlen == 0) {
9907                 pr_warn("map '%s': cannot be resized, map value datasec is empty\n",
9908                         bpf_map__name(map));
9909                 return -EINVAL;
9910         }
9911
9912         /* verify last var in the datasec is an array */
9913         var = &btf_var_secinfos(datasec_type)[vlen - 1];
9914         var_type = btf_type_by_id(btf, var->type);
9915         array_type = skip_mods_and_typedefs(btf, var_type->type, NULL);
9916         if (!btf_is_array(array_type)) {
9917                 pr_warn("map '%s': cannot be resized, last var must be an array\n",
9918                         bpf_map__name(map));
9919                 return -EINVAL;
9920         }
9921
9922         /* verify request size aligns with array */
9923         array = btf_array(array_type);
9924         element_sz = btf__resolve_size(btf, array->type);
9925         if (element_sz <= 0 || (size - var->offset) % element_sz != 0) {
9926                 pr_warn("map '%s': cannot be resized, element size (%d) doesn't align with new total size (%u)\n",
9927                         bpf_map__name(map), element_sz, size);
9928                 return -EINVAL;
9929         }
9930
9931         /* create a new array based on the existing array, but with new length */
9932         nr_elements = (size - var->offset) / element_sz;
9933         new_array_id = btf__add_array(btf, array->index_type, array->type, nr_elements);
9934         if (new_array_id < 0)
9935                 return new_array_id;
9936
9937         /* adding a new btf type invalidates existing pointers to btf objects,
9938          * so refresh pointers before proceeding
9939          */
9940         datasec_type = btf_type_by_id(btf, map->btf_value_type_id);
9941         var = &btf_var_secinfos(datasec_type)[vlen - 1];
9942         var_type = btf_type_by_id(btf, var->type);
9943
9944         /* finally update btf info */
9945         datasec_type->size = size;
9946         var->size = size - var->offset;
9947         var_type->type = new_array_id;
9948
9949         return 0;
9950 }
9951
9952 int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
9953 {
9954         if (map->obj->loaded || map->reused)
9955                 return libbpf_err(-EBUSY);
9956
9957         if (map->mmaped) {
9958                 int err;
9959                 size_t mmap_old_sz, mmap_new_sz;
9960
9961                 mmap_old_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
9962                 mmap_new_sz = bpf_map_mmap_sz(size, map->def.max_entries);
9963                 err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz);
9964                 if (err) {
9965                         pr_warn("map '%s': failed to resize memory-mapped region: %d\n",
9966                                 bpf_map__name(map), err);
9967                         return err;
9968                 }
9969                 err = map_btf_datasec_resize(map, size);
9970                 if (err && err != -ENOENT) {
9971                         pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %d\n",
9972                                 bpf_map__name(map), err);
9973                         map->btf_value_type_id = 0;
9974                         map->btf_key_type_id = 0;
9975                 }
9976         }
9977
9978         map->def.value_size = size;
9979         return 0;
9980 }
9981
9982 __u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
9983 {
9984         return map ? map->btf_key_type_id : 0;
9985 }
9986
9987 __u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
9988 {
9989         return map ? map->btf_value_type_id : 0;
9990 }
9991
9992 int bpf_map__set_initial_value(struct bpf_map *map,
9993                                const void *data, size_t size)
9994 {
9995         if (map->obj->loaded || map->reused)
9996                 return libbpf_err(-EBUSY);
9997
9998         if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
9999             size != map->def.value_size)
10000                 return libbpf_err(-EINVAL);
10001
10002         memcpy(map->mmaped, data, size);
10003         return 0;
10004 }
10005
10006 void *bpf_map__initial_value(struct bpf_map *map, size_t *psize)
10007 {
10008         if (bpf_map__is_struct_ops(map)) {
10009                 if (psize)
10010                         *psize = map->def.value_size;
10011                 return map->st_ops->data;
10012         }
10013
10014         if (!map->mmaped)
10015                 return NULL;
10016         *psize = map->def.value_size;
10017         return map->mmaped;
10018 }
10019
10020 bool bpf_map__is_internal(const struct bpf_map *map)
10021 {
10022         return map->libbpf_type != LIBBPF_MAP_UNSPEC;
10023 }
10024
10025 __u32 bpf_map__ifindex(const struct bpf_map *map)
10026 {
10027         return map->map_ifindex;
10028 }
10029
10030 int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
10031 {
10032         if (map_is_created(map))
10033                 return libbpf_err(-EBUSY);
10034         map->map_ifindex = ifindex;
10035         return 0;
10036 }
10037
10038 int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
10039 {
10040         if (!bpf_map_type__is_map_in_map(map->def.type)) {
10041                 pr_warn("error: unsupported map type\n");
10042                 return libbpf_err(-EINVAL);
10043         }
10044         if (map->inner_map_fd != -1) {
10045                 pr_warn("error: inner_map_fd already specified\n");
10046                 return libbpf_err(-EINVAL);
10047         }
10048         if (map->inner_map) {
10049                 bpf_map__destroy(map->inner_map);
10050                 zfree(&map->inner_map);
10051         }
10052         map->inner_map_fd = fd;
10053         return 0;
10054 }
10055
10056 static struct bpf_map *
10057 __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
10058 {
10059         ssize_t idx;
10060         struct bpf_map *s, *e;
10061
10062         if (!obj || !obj->maps)
10063                 return errno = EINVAL, NULL;
10064
10065         s = obj->maps;
10066         e = obj->maps + obj->nr_maps;
10067
10068         if ((m < s) || (m >= e)) {
10069                 pr_warn("error in %s: map handler doesn't belong to object\n",
10070                          __func__);
10071                 return errno = EINVAL, NULL;
10072         }
10073
10074         idx = (m - obj->maps) + i;
10075         if (idx >= obj->nr_maps || idx < 0)
10076                 return NULL;
10077         return &obj->maps[idx];
10078 }
10079
10080 struct bpf_map *
10081 bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
10082 {
10083         if (prev == NULL)
10084                 return obj->maps;
10085
10086         return __bpf_map__iter(prev, obj, 1);
10087 }
10088
10089 struct bpf_map *
10090 bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next)
10091 {
10092         if (next == NULL) {
10093                 if (!obj->nr_maps)
10094                         return NULL;
10095                 return obj->maps + obj->nr_maps - 1;
10096         }
10097
10098         return __bpf_map__iter(next, obj, -1);
10099 }
10100
10101 struct bpf_map *
10102 bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
10103 {
10104         struct bpf_map *pos;
10105
10106         bpf_object__for_each_map(pos, obj) {
10107                 /* if it's a special internal map name (which always starts
10108                  * with dot) then check if that special name matches the
10109                  * real map name (ELF section name)
10110                  */
10111                 if (name[0] == '.') {
10112                         if (pos->real_name && strcmp(pos->real_name, name) == 0)
10113                                 return pos;
10114                         continue;
10115                 }
10116                 /* otherwise map name has to be an exact match */
10117                 if (map_uses_real_name(pos)) {
10118                         if (strcmp(pos->real_name, name) == 0)
10119                                 return pos;
10120                         continue;
10121                 }
10122                 if (strcmp(pos->name, name) == 0)
10123                         return pos;
10124         }
10125         return errno = ENOENT, NULL;
10126 }
10127
10128 int
10129 bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
10130 {
10131         return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
10132 }
10133
10134 static int validate_map_op(const struct bpf_map *map, size_t key_sz,
10135                            size_t value_sz, bool check_value_sz)
10136 {
10137         if (!map_is_created(map)) /* map is not yet created */
10138                 return -ENOENT;
10139
10140         if (map->def.key_size != key_sz) {
10141                 pr_warn("map '%s': unexpected key size %zu provided, expected %u\n",
10142                         map->name, key_sz, map->def.key_size);
10143                 return -EINVAL;
10144         }
10145
10146         if (!check_value_sz)
10147                 return 0;
10148
10149         switch (map->def.type) {
10150         case BPF_MAP_TYPE_PERCPU_ARRAY:
10151         case BPF_MAP_TYPE_PERCPU_HASH:
10152         case BPF_MAP_TYPE_LRU_PERCPU_HASH:
10153         case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: {
10154                 int num_cpu = libbpf_num_possible_cpus();
10155                 size_t elem_sz = roundup(map->def.value_size, 8);
10156
10157                 if (value_sz != num_cpu * elem_sz) {
10158                         pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n",
10159                                 map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz);
10160                         return -EINVAL;
10161                 }
10162                 break;
10163         }
10164         default:
10165                 if (map->def.value_size != value_sz) {
10166                         pr_warn("map '%s': unexpected value size %zu provided, expected %u\n",
10167                                 map->name, value_sz, map->def.value_size);
10168                         return -EINVAL;
10169                 }
10170                 break;
10171         }
10172         return 0;
10173 }
10174
10175 int bpf_map__lookup_elem(const struct bpf_map *map,
10176                          const void *key, size_t key_sz,
10177                          void *value, size_t value_sz, __u64 flags)
10178 {
10179         int err;
10180
10181         err = validate_map_op(map, key_sz, value_sz, true);
10182         if (err)
10183                 return libbpf_err(err);
10184
10185         return bpf_map_lookup_elem_flags(map->fd, key, value, flags);
10186 }
10187
10188 int bpf_map__update_elem(const struct bpf_map *map,
10189                          const void *key, size_t key_sz,
10190                          const void *value, size_t value_sz, __u64 flags)
10191 {
10192         int err;
10193
10194         err = validate_map_op(map, key_sz, value_sz, true);
10195         if (err)
10196                 return libbpf_err(err);
10197
10198         return bpf_map_update_elem(map->fd, key, value, flags);
10199 }
10200
10201 int bpf_map__delete_elem(const struct bpf_map *map,
10202                          const void *key, size_t key_sz, __u64 flags)
10203 {
10204         int err;
10205
10206         err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
10207         if (err)
10208                 return libbpf_err(err);
10209
10210         return bpf_map_delete_elem_flags(map->fd, key, flags);
10211 }
10212
10213 int bpf_map__lookup_and_delete_elem(const struct bpf_map *map,
10214                                     const void *key, size_t key_sz,
10215                                     void *value, size_t value_sz, __u64 flags)
10216 {
10217         int err;
10218
10219         err = validate_map_op(map, key_sz, value_sz, true);
10220         if (err)
10221                 return libbpf_err(err);
10222
10223         return bpf_map_lookup_and_delete_elem_flags(map->fd, key, value, flags);
10224 }
10225
10226 int bpf_map__get_next_key(const struct bpf_map *map,
10227                           const void *cur_key, void *next_key, size_t key_sz)
10228 {
10229         int err;
10230
10231         err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
10232         if (err)
10233                 return libbpf_err(err);
10234
10235         return bpf_map_get_next_key(map->fd, cur_key, next_key);
10236 }
10237
10238 long libbpf_get_error(const void *ptr)
10239 {
10240         if (!IS_ERR_OR_NULL(ptr))
10241                 return 0;
10242
10243         if (IS_ERR(ptr))
10244                 errno = -PTR_ERR(ptr);
10245
10246         /* If ptr == NULL, then errno should be already set by the failing
10247          * API, because libbpf never returns NULL on success and it now always
10248          * sets errno on error. So no extra errno handling for ptr == NULL
10249          * case.
10250          */
10251         return -errno;
10252 }
10253
10254 /* Replace link's underlying BPF program with the new one */
10255 int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
10256 {
10257         int ret;
10258
10259         ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
10260         return libbpf_err_errno(ret);
10261 }
10262
10263 /* Release "ownership" of underlying BPF resource (typically, BPF program
10264  * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
10265  * link, when destructed through bpf_link__destroy() call won't attempt to
10266  * detach/unregisted that BPF resource. This is useful in situations where,
10267  * say, attached BPF program has to outlive userspace program that attached it
10268  * in the system. Depending on type of BPF program, though, there might be
10269  * additional steps (like pinning BPF program in BPF FS) necessary to ensure
10270  * exit of userspace program doesn't trigger automatic detachment and clean up
10271  * inside the kernel.
10272  */
10273 void bpf_link__disconnect(struct bpf_link *link)
10274 {
10275         link->disconnected = true;
10276 }
10277
10278 int bpf_link__destroy(struct bpf_link *link)
10279 {
10280         int err = 0;
10281
10282         if (IS_ERR_OR_NULL(link))
10283                 return 0;
10284
10285         if (!link->disconnected && link->detach)
10286                 err = link->detach(link);
10287         if (link->pin_path)
10288                 free(link->pin_path);
10289         if (link->dealloc)
10290                 link->dealloc(link);
10291         else
10292                 free(link);
10293
10294         return libbpf_err(err);
10295 }
10296
10297 int bpf_link__fd(const struct bpf_link *link)
10298 {
10299         return link->fd;
10300 }
10301
10302 const char *bpf_link__pin_path(const struct bpf_link *link)
10303 {
10304         return link->pin_path;
10305 }
10306
10307 static int bpf_link__detach_fd(struct bpf_link *link)
10308 {
10309         return libbpf_err_errno(close(link->fd));
10310 }
10311
10312 struct bpf_link *bpf_link__open(const char *path)
10313 {
10314         struct bpf_link *link;
10315         int fd;
10316
10317         fd = bpf_obj_get(path);
10318         if (fd < 0) {
10319                 fd = -errno;
10320                 pr_warn("failed to open link at %s: %d\n", path, fd);
10321                 return libbpf_err_ptr(fd);
10322         }
10323
10324         link = calloc(1, sizeof(*link));
10325         if (!link) {
10326                 close(fd);
10327                 return libbpf_err_ptr(-ENOMEM);
10328         }
10329         link->detach = &bpf_link__detach_fd;
10330         link->fd = fd;
10331
10332         link->pin_path = strdup(path);
10333         if (!link->pin_path) {
10334                 bpf_link__destroy(link);
10335                 return libbpf_err_ptr(-ENOMEM);
10336         }
10337
10338         return link;
10339 }
10340
10341 int bpf_link__detach(struct bpf_link *link)
10342 {
10343         return bpf_link_detach(link->fd) ? -errno : 0;
10344 }
10345
10346 int bpf_link__pin(struct bpf_link *link, const char *path)
10347 {
10348         int err;
10349
10350         if (link->pin_path)
10351                 return libbpf_err(-EBUSY);
10352         err = make_parent_dir(path);
10353         if (err)
10354                 return libbpf_err(err);
10355         err = check_path(path);
10356         if (err)
10357                 return libbpf_err(err);
10358
10359         link->pin_path = strdup(path);
10360         if (!link->pin_path)
10361                 return libbpf_err(-ENOMEM);
10362
10363         if (bpf_obj_pin(link->fd, link->pin_path)) {
10364                 err = -errno;
10365                 zfree(&link->pin_path);
10366                 return libbpf_err(err);
10367         }
10368
10369         pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
10370         return 0;
10371 }
10372
10373 int bpf_link__unpin(struct bpf_link *link)
10374 {
10375         int err;
10376
10377         if (!link->pin_path)
10378                 return libbpf_err(-EINVAL);
10379
10380         err = unlink(link->pin_path);
10381         if (err != 0)
10382                 return -errno;
10383
10384         pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
10385         zfree(&link->pin_path);
10386         return 0;
10387 }
10388
10389 struct bpf_link_perf {
10390         struct bpf_link link;
10391         int perf_event_fd;
10392         /* legacy kprobe support: keep track of probe identifier and type */
10393         char *legacy_probe_name;
10394         bool legacy_is_kprobe;
10395         bool legacy_is_retprobe;
10396 };
10397
10398 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe);
10399 static int remove_uprobe_event_legacy(const char *probe_name, bool retprobe);
10400
10401 static int bpf_link_perf_detach(struct bpf_link *link)
10402 {
10403         struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
10404         int err = 0;
10405
10406         if (ioctl(perf_link->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0) < 0)
10407                 err = -errno;
10408
10409         if (perf_link->perf_event_fd != link->fd)
10410                 close(perf_link->perf_event_fd);
10411         close(link->fd);
10412
10413         /* legacy uprobe/kprobe needs to be removed after perf event fd closure */
10414         if (perf_link->legacy_probe_name) {
10415                 if (perf_link->legacy_is_kprobe) {
10416                         err = remove_kprobe_event_legacy(perf_link->legacy_probe_name,
10417                                                          perf_link->legacy_is_retprobe);
10418                 } else {
10419                         err = remove_uprobe_event_legacy(perf_link->legacy_probe_name,
10420                                                          perf_link->legacy_is_retprobe);
10421                 }
10422         }
10423
10424         return err;
10425 }
10426
10427 static void bpf_link_perf_dealloc(struct bpf_link *link)
10428 {
10429         struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
10430
10431         free(perf_link->legacy_probe_name);
10432         free(perf_link);
10433 }
10434
10435 struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd,
10436                                                      const struct bpf_perf_event_opts *opts)
10437 {
10438         char errmsg[STRERR_BUFSIZE];
10439         struct bpf_link_perf *link;
10440         int prog_fd, link_fd = -1, err;
10441         bool force_ioctl_attach;
10442
10443         if (!OPTS_VALID(opts, bpf_perf_event_opts))
10444                 return libbpf_err_ptr(-EINVAL);
10445
10446         if (pfd < 0) {
10447                 pr_warn("prog '%s': invalid perf event FD %d\n",
10448                         prog->name, pfd);
10449                 return libbpf_err_ptr(-EINVAL);
10450         }
10451         prog_fd = bpf_program__fd(prog);
10452         if (prog_fd < 0) {
10453                 pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
10454                         prog->name);
10455                 return libbpf_err_ptr(-EINVAL);
10456         }
10457
10458         link = calloc(1, sizeof(*link));
10459         if (!link)
10460                 return libbpf_err_ptr(-ENOMEM);
10461         link->link.detach = &bpf_link_perf_detach;
10462         link->link.dealloc = &bpf_link_perf_dealloc;
10463         link->perf_event_fd = pfd;
10464
10465         force_ioctl_attach = OPTS_GET(opts, force_ioctl_attach, false);
10466         if (kernel_supports(prog->obj, FEAT_PERF_LINK) && !force_ioctl_attach) {
10467                 DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts,
10468                         .perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0));
10469
10470                 link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts);
10471                 if (link_fd < 0) {
10472                         err = -errno;
10473                         pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n",
10474                                 prog->name, pfd,
10475                                 err, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10476                         goto err_out;
10477                 }
10478                 link->link.fd = link_fd;
10479         } else {
10480                 if (OPTS_GET(opts, bpf_cookie, 0)) {
10481                         pr_warn("prog '%s': user context value is not supported\n", prog->name);
10482                         err = -EOPNOTSUPP;
10483                         goto err_out;
10484                 }
10485
10486                 if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
10487                         err = -errno;
10488                         pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n",
10489                                 prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10490                         if (err == -EPROTO)
10491                                 pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
10492                                         prog->name, pfd);
10493                         goto err_out;
10494                 }
10495                 link->link.fd = pfd;
10496         }
10497         if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
10498                 err = -errno;
10499                 pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n",
10500                         prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10501                 goto err_out;
10502         }
10503
10504         return &link->link;
10505 err_out:
10506         if (link_fd >= 0)
10507                 close(link_fd);
10508         free(link);
10509         return libbpf_err_ptr(err);
10510 }
10511
10512 struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd)
10513 {
10514         return bpf_program__attach_perf_event_opts(prog, pfd, NULL);
10515 }
10516
10517 /*
10518  * this function is expected to parse integer in the range of [0, 2^31-1] from
10519  * given file using scanf format string fmt. If actual parsed value is
10520  * negative, the result might be indistinguishable from error
10521  */
10522 static int parse_uint_from_file(const char *file, const char *fmt)
10523 {
10524         char buf[STRERR_BUFSIZE];
10525         int err, ret;
10526         FILE *f;
10527
10528         f = fopen(file, "re");
10529         if (!f) {
10530                 err = -errno;
10531                 pr_debug("failed to open '%s': %s\n", file,
10532                          libbpf_strerror_r(err, buf, sizeof(buf)));
10533                 return err;
10534         }
10535         err = fscanf(f, fmt, &ret);
10536         if (err != 1) {
10537                 err = err == EOF ? -EIO : -errno;
10538                 pr_debug("failed to parse '%s': %s\n", file,
10539                         libbpf_strerror_r(err, buf, sizeof(buf)));
10540                 fclose(f);
10541                 return err;
10542         }
10543         fclose(f);
10544         return ret;
10545 }
10546
10547 static int determine_kprobe_perf_type(void)
10548 {
10549         const char *file = "/sys/bus/event_source/devices/kprobe/type";
10550
10551         return parse_uint_from_file(file, "%d\n");
10552 }
10553
10554 static int determine_uprobe_perf_type(void)
10555 {
10556         const char *file = "/sys/bus/event_source/devices/uprobe/type";
10557
10558         return parse_uint_from_file(file, "%d\n");
10559 }
10560
10561 static int determine_kprobe_retprobe_bit(void)
10562 {
10563         const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
10564
10565         return parse_uint_from_file(file, "config:%d\n");
10566 }
10567
10568 static int determine_uprobe_retprobe_bit(void)
10569 {
10570         const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
10571
10572         return parse_uint_from_file(file, "config:%d\n");
10573 }
10574
10575 #define PERF_UPROBE_REF_CTR_OFFSET_BITS 32
10576 #define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32
10577
10578 static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
10579                                  uint64_t offset, int pid, size_t ref_ctr_off)
10580 {
10581         const size_t attr_sz = sizeof(struct perf_event_attr);
10582         struct perf_event_attr attr;
10583         char errmsg[STRERR_BUFSIZE];
10584         int type, pfd;
10585
10586         if ((__u64)ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS))
10587                 return -EINVAL;
10588
10589         memset(&attr, 0, attr_sz);
10590
10591         type = uprobe ? determine_uprobe_perf_type()
10592                       : determine_kprobe_perf_type();
10593         if (type < 0) {
10594                 pr_warn("failed to determine %s perf type: %s\n",
10595                         uprobe ? "uprobe" : "kprobe",
10596                         libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
10597                 return type;
10598         }
10599         if (retprobe) {
10600                 int bit = uprobe ? determine_uprobe_retprobe_bit()
10601                                  : determine_kprobe_retprobe_bit();
10602
10603                 if (bit < 0) {
10604                         pr_warn("failed to determine %s retprobe bit: %s\n",
10605                                 uprobe ? "uprobe" : "kprobe",
10606                                 libbpf_strerror_r(bit, errmsg, sizeof(errmsg)));
10607                         return bit;
10608                 }
10609                 attr.config |= 1 << bit;
10610         }
10611         attr.size = attr_sz;
10612         attr.type = type;
10613         attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
10614         attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
10615         attr.config2 = offset;           /* kprobe_addr or probe_offset */
10616
10617         /* pid filter is meaningful only for uprobes */
10618         pfd = syscall(__NR_perf_event_open, &attr,
10619                       pid < 0 ? -1 : pid /* pid */,
10620                       pid == -1 ? 0 : -1 /* cpu */,
10621                       -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
10622         return pfd >= 0 ? pfd : -errno;
10623 }
10624
10625 static int append_to_file(const char *file, const char *fmt, ...)
10626 {
10627         int fd, n, err = 0;
10628         va_list ap;
10629         char buf[1024];
10630
10631         va_start(ap, fmt);
10632         n = vsnprintf(buf, sizeof(buf), fmt, ap);
10633         va_end(ap);
10634
10635         if (n < 0 || n >= sizeof(buf))
10636                 return -EINVAL;
10637
10638         fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0);
10639         if (fd < 0)
10640                 return -errno;
10641
10642         if (write(fd, buf, n) < 0)
10643                 err = -errno;
10644
10645         close(fd);
10646         return err;
10647 }
10648
10649 #define DEBUGFS "/sys/kernel/debug/tracing"
10650 #define TRACEFS "/sys/kernel/tracing"
10651
10652 static bool use_debugfs(void)
10653 {
10654         static int has_debugfs = -1;
10655
10656         if (has_debugfs < 0)
10657                 has_debugfs = faccessat(AT_FDCWD, DEBUGFS, F_OK, AT_EACCESS) == 0;
10658
10659         return has_debugfs == 1;
10660 }
10661
10662 static const char *tracefs_path(void)
10663 {
10664         return use_debugfs() ? DEBUGFS : TRACEFS;
10665 }
10666
10667 static const char *tracefs_kprobe_events(void)
10668 {
10669         return use_debugfs() ? DEBUGFS"/kprobe_events" : TRACEFS"/kprobe_events";
10670 }
10671
10672 static const char *tracefs_uprobe_events(void)
10673 {
10674         return use_debugfs() ? DEBUGFS"/uprobe_events" : TRACEFS"/uprobe_events";
10675 }
10676
10677 static const char *tracefs_available_filter_functions(void)
10678 {
10679         return use_debugfs() ? DEBUGFS"/available_filter_functions"
10680                              : TRACEFS"/available_filter_functions";
10681 }
10682
10683 static const char *tracefs_available_filter_functions_addrs(void)
10684 {
10685         return use_debugfs() ? DEBUGFS"/available_filter_functions_addrs"
10686                              : TRACEFS"/available_filter_functions_addrs";
10687 }
10688
10689 static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz,
10690                                          const char *kfunc_name, size_t offset)
10691 {
10692         static int index = 0;
10693         int i;
10694
10695         snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset,
10696                  __sync_fetch_and_add(&index, 1));
10697
10698         /* sanitize binary_path in the probe name */
10699         for (i = 0; buf[i]; i++) {
10700                 if (!isalnum(buf[i]))
10701                         buf[i] = '_';
10702         }
10703 }
10704
10705 static int add_kprobe_event_legacy(const char *probe_name, bool retprobe,
10706                                    const char *kfunc_name, size_t offset)
10707 {
10708         return append_to_file(tracefs_kprobe_events(), "%c:%s/%s %s+0x%zx",
10709                               retprobe ? 'r' : 'p',
10710                               retprobe ? "kretprobes" : "kprobes",
10711                               probe_name, kfunc_name, offset);
10712 }
10713
10714 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe)
10715 {
10716         return append_to_file(tracefs_kprobe_events(), "-:%s/%s",
10717                               retprobe ? "kretprobes" : "kprobes", probe_name);
10718 }
10719
10720 static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe)
10721 {
10722         char file[256];
10723
10724         snprintf(file, sizeof(file), "%s/events/%s/%s/id",
10725                  tracefs_path(), retprobe ? "kretprobes" : "kprobes", probe_name);
10726
10727         return parse_uint_from_file(file, "%d\n");
10728 }
10729
10730 static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe,
10731                                          const char *kfunc_name, size_t offset, int pid)
10732 {
10733         const size_t attr_sz = sizeof(struct perf_event_attr);
10734         struct perf_event_attr attr;
10735         char errmsg[STRERR_BUFSIZE];
10736         int type, pfd, err;
10737
10738         err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset);
10739         if (err < 0) {
10740                 pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n",
10741                         kfunc_name, offset,
10742                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10743                 return err;
10744         }
10745         type = determine_kprobe_perf_type_legacy(probe_name, retprobe);
10746         if (type < 0) {
10747                 err = type;
10748                 pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n",
10749                         kfunc_name, offset,
10750                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10751                 goto err_clean_legacy;
10752         }
10753
10754         memset(&attr, 0, attr_sz);
10755         attr.size = attr_sz;
10756         attr.config = type;
10757         attr.type = PERF_TYPE_TRACEPOINT;
10758
10759         pfd = syscall(__NR_perf_event_open, &attr,
10760                       pid < 0 ? -1 : pid, /* pid */
10761                       pid == -1 ? 0 : -1, /* cpu */
10762                       -1 /* group_fd */,  PERF_FLAG_FD_CLOEXEC);
10763         if (pfd < 0) {
10764                 err = -errno;
10765                 pr_warn("legacy kprobe perf_event_open() failed: %s\n",
10766                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10767                 goto err_clean_legacy;
10768         }
10769         return pfd;
10770
10771 err_clean_legacy:
10772         /* Clear the newly added legacy kprobe_event */
10773         remove_kprobe_event_legacy(probe_name, retprobe);
10774         return err;
10775 }
10776
10777 static const char *arch_specific_syscall_pfx(void)
10778 {
10779 #if defined(__x86_64__)
10780         return "x64";
10781 #elif defined(__i386__)
10782         return "ia32";
10783 #elif defined(__s390x__)
10784         return "s390x";
10785 #elif defined(__s390__)
10786         return "s390";
10787 #elif defined(__arm__)
10788         return "arm";
10789 #elif defined(__aarch64__)
10790         return "arm64";
10791 #elif defined(__mips__)
10792         return "mips";
10793 #elif defined(__riscv)
10794         return "riscv";
10795 #elif defined(__powerpc__)
10796         return "powerpc";
10797 #elif defined(__powerpc64__)
10798         return "powerpc64";
10799 #else
10800         return NULL;
10801 #endif
10802 }
10803
10804 int probe_kern_syscall_wrapper(int token_fd)
10805 {
10806         char syscall_name[64];
10807         const char *ksys_pfx;
10808
10809         ksys_pfx = arch_specific_syscall_pfx();
10810         if (!ksys_pfx)
10811                 return 0;
10812
10813         snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx);
10814
10815         if (determine_kprobe_perf_type() >= 0) {
10816                 int pfd;
10817
10818                 pfd = perf_event_open_probe(false, false, syscall_name, 0, getpid(), 0);
10819                 if (pfd >= 0)
10820                         close(pfd);
10821
10822                 return pfd >= 0 ? 1 : 0;
10823         } else { /* legacy mode */
10824                 char probe_name[128];
10825
10826                 gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0);
10827                 if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0)
10828                         return 0;
10829
10830                 (void)remove_kprobe_event_legacy(probe_name, false);
10831                 return 1;
10832         }
10833 }
10834
10835 struct bpf_link *
10836 bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
10837                                 const char *func_name,
10838                                 const struct bpf_kprobe_opts *opts)
10839 {
10840         DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
10841         enum probe_attach_mode attach_mode;
10842         char errmsg[STRERR_BUFSIZE];
10843         char *legacy_probe = NULL;
10844         struct bpf_link *link;
10845         size_t offset;
10846         bool retprobe, legacy;
10847         int pfd, err;
10848
10849         if (!OPTS_VALID(opts, bpf_kprobe_opts))
10850                 return libbpf_err_ptr(-EINVAL);
10851
10852         attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT);
10853         retprobe = OPTS_GET(opts, retprobe, false);
10854         offset = OPTS_GET(opts, offset, 0);
10855         pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
10856
10857         legacy = determine_kprobe_perf_type() < 0;
10858         switch (attach_mode) {
10859         case PROBE_ATTACH_MODE_LEGACY:
10860                 legacy = true;
10861                 pe_opts.force_ioctl_attach = true;
10862                 break;
10863         case PROBE_ATTACH_MODE_PERF:
10864                 if (legacy)
10865                         return libbpf_err_ptr(-ENOTSUP);
10866                 pe_opts.force_ioctl_attach = true;
10867                 break;
10868         case PROBE_ATTACH_MODE_LINK:
10869                 if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK))
10870                         return libbpf_err_ptr(-ENOTSUP);
10871                 break;
10872         case PROBE_ATTACH_MODE_DEFAULT:
10873                 break;
10874         default:
10875                 return libbpf_err_ptr(-EINVAL);
10876         }
10877
10878         if (!legacy) {
10879                 pfd = perf_event_open_probe(false /* uprobe */, retprobe,
10880                                             func_name, offset,
10881                                             -1 /* pid */, 0 /* ref_ctr_off */);
10882         } else {
10883                 char probe_name[256];
10884
10885                 gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name),
10886                                              func_name, offset);
10887
10888                 legacy_probe = strdup(probe_name);
10889                 if (!legacy_probe)
10890                         return libbpf_err_ptr(-ENOMEM);
10891
10892                 pfd = perf_event_kprobe_open_legacy(legacy_probe, retprobe, func_name,
10893                                                     offset, -1 /* pid */);
10894         }
10895         if (pfd < 0) {
10896                 err = -errno;
10897                 pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n",
10898                         prog->name, retprobe ? "kretprobe" : "kprobe",
10899                         func_name, offset,
10900                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10901                 goto err_out;
10902         }
10903         link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
10904         err = libbpf_get_error(link);
10905         if (err) {
10906                 close(pfd);
10907                 pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n",
10908                         prog->name, retprobe ? "kretprobe" : "kprobe",
10909                         func_name, offset,
10910                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10911                 goto err_clean_legacy;
10912         }
10913         if (legacy) {
10914                 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
10915
10916                 perf_link->legacy_probe_name = legacy_probe;
10917                 perf_link->legacy_is_kprobe = true;
10918                 perf_link->legacy_is_retprobe = retprobe;
10919         }
10920
10921         return link;
10922
10923 err_clean_legacy:
10924         if (legacy)
10925                 remove_kprobe_event_legacy(legacy_probe, retprobe);
10926 err_out:
10927         free(legacy_probe);
10928         return libbpf_err_ptr(err);
10929 }
10930
10931 struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog,
10932                                             bool retprobe,
10933                                             const char *func_name)
10934 {
10935         DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts,
10936                 .retprobe = retprobe,
10937         );
10938
10939         return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
10940 }
10941
10942 struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog,
10943                                               const char *syscall_name,
10944                                               const struct bpf_ksyscall_opts *opts)
10945 {
10946         LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
10947         char func_name[128];
10948
10949         if (!OPTS_VALID(opts, bpf_ksyscall_opts))
10950                 return libbpf_err_ptr(-EINVAL);
10951
10952         if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) {
10953                 /* arch_specific_syscall_pfx() should never return NULL here
10954                  * because it is guarded by kernel_supports(). However, since
10955                  * compiler does not know that we have an explicit conditional
10956                  * as well.
10957                  */
10958                 snprintf(func_name, sizeof(func_name), "__%s_sys_%s",
10959                          arch_specific_syscall_pfx() ? : "", syscall_name);
10960         } else {
10961                 snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name);
10962         }
10963
10964         kprobe_opts.retprobe = OPTS_GET(opts, retprobe, false);
10965         kprobe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
10966
10967         return bpf_program__attach_kprobe_opts(prog, func_name, &kprobe_opts);
10968 }
10969
10970 /* Adapted from perf/util/string.c */
10971 bool glob_match(const char *str, const char *pat)
10972 {
10973         while (*str && *pat && *pat != '*') {
10974                 if (*pat == '?') {      /* Matches any single character */
10975                         str++;
10976                         pat++;
10977                         continue;
10978                 }
10979                 if (*str != *pat)
10980                         return false;
10981                 str++;
10982                 pat++;
10983         }
10984         /* Check wild card */
10985         if (*pat == '*') {
10986                 while (*pat == '*')
10987                         pat++;
10988                 if (!*pat) /* Tail wild card matches all */
10989                         return true;
10990                 while (*str)
10991                         if (glob_match(str++, pat))
10992                                 return true;
10993         }
10994         return !*str && !*pat;
10995 }
10996
10997 struct kprobe_multi_resolve {
10998         const char *pattern;
10999         unsigned long *addrs;
11000         size_t cap;
11001         size_t cnt;
11002 };
11003
11004 struct avail_kallsyms_data {
11005         char **syms;
11006         size_t cnt;
11007         struct kprobe_multi_resolve *res;
11008 };
11009
11010 static int avail_func_cmp(const void *a, const void *b)
11011 {
11012         return strcmp(*(const char **)a, *(const char **)b);
11013 }
11014
11015 static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type,
11016                              const char *sym_name, void *ctx)
11017 {
11018         struct avail_kallsyms_data *data = ctx;
11019         struct kprobe_multi_resolve *res = data->res;
11020         int err;
11021
11022         if (!bsearch(&sym_name, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp))
11023                 return 0;
11024
11025         err = libbpf_ensure_mem((void **)&res->addrs, &res->cap, sizeof(*res->addrs), res->cnt + 1);
11026         if (err)
11027                 return err;
11028
11029         res->addrs[res->cnt++] = (unsigned long)sym_addr;
11030         return 0;
11031 }
11032
11033 static int libbpf_available_kallsyms_parse(struct kprobe_multi_resolve *res)
11034 {
11035         const char *available_functions_file = tracefs_available_filter_functions();
11036         struct avail_kallsyms_data data;
11037         char sym_name[500];
11038         FILE *f;
11039         int err = 0, ret, i;
11040         char **syms = NULL;
11041         size_t cap = 0, cnt = 0;
11042
11043         f = fopen(available_functions_file, "re");
11044         if (!f) {
11045                 err = -errno;
11046                 pr_warn("failed to open %s: %d\n", available_functions_file, err);
11047                 return err;
11048         }
11049
11050         while (true) {
11051                 char *name;
11052
11053                 ret = fscanf(f, "%499s%*[^\n]\n", sym_name);
11054                 if (ret == EOF && feof(f))
11055                         break;
11056
11057                 if (ret != 1) {
11058                         pr_warn("failed to parse available_filter_functions entry: %d\n", ret);
11059                         err = -EINVAL;
11060                         goto cleanup;
11061                 }
11062
11063                 if (!glob_match(sym_name, res->pattern))
11064                         continue;
11065
11066                 err = libbpf_ensure_mem((void **)&syms, &cap, sizeof(*syms), cnt + 1);
11067                 if (err)
11068                         goto cleanup;
11069
11070                 name = strdup(sym_name);
11071                 if (!name) {
11072                         err = -errno;
11073                         goto cleanup;
11074                 }
11075
11076                 syms[cnt++] = name;
11077         }
11078
11079         /* no entries found, bail out */
11080         if (cnt == 0) {
11081                 err = -ENOENT;
11082                 goto cleanup;
11083         }
11084
11085         /* sort available functions */
11086         qsort(syms, cnt, sizeof(*syms), avail_func_cmp);
11087
11088         data.syms = syms;
11089         data.res = res;
11090         data.cnt = cnt;
11091         libbpf_kallsyms_parse(avail_kallsyms_cb, &data);
11092
11093         if (res->cnt == 0)
11094                 err = -ENOENT;
11095
11096 cleanup:
11097         for (i = 0; i < cnt; i++)
11098                 free((char *)syms[i]);
11099         free(syms);
11100
11101         fclose(f);
11102         return err;
11103 }
11104
11105 static bool has_available_filter_functions_addrs(void)
11106 {
11107         return access(tracefs_available_filter_functions_addrs(), R_OK) != -1;
11108 }
11109
11110 static int libbpf_available_kprobes_parse(struct kprobe_multi_resolve *res)
11111 {
11112         const char *available_path = tracefs_available_filter_functions_addrs();
11113         char sym_name[500];
11114         FILE *f;
11115         int ret, err = 0;
11116         unsigned long long sym_addr;
11117
11118         f = fopen(available_path, "re");
11119         if (!f) {
11120                 err = -errno;
11121                 pr_warn("failed to open %s: %d\n", available_path, err);
11122                 return err;
11123         }
11124
11125         while (true) {
11126                 ret = fscanf(f, "%llx %499s%*[^\n]\n", &sym_addr, sym_name);
11127                 if (ret == EOF && feof(f))
11128                         break;
11129
11130                 if (ret != 2) {
11131                         pr_warn("failed to parse available_filter_functions_addrs entry: %d\n",
11132                                 ret);
11133                         err = -EINVAL;
11134                         goto cleanup;
11135                 }
11136
11137                 if (!glob_match(sym_name, res->pattern))
11138                         continue;
11139
11140                 err = libbpf_ensure_mem((void **)&res->addrs, &res->cap,
11141                                         sizeof(*res->addrs), res->cnt + 1);
11142                 if (err)
11143                         goto cleanup;
11144
11145                 res->addrs[res->cnt++] = (unsigned long)sym_addr;
11146         }
11147
11148         if (res->cnt == 0)
11149                 err = -ENOENT;
11150
11151 cleanup:
11152         fclose(f);
11153         return err;
11154 }
11155
11156 struct bpf_link *
11157 bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
11158                                       const char *pattern,
11159                                       const struct bpf_kprobe_multi_opts *opts)
11160 {
11161         LIBBPF_OPTS(bpf_link_create_opts, lopts);
11162         struct kprobe_multi_resolve res = {
11163                 .pattern = pattern,
11164         };
11165         struct bpf_link *link = NULL;
11166         char errmsg[STRERR_BUFSIZE];
11167         const unsigned long *addrs;
11168         int err, link_fd, prog_fd;
11169         const __u64 *cookies;
11170         const char **syms;
11171         bool retprobe;
11172         size_t cnt;
11173
11174         if (!OPTS_VALID(opts, bpf_kprobe_multi_opts))
11175                 return libbpf_err_ptr(-EINVAL);
11176
11177         syms    = OPTS_GET(opts, syms, false);
11178         addrs   = OPTS_GET(opts, addrs, false);
11179         cnt     = OPTS_GET(opts, cnt, false);
11180         cookies = OPTS_GET(opts, cookies, false);
11181
11182         if (!pattern && !addrs && !syms)
11183                 return libbpf_err_ptr(-EINVAL);
11184         if (pattern && (addrs || syms || cookies || cnt))
11185                 return libbpf_err_ptr(-EINVAL);
11186         if (!pattern && !cnt)
11187                 return libbpf_err_ptr(-EINVAL);
11188         if (addrs && syms)
11189                 return libbpf_err_ptr(-EINVAL);
11190
11191         if (pattern) {
11192                 if (has_available_filter_functions_addrs())
11193                         err = libbpf_available_kprobes_parse(&res);
11194                 else
11195                         err = libbpf_available_kallsyms_parse(&res);
11196                 if (err)
11197                         goto error;
11198                 addrs = res.addrs;
11199                 cnt = res.cnt;
11200         }
11201
11202         retprobe = OPTS_GET(opts, retprobe, false);
11203
11204         lopts.kprobe_multi.syms = syms;
11205         lopts.kprobe_multi.addrs = addrs;
11206         lopts.kprobe_multi.cookies = cookies;
11207         lopts.kprobe_multi.cnt = cnt;
11208         lopts.kprobe_multi.flags = retprobe ? BPF_F_KPROBE_MULTI_RETURN : 0;
11209
11210         link = calloc(1, sizeof(*link));
11211         if (!link) {
11212                 err = -ENOMEM;
11213                 goto error;
11214         }
11215         link->detach = &bpf_link__detach_fd;
11216
11217         prog_fd = bpf_program__fd(prog);
11218         link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, &lopts);
11219         if (link_fd < 0) {
11220                 err = -errno;
11221                 pr_warn("prog '%s': failed to attach: %s\n",
11222                         prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11223                 goto error;
11224         }
11225         link->fd = link_fd;
11226         free(res.addrs);
11227         return link;
11228
11229 error:
11230         free(link);
11231         free(res.addrs);
11232         return libbpf_err_ptr(err);
11233 }
11234
11235 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11236 {
11237         DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
11238         unsigned long offset = 0;
11239         const char *func_name;
11240         char *func;
11241         int n;
11242
11243         *link = NULL;
11244
11245         /* no auto-attach for SEC("kprobe") and SEC("kretprobe") */
11246         if (strcmp(prog->sec_name, "kprobe") == 0 || strcmp(prog->sec_name, "kretprobe") == 0)
11247                 return 0;
11248
11249         opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/");
11250         if (opts.retprobe)
11251                 func_name = prog->sec_name + sizeof("kretprobe/") - 1;
11252         else
11253                 func_name = prog->sec_name + sizeof("kprobe/") - 1;
11254
11255         n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset);
11256         if (n < 1) {
11257                 pr_warn("kprobe name is invalid: %s\n", func_name);
11258                 return -EINVAL;
11259         }
11260         if (opts.retprobe && offset != 0) {
11261                 free(func);
11262                 pr_warn("kretprobes do not support offset specification\n");
11263                 return -EINVAL;
11264         }
11265
11266         opts.offset = offset;
11267         *link = bpf_program__attach_kprobe_opts(prog, func, &opts);
11268         free(func);
11269         return libbpf_get_error(*link);
11270 }
11271
11272 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11273 {
11274         LIBBPF_OPTS(bpf_ksyscall_opts, opts);
11275         const char *syscall_name;
11276
11277         *link = NULL;
11278
11279         /* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */
11280         if (strcmp(prog->sec_name, "ksyscall") == 0 || strcmp(prog->sec_name, "kretsyscall") == 0)
11281                 return 0;
11282
11283         opts.retprobe = str_has_pfx(prog->sec_name, "kretsyscall/");
11284         if (opts.retprobe)
11285                 syscall_name = prog->sec_name + sizeof("kretsyscall/") - 1;
11286         else
11287                 syscall_name = prog->sec_name + sizeof("ksyscall/") - 1;
11288
11289         *link = bpf_program__attach_ksyscall(prog, syscall_name, &opts);
11290         return *link ? 0 : -errno;
11291 }
11292
11293 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11294 {
11295         LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
11296         const char *spec;
11297         char *pattern;
11298         int n;
11299
11300         *link = NULL;
11301
11302         /* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */
11303         if (strcmp(prog->sec_name, "kprobe.multi") == 0 ||
11304             strcmp(prog->sec_name, "kretprobe.multi") == 0)
11305                 return 0;
11306
11307         opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe.multi/");
11308         if (opts.retprobe)
11309                 spec = prog->sec_name + sizeof("kretprobe.multi/") - 1;
11310         else
11311                 spec = prog->sec_name + sizeof("kprobe.multi/") - 1;
11312
11313         n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern);
11314         if (n < 1) {
11315                 pr_warn("kprobe multi pattern is invalid: %s\n", pattern);
11316                 return -EINVAL;
11317         }
11318
11319         *link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts);
11320         free(pattern);
11321         return libbpf_get_error(*link);
11322 }
11323
11324 static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11325 {
11326         char *probe_type = NULL, *binary_path = NULL, *func_name = NULL;
11327         LIBBPF_OPTS(bpf_uprobe_multi_opts, opts);
11328         int n, ret = -EINVAL;
11329
11330         *link = NULL;
11331
11332         n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]",
11333                    &probe_type, &binary_path, &func_name);
11334         switch (n) {
11335         case 1:
11336                 /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
11337                 ret = 0;
11338                 break;
11339         case 3:
11340                 opts.retprobe = strcmp(probe_type, "uretprobe.multi") == 0;
11341                 *link = bpf_program__attach_uprobe_multi(prog, -1, binary_path, func_name, &opts);
11342                 ret = libbpf_get_error(*link);
11343                 break;
11344         default:
11345                 pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name,
11346                         prog->sec_name);
11347                 break;
11348         }
11349         free(probe_type);
11350         free(binary_path);
11351         free(func_name);
11352         return ret;
11353 }
11354
11355 static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz,
11356                                          const char *binary_path, uint64_t offset)
11357 {
11358         int i;
11359
11360         snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset);
11361
11362         /* sanitize binary_path in the probe name */
11363         for (i = 0; buf[i]; i++) {
11364                 if (!isalnum(buf[i]))
11365                         buf[i] = '_';
11366         }
11367 }
11368
11369 static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe,
11370                                           const char *binary_path, size_t offset)
11371 {
11372         return append_to_file(tracefs_uprobe_events(), "%c:%s/%s %s:0x%zx",
11373                               retprobe ? 'r' : 'p',
11374                               retprobe ? "uretprobes" : "uprobes",
11375                               probe_name, binary_path, offset);
11376 }
11377
11378 static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe)
11379 {
11380         return append_to_file(tracefs_uprobe_events(), "-:%s/%s",
11381                               retprobe ? "uretprobes" : "uprobes", probe_name);
11382 }
11383
11384 static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe)
11385 {
11386         char file[512];
11387
11388         snprintf(file, sizeof(file), "%s/events/%s/%s/id",
11389                  tracefs_path(), retprobe ? "uretprobes" : "uprobes", probe_name);
11390
11391         return parse_uint_from_file(file, "%d\n");
11392 }
11393
11394 static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe,
11395                                          const char *binary_path, size_t offset, int pid)
11396 {
11397         const size_t attr_sz = sizeof(struct perf_event_attr);
11398         struct perf_event_attr attr;
11399         int type, pfd, err;
11400
11401         err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset);
11402         if (err < 0) {
11403                 pr_warn("failed to add legacy uprobe event for %s:0x%zx: %d\n",
11404                         binary_path, (size_t)offset, err);
11405                 return err;
11406         }
11407         type = determine_uprobe_perf_type_legacy(probe_name, retprobe);
11408         if (type < 0) {
11409                 err = type;
11410                 pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n",
11411                         binary_path, offset, err);
11412                 goto err_clean_legacy;
11413         }
11414
11415         memset(&attr, 0, attr_sz);
11416         attr.size = attr_sz;
11417         attr.config = type;
11418         attr.type = PERF_TYPE_TRACEPOINT;
11419
11420         pfd = syscall(__NR_perf_event_open, &attr,
11421                       pid < 0 ? -1 : pid, /* pid */
11422                       pid == -1 ? 0 : -1, /* cpu */
11423                       -1 /* group_fd */,  PERF_FLAG_FD_CLOEXEC);
11424         if (pfd < 0) {
11425                 err = -errno;
11426                 pr_warn("legacy uprobe perf_event_open() failed: %d\n", err);
11427                 goto err_clean_legacy;
11428         }
11429         return pfd;
11430
11431 err_clean_legacy:
11432         /* Clear the newly added legacy uprobe_event */
11433         remove_uprobe_event_legacy(probe_name, retprobe);
11434         return err;
11435 }
11436
11437 /* Find offset of function name in archive specified by path. Currently
11438  * supported are .zip files that do not compress their contents, as used on
11439  * Android in the form of APKs, for example. "file_name" is the name of the ELF
11440  * file inside the archive. "func_name" matches symbol name or name@@LIB for
11441  * library functions.
11442  *
11443  * An overview of the APK format specifically provided here:
11444  * https://en.wikipedia.org/w/index.php?title=Apk_(file_format)&oldid=1139099120#Package_contents
11445  */
11446 static long elf_find_func_offset_from_archive(const char *archive_path, const char *file_name,
11447                                               const char *func_name)
11448 {
11449         struct zip_archive *archive;
11450         struct zip_entry entry;
11451         long ret;
11452         Elf *elf;
11453
11454         archive = zip_archive_open(archive_path);
11455         if (IS_ERR(archive)) {
11456                 ret = PTR_ERR(archive);
11457                 pr_warn("zip: failed to open %s: %ld\n", archive_path, ret);
11458                 return ret;
11459         }
11460
11461         ret = zip_archive_find_entry(archive, file_name, &entry);
11462         if (ret) {
11463                 pr_warn("zip: could not find archive member %s in %s: %ld\n", file_name,
11464                         archive_path, ret);
11465                 goto out;
11466         }
11467         pr_debug("zip: found entry for %s in %s at 0x%lx\n", file_name, archive_path,
11468                  (unsigned long)entry.data_offset);
11469
11470         if (entry.compression) {
11471                 pr_warn("zip: entry %s of %s is compressed and cannot be handled\n", file_name,
11472                         archive_path);
11473                 ret = -LIBBPF_ERRNO__FORMAT;
11474                 goto out;
11475         }
11476
11477         elf = elf_memory((void *)entry.data, entry.data_length);
11478         if (!elf) {
11479                 pr_warn("elf: could not read elf file %s from %s: %s\n", file_name, archive_path,
11480                         elf_errmsg(-1));
11481                 ret = -LIBBPF_ERRNO__LIBELF;
11482                 goto out;
11483         }
11484
11485         ret = elf_find_func_offset(elf, file_name, func_name);
11486         if (ret > 0) {
11487                 pr_debug("elf: symbol address match for %s of %s in %s: 0x%x + 0x%lx = 0x%lx\n",
11488                          func_name, file_name, archive_path, entry.data_offset, ret,
11489                          ret + entry.data_offset);
11490                 ret += entry.data_offset;
11491         }
11492         elf_end(elf);
11493
11494 out:
11495         zip_archive_close(archive);
11496         return ret;
11497 }
11498
11499 static const char *arch_specific_lib_paths(void)
11500 {
11501         /*
11502          * Based on https://packages.debian.org/sid/libc6.
11503          *
11504          * Assume that the traced program is built for the same architecture
11505          * as libbpf, which should cover the vast majority of cases.
11506          */
11507 #if defined(__x86_64__)
11508         return "/lib/x86_64-linux-gnu";
11509 #elif defined(__i386__)
11510         return "/lib/i386-linux-gnu";
11511 #elif defined(__s390x__)
11512         return "/lib/s390x-linux-gnu";
11513 #elif defined(__s390__)
11514         return "/lib/s390-linux-gnu";
11515 #elif defined(__arm__) && defined(__SOFTFP__)
11516         return "/lib/arm-linux-gnueabi";
11517 #elif defined(__arm__) && !defined(__SOFTFP__)
11518         return "/lib/arm-linux-gnueabihf";
11519 #elif defined(__aarch64__)
11520         return "/lib/aarch64-linux-gnu";
11521 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64
11522         return "/lib/mips64el-linux-gnuabi64";
11523 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32
11524         return "/lib/mipsel-linux-gnu";
11525 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
11526         return "/lib/powerpc64le-linux-gnu";
11527 #elif defined(__sparc__) && defined(__arch64__)
11528         return "/lib/sparc64-linux-gnu";
11529 #elif defined(__riscv) && __riscv_xlen == 64
11530         return "/lib/riscv64-linux-gnu";
11531 #else
11532         return NULL;
11533 #endif
11534 }
11535
11536 /* Get full path to program/shared library. */
11537 static int resolve_full_path(const char *file, char *result, size_t result_sz)
11538 {
11539         const char *search_paths[3] = {};
11540         int i, perm;
11541
11542         if (str_has_sfx(file, ".so") || strstr(file, ".so.")) {
11543                 search_paths[0] = getenv("LD_LIBRARY_PATH");
11544                 search_paths[1] = "/usr/lib64:/usr/lib";
11545                 search_paths[2] = arch_specific_lib_paths();
11546                 perm = R_OK;
11547         } else {
11548                 search_paths[0] = getenv("PATH");
11549                 search_paths[1] = "/usr/bin:/usr/sbin";
11550                 perm = R_OK | X_OK;
11551         }
11552
11553         for (i = 0; i < ARRAY_SIZE(search_paths); i++) {
11554                 const char *s;
11555
11556                 if (!search_paths[i])
11557                         continue;
11558                 for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) {
11559                         char *next_path;
11560                         int seg_len;
11561
11562                         if (s[0] == ':')
11563                                 s++;
11564                         next_path = strchr(s, ':');
11565                         seg_len = next_path ? next_path - s : strlen(s);
11566                         if (!seg_len)
11567                                 continue;
11568                         snprintf(result, result_sz, "%.*s/%s", seg_len, s, file);
11569                         /* ensure it has required permissions */
11570                         if (faccessat(AT_FDCWD, result, perm, AT_EACCESS) < 0)
11571                                 continue;
11572                         pr_debug("resolved '%s' to '%s'\n", file, result);
11573                         return 0;
11574                 }
11575         }
11576         return -ENOENT;
11577 }
11578
11579 struct bpf_link *
11580 bpf_program__attach_uprobe_multi(const struct bpf_program *prog,
11581                                  pid_t pid,
11582                                  const char *path,
11583                                  const char *func_pattern,
11584                                  const struct bpf_uprobe_multi_opts *opts)
11585 {
11586         const unsigned long *ref_ctr_offsets = NULL, *offsets = NULL;
11587         LIBBPF_OPTS(bpf_link_create_opts, lopts);
11588         unsigned long *resolved_offsets = NULL;
11589         int err = 0, link_fd, prog_fd;
11590         struct bpf_link *link = NULL;
11591         char errmsg[STRERR_BUFSIZE];
11592         char full_path[PATH_MAX];
11593         const __u64 *cookies;
11594         const char **syms;
11595         size_t cnt;
11596
11597         if (!OPTS_VALID(opts, bpf_uprobe_multi_opts))
11598                 return libbpf_err_ptr(-EINVAL);
11599
11600         syms = OPTS_GET(opts, syms, NULL);
11601         offsets = OPTS_GET(opts, offsets, NULL);
11602         ref_ctr_offsets = OPTS_GET(opts, ref_ctr_offsets, NULL);
11603         cookies = OPTS_GET(opts, cookies, NULL);
11604         cnt = OPTS_GET(opts, cnt, 0);
11605
11606         /*
11607          * User can specify 2 mutually exclusive set of inputs:
11608          *
11609          * 1) use only path/func_pattern/pid arguments
11610          *
11611          * 2) use path/pid with allowed combinations of:
11612          *    syms/offsets/ref_ctr_offsets/cookies/cnt
11613          *
11614          *    - syms and offsets are mutually exclusive
11615          *    - ref_ctr_offsets and cookies are optional
11616          *
11617          * Any other usage results in error.
11618          */
11619
11620         if (!path)
11621                 return libbpf_err_ptr(-EINVAL);
11622         if (!func_pattern && cnt == 0)
11623                 return libbpf_err_ptr(-EINVAL);
11624
11625         if (func_pattern) {
11626                 if (syms || offsets || ref_ctr_offsets || cookies || cnt)
11627                         return libbpf_err_ptr(-EINVAL);
11628         } else {
11629                 if (!!syms == !!offsets)
11630                         return libbpf_err_ptr(-EINVAL);
11631         }
11632
11633         if (func_pattern) {
11634                 if (!strchr(path, '/')) {
11635                         err = resolve_full_path(path, full_path, sizeof(full_path));
11636                         if (err) {
11637                                 pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
11638                                         prog->name, path, err);
11639                                 return libbpf_err_ptr(err);
11640                         }
11641                         path = full_path;
11642                 }
11643
11644                 err = elf_resolve_pattern_offsets(path, func_pattern,
11645                                                   &resolved_offsets, &cnt);
11646                 if (err < 0)
11647                         return libbpf_err_ptr(err);
11648                 offsets = resolved_offsets;
11649         } else if (syms) {
11650                 err = elf_resolve_syms_offsets(path, cnt, syms, &resolved_offsets, STT_FUNC);
11651                 if (err < 0)
11652                         return libbpf_err_ptr(err);
11653                 offsets = resolved_offsets;
11654         }
11655
11656         lopts.uprobe_multi.path = path;
11657         lopts.uprobe_multi.offsets = offsets;
11658         lopts.uprobe_multi.ref_ctr_offsets = ref_ctr_offsets;
11659         lopts.uprobe_multi.cookies = cookies;
11660         lopts.uprobe_multi.cnt = cnt;
11661         lopts.uprobe_multi.flags = OPTS_GET(opts, retprobe, false) ? BPF_F_UPROBE_MULTI_RETURN : 0;
11662
11663         if (pid == 0)
11664                 pid = getpid();
11665         if (pid > 0)
11666                 lopts.uprobe_multi.pid = pid;
11667
11668         link = calloc(1, sizeof(*link));
11669         if (!link) {
11670                 err = -ENOMEM;
11671                 goto error;
11672         }
11673         link->detach = &bpf_link__detach_fd;
11674
11675         prog_fd = bpf_program__fd(prog);
11676         link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &lopts);
11677         if (link_fd < 0) {
11678                 err = -errno;
11679                 pr_warn("prog '%s': failed to attach multi-uprobe: %s\n",
11680                         prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11681                 goto error;
11682         }
11683         link->fd = link_fd;
11684         free(resolved_offsets);
11685         return link;
11686
11687 error:
11688         free(resolved_offsets);
11689         free(link);
11690         return libbpf_err_ptr(err);
11691 }
11692
11693 LIBBPF_API struct bpf_link *
11694 bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
11695                                 const char *binary_path, size_t func_offset,
11696                                 const struct bpf_uprobe_opts *opts)
11697 {
11698         const char *archive_path = NULL, *archive_sep = NULL;
11699         char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL;
11700         DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
11701         enum probe_attach_mode attach_mode;
11702         char full_path[PATH_MAX];
11703         struct bpf_link *link;
11704         size_t ref_ctr_off;
11705         int pfd, err;
11706         bool retprobe, legacy;
11707         const char *func_name;
11708
11709         if (!OPTS_VALID(opts, bpf_uprobe_opts))
11710                 return libbpf_err_ptr(-EINVAL);
11711
11712         attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT);
11713         retprobe = OPTS_GET(opts, retprobe, false);
11714         ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0);
11715         pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
11716
11717         if (!binary_path)
11718                 return libbpf_err_ptr(-EINVAL);
11719
11720         /* Check if "binary_path" refers to an archive. */
11721         archive_sep = strstr(binary_path, "!/");
11722         if (archive_sep) {
11723                 full_path[0] = '\0';
11724                 libbpf_strlcpy(full_path, binary_path,
11725                                min(sizeof(full_path), (size_t)(archive_sep - binary_path + 1)));
11726                 archive_path = full_path;
11727                 binary_path = archive_sep + 2;
11728         } else if (!strchr(binary_path, '/')) {
11729                 err = resolve_full_path(binary_path, full_path, sizeof(full_path));
11730                 if (err) {
11731                         pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
11732                                 prog->name, binary_path, err);
11733                         return libbpf_err_ptr(err);
11734                 }
11735                 binary_path = full_path;
11736         }
11737         func_name = OPTS_GET(opts, func_name, NULL);
11738         if (func_name) {
11739                 long sym_off;
11740
11741                 if (archive_path) {
11742                         sym_off = elf_find_func_offset_from_archive(archive_path, binary_path,
11743                                                                     func_name);
11744                         binary_path = archive_path;
11745                 } else {
11746                         sym_off = elf_find_func_offset_from_file(binary_path, func_name);
11747                 }
11748                 if (sym_off < 0)
11749                         return libbpf_err_ptr(sym_off);
11750                 func_offset += sym_off;
11751         }
11752
11753         legacy = determine_uprobe_perf_type() < 0;
11754         switch (attach_mode) {
11755         case PROBE_ATTACH_MODE_LEGACY:
11756                 legacy = true;
11757                 pe_opts.force_ioctl_attach = true;
11758                 break;
11759         case PROBE_ATTACH_MODE_PERF:
11760                 if (legacy)
11761                         return libbpf_err_ptr(-ENOTSUP);
11762                 pe_opts.force_ioctl_attach = true;
11763                 break;
11764         case PROBE_ATTACH_MODE_LINK:
11765                 if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK))
11766                         return libbpf_err_ptr(-ENOTSUP);
11767                 break;
11768         case PROBE_ATTACH_MODE_DEFAULT:
11769                 break;
11770         default:
11771                 return libbpf_err_ptr(-EINVAL);
11772         }
11773
11774         if (!legacy) {
11775                 pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path,
11776                                             func_offset, pid, ref_ctr_off);
11777         } else {
11778                 char probe_name[PATH_MAX + 64];
11779
11780                 if (ref_ctr_off)
11781                         return libbpf_err_ptr(-EINVAL);
11782
11783                 gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name),
11784                                              binary_path, func_offset);
11785
11786                 legacy_probe = strdup(probe_name);
11787                 if (!legacy_probe)
11788                         return libbpf_err_ptr(-ENOMEM);
11789
11790                 pfd = perf_event_uprobe_open_legacy(legacy_probe, retprobe,
11791                                                     binary_path, func_offset, pid);
11792         }
11793         if (pfd < 0) {
11794                 err = -errno;
11795                 pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
11796                         prog->name, retprobe ? "uretprobe" : "uprobe",
11797                         binary_path, func_offset,
11798                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11799                 goto err_out;
11800         }
11801
11802         link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
11803         err = libbpf_get_error(link);
11804         if (err) {
11805                 close(pfd);
11806                 pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
11807                         prog->name, retprobe ? "uretprobe" : "uprobe",
11808                         binary_path, func_offset,
11809                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11810                 goto err_clean_legacy;
11811         }
11812         if (legacy) {
11813                 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
11814
11815                 perf_link->legacy_probe_name = legacy_probe;
11816                 perf_link->legacy_is_kprobe = false;
11817                 perf_link->legacy_is_retprobe = retprobe;
11818         }
11819         return link;
11820
11821 err_clean_legacy:
11822         if (legacy)
11823                 remove_uprobe_event_legacy(legacy_probe, retprobe);
11824 err_out:
11825         free(legacy_probe);
11826         return libbpf_err_ptr(err);
11827 }
11828
11829 /* Format of u[ret]probe section definition supporting auto-attach:
11830  * u[ret]probe/binary:function[+offset]
11831  *
11832  * binary can be an absolute/relative path or a filename; the latter is resolved to a
11833  * full binary path via bpf_program__attach_uprobe_opts.
11834  *
11835  * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be
11836  * specified (and auto-attach is not possible) or the above format is specified for
11837  * auto-attach.
11838  */
11839 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11840 {
11841         DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
11842         char *probe_type = NULL, *binary_path = NULL, *func_name = NULL, *func_off;
11843         int n, c, ret = -EINVAL;
11844         long offset = 0;
11845
11846         *link = NULL;
11847
11848         n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]",
11849                    &probe_type, &binary_path, &func_name);
11850         switch (n) {
11851         case 1:
11852                 /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
11853                 ret = 0;
11854                 break;
11855         case 2:
11856                 pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n",
11857                         prog->name, prog->sec_name);
11858                 break;
11859         case 3:
11860                 /* check if user specifies `+offset`, if yes, this should be
11861                  * the last part of the string, make sure sscanf read to EOL
11862                  */
11863                 func_off = strrchr(func_name, '+');
11864                 if (func_off) {
11865                         n = sscanf(func_off, "+%li%n", &offset, &c);
11866                         if (n == 1 && *(func_off + c) == '\0')
11867                                 func_off[0] = '\0';
11868                         else
11869                                 offset = 0;
11870                 }
11871                 opts.retprobe = strcmp(probe_type, "uretprobe") == 0 ||
11872                                 strcmp(probe_type, "uretprobe.s") == 0;
11873                 if (opts.retprobe && offset != 0) {
11874                         pr_warn("prog '%s': uretprobes do not support offset specification\n",
11875                                 prog->name);
11876                         break;
11877                 }
11878                 opts.func_name = func_name;
11879                 *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts);
11880                 ret = libbpf_get_error(*link);
11881                 break;
11882         default:
11883                 pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name,
11884                         prog->sec_name);
11885                 break;
11886         }
11887         free(probe_type);
11888         free(binary_path);
11889         free(func_name);
11890
11891         return ret;
11892 }
11893
11894 struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog,
11895                                             bool retprobe, pid_t pid,
11896                                             const char *binary_path,
11897                                             size_t func_offset)
11898 {
11899         DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, .retprobe = retprobe);
11900
11901         return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts);
11902 }
11903
11904 struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog,
11905                                           pid_t pid, const char *binary_path,
11906                                           const char *usdt_provider, const char *usdt_name,
11907                                           const struct bpf_usdt_opts *opts)
11908 {
11909         char resolved_path[512];
11910         struct bpf_object *obj = prog->obj;
11911         struct bpf_link *link;
11912         __u64 usdt_cookie;
11913         int err;
11914
11915         if (!OPTS_VALID(opts, bpf_uprobe_opts))
11916                 return libbpf_err_ptr(-EINVAL);
11917
11918         if (bpf_program__fd(prog) < 0) {
11919                 pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
11920                         prog->name);
11921                 return libbpf_err_ptr(-EINVAL);
11922         }
11923
11924         if (!binary_path)
11925                 return libbpf_err_ptr(-EINVAL);
11926
11927         if (!strchr(binary_path, '/')) {
11928                 err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path));
11929                 if (err) {
11930                         pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
11931                                 prog->name, binary_path, err);
11932                         return libbpf_err_ptr(err);
11933                 }
11934                 binary_path = resolved_path;
11935         }
11936
11937         /* USDT manager is instantiated lazily on first USDT attach. It will
11938          * be destroyed together with BPF object in bpf_object__close().
11939          */
11940         if (IS_ERR(obj->usdt_man))
11941                 return libbpf_ptr(obj->usdt_man);
11942         if (!obj->usdt_man) {
11943                 obj->usdt_man = usdt_manager_new(obj);
11944                 if (IS_ERR(obj->usdt_man))
11945                         return libbpf_ptr(obj->usdt_man);
11946         }
11947
11948         usdt_cookie = OPTS_GET(opts, usdt_cookie, 0);
11949         link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path,
11950                                         usdt_provider, usdt_name, usdt_cookie);
11951         err = libbpf_get_error(link);
11952         if (err)
11953                 return libbpf_err_ptr(err);
11954         return link;
11955 }
11956
11957 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11958 {
11959         char *path = NULL, *provider = NULL, *name = NULL;
11960         const char *sec_name;
11961         int n, err;
11962
11963         sec_name = bpf_program__section_name(prog);
11964         if (strcmp(sec_name, "usdt") == 0) {
11965                 /* no auto-attach for just SEC("usdt") */
11966                 *link = NULL;
11967                 return 0;
11968         }
11969
11970         n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name);
11971         if (n != 3) {
11972                 pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n",
11973                         sec_name);
11974                 err = -EINVAL;
11975         } else {
11976                 *link = bpf_program__attach_usdt(prog, -1 /* any process */, path,
11977                                                  provider, name, NULL);
11978                 err = libbpf_get_error(*link);
11979         }
11980         free(path);
11981         free(provider);
11982         free(name);
11983         return err;
11984 }
11985
11986 static int determine_tracepoint_id(const char *tp_category,
11987                                    const char *tp_name)
11988 {
11989         char file[PATH_MAX];
11990         int ret;
11991
11992         ret = snprintf(file, sizeof(file), "%s/events/%s/%s/id",
11993                        tracefs_path(), tp_category, tp_name);
11994         if (ret < 0)
11995                 return -errno;
11996         if (ret >= sizeof(file)) {
11997                 pr_debug("tracepoint %s/%s path is too long\n",
11998                          tp_category, tp_name);
11999                 return -E2BIG;
12000         }
12001         return parse_uint_from_file(file, "%d\n");
12002 }
12003
12004 static int perf_event_open_tracepoint(const char *tp_category,
12005                                       const char *tp_name)
12006 {
12007         const size_t attr_sz = sizeof(struct perf_event_attr);
12008         struct perf_event_attr attr;
12009         char errmsg[STRERR_BUFSIZE];
12010         int tp_id, pfd, err;
12011
12012         tp_id = determine_tracepoint_id(tp_category, tp_name);
12013         if (tp_id < 0) {
12014                 pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
12015                         tp_category, tp_name,
12016                         libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));
12017                 return tp_id;
12018         }
12019
12020         memset(&attr, 0, attr_sz);
12021         attr.type = PERF_TYPE_TRACEPOINT;
12022         attr.size = attr_sz;
12023         attr.config = tp_id;
12024
12025         pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
12026                       -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
12027         if (pfd < 0) {
12028                 err = -errno;
12029                 pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n",
12030                         tp_category, tp_name,
12031                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
12032                 return err;
12033         }
12034         return pfd;
12035 }
12036
12037 struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *prog,
12038                                                      const char *tp_category,
12039                                                      const char *tp_name,
12040                                                      const struct bpf_tracepoint_opts *opts)
12041 {
12042         DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
12043         char errmsg[STRERR_BUFSIZE];
12044         struct bpf_link *link;
12045         int pfd, err;
12046
12047         if (!OPTS_VALID(opts, bpf_tracepoint_opts))
12048                 return libbpf_err_ptr(-EINVAL);
12049
12050         pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
12051
12052         pfd = perf_event_open_tracepoint(tp_category, tp_name);
12053         if (pfd < 0) {
12054                 pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
12055                         prog->name, tp_category, tp_name,
12056                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
12057                 return libbpf_err_ptr(pfd);
12058         }
12059         link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
12060         err = libbpf_get_error(link);
12061         if (err) {
12062                 close(pfd);
12063                 pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
12064                         prog->name, tp_category, tp_name,
12065                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
12066                 return libbpf_err_ptr(err);
12067         }
12068         return link;
12069 }
12070
12071 struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog,
12072                                                 const char *tp_category,
12073                                                 const char *tp_name)
12074 {
12075         return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL);
12076 }
12077
12078 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12079 {
12080         char *sec_name, *tp_cat, *tp_name;
12081
12082         *link = NULL;
12083
12084         /* no auto-attach for SEC("tp") or SEC("tracepoint") */
12085         if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0)
12086                 return 0;
12087
12088         sec_name = strdup(prog->sec_name);
12089         if (!sec_name)
12090                 return -ENOMEM;
12091
12092         /* extract "tp/<category>/<name>" or "tracepoint/<category>/<name>" */
12093         if (str_has_pfx(prog->sec_name, "tp/"))
12094                 tp_cat = sec_name + sizeof("tp/") - 1;
12095         else
12096                 tp_cat = sec_name + sizeof("tracepoint/") - 1;
12097         tp_name = strchr(tp_cat, '/');
12098         if (!tp_name) {
12099                 free(sec_name);
12100                 return -EINVAL;
12101         }
12102         *tp_name = '\0';
12103         tp_name++;
12104
12105         *link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
12106         free(sec_name);
12107         return libbpf_get_error(*link);
12108 }
12109
12110 struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog,
12111                                                     const char *tp_name)
12112 {
12113         char errmsg[STRERR_BUFSIZE];
12114         struct bpf_link *link;
12115         int prog_fd, pfd;
12116
12117         prog_fd = bpf_program__fd(prog);
12118         if (prog_fd < 0) {
12119                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12120                 return libbpf_err_ptr(-EINVAL);
12121         }
12122
12123         link = calloc(1, sizeof(*link));
12124         if (!link)
12125                 return libbpf_err_ptr(-ENOMEM);
12126         link->detach = &bpf_link__detach_fd;
12127
12128         pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
12129         if (pfd < 0) {
12130                 pfd = -errno;
12131                 free(link);
12132                 pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
12133                         prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
12134                 return libbpf_err_ptr(pfd);
12135         }
12136         link->fd = pfd;
12137         return link;
12138 }
12139
12140 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12141 {
12142         static const char *const prefixes[] = {
12143                 "raw_tp",
12144                 "raw_tracepoint",
12145                 "raw_tp.w",
12146                 "raw_tracepoint.w",
12147         };
12148         size_t i;
12149         const char *tp_name = NULL;
12150
12151         *link = NULL;
12152
12153         for (i = 0; i < ARRAY_SIZE(prefixes); i++) {
12154                 size_t pfx_len;
12155
12156                 if (!str_has_pfx(prog->sec_name, prefixes[i]))
12157                         continue;
12158
12159                 pfx_len = strlen(prefixes[i]);
12160                 /* no auto-attach case of, e.g., SEC("raw_tp") */
12161                 if (prog->sec_name[pfx_len] == '\0')
12162                         return 0;
12163
12164                 if (prog->sec_name[pfx_len] != '/')
12165                         continue;
12166
12167                 tp_name = prog->sec_name + pfx_len + 1;
12168                 break;
12169         }
12170
12171         if (!tp_name) {
12172                 pr_warn("prog '%s': invalid section name '%s'\n",
12173                         prog->name, prog->sec_name);
12174                 return -EINVAL;
12175         }
12176
12177         *link = bpf_program__attach_raw_tracepoint(prog, tp_name);
12178         return libbpf_get_error(*link);
12179 }
12180
12181 /* Common logic for all BPF program types that attach to a btf_id */
12182 static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog,
12183                                                    const struct bpf_trace_opts *opts)
12184 {
12185         LIBBPF_OPTS(bpf_link_create_opts, link_opts);
12186         char errmsg[STRERR_BUFSIZE];
12187         struct bpf_link *link;
12188         int prog_fd, pfd;
12189
12190         if (!OPTS_VALID(opts, bpf_trace_opts))
12191                 return libbpf_err_ptr(-EINVAL);
12192
12193         prog_fd = bpf_program__fd(prog);
12194         if (prog_fd < 0) {
12195                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12196                 return libbpf_err_ptr(-EINVAL);
12197         }
12198
12199         link = calloc(1, sizeof(*link));
12200         if (!link)
12201                 return libbpf_err_ptr(-ENOMEM);
12202         link->detach = &bpf_link__detach_fd;
12203
12204         /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */
12205         link_opts.tracing.cookie = OPTS_GET(opts, cookie, 0);
12206         pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), &link_opts);
12207         if (pfd < 0) {
12208                 pfd = -errno;
12209                 free(link);
12210                 pr_warn("prog '%s': failed to attach: %s\n",
12211                         prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
12212                 return libbpf_err_ptr(pfd);
12213         }
12214         link->fd = pfd;
12215         return link;
12216 }
12217
12218 struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog)
12219 {
12220         return bpf_program__attach_btf_id(prog, NULL);
12221 }
12222
12223 struct bpf_link *bpf_program__attach_trace_opts(const struct bpf_program *prog,
12224                                                 const struct bpf_trace_opts *opts)
12225 {
12226         return bpf_program__attach_btf_id(prog, opts);
12227 }
12228
12229 struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog)
12230 {
12231         return bpf_program__attach_btf_id(prog, NULL);
12232 }
12233
12234 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12235 {
12236         *link = bpf_program__attach_trace(prog);
12237         return libbpf_get_error(*link);
12238 }
12239
12240 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12241 {
12242         *link = bpf_program__attach_lsm(prog);
12243         return libbpf_get_error(*link);
12244 }
12245
12246 static struct bpf_link *
12247 bpf_program_attach_fd(const struct bpf_program *prog,
12248                       int target_fd, const char *target_name,
12249                       const struct bpf_link_create_opts *opts)
12250 {
12251         enum bpf_attach_type attach_type;
12252         char errmsg[STRERR_BUFSIZE];
12253         struct bpf_link *link;
12254         int prog_fd, link_fd;
12255
12256         prog_fd = bpf_program__fd(prog);
12257         if (prog_fd < 0) {
12258                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12259                 return libbpf_err_ptr(-EINVAL);
12260         }
12261
12262         link = calloc(1, sizeof(*link));
12263         if (!link)
12264                 return libbpf_err_ptr(-ENOMEM);
12265         link->detach = &bpf_link__detach_fd;
12266
12267         attach_type = bpf_program__expected_attach_type(prog);
12268         link_fd = bpf_link_create(prog_fd, target_fd, attach_type, opts);
12269         if (link_fd < 0) {
12270                 link_fd = -errno;
12271                 free(link);
12272                 pr_warn("prog '%s': failed to attach to %s: %s\n",
12273                         prog->name, target_name,
12274                         libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
12275                 return libbpf_err_ptr(link_fd);
12276         }
12277         link->fd = link_fd;
12278         return link;
12279 }
12280
12281 struct bpf_link *
12282 bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd)
12283 {
12284         return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", NULL);
12285 }
12286
12287 struct bpf_link *
12288 bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd)
12289 {
12290         return bpf_program_attach_fd(prog, netns_fd, "netns", NULL);
12291 }
12292
12293 struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex)
12294 {
12295         /* target_fd/target_ifindex use the same field in LINK_CREATE */
12296         return bpf_program_attach_fd(prog, ifindex, "xdp", NULL);
12297 }
12298
12299 struct bpf_link *
12300 bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex,
12301                         const struct bpf_tcx_opts *opts)
12302 {
12303         LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
12304         __u32 relative_id;
12305         int relative_fd;
12306
12307         if (!OPTS_VALID(opts, bpf_tcx_opts))
12308                 return libbpf_err_ptr(-EINVAL);
12309
12310         relative_id = OPTS_GET(opts, relative_id, 0);
12311         relative_fd = OPTS_GET(opts, relative_fd, 0);
12312
12313         /* validate we don't have unexpected combinations of non-zero fields */
12314         if (!ifindex) {
12315                 pr_warn("prog '%s': target netdevice ifindex cannot be zero\n",
12316                         prog->name);
12317                 return libbpf_err_ptr(-EINVAL);
12318         }
12319         if (relative_fd && relative_id) {
12320                 pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n",
12321                         prog->name);
12322                 return libbpf_err_ptr(-EINVAL);
12323         }
12324
12325         link_create_opts.tcx.expected_revision = OPTS_GET(opts, expected_revision, 0);
12326         link_create_opts.tcx.relative_fd = relative_fd;
12327         link_create_opts.tcx.relative_id = relative_id;
12328         link_create_opts.flags = OPTS_GET(opts, flags, 0);
12329
12330         /* target_fd/target_ifindex use the same field in LINK_CREATE */
12331         return bpf_program_attach_fd(prog, ifindex, "tcx", &link_create_opts);
12332 }
12333
12334 struct bpf_link *
12335 bpf_program__attach_netkit(const struct bpf_program *prog, int ifindex,
12336                            const struct bpf_netkit_opts *opts)
12337 {
12338         LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
12339         __u32 relative_id;
12340         int relative_fd;
12341
12342         if (!OPTS_VALID(opts, bpf_netkit_opts))
12343                 return libbpf_err_ptr(-EINVAL);
12344
12345         relative_id = OPTS_GET(opts, relative_id, 0);
12346         relative_fd = OPTS_GET(opts, relative_fd, 0);
12347
12348         /* validate we don't have unexpected combinations of non-zero fields */
12349         if (!ifindex) {
12350                 pr_warn("prog '%s': target netdevice ifindex cannot be zero\n",
12351                         prog->name);
12352                 return libbpf_err_ptr(-EINVAL);
12353         }
12354         if (relative_fd && relative_id) {
12355                 pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n",
12356                         prog->name);
12357                 return libbpf_err_ptr(-EINVAL);
12358         }
12359
12360         link_create_opts.netkit.expected_revision = OPTS_GET(opts, expected_revision, 0);
12361         link_create_opts.netkit.relative_fd = relative_fd;
12362         link_create_opts.netkit.relative_id = relative_id;
12363         link_create_opts.flags = OPTS_GET(opts, flags, 0);
12364
12365         return bpf_program_attach_fd(prog, ifindex, "netkit", &link_create_opts);
12366 }
12367
12368 struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog,
12369                                               int target_fd,
12370                                               const char *attach_func_name)
12371 {
12372         int btf_id;
12373
12374         if (!!target_fd != !!attach_func_name) {
12375                 pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
12376                         prog->name);
12377                 return libbpf_err_ptr(-EINVAL);
12378         }
12379
12380         if (prog->type != BPF_PROG_TYPE_EXT) {
12381                 pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace",
12382                         prog->name);
12383                 return libbpf_err_ptr(-EINVAL);
12384         }
12385
12386         if (target_fd) {
12387                 LIBBPF_OPTS(bpf_link_create_opts, target_opts);
12388
12389                 btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
12390                 if (btf_id < 0)
12391                         return libbpf_err_ptr(btf_id);
12392
12393                 target_opts.target_btf_id = btf_id;
12394
12395                 return bpf_program_attach_fd(prog, target_fd, "freplace",
12396                                              &target_opts);
12397         } else {
12398                 /* no target, so use raw_tracepoint_open for compatibility
12399                  * with old kernels
12400                  */
12401                 return bpf_program__attach_trace(prog);
12402         }
12403 }
12404
12405 struct bpf_link *
12406 bpf_program__attach_iter(const struct bpf_program *prog,
12407                          const struct bpf_iter_attach_opts *opts)
12408 {
12409         DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
12410         char errmsg[STRERR_BUFSIZE];
12411         struct bpf_link *link;
12412         int prog_fd, link_fd;
12413         __u32 target_fd = 0;
12414
12415         if (!OPTS_VALID(opts, bpf_iter_attach_opts))
12416                 return libbpf_err_ptr(-EINVAL);
12417
12418         link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
12419         link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
12420
12421         prog_fd = bpf_program__fd(prog);
12422         if (prog_fd < 0) {
12423                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12424                 return libbpf_err_ptr(-EINVAL);
12425         }
12426
12427         link = calloc(1, sizeof(*link));
12428         if (!link)
12429                 return libbpf_err_ptr(-ENOMEM);
12430         link->detach = &bpf_link__detach_fd;
12431
12432         link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
12433                                   &link_create_opts);
12434         if (link_fd < 0) {
12435                 link_fd = -errno;
12436                 free(link);
12437                 pr_warn("prog '%s': failed to attach to iterator: %s\n",
12438                         prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
12439                 return libbpf_err_ptr(link_fd);
12440         }
12441         link->fd = link_fd;
12442         return link;
12443 }
12444
12445 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12446 {
12447         *link = bpf_program__attach_iter(prog, NULL);
12448         return libbpf_get_error(*link);
12449 }
12450
12451 struct bpf_link *bpf_program__attach_netfilter(const struct bpf_program *prog,
12452                                                const struct bpf_netfilter_opts *opts)
12453 {
12454         LIBBPF_OPTS(bpf_link_create_opts, lopts);
12455         struct bpf_link *link;
12456         int prog_fd, link_fd;
12457
12458         if (!OPTS_VALID(opts, bpf_netfilter_opts))
12459                 return libbpf_err_ptr(-EINVAL);
12460
12461         prog_fd = bpf_program__fd(prog);
12462         if (prog_fd < 0) {
12463                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12464                 return libbpf_err_ptr(-EINVAL);
12465         }
12466
12467         link = calloc(1, sizeof(*link));
12468         if (!link)
12469                 return libbpf_err_ptr(-ENOMEM);
12470
12471         link->detach = &bpf_link__detach_fd;
12472
12473         lopts.netfilter.pf = OPTS_GET(opts, pf, 0);
12474         lopts.netfilter.hooknum = OPTS_GET(opts, hooknum, 0);
12475         lopts.netfilter.priority = OPTS_GET(opts, priority, 0);
12476         lopts.netfilter.flags = OPTS_GET(opts, flags, 0);
12477
12478         link_fd = bpf_link_create(prog_fd, 0, BPF_NETFILTER, &lopts);
12479         if (link_fd < 0) {
12480                 char errmsg[STRERR_BUFSIZE];
12481
12482                 link_fd = -errno;
12483                 free(link);
12484                 pr_warn("prog '%s': failed to attach to netfilter: %s\n",
12485                         prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
12486                 return libbpf_err_ptr(link_fd);
12487         }
12488         link->fd = link_fd;
12489
12490         return link;
12491 }
12492
12493 struct bpf_link *bpf_program__attach(const struct bpf_program *prog)
12494 {
12495         struct bpf_link *link = NULL;
12496         int err;
12497
12498         if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
12499                 return libbpf_err_ptr(-EOPNOTSUPP);
12500
12501         err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, &link);
12502         if (err)
12503                 return libbpf_err_ptr(err);
12504
12505         /* When calling bpf_program__attach() explicitly, auto-attach support
12506          * is expected to work, so NULL returned link is considered an error.
12507          * This is different for skeleton's attach, see comment in
12508          * bpf_object__attach_skeleton().
12509          */
12510         if (!link)
12511                 return libbpf_err_ptr(-EOPNOTSUPP);
12512
12513         return link;
12514 }
12515
12516 struct bpf_link_struct_ops {
12517         struct bpf_link link;
12518         int map_fd;
12519 };
12520
12521 static int bpf_link__detach_struct_ops(struct bpf_link *link)
12522 {
12523         struct bpf_link_struct_ops *st_link;
12524         __u32 zero = 0;
12525
12526         st_link = container_of(link, struct bpf_link_struct_ops, link);
12527
12528         if (st_link->map_fd < 0)
12529                 /* w/o a real link */
12530                 return bpf_map_delete_elem(link->fd, &zero);
12531
12532         return close(link->fd);
12533 }
12534
12535 struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
12536 {
12537         struct bpf_link_struct_ops *link;
12538         __u32 zero = 0;
12539         int err, fd;
12540
12541         if (!bpf_map__is_struct_ops(map) || map->fd == -1)
12542                 return libbpf_err_ptr(-EINVAL);
12543
12544         link = calloc(1, sizeof(*link));
12545         if (!link)
12546                 return libbpf_err_ptr(-EINVAL);
12547
12548         /* kern_vdata should be prepared during the loading phase. */
12549         err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
12550         /* It can be EBUSY if the map has been used to create or
12551          * update a link before.  We don't allow updating the value of
12552          * a struct_ops once it is set.  That ensures that the value
12553          * never changed.  So, it is safe to skip EBUSY.
12554          */
12555         if (err && (!(map->def.map_flags & BPF_F_LINK) || err != -EBUSY)) {
12556                 free(link);
12557                 return libbpf_err_ptr(err);
12558         }
12559
12560         link->link.detach = bpf_link__detach_struct_ops;
12561
12562         if (!(map->def.map_flags & BPF_F_LINK)) {
12563                 /* w/o a real link */
12564                 link->link.fd = map->fd;
12565                 link->map_fd = -1;
12566                 return &link->link;
12567         }
12568
12569         fd = bpf_link_create(map->fd, 0, BPF_STRUCT_OPS, NULL);
12570         if (fd < 0) {
12571                 free(link);
12572                 return libbpf_err_ptr(fd);
12573         }
12574
12575         link->link.fd = fd;
12576         link->map_fd = map->fd;
12577
12578         return &link->link;
12579 }
12580
12581 /*
12582  * Swap the back struct_ops of a link with a new struct_ops map.
12583  */
12584 int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map)
12585 {
12586         struct bpf_link_struct_ops *st_ops_link;
12587         __u32 zero = 0;
12588         int err;
12589
12590         if (!bpf_map__is_struct_ops(map) || !map_is_created(map))
12591                 return -EINVAL;
12592
12593         st_ops_link = container_of(link, struct bpf_link_struct_ops, link);
12594         /* Ensure the type of a link is correct */
12595         if (st_ops_link->map_fd < 0)
12596                 return -EINVAL;
12597
12598         err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
12599         /* It can be EBUSY if the map has been used to create or
12600          * update a link before.  We don't allow updating the value of
12601          * a struct_ops once it is set.  That ensures that the value
12602          * never changed.  So, it is safe to skip EBUSY.
12603          */
12604         if (err && err != -EBUSY)
12605                 return err;
12606
12607         err = bpf_link_update(link->fd, map->fd, NULL);
12608         if (err < 0)
12609                 return err;
12610
12611         st_ops_link->map_fd = map->fd;
12612
12613         return 0;
12614 }
12615
12616 typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr,
12617                                                           void *private_data);
12618
12619 static enum bpf_perf_event_ret
12620 perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
12621                        void **copy_mem, size_t *copy_size,
12622                        bpf_perf_event_print_t fn, void *private_data)
12623 {
12624         struct perf_event_mmap_page *header = mmap_mem;
12625         __u64 data_head = ring_buffer_read_head(header);
12626         __u64 data_tail = header->data_tail;
12627         void *base = ((__u8 *)header) + page_size;
12628         int ret = LIBBPF_PERF_EVENT_CONT;
12629         struct perf_event_header *ehdr;
12630         size_t ehdr_size;
12631
12632         while (data_head != data_tail) {
12633                 ehdr = base + (data_tail & (mmap_size - 1));
12634                 ehdr_size = ehdr->size;
12635
12636                 if (((void *)ehdr) + ehdr_size > base + mmap_size) {
12637                         void *copy_start = ehdr;
12638                         size_t len_first = base + mmap_size - copy_start;
12639                         size_t len_secnd = ehdr_size - len_first;
12640
12641                         if (*copy_size < ehdr_size) {
12642                                 free(*copy_mem);
12643                                 *copy_mem = malloc(ehdr_size);
12644                                 if (!*copy_mem) {
12645                                         *copy_size = 0;
12646                                         ret = LIBBPF_PERF_EVENT_ERROR;
12647                                         break;
12648                                 }
12649                                 *copy_size = ehdr_size;
12650                         }
12651
12652                         memcpy(*copy_mem, copy_start, len_first);
12653                         memcpy(*copy_mem + len_first, base, len_secnd);
12654                         ehdr = *copy_mem;
12655                 }
12656
12657                 ret = fn(ehdr, private_data);
12658                 data_tail += ehdr_size;
12659                 if (ret != LIBBPF_PERF_EVENT_CONT)
12660                         break;
12661         }
12662
12663         ring_buffer_write_tail(header, data_tail);
12664         return libbpf_err(ret);
12665 }
12666
12667 struct perf_buffer;
12668
12669 struct perf_buffer_params {
12670         struct perf_event_attr *attr;
12671         /* if event_cb is specified, it takes precendence */
12672         perf_buffer_event_fn event_cb;
12673         /* sample_cb and lost_cb are higher-level common-case callbacks */
12674         perf_buffer_sample_fn sample_cb;
12675         perf_buffer_lost_fn lost_cb;
12676         void *ctx;
12677         int cpu_cnt;
12678         int *cpus;
12679         int *map_keys;
12680 };
12681
12682 struct perf_cpu_buf {
12683         struct perf_buffer *pb;
12684         void *base; /* mmap()'ed memory */
12685         void *buf; /* for reconstructing segmented data */
12686         size_t buf_size;
12687         int fd;
12688         int cpu;
12689         int map_key;
12690 };
12691
12692 struct perf_buffer {
12693         perf_buffer_event_fn event_cb;
12694         perf_buffer_sample_fn sample_cb;
12695         perf_buffer_lost_fn lost_cb;
12696         void *ctx; /* passed into callbacks */
12697
12698         size_t page_size;
12699         size_t mmap_size;
12700         struct perf_cpu_buf **cpu_bufs;
12701         struct epoll_event *events;
12702         int cpu_cnt; /* number of allocated CPU buffers */
12703         int epoll_fd; /* perf event FD */
12704         int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
12705 };
12706
12707 static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
12708                                       struct perf_cpu_buf *cpu_buf)
12709 {
12710         if (!cpu_buf)
12711                 return;
12712         if (cpu_buf->base &&
12713             munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
12714                 pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
12715         if (cpu_buf->fd >= 0) {
12716                 ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
12717                 close(cpu_buf->fd);
12718         }
12719         free(cpu_buf->buf);
12720         free(cpu_buf);
12721 }
12722
12723 void perf_buffer__free(struct perf_buffer *pb)
12724 {
12725         int i;
12726
12727         if (IS_ERR_OR_NULL(pb))
12728                 return;
12729         if (pb->cpu_bufs) {
12730                 for (i = 0; i < pb->cpu_cnt; i++) {
12731                         struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
12732
12733                         if (!cpu_buf)
12734                                 continue;
12735
12736                         bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
12737                         perf_buffer__free_cpu_buf(pb, cpu_buf);
12738                 }
12739                 free(pb->cpu_bufs);
12740         }
12741         if (pb->epoll_fd >= 0)
12742                 close(pb->epoll_fd);
12743         free(pb->events);
12744         free(pb);
12745 }
12746
12747 static struct perf_cpu_buf *
12748 perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
12749                           int cpu, int map_key)
12750 {
12751         struct perf_cpu_buf *cpu_buf;
12752         char msg[STRERR_BUFSIZE];
12753         int err;
12754
12755         cpu_buf = calloc(1, sizeof(*cpu_buf));
12756         if (!cpu_buf)
12757                 return ERR_PTR(-ENOMEM);
12758
12759         cpu_buf->pb = pb;
12760         cpu_buf->cpu = cpu;
12761         cpu_buf->map_key = map_key;
12762
12763         cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
12764                               -1, PERF_FLAG_FD_CLOEXEC);
12765         if (cpu_buf->fd < 0) {
12766                 err = -errno;
12767                 pr_warn("failed to open perf buffer event on cpu #%d: %s\n",
12768                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
12769                 goto error;
12770         }
12771
12772         cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
12773                              PROT_READ | PROT_WRITE, MAP_SHARED,
12774                              cpu_buf->fd, 0);
12775         if (cpu_buf->base == MAP_FAILED) {
12776                 cpu_buf->base = NULL;
12777                 err = -errno;
12778                 pr_warn("failed to mmap perf buffer on cpu #%d: %s\n",
12779                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
12780                 goto error;
12781         }
12782
12783         if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
12784                 err = -errno;
12785                 pr_warn("failed to enable perf buffer event on cpu #%d: %s\n",
12786                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
12787                 goto error;
12788         }
12789
12790         return cpu_buf;
12791
12792 error:
12793         perf_buffer__free_cpu_buf(pb, cpu_buf);
12794         return (struct perf_cpu_buf *)ERR_PTR(err);
12795 }
12796
12797 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
12798                                               struct perf_buffer_params *p);
12799
12800 struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
12801                                      perf_buffer_sample_fn sample_cb,
12802                                      perf_buffer_lost_fn lost_cb,
12803                                      void *ctx,
12804                                      const struct perf_buffer_opts *opts)
12805 {
12806         const size_t attr_sz = sizeof(struct perf_event_attr);
12807         struct perf_buffer_params p = {};
12808         struct perf_event_attr attr;
12809         __u32 sample_period;
12810
12811         if (!OPTS_VALID(opts, perf_buffer_opts))
12812                 return libbpf_err_ptr(-EINVAL);
12813
12814         sample_period = OPTS_GET(opts, sample_period, 1);
12815         if (!sample_period)
12816                 sample_period = 1;
12817
12818         memset(&attr, 0, attr_sz);
12819         attr.size = attr_sz;
12820         attr.config = PERF_COUNT_SW_BPF_OUTPUT;
12821         attr.type = PERF_TYPE_SOFTWARE;
12822         attr.sample_type = PERF_SAMPLE_RAW;
12823         attr.sample_period = sample_period;
12824         attr.wakeup_events = sample_period;
12825
12826         p.attr = &attr;
12827         p.sample_cb = sample_cb;
12828         p.lost_cb = lost_cb;
12829         p.ctx = ctx;
12830
12831         return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
12832 }
12833
12834 struct perf_buffer *perf_buffer__new_raw(int map_fd, size_t page_cnt,
12835                                          struct perf_event_attr *attr,
12836                                          perf_buffer_event_fn event_cb, void *ctx,
12837                                          const struct perf_buffer_raw_opts *opts)
12838 {
12839         struct perf_buffer_params p = {};
12840
12841         if (!attr)
12842                 return libbpf_err_ptr(-EINVAL);
12843
12844         if (!OPTS_VALID(opts, perf_buffer_raw_opts))
12845                 return libbpf_err_ptr(-EINVAL);
12846
12847         p.attr = attr;
12848         p.event_cb = event_cb;
12849         p.ctx = ctx;
12850         p.cpu_cnt = OPTS_GET(opts, cpu_cnt, 0);
12851         p.cpus = OPTS_GET(opts, cpus, NULL);
12852         p.map_keys = OPTS_GET(opts, map_keys, NULL);
12853
12854         return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
12855 }
12856
12857 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
12858                                               struct perf_buffer_params *p)
12859 {
12860         const char *online_cpus_file = "/sys/devices/system/cpu/online";
12861         struct bpf_map_info map;
12862         char msg[STRERR_BUFSIZE];
12863         struct perf_buffer *pb;
12864         bool *online = NULL;
12865         __u32 map_info_len;
12866         int err, i, j, n;
12867
12868         if (page_cnt == 0 || (page_cnt & (page_cnt - 1))) {
12869                 pr_warn("page count should be power of two, but is %zu\n",
12870                         page_cnt);
12871                 return ERR_PTR(-EINVAL);
12872         }
12873
12874         /* best-effort sanity checks */
12875         memset(&map, 0, sizeof(map));
12876         map_info_len = sizeof(map);
12877         err = bpf_map_get_info_by_fd(map_fd, &map, &map_info_len);
12878         if (err) {
12879                 err = -errno;
12880                 /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
12881                  * -EBADFD, -EFAULT, or -E2BIG on real error
12882                  */
12883                 if (err != -EINVAL) {
12884                         pr_warn("failed to get map info for map FD %d: %s\n",
12885                                 map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
12886                         return ERR_PTR(err);
12887                 }
12888                 pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n",
12889                          map_fd);
12890         } else {
12891                 if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
12892                         pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
12893                                 map.name);
12894                         return ERR_PTR(-EINVAL);
12895                 }
12896         }
12897
12898         pb = calloc(1, sizeof(*pb));
12899         if (!pb)
12900                 return ERR_PTR(-ENOMEM);
12901
12902         pb->event_cb = p->event_cb;
12903         pb->sample_cb = p->sample_cb;
12904         pb->lost_cb = p->lost_cb;
12905         pb->ctx = p->ctx;
12906
12907         pb->page_size = getpagesize();
12908         pb->mmap_size = pb->page_size * page_cnt;
12909         pb->map_fd = map_fd;
12910
12911         pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
12912         if (pb->epoll_fd < 0) {
12913                 err = -errno;
12914                 pr_warn("failed to create epoll instance: %s\n",
12915                         libbpf_strerror_r(err, msg, sizeof(msg)));
12916                 goto error;
12917         }
12918
12919         if (p->cpu_cnt > 0) {
12920                 pb->cpu_cnt = p->cpu_cnt;
12921         } else {
12922                 pb->cpu_cnt = libbpf_num_possible_cpus();
12923                 if (pb->cpu_cnt < 0) {
12924                         err = pb->cpu_cnt;
12925                         goto error;
12926                 }
12927                 if (map.max_entries && map.max_entries < pb->cpu_cnt)
12928                         pb->cpu_cnt = map.max_entries;
12929         }
12930
12931         pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
12932         if (!pb->events) {
12933                 err = -ENOMEM;
12934                 pr_warn("failed to allocate events: out of memory\n");
12935                 goto error;
12936         }
12937         pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
12938         if (!pb->cpu_bufs) {
12939                 err = -ENOMEM;
12940                 pr_warn("failed to allocate buffers: out of memory\n");
12941                 goto error;
12942         }
12943
12944         err = parse_cpu_mask_file(online_cpus_file, &online, &n);
12945         if (err) {
12946                 pr_warn("failed to get online CPU mask: %d\n", err);
12947                 goto error;
12948         }
12949
12950         for (i = 0, j = 0; i < pb->cpu_cnt; i++) {
12951                 struct perf_cpu_buf *cpu_buf;
12952                 int cpu, map_key;
12953
12954                 cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
12955                 map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
12956
12957                 /* in case user didn't explicitly requested particular CPUs to
12958                  * be attached to, skip offline/not present CPUs
12959                  */
12960                 if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu]))
12961                         continue;
12962
12963                 cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
12964                 if (IS_ERR(cpu_buf)) {
12965                         err = PTR_ERR(cpu_buf);
12966                         goto error;
12967                 }
12968
12969                 pb->cpu_bufs[j] = cpu_buf;
12970
12971                 err = bpf_map_update_elem(pb->map_fd, &map_key,
12972                                           &cpu_buf->fd, 0);
12973                 if (err) {
12974                         err = -errno;
12975                         pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
12976                                 cpu, map_key, cpu_buf->fd,
12977                                 libbpf_strerror_r(err, msg, sizeof(msg)));
12978                         goto error;
12979                 }
12980
12981                 pb->events[j].events = EPOLLIN;
12982                 pb->events[j].data.ptr = cpu_buf;
12983                 if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
12984                               &pb->events[j]) < 0) {
12985                         err = -errno;
12986                         pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
12987                                 cpu, cpu_buf->fd,
12988                                 libbpf_strerror_r(err, msg, sizeof(msg)));
12989                         goto error;
12990                 }
12991                 j++;
12992         }
12993         pb->cpu_cnt = j;
12994         free(online);
12995
12996         return pb;
12997
12998 error:
12999         free(online);
13000         if (pb)
13001                 perf_buffer__free(pb);
13002         return ERR_PTR(err);
13003 }
13004
13005 struct perf_sample_raw {
13006         struct perf_event_header header;
13007         uint32_t size;
13008         char data[];
13009 };
13010
13011 struct perf_sample_lost {
13012         struct perf_event_header header;
13013         uint64_t id;
13014         uint64_t lost;
13015         uint64_t sample_id;
13016 };
13017
13018 static enum bpf_perf_event_ret
13019 perf_buffer__process_record(struct perf_event_header *e, void *ctx)
13020 {
13021         struct perf_cpu_buf *cpu_buf = ctx;
13022         struct perf_buffer *pb = cpu_buf->pb;
13023         void *data = e;
13024
13025         /* user wants full control over parsing perf event */
13026         if (pb->event_cb)
13027                 return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
13028
13029         switch (e->type) {
13030         case PERF_RECORD_SAMPLE: {
13031                 struct perf_sample_raw *s = data;
13032
13033                 if (pb->sample_cb)
13034                         pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
13035                 break;
13036         }
13037         case PERF_RECORD_LOST: {
13038                 struct perf_sample_lost *s = data;
13039
13040                 if (pb->lost_cb)
13041                         pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
13042                 break;
13043         }
13044         default:
13045                 pr_warn("unknown perf sample type %d\n", e->type);
13046                 return LIBBPF_PERF_EVENT_ERROR;
13047         }
13048         return LIBBPF_PERF_EVENT_CONT;
13049 }
13050
13051 static int perf_buffer__process_records(struct perf_buffer *pb,
13052                                         struct perf_cpu_buf *cpu_buf)
13053 {
13054         enum bpf_perf_event_ret ret;
13055
13056         ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size,
13057                                      pb->page_size, &cpu_buf->buf,
13058                                      &cpu_buf->buf_size,
13059                                      perf_buffer__process_record, cpu_buf);
13060         if (ret != LIBBPF_PERF_EVENT_CONT)
13061                 return ret;
13062         return 0;
13063 }
13064
13065 int perf_buffer__epoll_fd(const struct perf_buffer *pb)
13066 {
13067         return pb->epoll_fd;
13068 }
13069
13070 int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
13071 {
13072         int i, cnt, err;
13073
13074         cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
13075         if (cnt < 0)
13076                 return -errno;
13077
13078         for (i = 0; i < cnt; i++) {
13079                 struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
13080
13081                 err = perf_buffer__process_records(pb, cpu_buf);
13082                 if (err) {
13083                         pr_warn("error while processing records: %d\n", err);
13084                         return libbpf_err(err);
13085                 }
13086         }
13087         return cnt;
13088 }
13089
13090 /* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
13091  * manager.
13092  */
13093 size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb)
13094 {
13095         return pb->cpu_cnt;
13096 }
13097
13098 /*
13099  * Return perf_event FD of a ring buffer in *buf_idx* slot of
13100  * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using
13101  * select()/poll()/epoll() Linux syscalls.
13102  */
13103 int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
13104 {
13105         struct perf_cpu_buf *cpu_buf;
13106
13107         if (buf_idx >= pb->cpu_cnt)
13108                 return libbpf_err(-EINVAL);
13109
13110         cpu_buf = pb->cpu_bufs[buf_idx];
13111         if (!cpu_buf)
13112                 return libbpf_err(-ENOENT);
13113
13114         return cpu_buf->fd;
13115 }
13116
13117 int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, size_t *buf_size)
13118 {
13119         struct perf_cpu_buf *cpu_buf;
13120
13121         if (buf_idx >= pb->cpu_cnt)
13122                 return libbpf_err(-EINVAL);
13123
13124         cpu_buf = pb->cpu_bufs[buf_idx];
13125         if (!cpu_buf)
13126                 return libbpf_err(-ENOENT);
13127
13128         *buf = cpu_buf->base;
13129         *buf_size = pb->mmap_size;
13130         return 0;
13131 }
13132
13133 /*
13134  * Consume data from perf ring buffer corresponding to slot *buf_idx* in
13135  * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
13136  * consume, do nothing and return success.
13137  * Returns:
13138  *   - 0 on success;
13139  *   - <0 on failure.
13140  */
13141 int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
13142 {
13143         struct perf_cpu_buf *cpu_buf;
13144
13145         if (buf_idx >= pb->cpu_cnt)
13146                 return libbpf_err(-EINVAL);
13147
13148         cpu_buf = pb->cpu_bufs[buf_idx];
13149         if (!cpu_buf)
13150                 return libbpf_err(-ENOENT);
13151
13152         return perf_buffer__process_records(pb, cpu_buf);
13153 }
13154
13155 int perf_buffer__consume(struct perf_buffer *pb)
13156 {
13157         int i, err;
13158
13159         for (i = 0; i < pb->cpu_cnt; i++) {
13160                 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
13161
13162                 if (!cpu_buf)
13163                         continue;
13164
13165                 err = perf_buffer__process_records(pb, cpu_buf);
13166                 if (err) {
13167                         pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
13168                         return libbpf_err(err);
13169                 }
13170         }
13171         return 0;
13172 }
13173
13174 int bpf_program__set_attach_target(struct bpf_program *prog,
13175                                    int attach_prog_fd,
13176                                    const char *attach_func_name)
13177 {
13178         int btf_obj_fd = 0, btf_id = 0, err;
13179
13180         if (!prog || attach_prog_fd < 0)
13181                 return libbpf_err(-EINVAL);
13182
13183         if (prog->obj->loaded)
13184                 return libbpf_err(-EINVAL);
13185
13186         if (attach_prog_fd && !attach_func_name) {
13187                 /* remember attach_prog_fd and let bpf_program__load() find
13188                  * BTF ID during the program load
13189                  */
13190                 prog->attach_prog_fd = attach_prog_fd;
13191                 return 0;
13192         }
13193
13194         if (attach_prog_fd) {
13195                 btf_id = libbpf_find_prog_btf_id(attach_func_name,
13196                                                  attach_prog_fd);
13197                 if (btf_id < 0)
13198                         return libbpf_err(btf_id);
13199         } else {
13200                 if (!attach_func_name)
13201                         return libbpf_err(-EINVAL);
13202
13203                 /* load btf_vmlinux, if not yet */
13204                 err = bpf_object__load_vmlinux_btf(prog->obj, true);
13205                 if (err)
13206                         return libbpf_err(err);
13207                 err = find_kernel_btf_id(prog->obj, attach_func_name,
13208                                          prog->expected_attach_type,
13209                                          &btf_obj_fd, &btf_id);
13210                 if (err)
13211                         return libbpf_err(err);
13212         }
13213
13214         prog->attach_btf_id = btf_id;
13215         prog->attach_btf_obj_fd = btf_obj_fd;
13216         prog->attach_prog_fd = attach_prog_fd;
13217         return 0;
13218 }
13219
13220 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
13221 {
13222         int err = 0, n, len, start, end = -1;
13223         bool *tmp;
13224
13225         *mask = NULL;
13226         *mask_sz = 0;
13227
13228         /* Each sub string separated by ',' has format \d+-\d+ or \d+ */
13229         while (*s) {
13230                 if (*s == ',' || *s == '\n') {
13231                         s++;
13232                         continue;
13233                 }
13234                 n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len);
13235                 if (n <= 0 || n > 2) {
13236                         pr_warn("Failed to get CPU range %s: %d\n", s, n);
13237                         err = -EINVAL;
13238                         goto cleanup;
13239                 } else if (n == 1) {
13240                         end = start;
13241                 }
13242                 if (start < 0 || start > end) {
13243                         pr_warn("Invalid CPU range [%d,%d] in %s\n",
13244                                 start, end, s);
13245                         err = -EINVAL;
13246                         goto cleanup;
13247                 }
13248                 tmp = realloc(*mask, end + 1);
13249                 if (!tmp) {
13250                         err = -ENOMEM;
13251                         goto cleanup;
13252                 }
13253                 *mask = tmp;
13254                 memset(tmp + *mask_sz, 0, start - *mask_sz);
13255                 memset(tmp + start, 1, end - start + 1);
13256                 *mask_sz = end + 1;
13257                 s += len;
13258         }
13259         if (!*mask_sz) {
13260                 pr_warn("Empty CPU range\n");
13261                 return -EINVAL;
13262         }
13263         return 0;
13264 cleanup:
13265         free(*mask);
13266         *mask = NULL;
13267         return err;
13268 }
13269
13270 int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz)
13271 {
13272         int fd, err = 0, len;
13273         char buf[128];
13274
13275         fd = open(fcpu, O_RDONLY | O_CLOEXEC);
13276         if (fd < 0) {
13277                 err = -errno;
13278                 pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err);
13279                 return err;
13280         }
13281         len = read(fd, buf, sizeof(buf));
13282         close(fd);
13283         if (len <= 0) {
13284                 err = len ? -errno : -EINVAL;
13285                 pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err);
13286                 return err;
13287         }
13288         if (len >= sizeof(buf)) {
13289                 pr_warn("CPU mask is too big in file %s\n", fcpu);
13290                 return -E2BIG;
13291         }
13292         buf[len] = '\0';
13293
13294         return parse_cpu_mask_str(buf, mask, mask_sz);
13295 }
13296
13297 int libbpf_num_possible_cpus(void)
13298 {
13299         static const char *fcpu = "/sys/devices/system/cpu/possible";
13300         static int cpus;
13301         int err, n, i, tmp_cpus;
13302         bool *mask;
13303
13304         tmp_cpus = READ_ONCE(cpus);
13305         if (tmp_cpus > 0)
13306                 return tmp_cpus;
13307
13308         err = parse_cpu_mask_file(fcpu, &mask, &n);
13309         if (err)
13310                 return libbpf_err(err);
13311
13312         tmp_cpus = 0;
13313         for (i = 0; i < n; i++) {
13314                 if (mask[i])
13315                         tmp_cpus++;
13316         }
13317         free(mask);
13318
13319         WRITE_ONCE(cpus, tmp_cpus);
13320         return tmp_cpus;
13321 }
13322
13323 static int populate_skeleton_maps(const struct bpf_object *obj,
13324                                   struct bpf_map_skeleton *maps,
13325                                   size_t map_cnt)
13326 {
13327         int i;
13328
13329         for (i = 0; i < map_cnt; i++) {
13330                 struct bpf_map **map = maps[i].map;
13331                 const char *name = maps[i].name;
13332                 void **mmaped = maps[i].mmaped;
13333
13334                 *map = bpf_object__find_map_by_name(obj, name);
13335                 if (!*map) {
13336                         pr_warn("failed to find skeleton map '%s'\n", name);
13337                         return -ESRCH;
13338                 }
13339
13340                 /* externs shouldn't be pre-setup from user code */
13341                 if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
13342                         *mmaped = (*map)->mmaped;
13343         }
13344         return 0;
13345 }
13346
13347 static int populate_skeleton_progs(const struct bpf_object *obj,
13348                                    struct bpf_prog_skeleton *progs,
13349                                    size_t prog_cnt)
13350 {
13351         int i;
13352
13353         for (i = 0; i < prog_cnt; i++) {
13354                 struct bpf_program **prog = progs[i].prog;
13355                 const char *name = progs[i].name;
13356
13357                 *prog = bpf_object__find_program_by_name(obj, name);
13358                 if (!*prog) {
13359                         pr_warn("failed to find skeleton program '%s'\n", name);
13360                         return -ESRCH;
13361                 }
13362         }
13363         return 0;
13364 }
13365
13366 int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
13367                               const struct bpf_object_open_opts *opts)
13368 {
13369         DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts,
13370                 .object_name = s->name,
13371         );
13372         struct bpf_object *obj;
13373         int err;
13374
13375         /* Attempt to preserve opts->object_name, unless overriden by user
13376          * explicitly. Overwriting object name for skeletons is discouraged,
13377          * as it breaks global data maps, because they contain object name
13378          * prefix as their own map name prefix. When skeleton is generated,
13379          * bpftool is making an assumption that this name will stay the same.
13380          */
13381         if (opts) {
13382                 memcpy(&skel_opts, opts, sizeof(*opts));
13383                 if (!opts->object_name)
13384                         skel_opts.object_name = s->name;
13385         }
13386
13387         obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
13388         err = libbpf_get_error(obj);
13389         if (err) {
13390                 pr_warn("failed to initialize skeleton BPF object '%s': %d\n",
13391                         s->name, err);
13392                 return libbpf_err(err);
13393         }
13394
13395         *s->obj = obj;
13396         err = populate_skeleton_maps(obj, s->maps, s->map_cnt);
13397         if (err) {
13398                 pr_warn("failed to populate skeleton maps for '%s': %d\n", s->name, err);
13399                 return libbpf_err(err);
13400         }
13401
13402         err = populate_skeleton_progs(obj, s->progs, s->prog_cnt);
13403         if (err) {
13404                 pr_warn("failed to populate skeleton progs for '%s': %d\n", s->name, err);
13405                 return libbpf_err(err);
13406         }
13407
13408         return 0;
13409 }
13410
13411 int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s)
13412 {
13413         int err, len, var_idx, i;
13414         const char *var_name;
13415         const struct bpf_map *map;
13416         struct btf *btf;
13417         __u32 map_type_id;
13418         const struct btf_type *map_type, *var_type;
13419         const struct bpf_var_skeleton *var_skel;
13420         struct btf_var_secinfo *var;
13421
13422         if (!s->obj)
13423                 return libbpf_err(-EINVAL);
13424
13425         btf = bpf_object__btf(s->obj);
13426         if (!btf) {
13427                 pr_warn("subskeletons require BTF at runtime (object %s)\n",
13428                         bpf_object__name(s->obj));
13429                 return libbpf_err(-errno);
13430         }
13431
13432         err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt);
13433         if (err) {
13434                 pr_warn("failed to populate subskeleton maps: %d\n", err);
13435                 return libbpf_err(err);
13436         }
13437
13438         err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt);
13439         if (err) {
13440                 pr_warn("failed to populate subskeleton maps: %d\n", err);
13441                 return libbpf_err(err);
13442         }
13443
13444         for (var_idx = 0; var_idx < s->var_cnt; var_idx++) {
13445                 var_skel = &s->vars[var_idx];
13446                 map = *var_skel->map;
13447                 map_type_id = bpf_map__btf_value_type_id(map);
13448                 map_type = btf__type_by_id(btf, map_type_id);
13449
13450                 if (!btf_is_datasec(map_type)) {
13451                         pr_warn("type for map '%1$s' is not a datasec: %2$s",
13452                                 bpf_map__name(map),
13453                                 __btf_kind_str(btf_kind(map_type)));
13454                         return libbpf_err(-EINVAL);
13455                 }
13456
13457                 len = btf_vlen(map_type);
13458                 var = btf_var_secinfos(map_type);
13459                 for (i = 0; i < len; i++, var++) {
13460                         var_type = btf__type_by_id(btf, var->type);
13461                         var_name = btf__name_by_offset(btf, var_type->name_off);
13462                         if (strcmp(var_name, var_skel->name) == 0) {
13463                                 *var_skel->addr = map->mmaped + var->offset;
13464                                 break;
13465                         }
13466                 }
13467         }
13468         return 0;
13469 }
13470
13471 void bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s)
13472 {
13473         if (!s)
13474                 return;
13475         free(s->maps);
13476         free(s->progs);
13477         free(s->vars);
13478         free(s);
13479 }
13480
13481 int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
13482 {
13483         int i, err;
13484
13485         err = bpf_object__load(*s->obj);
13486         if (err) {
13487                 pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
13488                 return libbpf_err(err);
13489         }
13490
13491         for (i = 0; i < s->map_cnt; i++) {
13492                 struct bpf_map *map = *s->maps[i].map;
13493                 size_t mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
13494                 int prot, map_fd = map->fd;
13495                 void **mmaped = s->maps[i].mmaped;
13496
13497                 if (!mmaped)
13498                         continue;
13499
13500                 if (!(map->def.map_flags & BPF_F_MMAPABLE)) {
13501                         *mmaped = NULL;
13502                         continue;
13503                 }
13504
13505                 if (map->def.map_flags & BPF_F_RDONLY_PROG)
13506                         prot = PROT_READ;
13507                 else
13508                         prot = PROT_READ | PROT_WRITE;
13509
13510                 /* Remap anonymous mmap()-ed "map initialization image" as
13511                  * a BPF map-backed mmap()-ed memory, but preserving the same
13512                  * memory address. This will cause kernel to change process'
13513                  * page table to point to a different piece of kernel memory,
13514                  * but from userspace point of view memory address (and its
13515                  * contents, being identical at this point) will stay the
13516                  * same. This mapping will be released by bpf_object__close()
13517                  * as per normal clean up procedure, so we don't need to worry
13518                  * about it from skeleton's clean up perspective.
13519                  */
13520                 *mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map_fd, 0);
13521                 if (*mmaped == MAP_FAILED) {
13522                         err = -errno;
13523                         *mmaped = NULL;
13524                         pr_warn("failed to re-mmap() map '%s': %d\n",
13525                                  bpf_map__name(map), err);
13526                         return libbpf_err(err);
13527                 }
13528         }
13529
13530         return 0;
13531 }
13532
13533 int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
13534 {
13535         int i, err;
13536
13537         for (i = 0; i < s->prog_cnt; i++) {
13538                 struct bpf_program *prog = *s->progs[i].prog;
13539                 struct bpf_link **link = s->progs[i].link;
13540
13541                 if (!prog->autoload || !prog->autoattach)
13542                         continue;
13543
13544                 /* auto-attaching not supported for this program */
13545                 if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
13546                         continue;
13547
13548                 /* if user already set the link manually, don't attempt auto-attach */
13549                 if (*link)
13550                         continue;
13551
13552                 err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, link);
13553                 if (err) {
13554                         pr_warn("prog '%s': failed to auto-attach: %d\n",
13555                                 bpf_program__name(prog), err);
13556                         return libbpf_err(err);
13557                 }
13558
13559                 /* It's possible that for some SEC() definitions auto-attach
13560                  * is supported in some cases (e.g., if definition completely
13561                  * specifies target information), but is not in other cases.
13562                  * SEC("uprobe") is one such case. If user specified target
13563                  * binary and function name, such BPF program can be
13564                  * auto-attached. But if not, it shouldn't trigger skeleton's
13565                  * attach to fail. It should just be skipped.
13566                  * attach_fn signals such case with returning 0 (no error) and
13567                  * setting link to NULL.
13568                  */
13569         }
13570
13571         return 0;
13572 }
13573
13574 void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
13575 {
13576         int i;
13577
13578         for (i = 0; i < s->prog_cnt; i++) {
13579                 struct bpf_link **link = s->progs[i].link;
13580
13581                 bpf_link__destroy(*link);
13582                 *link = NULL;
13583         }
13584 }
13585
13586 void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
13587 {
13588         if (!s)
13589                 return;
13590
13591         if (s->progs)
13592                 bpf_object__detach_skeleton(s);
13593         if (s->obj)
13594                 bpf_object__close(*s->obj);
13595         free(s->maps);
13596         free(s->progs);
13597         free(s);
13598 }