1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3 * Copyright (c) 2016 Facebook
4 * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
6 #include <uapi/linux/btf.h>
7 #include <linux/bpf-cgroup.h>
8 #include <linux/kernel.h>
9 #include <linux/types.h>
10 #include <linux/slab.h>
11 #include <linux/bpf.h>
12 #include <linux/btf.h>
13 #include <linux/bpf_verifier.h>
14 #include <linux/filter.h>
15 #include <net/netlink.h>
16 #include <linux/file.h>
17 #include <linux/vmalloc.h>
18 #include <linux/stringify.h>
19 #include <linux/bsearch.h>
20 #include <linux/sort.h>
21 #include <linux/perf_event.h>
22 #include <linux/ctype.h>
23 #include <linux/error-injection.h>
24 #include <linux/bpf_lsm.h>
25 #include <linux/btf_ids.h>
26 #include <linux/poison.h>
27 #include <linux/module.h>
28 #include <linux/cpumask.h>
29 #include <linux/bpf_mem_alloc.h>
34 static const struct bpf_verifier_ops
* const bpf_verifier_ops
[] = {
35 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
36 [_id] = & _name ## _verifier_ops,
37 #define BPF_MAP_TYPE(_id, _ops)
38 #define BPF_LINK_TYPE(_id, _name)
39 #include <linux/bpf_types.h>
45 struct bpf_mem_alloc bpf_global_percpu_ma
;
46 static bool bpf_global_percpu_ma_set
;
48 /* bpf_check() is a static code analyzer that walks eBPF program
49 * instruction by instruction and updates register/stack state.
50 * All paths of conditional branches are analyzed until 'bpf_exit' insn.
52 * The first pass is depth-first-search to check that the program is a DAG.
53 * It rejects the following programs:
54 * - larger than BPF_MAXINSNS insns
55 * - if loop is present (detected via back-edge)
56 * - unreachable insns exist (shouldn't be a forest. program = one function)
57 * - out of bounds or malformed jumps
58 * The second pass is all possible path descent from the 1st insn.
59 * Since it's analyzing all paths through the program, the length of the
60 * analysis is limited to 64k insn, which may be hit even if total number of
61 * insn is less then 4K, but there are too many branches that change stack/regs.
62 * Number of 'branches to be analyzed' is limited to 1k
64 * On entry to each instruction, each register has a type, and the instruction
65 * changes the types of the registers depending on instruction semantics.
66 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
69 * All registers are 64-bit.
70 * R0 - return register
71 * R1-R5 argument passing registers
72 * R6-R9 callee saved registers
73 * R10 - frame pointer read-only
75 * At the start of BPF program the register R1 contains a pointer to bpf_context
76 * and has type PTR_TO_CTX.
78 * Verifier tracks arithmetic operations on pointers in case:
79 * BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
80 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
81 * 1st insn copies R10 (which has FRAME_PTR) type into R1
82 * and 2nd arithmetic instruction is pattern matched to recognize
83 * that it wants to construct a pointer to some element within stack.
84 * So after 2nd insn, the register R1 has type PTR_TO_STACK
85 * (and -20 constant is saved for further stack bounds checking).
86 * Meaning that this reg is a pointer to stack plus known immediate constant.
88 * Most of the time the registers have SCALAR_VALUE type, which
89 * means the register has some value, but it's not a valid pointer.
90 * (like pointer plus pointer becomes SCALAR_VALUE type)
92 * When verifier sees load or store instructions the type of base register
93 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
94 * four pointer types recognized by check_mem_access() function.
96 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
97 * and the range of [ptr, ptr + map's value_size) is accessible.
99 * registers used to pass values to function calls are checked against
100 * function argument constraints.
102 * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
103 * It means that the register type passed to this function must be
104 * PTR_TO_STACK and it will be used inside the function as
105 * 'pointer to map element key'
107 * For example the argument constraints for bpf_map_lookup_elem():
108 * .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
109 * .arg1_type = ARG_CONST_MAP_PTR,
110 * .arg2_type = ARG_PTR_TO_MAP_KEY,
112 * ret_type says that this function returns 'pointer to map elem value or null'
113 * function expects 1st argument to be a const pointer to 'struct bpf_map' and
114 * 2nd argument should be a pointer to stack, which will be used inside
115 * the helper function as a pointer to map element key.
117 * On the kernel side the helper function looks like:
118 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
120 * struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
121 * void *key = (void *) (unsigned long) r2;
124 * here kernel can access 'key' and 'map' pointers safely, knowing that
125 * [key, key + map->key_size) bytes are valid and were initialized on
126 * the stack of eBPF program.
129 * Corresponding eBPF program may look like:
130 * BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR
131 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
132 * BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP
133 * BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
134 * here verifier looks at prototype of map_lookup_elem() and sees:
135 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
136 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
138 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
139 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
140 * and were initialized prior to this call.
141 * If it's ok, then verifier allows this BPF_CALL insn and looks at
142 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
143 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
144 * returns either pointer to map value or NULL.
146 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
147 * insn, the register holding that pointer in the true branch changes state to
148 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
149 * branch. See check_cond_jmp_op().
151 * After the call R0 is set to return type of the function and registers R1-R5
152 * are set to NOT_INIT to indicate that they are no longer readable.
154 * The following reference types represent a potential reference to a kernel
155 * resource which, after first being allocated, must be checked and freed by
157 * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
159 * When the verifier sees a helper call return a reference type, it allocates a
160 * pointer id for the reference and stores it in the current function state.
161 * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
162 * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
163 * passes through a NULL-check conditional. For the branch wherein the state is
164 * changed to CONST_IMM, the verifier releases the reference.
166 * For each helper function that allocates a reference, such as
167 * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
168 * bpf_sk_release(). When a reference type passes into the release function,
169 * the verifier also releases the reference. If any unchecked or unreleased
170 * reference remains at the end of the program, the verifier rejects it.
173 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
174 struct bpf_verifier_stack_elem
{
175 /* verifer state is 'st'
176 * before processing instruction 'insn_idx'
177 * and after processing instruction 'prev_insn_idx'
179 struct bpf_verifier_state st
;
182 struct bpf_verifier_stack_elem
*next
;
183 /* length of verifier log at the time this state was pushed on stack */
187 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192
188 #define BPF_COMPLEXITY_LIMIT_STATES 64
190 #define BPF_MAP_KEY_POISON (1ULL << 63)
191 #define BPF_MAP_KEY_SEEN (1ULL << 62)
193 #define BPF_MAP_PTR_UNPRIV 1UL
194 #define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \
195 POISON_POINTER_DELTA))
196 #define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
198 static int acquire_reference_state(struct bpf_verifier_env
*env
, int insn_idx
);
199 static int release_reference(struct bpf_verifier_env
*env
, int ref_obj_id
);
200 static void invalidate_non_owning_refs(struct bpf_verifier_env
*env
);
201 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env
*env
);
202 static int ref_set_non_owning(struct bpf_verifier_env
*env
,
203 struct bpf_reg_state
*reg
);
204 static void specialize_kfunc(struct bpf_verifier_env
*env
,
205 u32 func_id
, u16 offset
, unsigned long *addr
);
206 static bool is_trusted_reg(const struct bpf_reg_state
*reg
);
208 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data
*aux
)
210 return BPF_MAP_PTR(aux
->map_ptr_state
) == BPF_MAP_PTR_POISON
;
213 static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data
*aux
)
215 return aux
->map_ptr_state
& BPF_MAP_PTR_UNPRIV
;
218 static void bpf_map_ptr_store(struct bpf_insn_aux_data
*aux
,
219 const struct bpf_map
*map
, bool unpriv
)
221 BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON
& BPF_MAP_PTR_UNPRIV
);
222 unpriv
|= bpf_map_ptr_unpriv(aux
);
223 aux
->map_ptr_state
= (unsigned long)map
|
224 (unpriv
? BPF_MAP_PTR_UNPRIV
: 0UL);
227 static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data
*aux
)
229 return aux
->map_key_state
& BPF_MAP_KEY_POISON
;
232 static bool bpf_map_key_unseen(const struct bpf_insn_aux_data
*aux
)
234 return !(aux
->map_key_state
& BPF_MAP_KEY_SEEN
);
237 static u64
bpf_map_key_immediate(const struct bpf_insn_aux_data
*aux
)
239 return aux
->map_key_state
& ~(BPF_MAP_KEY_SEEN
| BPF_MAP_KEY_POISON
);
242 static void bpf_map_key_store(struct bpf_insn_aux_data
*aux
, u64 state
)
244 bool poisoned
= bpf_map_key_poisoned(aux
);
246 aux
->map_key_state
= state
| BPF_MAP_KEY_SEEN
|
247 (poisoned
? BPF_MAP_KEY_POISON
: 0ULL);
250 static bool bpf_helper_call(const struct bpf_insn
*insn
)
252 return insn
->code
== (BPF_JMP
| BPF_CALL
) &&
256 static bool bpf_pseudo_call(const struct bpf_insn
*insn
)
258 return insn
->code
== (BPF_JMP
| BPF_CALL
) &&
259 insn
->src_reg
== BPF_PSEUDO_CALL
;
262 static bool bpf_pseudo_kfunc_call(const struct bpf_insn
*insn
)
264 return insn
->code
== (BPF_JMP
| BPF_CALL
) &&
265 insn
->src_reg
== BPF_PSEUDO_KFUNC_CALL
;
268 struct bpf_call_arg_meta
{
269 struct bpf_map
*map_ptr
;
286 struct btf_field
*kptr_field
;
289 struct bpf_kfunc_call_arg_meta
{
294 const struct btf_type
*func_proto
;
295 const char *func_name
;
308 /* arg_{btf,btf_id,owning_ref} are used by kfunc-specific handling,
309 * generally to pass info about user-defined local kptr types to later
311 * bpf_obj_drop/bpf_percpu_obj_drop
312 * Record the local kptr type to be drop'd
313 * bpf_refcount_acquire (via KF_ARG_PTR_TO_REFCOUNTED_KPTR arg type)
314 * Record the local kptr type to be refcount_incr'd and use
315 * arg_owning_ref to determine whether refcount_acquire should be
323 struct btf_field
*field
;
326 struct btf_field
*field
;
329 enum bpf_dynptr_type type
;
332 } initialized_dynptr
;
340 struct btf
*btf_vmlinux
;
342 static DEFINE_MUTEX(bpf_verifier_lock
);
343 static DEFINE_MUTEX(bpf_percpu_ma_lock
);
345 static const struct bpf_line_info
*
346 find_linfo(const struct bpf_verifier_env
*env
, u32 insn_off
)
348 const struct bpf_line_info
*linfo
;
349 const struct bpf_prog
*prog
;
353 nr_linfo
= prog
->aux
->nr_linfo
;
355 if (!nr_linfo
|| insn_off
>= prog
->len
)
358 linfo
= prog
->aux
->linfo
;
359 for (i
= 1; i
< nr_linfo
; i
++)
360 if (insn_off
< linfo
[i
].insn_off
)
363 return &linfo
[i
- 1];
366 __printf(2, 3) static void verbose(void *private_data
, const char *fmt
, ...)
368 struct bpf_verifier_env
*env
= private_data
;
371 if (!bpf_verifier_log_needed(&env
->log
))
375 bpf_verifier_vlog(&env
->log
, fmt
, args
);
379 static const char *ltrim(const char *s
)
387 __printf(3, 4) static void verbose_linfo(struct bpf_verifier_env
*env
,
389 const char *prefix_fmt
, ...)
391 const struct bpf_line_info
*linfo
;
393 if (!bpf_verifier_log_needed(&env
->log
))
396 linfo
= find_linfo(env
, insn_off
);
397 if (!linfo
|| linfo
== env
->prev_linfo
)
403 va_start(args
, prefix_fmt
);
404 bpf_verifier_vlog(&env
->log
, prefix_fmt
, args
);
409 ltrim(btf_name_by_offset(env
->prog
->aux
->btf
,
412 env
->prev_linfo
= linfo
;
415 static void verbose_invalid_scalar(struct bpf_verifier_env
*env
,
416 struct bpf_reg_state
*reg
,
417 struct tnum
*range
, const char *ctx
,
418 const char *reg_name
)
422 verbose(env
, "At %s the register %s ", ctx
, reg_name
);
423 if (!tnum_is_unknown(reg
->var_off
)) {
424 tnum_strn(tn_buf
, sizeof(tn_buf
), reg
->var_off
);
425 verbose(env
, "has value %s", tn_buf
);
427 verbose(env
, "has unknown scalar value");
429 tnum_strn(tn_buf
, sizeof(tn_buf
), *range
);
430 verbose(env
, " should have been in %s\n", tn_buf
);
433 static bool type_is_pkt_pointer(enum bpf_reg_type type
)
435 type
= base_type(type
);
436 return type
== PTR_TO_PACKET
||
437 type
== PTR_TO_PACKET_META
;
440 static bool type_is_sk_pointer(enum bpf_reg_type type
)
442 return type
== PTR_TO_SOCKET
||
443 type
== PTR_TO_SOCK_COMMON
||
444 type
== PTR_TO_TCP_SOCK
||
445 type
== PTR_TO_XDP_SOCK
;
448 static bool type_may_be_null(u32 type
)
450 return type
& PTR_MAYBE_NULL
;
453 static bool reg_not_null(const struct bpf_reg_state
*reg
)
455 enum bpf_reg_type type
;
458 if (type_may_be_null(type
))
461 type
= base_type(type
);
462 return type
== PTR_TO_SOCKET
||
463 type
== PTR_TO_TCP_SOCK
||
464 type
== PTR_TO_MAP_VALUE
||
465 type
== PTR_TO_MAP_KEY
||
466 type
== PTR_TO_SOCK_COMMON
||
467 (type
== PTR_TO_BTF_ID
&& is_trusted_reg(reg
)) ||
471 static bool type_is_ptr_alloc_obj(u32 type
)
473 return base_type(type
) == PTR_TO_BTF_ID
&& type_flag(type
) & MEM_ALLOC
;
476 static bool type_is_non_owning_ref(u32 type
)
478 return type_is_ptr_alloc_obj(type
) && type_flag(type
) & NON_OWN_REF
;
481 static struct btf_record
*reg_btf_record(const struct bpf_reg_state
*reg
)
483 struct btf_record
*rec
= NULL
;
484 struct btf_struct_meta
*meta
;
486 if (reg
->type
== PTR_TO_MAP_VALUE
) {
487 rec
= reg
->map_ptr
->record
;
488 } else if (type_is_ptr_alloc_obj(reg
->type
)) {
489 meta
= btf_find_struct_meta(reg
->btf
, reg
->btf_id
);
496 static bool subprog_is_global(const struct bpf_verifier_env
*env
, int subprog
)
498 struct bpf_func_info_aux
*aux
= env
->prog
->aux
->func_info_aux
;
500 return aux
&& aux
[subprog
].linkage
== BTF_FUNC_GLOBAL
;
503 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state
*reg
)
505 return btf_record_has_field(reg_btf_record(reg
), BPF_SPIN_LOCK
);
508 static bool type_is_rdonly_mem(u32 type
)
510 return type
& MEM_RDONLY
;
513 static bool is_acquire_function(enum bpf_func_id func_id
,
514 const struct bpf_map
*map
)
516 enum bpf_map_type map_type
= map
? map
->map_type
: BPF_MAP_TYPE_UNSPEC
;
518 if (func_id
== BPF_FUNC_sk_lookup_tcp
||
519 func_id
== BPF_FUNC_sk_lookup_udp
||
520 func_id
== BPF_FUNC_skc_lookup_tcp
||
521 func_id
== BPF_FUNC_ringbuf_reserve
||
522 func_id
== BPF_FUNC_kptr_xchg
)
525 if (func_id
== BPF_FUNC_map_lookup_elem
&&
526 (map_type
== BPF_MAP_TYPE_SOCKMAP
||
527 map_type
== BPF_MAP_TYPE_SOCKHASH
))
533 static bool is_ptr_cast_function(enum bpf_func_id func_id
)
535 return func_id
== BPF_FUNC_tcp_sock
||
536 func_id
== BPF_FUNC_sk_fullsock
||
537 func_id
== BPF_FUNC_skc_to_tcp_sock
||
538 func_id
== BPF_FUNC_skc_to_tcp6_sock
||
539 func_id
== BPF_FUNC_skc_to_udp6_sock
||
540 func_id
== BPF_FUNC_skc_to_mptcp_sock
||
541 func_id
== BPF_FUNC_skc_to_tcp_timewait_sock
||
542 func_id
== BPF_FUNC_skc_to_tcp_request_sock
;
545 static bool is_dynptr_ref_function(enum bpf_func_id func_id
)
547 return func_id
== BPF_FUNC_dynptr_data
;
550 static bool is_sync_callback_calling_kfunc(u32 btf_id
);
551 static bool is_bpf_throw_kfunc(struct bpf_insn
*insn
);
553 static bool is_sync_callback_calling_function(enum bpf_func_id func_id
)
555 return func_id
== BPF_FUNC_for_each_map_elem
||
556 func_id
== BPF_FUNC_find_vma
||
557 func_id
== BPF_FUNC_loop
||
558 func_id
== BPF_FUNC_user_ringbuf_drain
;
561 static bool is_async_callback_calling_function(enum bpf_func_id func_id
)
563 return func_id
== BPF_FUNC_timer_set_callback
;
566 static bool is_callback_calling_function(enum bpf_func_id func_id
)
568 return is_sync_callback_calling_function(func_id
) ||
569 is_async_callback_calling_function(func_id
);
572 static bool is_sync_callback_calling_insn(struct bpf_insn
*insn
)
574 return (bpf_helper_call(insn
) && is_sync_callback_calling_function(insn
->imm
)) ||
575 (bpf_pseudo_kfunc_call(insn
) && is_sync_callback_calling_kfunc(insn
->imm
));
578 static bool is_storage_get_function(enum bpf_func_id func_id
)
580 return func_id
== BPF_FUNC_sk_storage_get
||
581 func_id
== BPF_FUNC_inode_storage_get
||
582 func_id
== BPF_FUNC_task_storage_get
||
583 func_id
== BPF_FUNC_cgrp_storage_get
;
586 static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id
,
587 const struct bpf_map
*map
)
589 int ref_obj_uses
= 0;
591 if (is_ptr_cast_function(func_id
))
593 if (is_acquire_function(func_id
, map
))
595 if (is_dynptr_ref_function(func_id
))
598 return ref_obj_uses
> 1;
601 static bool is_cmpxchg_insn(const struct bpf_insn
*insn
)
603 return BPF_CLASS(insn
->code
) == BPF_STX
&&
604 BPF_MODE(insn
->code
) == BPF_ATOMIC
&&
605 insn
->imm
== BPF_CMPXCHG
;
608 /* string representation of 'enum bpf_reg_type'
610 * Note that reg_type_str() can not appear more than once in a single verbose()
613 static const char *reg_type_str(struct bpf_verifier_env
*env
,
614 enum bpf_reg_type type
)
616 char postfix
[16] = {0}, prefix
[64] = {0};
617 static const char * const str
[] = {
619 [SCALAR_VALUE
] = "scalar",
620 [PTR_TO_CTX
] = "ctx",
621 [CONST_PTR_TO_MAP
] = "map_ptr",
622 [PTR_TO_MAP_VALUE
] = "map_value",
623 [PTR_TO_STACK
] = "fp",
624 [PTR_TO_PACKET
] = "pkt",
625 [PTR_TO_PACKET_META
] = "pkt_meta",
626 [PTR_TO_PACKET_END
] = "pkt_end",
627 [PTR_TO_FLOW_KEYS
] = "flow_keys",
628 [PTR_TO_SOCKET
] = "sock",
629 [PTR_TO_SOCK_COMMON
] = "sock_common",
630 [PTR_TO_TCP_SOCK
] = "tcp_sock",
631 [PTR_TO_TP_BUFFER
] = "tp_buffer",
632 [PTR_TO_XDP_SOCK
] = "xdp_sock",
633 [PTR_TO_BTF_ID
] = "ptr_",
634 [PTR_TO_MEM
] = "mem",
635 [PTR_TO_BUF
] = "buf",
636 [PTR_TO_FUNC
] = "func",
637 [PTR_TO_MAP_KEY
] = "map_key",
638 [CONST_PTR_TO_DYNPTR
] = "dynptr_ptr",
641 if (type
& PTR_MAYBE_NULL
) {
642 if (base_type(type
) == PTR_TO_BTF_ID
)
643 strncpy(postfix
, "or_null_", 16);
645 strncpy(postfix
, "_or_null", 16);
648 snprintf(prefix
, sizeof(prefix
), "%s%s%s%s%s%s%s",
649 type
& MEM_RDONLY
? "rdonly_" : "",
650 type
& MEM_RINGBUF
? "ringbuf_" : "",
651 type
& MEM_USER
? "user_" : "",
652 type
& MEM_PERCPU
? "percpu_" : "",
653 type
& MEM_RCU
? "rcu_" : "",
654 type
& PTR_UNTRUSTED
? "untrusted_" : "",
655 type
& PTR_TRUSTED
? "trusted_" : ""
658 snprintf(env
->tmp_str_buf
, TMP_STR_BUF_LEN
, "%s%s%s",
659 prefix
, str
[base_type(type
)], postfix
);
660 return env
->tmp_str_buf
;
663 static char slot_type_char
[] = {
664 [STACK_INVALID
] = '?',
668 [STACK_DYNPTR
] = 'd',
672 static void print_liveness(struct bpf_verifier_env
*env
,
673 enum bpf_reg_liveness live
)
675 if (live
& (REG_LIVE_READ
| REG_LIVE_WRITTEN
| REG_LIVE_DONE
))
677 if (live
& REG_LIVE_READ
)
679 if (live
& REG_LIVE_WRITTEN
)
681 if (live
& REG_LIVE_DONE
)
685 static int __get_spi(s32 off
)
687 return (-off
- 1) / BPF_REG_SIZE
;
690 static struct bpf_func_state
*func(struct bpf_verifier_env
*env
,
691 const struct bpf_reg_state
*reg
)
693 struct bpf_verifier_state
*cur
= env
->cur_state
;
695 return cur
->frame
[reg
->frameno
];
698 static bool is_spi_bounds_valid(struct bpf_func_state
*state
, int spi
, int nr_slots
)
700 int allocated_slots
= state
->allocated_stack
/ BPF_REG_SIZE
;
702 /* We need to check that slots between [spi - nr_slots + 1, spi] are
703 * within [0, allocated_stack).
705 * Please note that the spi grows downwards. For example, a dynptr
706 * takes the size of two stack slots; the first slot will be at
707 * spi and the second slot will be at spi - 1.
709 return spi
- nr_slots
+ 1 >= 0 && spi
< allocated_slots
;
712 static int stack_slot_obj_get_spi(struct bpf_verifier_env
*env
, struct bpf_reg_state
*reg
,
713 const char *obj_kind
, int nr_slots
)
717 if (!tnum_is_const(reg
->var_off
)) {
718 verbose(env
, "%s has to be at a constant offset\n", obj_kind
);
722 off
= reg
->off
+ reg
->var_off
.value
;
723 if (off
% BPF_REG_SIZE
) {
724 verbose(env
, "cannot pass in %s at an offset=%d\n", obj_kind
, off
);
728 spi
= __get_spi(off
);
729 if (spi
+ 1 < nr_slots
) {
730 verbose(env
, "cannot pass in %s at an offset=%d\n", obj_kind
, off
);
734 if (!is_spi_bounds_valid(func(env
, reg
), spi
, nr_slots
))
739 static int dynptr_get_spi(struct bpf_verifier_env
*env
, struct bpf_reg_state
*reg
)
741 return stack_slot_obj_get_spi(env
, reg
, "dynptr", BPF_DYNPTR_NR_SLOTS
);
744 static int iter_get_spi(struct bpf_verifier_env
*env
, struct bpf_reg_state
*reg
, int nr_slots
)
746 return stack_slot_obj_get_spi(env
, reg
, "iter", nr_slots
);
749 static const char *btf_type_name(const struct btf
*btf
, u32 id
)
751 return btf_name_by_offset(btf
, btf_type_by_id(btf
, id
)->name_off
);
754 static const char *dynptr_type_str(enum bpf_dynptr_type type
)
757 case BPF_DYNPTR_TYPE_LOCAL
:
759 case BPF_DYNPTR_TYPE_RINGBUF
:
761 case BPF_DYNPTR_TYPE_SKB
:
763 case BPF_DYNPTR_TYPE_XDP
:
765 case BPF_DYNPTR_TYPE_INVALID
:
768 WARN_ONCE(1, "unknown dynptr type %d\n", type
);
773 static const char *iter_type_str(const struct btf
*btf
, u32 btf_id
)
775 if (!btf
|| btf_id
== 0)
778 /* we already validated that type is valid and has conforming name */
779 return btf_type_name(btf
, btf_id
) + sizeof(ITER_PREFIX
) - 1;
782 static const char *iter_state_str(enum bpf_iter_state state
)
785 case BPF_ITER_STATE_ACTIVE
:
787 case BPF_ITER_STATE_DRAINED
:
789 case BPF_ITER_STATE_INVALID
:
792 WARN_ONCE(1, "unknown iter state %d\n", state
);
797 static void mark_reg_scratched(struct bpf_verifier_env
*env
, u32 regno
)
799 env
->scratched_regs
|= 1U << regno
;
802 static void mark_stack_slot_scratched(struct bpf_verifier_env
*env
, u32 spi
)
804 env
->scratched_stack_slots
|= 1ULL << spi
;
807 static bool reg_scratched(const struct bpf_verifier_env
*env
, u32 regno
)
809 return (env
->scratched_regs
>> regno
) & 1;
812 static bool stack_slot_scratched(const struct bpf_verifier_env
*env
, u64 regno
)
814 return (env
->scratched_stack_slots
>> regno
) & 1;
817 static bool verifier_state_scratched(const struct bpf_verifier_env
*env
)
819 return env
->scratched_regs
|| env
->scratched_stack_slots
;
822 static void mark_verifier_state_clean(struct bpf_verifier_env
*env
)
824 env
->scratched_regs
= 0U;
825 env
->scratched_stack_slots
= 0ULL;
828 /* Used for printing the entire verifier state. */
829 static void mark_verifier_state_scratched(struct bpf_verifier_env
*env
)
831 env
->scratched_regs
= ~0U;
832 env
->scratched_stack_slots
= ~0ULL;
835 static enum bpf_dynptr_type
arg_to_dynptr_type(enum bpf_arg_type arg_type
)
837 switch (arg_type
& DYNPTR_TYPE_FLAG_MASK
) {
838 case DYNPTR_TYPE_LOCAL
:
839 return BPF_DYNPTR_TYPE_LOCAL
;
840 case DYNPTR_TYPE_RINGBUF
:
841 return BPF_DYNPTR_TYPE_RINGBUF
;
842 case DYNPTR_TYPE_SKB
:
843 return BPF_DYNPTR_TYPE_SKB
;
844 case DYNPTR_TYPE_XDP
:
845 return BPF_DYNPTR_TYPE_XDP
;
847 return BPF_DYNPTR_TYPE_INVALID
;
851 static enum bpf_type_flag
get_dynptr_type_flag(enum bpf_dynptr_type type
)
854 case BPF_DYNPTR_TYPE_LOCAL
:
855 return DYNPTR_TYPE_LOCAL
;
856 case BPF_DYNPTR_TYPE_RINGBUF
:
857 return DYNPTR_TYPE_RINGBUF
;
858 case BPF_DYNPTR_TYPE_SKB
:
859 return DYNPTR_TYPE_SKB
;
860 case BPF_DYNPTR_TYPE_XDP
:
861 return DYNPTR_TYPE_XDP
;
867 static bool dynptr_type_refcounted(enum bpf_dynptr_type type
)
869 return type
== BPF_DYNPTR_TYPE_RINGBUF
;
872 static void __mark_dynptr_reg(struct bpf_reg_state
*reg
,
873 enum bpf_dynptr_type type
,
874 bool first_slot
, int dynptr_id
);
876 static void __mark_reg_not_init(const struct bpf_verifier_env
*env
,
877 struct bpf_reg_state
*reg
);
879 static void mark_dynptr_stack_regs(struct bpf_verifier_env
*env
,
880 struct bpf_reg_state
*sreg1
,
881 struct bpf_reg_state
*sreg2
,
882 enum bpf_dynptr_type type
)
884 int id
= ++env
->id_gen
;
886 __mark_dynptr_reg(sreg1
, type
, true, id
);
887 __mark_dynptr_reg(sreg2
, type
, false, id
);
890 static void mark_dynptr_cb_reg(struct bpf_verifier_env
*env
,
891 struct bpf_reg_state
*reg
,
892 enum bpf_dynptr_type type
)
894 __mark_dynptr_reg(reg
, type
, true, ++env
->id_gen
);
897 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env
*env
,
898 struct bpf_func_state
*state
, int spi
);
900 static int mark_stack_slots_dynptr(struct bpf_verifier_env
*env
, struct bpf_reg_state
*reg
,
901 enum bpf_arg_type arg_type
, int insn_idx
, int clone_ref_obj_id
)
903 struct bpf_func_state
*state
= func(env
, reg
);
904 enum bpf_dynptr_type type
;
907 spi
= dynptr_get_spi(env
, reg
);
911 /* We cannot assume both spi and spi - 1 belong to the same dynptr,
912 * hence we need to call destroy_if_dynptr_stack_slot twice for both,
913 * to ensure that for the following example:
916 * So marking spi = 2 should lead to destruction of both d1 and d2. In
917 * case they do belong to same dynptr, second call won't see slot_type
918 * as STACK_DYNPTR and will simply skip destruction.
920 err
= destroy_if_dynptr_stack_slot(env
, state
, spi
);
923 err
= destroy_if_dynptr_stack_slot(env
, state
, spi
- 1);
927 for (i
= 0; i
< BPF_REG_SIZE
; i
++) {
928 state
->stack
[spi
].slot_type
[i
] = STACK_DYNPTR
;
929 state
->stack
[spi
- 1].slot_type
[i
] = STACK_DYNPTR
;
932 type
= arg_to_dynptr_type(arg_type
);
933 if (type
== BPF_DYNPTR_TYPE_INVALID
)
936 mark_dynptr_stack_regs(env
, &state
->stack
[spi
].spilled_ptr
,
937 &state
->stack
[spi
- 1].spilled_ptr
, type
);
939 if (dynptr_type_refcounted(type
)) {
940 /* The id is used to track proper releasing */
943 if (clone_ref_obj_id
)
944 id
= clone_ref_obj_id
;
946 id
= acquire_reference_state(env
, insn_idx
);
951 state
->stack
[spi
].spilled_ptr
.ref_obj_id
= id
;
952 state
->stack
[spi
- 1].spilled_ptr
.ref_obj_id
= id
;
955 state
->stack
[spi
].spilled_ptr
.live
|= REG_LIVE_WRITTEN
;
956 state
->stack
[spi
- 1].spilled_ptr
.live
|= REG_LIVE_WRITTEN
;
961 static void invalidate_dynptr(struct bpf_verifier_env
*env
, struct bpf_func_state
*state
, int spi
)
965 for (i
= 0; i
< BPF_REG_SIZE
; i
++) {
966 state
->stack
[spi
].slot_type
[i
] = STACK_INVALID
;
967 state
->stack
[spi
- 1].slot_type
[i
] = STACK_INVALID
;
970 __mark_reg_not_init(env
, &state
->stack
[spi
].spilled_ptr
);
971 __mark_reg_not_init(env
, &state
->stack
[spi
- 1].spilled_ptr
);
973 /* Why do we need to set REG_LIVE_WRITTEN for STACK_INVALID slot?
975 * While we don't allow reading STACK_INVALID, it is still possible to
976 * do <8 byte writes marking some but not all slots as STACK_MISC. Then,
977 * helpers or insns can do partial read of that part without failing,
978 * but check_stack_range_initialized, check_stack_read_var_off, and
979 * check_stack_read_fixed_off will do mark_reg_read for all 8-bytes of
980 * the slot conservatively. Hence we need to prevent those liveness
983 * This was not a problem before because STACK_INVALID is only set by
984 * default (where the default reg state has its reg->parent as NULL), or
985 * in clean_live_states after REG_LIVE_DONE (at which point
986 * mark_reg_read won't walk reg->parent chain), but not randomly during
987 * verifier state exploration (like we did above). Hence, for our case
988 * parentage chain will still be live (i.e. reg->parent may be
989 * non-NULL), while earlier reg->parent was NULL, so we need
990 * REG_LIVE_WRITTEN to screen off read marker propagation when it is
991 * done later on reads or by mark_dynptr_read as well to unnecessary
992 * mark registers in verifier state.
994 state
->stack
[spi
].spilled_ptr
.live
|= REG_LIVE_WRITTEN
;
995 state
->stack
[spi
- 1].spilled_ptr
.live
|= REG_LIVE_WRITTEN
;
998 static int unmark_stack_slots_dynptr(struct bpf_verifier_env
*env
, struct bpf_reg_state
*reg
)
1000 struct bpf_func_state
*state
= func(env
, reg
);
1001 int spi
, ref_obj_id
, i
;
1003 spi
= dynptr_get_spi(env
, reg
);
1007 if (!dynptr_type_refcounted(state
->stack
[spi
].spilled_ptr
.dynptr
.type
)) {
1008 invalidate_dynptr(env
, state
, spi
);
1012 ref_obj_id
= state
->stack
[spi
].spilled_ptr
.ref_obj_id
;
1014 /* If the dynptr has a ref_obj_id, then we need to invalidate
1017 * 1) Any dynptrs with a matching ref_obj_id (clones)
1018 * 2) Any slices derived from this dynptr.
1021 /* Invalidate any slices associated with this dynptr */
1022 WARN_ON_ONCE(release_reference(env
, ref_obj_id
));
1024 /* Invalidate any dynptr clones */
1025 for (i
= 1; i
< state
->allocated_stack
/ BPF_REG_SIZE
; i
++) {
1026 if (state
->stack
[i
].spilled_ptr
.ref_obj_id
!= ref_obj_id
)
1029 /* it should always be the case that if the ref obj id
1030 * matches then the stack slot also belongs to a
1033 if (state
->stack
[i
].slot_type
[0] != STACK_DYNPTR
) {
1034 verbose(env
, "verifier internal error: misconfigured ref_obj_id\n");
1037 if (state
->stack
[i
].spilled_ptr
.dynptr
.first_slot
)
1038 invalidate_dynptr(env
, state
, i
);
1044 static void __mark_reg_unknown(const struct bpf_verifier_env
*env
,
1045 struct bpf_reg_state
*reg
);
1047 static void mark_reg_invalid(const struct bpf_verifier_env
*env
, struct bpf_reg_state
*reg
)
1049 if (!env
->allow_ptr_leaks
)
1050 __mark_reg_not_init(env
, reg
);
1052 __mark_reg_unknown(env
, reg
);
1055 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env
*env
,
1056 struct bpf_func_state
*state
, int spi
)
1058 struct bpf_func_state
*fstate
;
1059 struct bpf_reg_state
*dreg
;
1062 /* We always ensure that STACK_DYNPTR is never set partially,
1063 * hence just checking for slot_type[0] is enough. This is
1064 * different for STACK_SPILL, where it may be only set for
1065 * 1 byte, so code has to use is_spilled_reg.
1067 if (state
->stack
[spi
].slot_type
[0] != STACK_DYNPTR
)
1070 /* Reposition spi to first slot */
1071 if (!state
->stack
[spi
].spilled_ptr
.dynptr
.first_slot
)
1074 if (dynptr_type_refcounted(state
->stack
[spi
].spilled_ptr
.dynptr
.type
)) {
1075 verbose(env
, "cannot overwrite referenced dynptr\n");
1079 mark_stack_slot_scratched(env
, spi
);
1080 mark_stack_slot_scratched(env
, spi
- 1);
1082 /* Writing partially to one dynptr stack slot destroys both. */
1083 for (i
= 0; i
< BPF_REG_SIZE
; i
++) {
1084 state
->stack
[spi
].slot_type
[i
] = STACK_INVALID
;
1085 state
->stack
[spi
- 1].slot_type
[i
] = STACK_INVALID
;
1088 dynptr_id
= state
->stack
[spi
].spilled_ptr
.id
;
1089 /* Invalidate any slices associated with this dynptr */
1090 bpf_for_each_reg_in_vstate(env
->cur_state
, fstate
, dreg
, ({
1091 /* Dynptr slices are only PTR_TO_MEM_OR_NULL and PTR_TO_MEM */
1092 if (dreg
->type
!= (PTR_TO_MEM
| PTR_MAYBE_NULL
) && dreg
->type
!= PTR_TO_MEM
)
1094 if (dreg
->dynptr_id
== dynptr_id
)
1095 mark_reg_invalid(env
, dreg
);
1098 /* Do not release reference state, we are destroying dynptr on stack,
1099 * not using some helper to release it. Just reset register.
1101 __mark_reg_not_init(env
, &state
->stack
[spi
].spilled_ptr
);
1102 __mark_reg_not_init(env
, &state
->stack
[spi
- 1].spilled_ptr
);
1104 /* Same reason as unmark_stack_slots_dynptr above */
1105 state
->stack
[spi
].spilled_ptr
.live
|= REG_LIVE_WRITTEN
;
1106 state
->stack
[spi
- 1].spilled_ptr
.live
|= REG_LIVE_WRITTEN
;
1111 static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env
*env
, struct bpf_reg_state
*reg
)
1115 if (reg
->type
== CONST_PTR_TO_DYNPTR
)
1118 spi
= dynptr_get_spi(env
, reg
);
1120 /* -ERANGE (i.e. spi not falling into allocated stack slots) isn't an
1121 * error because this just means the stack state hasn't been updated yet.
1122 * We will do check_mem_access to check and update stack bounds later.
1124 if (spi
< 0 && spi
!= -ERANGE
)
1127 /* We don't need to check if the stack slots are marked by previous
1128 * dynptr initializations because we allow overwriting existing unreferenced
1129 * STACK_DYNPTR slots, see mark_stack_slots_dynptr which calls
1130 * destroy_if_dynptr_stack_slot to ensure dynptr objects at the slots we are
1131 * touching are completely destructed before we reinitialize them for a new
1132 * one. For referenced ones, destroy_if_dynptr_stack_slot returns an error early
1133 * instead of delaying it until the end where the user will get "Unreleased
1139 static bool is_dynptr_reg_valid_init(struct bpf_verifier_env
*env
, struct bpf_reg_state
*reg
)
1141 struct bpf_func_state
*state
= func(env
, reg
);
1144 /* This already represents first slot of initialized bpf_dynptr.
1146 * CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to
1147 * check_func_arg_reg_off's logic, so we don't need to check its
1148 * offset and alignment.
1150 if (reg
->type
== CONST_PTR_TO_DYNPTR
)
1153 spi
= dynptr_get_spi(env
, reg
);
1156 if (!state
->stack
[spi
].spilled_ptr
.dynptr
.first_slot
)
1159 for (i
= 0; i
< BPF_REG_SIZE
; i
++) {
1160 if (state
->stack
[spi
].slot_type
[i
] != STACK_DYNPTR
||
1161 state
->stack
[spi
- 1].slot_type
[i
] != STACK_DYNPTR
)
1168 static bool is_dynptr_type_expected(struct bpf_verifier_env
*env
, struct bpf_reg_state
*reg
,
1169 enum bpf_arg_type arg_type
)
1171 struct bpf_func_state
*state
= func(env
, reg
);
1172 enum bpf_dynptr_type dynptr_type
;
1175 /* ARG_PTR_TO_DYNPTR takes any type of dynptr */
1176 if (arg_type
== ARG_PTR_TO_DYNPTR
)
1179 dynptr_type
= arg_to_dynptr_type(arg_type
);
1180 if (reg
->type
== CONST_PTR_TO_DYNPTR
) {
1181 return reg
->dynptr
.type
== dynptr_type
;
1183 spi
= dynptr_get_spi(env
, reg
);
1186 return state
->stack
[spi
].spilled_ptr
.dynptr
.type
== dynptr_type
;
1190 static void __mark_reg_known_zero(struct bpf_reg_state
*reg
);
1192 static bool in_rcu_cs(struct bpf_verifier_env
*env
);
1194 static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta
*meta
);
1196 static int mark_stack_slots_iter(struct bpf_verifier_env
*env
,
1197 struct bpf_kfunc_call_arg_meta
*meta
,
1198 struct bpf_reg_state
*reg
, int insn_idx
,
1199 struct btf
*btf
, u32 btf_id
, int nr_slots
)
1201 struct bpf_func_state
*state
= func(env
, reg
);
1204 spi
= iter_get_spi(env
, reg
, nr_slots
);
1208 id
= acquire_reference_state(env
, insn_idx
);
1212 for (i
= 0; i
< nr_slots
; i
++) {
1213 struct bpf_stack_state
*slot
= &state
->stack
[spi
- i
];
1214 struct bpf_reg_state
*st
= &slot
->spilled_ptr
;
1216 __mark_reg_known_zero(st
);
1217 st
->type
= PTR_TO_STACK
; /* we don't have dedicated reg type */
1218 if (is_kfunc_rcu_protected(meta
)) {
1220 st
->type
|= MEM_RCU
;
1222 st
->type
|= PTR_UNTRUSTED
;
1224 st
->live
|= REG_LIVE_WRITTEN
;
1225 st
->ref_obj_id
= i
== 0 ? id
: 0;
1227 st
->iter
.btf_id
= btf_id
;
1228 st
->iter
.state
= BPF_ITER_STATE_ACTIVE
;
1231 for (j
= 0; j
< BPF_REG_SIZE
; j
++)
1232 slot
->slot_type
[j
] = STACK_ITER
;
1234 mark_stack_slot_scratched(env
, spi
- i
);
1240 static int unmark_stack_slots_iter(struct bpf_verifier_env
*env
,
1241 struct bpf_reg_state
*reg
, int nr_slots
)
1243 struct bpf_func_state
*state
= func(env
, reg
);
1246 spi
= iter_get_spi(env
, reg
, nr_slots
);
1250 for (i
= 0; i
< nr_slots
; i
++) {
1251 struct bpf_stack_state
*slot
= &state
->stack
[spi
- i
];
1252 struct bpf_reg_state
*st
= &slot
->spilled_ptr
;
1255 WARN_ON_ONCE(release_reference(env
, st
->ref_obj_id
));
1257 __mark_reg_not_init(env
, st
);
1259 /* see unmark_stack_slots_dynptr() for why we need to set REG_LIVE_WRITTEN */
1260 st
->live
|= REG_LIVE_WRITTEN
;
1262 for (j
= 0; j
< BPF_REG_SIZE
; j
++)
1263 slot
->slot_type
[j
] = STACK_INVALID
;
1265 mark_stack_slot_scratched(env
, spi
- i
);
1271 static bool is_iter_reg_valid_uninit(struct bpf_verifier_env
*env
,
1272 struct bpf_reg_state
*reg
, int nr_slots
)
1274 struct bpf_func_state
*state
= func(env
, reg
);
1277 /* For -ERANGE (i.e. spi not falling into allocated stack slots), we
1278 * will do check_mem_access to check and update stack bounds later, so
1279 * return true for that case.
1281 spi
= iter_get_spi(env
, reg
, nr_slots
);
1287 for (i
= 0; i
< nr_slots
; i
++) {
1288 struct bpf_stack_state
*slot
= &state
->stack
[spi
- i
];
1290 for (j
= 0; j
< BPF_REG_SIZE
; j
++)
1291 if (slot
->slot_type
[j
] == STACK_ITER
)
1298 static int is_iter_reg_valid_init(struct bpf_verifier_env
*env
, struct bpf_reg_state
*reg
,
1299 struct btf
*btf
, u32 btf_id
, int nr_slots
)
1301 struct bpf_func_state
*state
= func(env
, reg
);
1304 spi
= iter_get_spi(env
, reg
, nr_slots
);
1308 for (i
= 0; i
< nr_slots
; i
++) {
1309 struct bpf_stack_state
*slot
= &state
->stack
[spi
- i
];
1310 struct bpf_reg_state
*st
= &slot
->spilled_ptr
;
1312 if (st
->type
& PTR_UNTRUSTED
)
1314 /* only main (first) slot has ref_obj_id set */
1315 if (i
== 0 && !st
->ref_obj_id
)
1317 if (i
!= 0 && st
->ref_obj_id
)
1319 if (st
->iter
.btf
!= btf
|| st
->iter
.btf_id
!= btf_id
)
1322 for (j
= 0; j
< BPF_REG_SIZE
; j
++)
1323 if (slot
->slot_type
[j
] != STACK_ITER
)
1330 /* Check if given stack slot is "special":
1331 * - spilled register state (STACK_SPILL);
1332 * - dynptr state (STACK_DYNPTR);
1333 * - iter state (STACK_ITER).
1335 static bool is_stack_slot_special(const struct bpf_stack_state
*stack
)
1337 enum bpf_stack_slot_type type
= stack
->slot_type
[BPF_REG_SIZE
- 1];
1349 WARN_ONCE(1, "unknown stack slot type %d\n", type
);
1354 /* The reg state of a pointer or a bounded scalar was saved when
1355 * it was spilled to the stack.
1357 static bool is_spilled_reg(const struct bpf_stack_state
*stack
)
1359 return stack
->slot_type
[BPF_REG_SIZE
- 1] == STACK_SPILL
;
1362 static bool is_spilled_scalar_reg(const struct bpf_stack_state
*stack
)
1364 return stack
->slot_type
[BPF_REG_SIZE
- 1] == STACK_SPILL
&&
1365 stack
->spilled_ptr
.type
== SCALAR_VALUE
;
1368 static void scrub_spilled_slot(u8
*stype
)
1370 if (*stype
!= STACK_INVALID
)
1371 *stype
= STACK_MISC
;
1374 static void print_scalar_ranges(struct bpf_verifier_env
*env
,
1375 const struct bpf_reg_state
*reg
,
1383 {"smin", reg
->smin_value
, reg
->smin_value
== S64_MIN
},
1384 {"smax", reg
->smax_value
, reg
->smax_value
== S64_MAX
},
1385 {"umin", reg
->umin_value
, reg
->umin_value
== 0},
1386 {"umax", reg
->umax_value
, reg
->umax_value
== U64_MAX
},
1387 {"smin32", (s64
)reg
->s32_min_value
, reg
->s32_min_value
== S32_MIN
},
1388 {"smax32", (s64
)reg
->s32_max_value
, reg
->s32_max_value
== S32_MAX
},
1389 {"umin32", reg
->u32_min_value
, reg
->u32_min_value
== 0},
1390 {"umax32", reg
->u32_max_value
, reg
->u32_max_value
== U32_MAX
},
1391 }, *m1
, *m2
, *mend
= &minmaxs
[ARRAY_SIZE(minmaxs
)];
1394 for (m1
= &minmaxs
[0]; m1
< mend
; m1
++) {
1398 neg1
= m1
->name
[0] == 's' && (s64
)m1
->val
< 0;
1400 verbose(env
, "%s%s=", *sep
, m1
->name
);
1403 for (m2
= m1
+ 2; m2
< mend
; m2
+= 2) {
1404 if (m2
->omit
|| m2
->val
!= m1
->val
)
1406 /* don't mix negatives with positives */
1407 neg2
= m2
->name
[0] == 's' && (s64
)m2
->val
< 0;
1411 verbose(env
, "%s=", m2
->name
);
1414 verbose(env
, m1
->name
[0] == 's' ? "%lld" : "%llu", m1
->val
);
1418 static void print_verifier_state(struct bpf_verifier_env
*env
,
1419 const struct bpf_func_state
*state
,
1422 const struct bpf_reg_state
*reg
;
1423 enum bpf_reg_type t
;
1427 verbose(env
, " frame%d:", state
->frameno
);
1428 for (i
= 0; i
< MAX_BPF_REG
; i
++) {
1429 reg
= &state
->regs
[i
];
1433 if (!print_all
&& !reg_scratched(env
, i
))
1435 verbose(env
, " R%d", i
);
1436 print_liveness(env
, reg
->live
);
1438 if (t
== SCALAR_VALUE
&& reg
->precise
)
1440 if ((t
== SCALAR_VALUE
|| t
== PTR_TO_STACK
) &&
1441 tnum_is_const(reg
->var_off
)) {
1442 /* reg->off should be 0 for SCALAR_VALUE */
1443 verbose(env
, "%s", t
== SCALAR_VALUE
? "" : reg_type_str(env
, t
));
1444 verbose(env
, "%lld", reg
->var_off
.value
+ reg
->off
);
1446 const char *sep
= "";
1448 verbose(env
, "%s", reg_type_str(env
, t
));
1449 if (base_type(t
) == PTR_TO_BTF_ID
)
1450 verbose(env
, "%s", btf_type_name(reg
->btf
, reg
->btf_id
));
1453 * _a stands for append, was shortened to avoid multiline statements below.
1454 * This macro is used to output a comma separated list of attributes.
1456 #define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, __VA_ARGS__); sep = ","; })
1459 verbose_a("id=%d", reg
->id
);
1460 if (reg
->ref_obj_id
)
1461 verbose_a("ref_obj_id=%d", reg
->ref_obj_id
);
1462 if (type_is_non_owning_ref(reg
->type
))
1463 verbose_a("%s", "non_own_ref");
1464 if (t
!= SCALAR_VALUE
)
1465 verbose_a("off=%d", reg
->off
);
1466 if (type_is_pkt_pointer(t
))
1467 verbose_a("r=%d", reg
->range
);
1468 else if (base_type(t
) == CONST_PTR_TO_MAP
||
1469 base_type(t
) == PTR_TO_MAP_KEY
||
1470 base_type(t
) == PTR_TO_MAP_VALUE
)
1471 verbose_a("ks=%d,vs=%d",
1472 reg
->map_ptr
->key_size
,
1473 reg
->map_ptr
->value_size
);
1474 if (tnum_is_const(reg
->var_off
)) {
1475 /* Typically an immediate SCALAR_VALUE, but
1476 * could be a pointer whose offset is too big
1479 verbose_a("imm=%llx", reg
->var_off
.value
);
1481 print_scalar_ranges(env
, reg
, &sep
);
1482 if (!tnum_is_unknown(reg
->var_off
)) {
1485 tnum_strn(tn_buf
, sizeof(tn_buf
), reg
->var_off
);
1486 verbose_a("var_off=%s", tn_buf
);
1494 for (i
= 0; i
< state
->allocated_stack
/ BPF_REG_SIZE
; i
++) {
1495 char types_buf
[BPF_REG_SIZE
+ 1];
1499 for (j
= 0; j
< BPF_REG_SIZE
; j
++) {
1500 if (state
->stack
[i
].slot_type
[j
] != STACK_INVALID
)
1502 types_buf
[j
] = slot_type_char
[state
->stack
[i
].slot_type
[j
]];
1504 types_buf
[BPF_REG_SIZE
] = 0;
1507 if (!print_all
&& !stack_slot_scratched(env
, i
))
1509 switch (state
->stack
[i
].slot_type
[BPF_REG_SIZE
- 1]) {
1511 reg
= &state
->stack
[i
].spilled_ptr
;
1514 verbose(env
, " fp%d", (-i
- 1) * BPF_REG_SIZE
);
1515 print_liveness(env
, reg
->live
);
1516 verbose(env
, "=%s", t
== SCALAR_VALUE
? "" : reg_type_str(env
, t
));
1517 if (t
== SCALAR_VALUE
&& reg
->precise
)
1519 if (t
== SCALAR_VALUE
&& tnum_is_const(reg
->var_off
))
1520 verbose(env
, "%lld", reg
->var_off
.value
+ reg
->off
);
1523 i
+= BPF_DYNPTR_NR_SLOTS
- 1;
1524 reg
= &state
->stack
[i
].spilled_ptr
;
1526 verbose(env
, " fp%d", (-i
- 1) * BPF_REG_SIZE
);
1527 print_liveness(env
, reg
->live
);
1528 verbose(env
, "=dynptr_%s", dynptr_type_str(reg
->dynptr
.type
));
1529 if (reg
->ref_obj_id
)
1530 verbose(env
, "(ref_id=%d)", reg
->ref_obj_id
);
1533 /* only main slot has ref_obj_id set; skip others */
1534 reg
= &state
->stack
[i
].spilled_ptr
;
1535 if (!reg
->ref_obj_id
)
1538 verbose(env
, " fp%d", (-i
- 1) * BPF_REG_SIZE
);
1539 print_liveness(env
, reg
->live
);
1540 verbose(env
, "=iter_%s(ref_id=%d,state=%s,depth=%u)",
1541 iter_type_str(reg
->iter
.btf
, reg
->iter
.btf_id
),
1542 reg
->ref_obj_id
, iter_state_str(reg
->iter
.state
),
1548 reg
= &state
->stack
[i
].spilled_ptr
;
1550 for (j
= 0; j
< BPF_REG_SIZE
; j
++)
1551 types_buf
[j
] = slot_type_char
[state
->stack
[i
].slot_type
[j
]];
1552 types_buf
[BPF_REG_SIZE
] = 0;
1554 verbose(env
, " fp%d", (-i
- 1) * BPF_REG_SIZE
);
1555 print_liveness(env
, reg
->live
);
1556 verbose(env
, "=%s", types_buf
);
1560 if (state
->acquired_refs
&& state
->refs
[0].id
) {
1561 verbose(env
, " refs=%d", state
->refs
[0].id
);
1562 for (i
= 1; i
< state
->acquired_refs
; i
++)
1563 if (state
->refs
[i
].id
)
1564 verbose(env
, ",%d", state
->refs
[i
].id
);
1566 if (state
->in_callback_fn
)
1567 verbose(env
, " cb");
1568 if (state
->in_async_callback_fn
)
1569 verbose(env
, " async_cb");
1572 mark_verifier_state_clean(env
);
1575 static inline u32
vlog_alignment(u32 pos
)
1577 return round_up(max(pos
+ BPF_LOG_MIN_ALIGNMENT
/ 2, BPF_LOG_ALIGNMENT
),
1578 BPF_LOG_MIN_ALIGNMENT
) - pos
- 1;
1581 static void print_insn_state(struct bpf_verifier_env
*env
,
1582 const struct bpf_func_state
*state
)
1584 if (env
->prev_log_pos
&& env
->prev_log_pos
== env
->log
.end_pos
) {
1585 /* remove new line character */
1586 bpf_vlog_reset(&env
->log
, env
->prev_log_pos
- 1);
1587 verbose(env
, "%*c;", vlog_alignment(env
->prev_insn_print_pos
), ' ');
1589 verbose(env
, "%d:", env
->insn_idx
);
1591 print_verifier_state(env
, state
, false);
1594 /* copy array src of length n * size bytes to dst. dst is reallocated if it's too
1595 * small to hold src. This is different from krealloc since we don't want to preserve
1596 * the contents of dst.
1598 * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
1601 static void *copy_array(void *dst
, const void *src
, size_t n
, size_t size
, gfp_t flags
)
1607 if (ZERO_OR_NULL_PTR(src
))
1610 if (unlikely(check_mul_overflow(n
, size
, &bytes
)))
1613 alloc_bytes
= max(ksize(orig
), kmalloc_size_roundup(bytes
));
1614 dst
= krealloc(orig
, alloc_bytes
, flags
);
1620 memcpy(dst
, src
, bytes
);
1622 return dst
? dst
: ZERO_SIZE_PTR
;
1625 /* resize an array from old_n items to new_n items. the array is reallocated if it's too
1626 * small to hold new_n items. new items are zeroed out if the array grows.
1628 * Contrary to krealloc_array, does not free arr if new_n is zero.
1630 static void *realloc_array(void *arr
, size_t old_n
, size_t new_n
, size_t size
)
1635 if (!new_n
|| old_n
== new_n
)
1638 alloc_size
= kmalloc_size_roundup(size_mul(new_n
, size
));
1639 new_arr
= krealloc(arr
, alloc_size
, GFP_KERNEL
);
1647 memset(arr
+ old_n
* size
, 0, (new_n
- old_n
) * size
);
1650 return arr
? arr
: ZERO_SIZE_PTR
;
1653 static int copy_reference_state(struct bpf_func_state
*dst
, const struct bpf_func_state
*src
)
1655 dst
->refs
= copy_array(dst
->refs
, src
->refs
, src
->acquired_refs
,
1656 sizeof(struct bpf_reference_state
), GFP_KERNEL
);
1660 dst
->acquired_refs
= src
->acquired_refs
;
1664 static int copy_stack_state(struct bpf_func_state
*dst
, const struct bpf_func_state
*src
)
1666 size_t n
= src
->allocated_stack
/ BPF_REG_SIZE
;
1668 dst
->stack
= copy_array(dst
->stack
, src
->stack
, n
, sizeof(struct bpf_stack_state
),
1673 dst
->allocated_stack
= src
->allocated_stack
;
1677 static int resize_reference_state(struct bpf_func_state
*state
, size_t n
)
1679 state
->refs
= realloc_array(state
->refs
, state
->acquired_refs
, n
,
1680 sizeof(struct bpf_reference_state
));
1684 state
->acquired_refs
= n
;
1688 static int grow_stack_state(struct bpf_func_state
*state
, int size
)
1690 size_t old_n
= state
->allocated_stack
/ BPF_REG_SIZE
, n
= size
/ BPF_REG_SIZE
;
1695 state
->stack
= realloc_array(state
->stack
, old_n
, n
, sizeof(struct bpf_stack_state
));
1699 state
->allocated_stack
= size
;
1703 /* Acquire a pointer id from the env and update the state->refs to include
1704 * this new pointer reference.
1705 * On success, returns a valid pointer id to associate with the register
1706 * On failure, returns a negative errno.
1708 static int acquire_reference_state(struct bpf_verifier_env
*env
, int insn_idx
)
1710 struct bpf_func_state
*state
= cur_func(env
);
1711 int new_ofs
= state
->acquired_refs
;
1714 err
= resize_reference_state(state
, state
->acquired_refs
+ 1);
1718 state
->refs
[new_ofs
].id
= id
;
1719 state
->refs
[new_ofs
].insn_idx
= insn_idx
;
1720 state
->refs
[new_ofs
].callback_ref
= state
->in_callback_fn
? state
->frameno
: 0;
1725 /* release function corresponding to acquire_reference_state(). Idempotent. */
1726 static int release_reference_state(struct bpf_func_state
*state
, int ptr_id
)
1730 last_idx
= state
->acquired_refs
- 1;
1731 for (i
= 0; i
< state
->acquired_refs
; i
++) {
1732 if (state
->refs
[i
].id
== ptr_id
) {
1733 /* Cannot release caller references in callbacks */
1734 if (state
->in_callback_fn
&& state
->refs
[i
].callback_ref
!= state
->frameno
)
1736 if (last_idx
&& i
!= last_idx
)
1737 memcpy(&state
->refs
[i
], &state
->refs
[last_idx
],
1738 sizeof(*state
->refs
));
1739 memset(&state
->refs
[last_idx
], 0, sizeof(*state
->refs
));
1740 state
->acquired_refs
--;
1747 static void free_func_state(struct bpf_func_state
*state
)
1752 kfree(state
->stack
);
1756 static void clear_jmp_history(struct bpf_verifier_state
*state
)
1758 kfree(state
->jmp_history
);
1759 state
->jmp_history
= NULL
;
1760 state
->jmp_history_cnt
= 0;
1763 static void free_verifier_state(struct bpf_verifier_state
*state
,
1768 for (i
= 0; i
<= state
->curframe
; i
++) {
1769 free_func_state(state
->frame
[i
]);
1770 state
->frame
[i
] = NULL
;
1772 clear_jmp_history(state
);
1777 /* copy verifier state from src to dst growing dst stack space
1778 * when necessary to accommodate larger src stack
1780 static int copy_func_state(struct bpf_func_state
*dst
,
1781 const struct bpf_func_state
*src
)
1785 memcpy(dst
, src
, offsetof(struct bpf_func_state
, acquired_refs
));
1786 err
= copy_reference_state(dst
, src
);
1789 return copy_stack_state(dst
, src
);
1792 static int copy_verifier_state(struct bpf_verifier_state
*dst_state
,
1793 const struct bpf_verifier_state
*src
)
1795 struct bpf_func_state
*dst
;
1798 dst_state
->jmp_history
= copy_array(dst_state
->jmp_history
, src
->jmp_history
,
1799 src
->jmp_history_cnt
, sizeof(struct bpf_idx_pair
),
1801 if (!dst_state
->jmp_history
)
1803 dst_state
->jmp_history_cnt
= src
->jmp_history_cnt
;
1805 /* if dst has more stack frames then src frame, free them, this is also
1806 * necessary in case of exceptional exits using bpf_throw.
1808 for (i
= src
->curframe
+ 1; i
<= dst_state
->curframe
; i
++) {
1809 free_func_state(dst_state
->frame
[i
]);
1810 dst_state
->frame
[i
] = NULL
;
1812 dst_state
->speculative
= src
->speculative
;
1813 dst_state
->active_rcu_lock
= src
->active_rcu_lock
;
1814 dst_state
->curframe
= src
->curframe
;
1815 dst_state
->active_lock
.ptr
= src
->active_lock
.ptr
;
1816 dst_state
->active_lock
.id
= src
->active_lock
.id
;
1817 dst_state
->branches
= src
->branches
;
1818 dst_state
->parent
= src
->parent
;
1819 dst_state
->first_insn_idx
= src
->first_insn_idx
;
1820 dst_state
->last_insn_idx
= src
->last_insn_idx
;
1821 dst_state
->dfs_depth
= src
->dfs_depth
;
1822 dst_state
->callback_unroll_depth
= src
->callback_unroll_depth
;
1823 dst_state
->used_as_loop_entry
= src
->used_as_loop_entry
;
1824 for (i
= 0; i
<= src
->curframe
; i
++) {
1825 dst
= dst_state
->frame
[i
];
1827 dst
= kzalloc(sizeof(*dst
), GFP_KERNEL
);
1830 dst_state
->frame
[i
] = dst
;
1832 err
= copy_func_state(dst
, src
->frame
[i
]);
1839 static u32
state_htab_size(struct bpf_verifier_env
*env
)
1841 return env
->prog
->len
;
1844 static struct bpf_verifier_state_list
**explored_state(struct bpf_verifier_env
*env
, int idx
)
1846 struct bpf_verifier_state
*cur
= env
->cur_state
;
1847 struct bpf_func_state
*state
= cur
->frame
[cur
->curframe
];
1849 return &env
->explored_states
[(idx
^ state
->callsite
) % state_htab_size(env
)];
1852 static bool same_callsites(struct bpf_verifier_state
*a
, struct bpf_verifier_state
*b
)
1856 if (a
->curframe
!= b
->curframe
)
1859 for (fr
= a
->curframe
; fr
>= 0; fr
--)
1860 if (a
->frame
[fr
]->callsite
!= b
->frame
[fr
]->callsite
)
1866 /* Open coded iterators allow back-edges in the state graph in order to
1867 * check unbounded loops that iterators.
1869 * In is_state_visited() it is necessary to know if explored states are
1870 * part of some loops in order to decide whether non-exact states
1871 * comparison could be used:
1872 * - non-exact states comparison establishes sub-state relation and uses
1873 * read and precision marks to do so, these marks are propagated from
1874 * children states and thus are not guaranteed to be final in a loop;
1875 * - exact states comparison just checks if current and explored states
1876 * are identical (and thus form a back-edge).
1878 * Paper "A New Algorithm for Identifying Loops in Decompilation"
1879 * by Tao Wei, Jian Mao, Wei Zou and Yu Chen [1] presents a convenient
1880 * algorithm for loop structure detection and gives an overview of
1881 * relevant terminology. It also has helpful illustrations.
1883 * [1] https://api.semanticscholar.org/CorpusID:15784067
1885 * We use a similar algorithm but because loop nested structure is
1886 * irrelevant for verifier ours is significantly simpler and resembles
1887 * strongly connected components algorithm from Sedgewick's textbook.
1889 * Define topmost loop entry as a first node of the loop traversed in a
1890 * depth first search starting from initial state. The goal of the loop
1891 * tracking algorithm is to associate topmost loop entries with states
1892 * derived from these entries.
1894 * For each step in the DFS states traversal algorithm needs to identify
1895 * the following situations:
1897 * initial initial initial
1900 * ... ... .---------> hdr
1903 * cur .-> succ | .------...
1906 * succ '-- cur | ... ...
1916 * (A) successor state of cur (B) successor state of cur or it's entry
1917 * not yet traversed are in current DFS path, thus cur and succ
1918 * are members of the same outermost loop
1926 * .------... .------...
1929 * .-> hdr ... ... ...
1932 * | succ <- cur succ <- cur
1939 * (C) successor state of cur is a part of some loop but this loop
1940 * does not include cur or successor state is not in a loop at all.
1942 * Algorithm could be described as the following python code:
1944 * traversed = set() # Set of traversed nodes
1945 * entries = {} # Mapping from node to loop entry
1946 * depths = {} # Depth level assigned to graph node
1947 * path = set() # Current DFS path
1949 * # Find outermost loop entry known for n
1950 * def get_loop_entry(n):
1951 * h = entries.get(n, None)
1952 * while h in entries and entries[h] != h:
1956 * # Update n's loop entry if h's outermost entry comes
1957 * # before n's outermost entry in current DFS path.
1958 * def update_loop_entry(n, h):
1959 * n1 = get_loop_entry(n) or n
1960 * h1 = get_loop_entry(h) or h
1961 * if h1 in path and depths[h1] <= depths[n1]:
1964 * def dfs(n, depth):
1968 * for succ in G.successors(n):
1969 * if succ not in traversed:
1970 * # Case A: explore succ and update cur's loop entry
1971 * # only if succ's entry is in current DFS path.
1972 * dfs(succ, depth + 1)
1973 * h = get_loop_entry(succ)
1974 * update_loop_entry(n, h)
1976 * # Case B or C depending on `h1 in path` check in update_loop_entry().
1977 * update_loop_entry(n, succ)
1980 * To adapt this algorithm for use with verifier:
1981 * - use st->branch == 0 as a signal that DFS of succ had been finished
1982 * and cur's loop entry has to be updated (case A), handle this in
1983 * update_branch_counts();
1984 * - use st->branch > 0 as a signal that st is in the current DFS path;
1985 * - handle cases B and C in is_state_visited();
1986 * - update topmost loop entry for intermediate states in get_loop_entry().
1988 static struct bpf_verifier_state
*get_loop_entry(struct bpf_verifier_state
*st
)
1990 struct bpf_verifier_state
*topmost
= st
->loop_entry
, *old
;
1992 while (topmost
&& topmost
->loop_entry
&& topmost
!= topmost
->loop_entry
)
1993 topmost
= topmost
->loop_entry
;
1994 /* Update loop entries for intermediate states to avoid this
1995 * traversal in future get_loop_entry() calls.
1997 while (st
&& st
->loop_entry
!= topmost
) {
1998 old
= st
->loop_entry
;
1999 st
->loop_entry
= topmost
;
2005 static void update_loop_entry(struct bpf_verifier_state
*cur
, struct bpf_verifier_state
*hdr
)
2007 struct bpf_verifier_state
*cur1
, *hdr1
;
2009 cur1
= get_loop_entry(cur
) ?: cur
;
2010 hdr1
= get_loop_entry(hdr
) ?: hdr
;
2011 /* The head1->branches check decides between cases B and C in
2012 * comment for get_loop_entry(). If hdr1->branches == 0 then
2013 * head's topmost loop entry is not in current DFS path,
2014 * hence 'cur' and 'hdr' are not in the same loop and there is
2015 * no need to update cur->loop_entry.
2017 if (hdr1
->branches
&& hdr1
->dfs_depth
<= cur1
->dfs_depth
) {
2018 cur
->loop_entry
= hdr
;
2019 hdr
->used_as_loop_entry
= true;
2023 static void update_branch_counts(struct bpf_verifier_env
*env
, struct bpf_verifier_state
*st
)
2026 u32 br
= --st
->branches
;
2028 /* br == 0 signals that DFS exploration for 'st' is finished,
2029 * thus it is necessary to update parent's loop entry if it
2030 * turned out that st is a part of some loop.
2031 * This is a part of 'case A' in get_loop_entry() comment.
2033 if (br
== 0 && st
->parent
&& st
->loop_entry
)
2034 update_loop_entry(st
->parent
, st
->loop_entry
);
2036 /* WARN_ON(br > 1) technically makes sense here,
2037 * but see comment in push_stack(), hence:
2039 WARN_ONCE((int)br
< 0,
2040 "BUG update_branch_counts:branches_to_explore=%d\n",
2048 static int pop_stack(struct bpf_verifier_env
*env
, int *prev_insn_idx
,
2049 int *insn_idx
, bool pop_log
)
2051 struct bpf_verifier_state
*cur
= env
->cur_state
;
2052 struct bpf_verifier_stack_elem
*elem
, *head
= env
->head
;
2055 if (env
->head
== NULL
)
2059 err
= copy_verifier_state(cur
, &head
->st
);
2064 bpf_vlog_reset(&env
->log
, head
->log_pos
);
2066 *insn_idx
= head
->insn_idx
;
2068 *prev_insn_idx
= head
->prev_insn_idx
;
2070 free_verifier_state(&head
->st
, false);
2077 static struct bpf_verifier_state
*push_stack(struct bpf_verifier_env
*env
,
2078 int insn_idx
, int prev_insn_idx
,
2081 struct bpf_verifier_state
*cur
= env
->cur_state
;
2082 struct bpf_verifier_stack_elem
*elem
;
2085 elem
= kzalloc(sizeof(struct bpf_verifier_stack_elem
), GFP_KERNEL
);
2089 elem
->insn_idx
= insn_idx
;
2090 elem
->prev_insn_idx
= prev_insn_idx
;
2091 elem
->next
= env
->head
;
2092 elem
->log_pos
= env
->log
.end_pos
;
2095 err
= copy_verifier_state(&elem
->st
, cur
);
2098 elem
->st
.speculative
|= speculative
;
2099 if (env
->stack_size
> BPF_COMPLEXITY_LIMIT_JMP_SEQ
) {
2100 verbose(env
, "The sequence of %d jumps is too complex.\n",
2104 if (elem
->st
.parent
) {
2105 ++elem
->st
.parent
->branches
;
2106 /* WARN_ON(branches > 2) technically makes sense here,
2108 * 1. speculative states will bump 'branches' for non-branch
2110 * 2. is_state_visited() heuristics may decide not to create
2111 * a new state for a sequence of branches and all such current
2112 * and cloned states will be pointing to a single parent state
2113 * which might have large 'branches' count.
2118 free_verifier_state(env
->cur_state
, true);
2119 env
->cur_state
= NULL
;
2120 /* pop all elements and return */
2121 while (!pop_stack(env
, NULL
, NULL
, false));
2125 #define CALLER_SAVED_REGS 6
2126 static const int caller_saved
[CALLER_SAVED_REGS
] = {
2127 BPF_REG_0
, BPF_REG_1
, BPF_REG_2
, BPF_REG_3
, BPF_REG_4
, BPF_REG_5
2130 /* This helper doesn't clear reg->id */
2131 static void ___mark_reg_known(struct bpf_reg_state
*reg
, u64 imm
)
2133 reg
->var_off
= tnum_const(imm
);
2134 reg
->smin_value
= (s64
)imm
;
2135 reg
->smax_value
= (s64
)imm
;
2136 reg
->umin_value
= imm
;
2137 reg
->umax_value
= imm
;
2139 reg
->s32_min_value
= (s32
)imm
;
2140 reg
->s32_max_value
= (s32
)imm
;
2141 reg
->u32_min_value
= (u32
)imm
;
2142 reg
->u32_max_value
= (u32
)imm
;
2145 /* Mark the unknown part of a register (variable offset or scalar value) as
2146 * known to have the value @imm.
2148 static void __mark_reg_known(struct bpf_reg_state
*reg
, u64 imm
)
2150 /* Clear off and union(map_ptr, range) */
2151 memset(((u8
*)reg
) + sizeof(reg
->type
), 0,
2152 offsetof(struct bpf_reg_state
, var_off
) - sizeof(reg
->type
));
2154 reg
->ref_obj_id
= 0;
2155 ___mark_reg_known(reg
, imm
);
2158 static void __mark_reg32_known(struct bpf_reg_state
*reg
, u64 imm
)
2160 reg
->var_off
= tnum_const_subreg(reg
->var_off
, imm
);
2161 reg
->s32_min_value
= (s32
)imm
;
2162 reg
->s32_max_value
= (s32
)imm
;
2163 reg
->u32_min_value
= (u32
)imm
;
2164 reg
->u32_max_value
= (u32
)imm
;
2167 /* Mark the 'variable offset' part of a register as zero. This should be
2168 * used only on registers holding a pointer type.
2170 static void __mark_reg_known_zero(struct bpf_reg_state
*reg
)
2172 __mark_reg_known(reg
, 0);
2175 static void __mark_reg_const_zero(struct bpf_reg_state
*reg
)
2177 __mark_reg_known(reg
, 0);
2178 reg
->type
= SCALAR_VALUE
;
2181 static void mark_reg_known_zero(struct bpf_verifier_env
*env
,
2182 struct bpf_reg_state
*regs
, u32 regno
)
2184 if (WARN_ON(regno
>= MAX_BPF_REG
)) {
2185 verbose(env
, "mark_reg_known_zero(regs, %u)\n", regno
);
2186 /* Something bad happened, let's kill all regs */
2187 for (regno
= 0; regno
< MAX_BPF_REG
; regno
++)
2188 __mark_reg_not_init(env
, regs
+ regno
);
2191 __mark_reg_known_zero(regs
+ regno
);
2194 static void __mark_dynptr_reg(struct bpf_reg_state
*reg
, enum bpf_dynptr_type type
,
2195 bool first_slot
, int dynptr_id
)
2197 /* reg->type has no meaning for STACK_DYNPTR, but when we set reg for
2198 * callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply
2199 * set it unconditionally as it is ignored for STACK_DYNPTR anyway.
2201 __mark_reg_known_zero(reg
);
2202 reg
->type
= CONST_PTR_TO_DYNPTR
;
2203 /* Give each dynptr a unique id to uniquely associate slices to it. */
2204 reg
->id
= dynptr_id
;
2205 reg
->dynptr
.type
= type
;
2206 reg
->dynptr
.first_slot
= first_slot
;
2209 static void mark_ptr_not_null_reg(struct bpf_reg_state
*reg
)
2211 if (base_type(reg
->type
) == PTR_TO_MAP_VALUE
) {
2212 const struct bpf_map
*map
= reg
->map_ptr
;
2214 if (map
->inner_map_meta
) {
2215 reg
->type
= CONST_PTR_TO_MAP
;
2216 reg
->map_ptr
= map
->inner_map_meta
;
2217 /* transfer reg's id which is unique for every map_lookup_elem
2218 * as UID of the inner map.
2220 if (btf_record_has_field(map
->inner_map_meta
->record
, BPF_TIMER
))
2221 reg
->map_uid
= reg
->id
;
2222 } else if (map
->map_type
== BPF_MAP_TYPE_XSKMAP
) {
2223 reg
->type
= PTR_TO_XDP_SOCK
;
2224 } else if (map
->map_type
== BPF_MAP_TYPE_SOCKMAP
||
2225 map
->map_type
== BPF_MAP_TYPE_SOCKHASH
) {
2226 reg
->type
= PTR_TO_SOCKET
;
2228 reg
->type
= PTR_TO_MAP_VALUE
;
2233 reg
->type
&= ~PTR_MAYBE_NULL
;
2236 static void mark_reg_graph_node(struct bpf_reg_state
*regs
, u32 regno
,
2237 struct btf_field_graph_root
*ds_head
)
2239 __mark_reg_known_zero(®s
[regno
]);
2240 regs
[regno
].type
= PTR_TO_BTF_ID
| MEM_ALLOC
;
2241 regs
[regno
].btf
= ds_head
->btf
;
2242 regs
[regno
].btf_id
= ds_head
->value_btf_id
;
2243 regs
[regno
].off
= ds_head
->node_offset
;
2246 static bool reg_is_pkt_pointer(const struct bpf_reg_state
*reg
)
2248 return type_is_pkt_pointer(reg
->type
);
2251 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state
*reg
)
2253 return reg_is_pkt_pointer(reg
) ||
2254 reg
->type
== PTR_TO_PACKET_END
;
2257 static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state
*reg
)
2259 return base_type(reg
->type
) == PTR_TO_MEM
&&
2260 (reg
->type
& DYNPTR_TYPE_SKB
|| reg
->type
& DYNPTR_TYPE_XDP
);
2263 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
2264 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state
*reg
,
2265 enum bpf_reg_type which
)
2267 /* The register can already have a range from prior markings.
2268 * This is fine as long as it hasn't been advanced from its
2271 return reg
->type
== which
&&
2274 tnum_equals_const(reg
->var_off
, 0);
2277 /* Reset the min/max bounds of a register */
2278 static void __mark_reg_unbounded(struct bpf_reg_state
*reg
)
2280 reg
->smin_value
= S64_MIN
;
2281 reg
->smax_value
= S64_MAX
;
2282 reg
->umin_value
= 0;
2283 reg
->umax_value
= U64_MAX
;
2285 reg
->s32_min_value
= S32_MIN
;
2286 reg
->s32_max_value
= S32_MAX
;
2287 reg
->u32_min_value
= 0;
2288 reg
->u32_max_value
= U32_MAX
;
2291 static void __mark_reg64_unbounded(struct bpf_reg_state
*reg
)
2293 reg
->smin_value
= S64_MIN
;
2294 reg
->smax_value
= S64_MAX
;
2295 reg
->umin_value
= 0;
2296 reg
->umax_value
= U64_MAX
;
2299 static void __mark_reg32_unbounded(struct bpf_reg_state
*reg
)
2301 reg
->s32_min_value
= S32_MIN
;
2302 reg
->s32_max_value
= S32_MAX
;
2303 reg
->u32_min_value
= 0;
2304 reg
->u32_max_value
= U32_MAX
;
2307 static void __update_reg32_bounds(struct bpf_reg_state
*reg
)
2309 struct tnum var32_off
= tnum_subreg(reg
->var_off
);
2311 /* min signed is max(sign bit) | min(other bits) */
2312 reg
->s32_min_value
= max_t(s32
, reg
->s32_min_value
,
2313 var32_off
.value
| (var32_off
.mask
& S32_MIN
));
2314 /* max signed is min(sign bit) | max(other bits) */
2315 reg
->s32_max_value
= min_t(s32
, reg
->s32_max_value
,
2316 var32_off
.value
| (var32_off
.mask
& S32_MAX
));
2317 reg
->u32_min_value
= max_t(u32
, reg
->u32_min_value
, (u32
)var32_off
.value
);
2318 reg
->u32_max_value
= min(reg
->u32_max_value
,
2319 (u32
)(var32_off
.value
| var32_off
.mask
));
2322 static void __update_reg64_bounds(struct bpf_reg_state
*reg
)
2324 /* min signed is max(sign bit) | min(other bits) */
2325 reg
->smin_value
= max_t(s64
, reg
->smin_value
,
2326 reg
->var_off
.value
| (reg
->var_off
.mask
& S64_MIN
));
2327 /* max signed is min(sign bit) | max(other bits) */
2328 reg
->smax_value
= min_t(s64
, reg
->smax_value
,
2329 reg
->var_off
.value
| (reg
->var_off
.mask
& S64_MAX
));
2330 reg
->umin_value
= max(reg
->umin_value
, reg
->var_off
.value
);
2331 reg
->umax_value
= min(reg
->umax_value
,
2332 reg
->var_off
.value
| reg
->var_off
.mask
);
2335 static void __update_reg_bounds(struct bpf_reg_state
*reg
)
2337 __update_reg32_bounds(reg
);
2338 __update_reg64_bounds(reg
);
2341 /* Uses signed min/max values to inform unsigned, and vice-versa */
2342 static void __reg32_deduce_bounds(struct bpf_reg_state
*reg
)
2344 /* Learn sign from signed bounds.
2345 * If we cannot cross the sign boundary, then signed and unsigned bounds
2346 * are the same, so combine. This works even in the negative case, e.g.
2347 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
2349 if (reg
->s32_min_value
>= 0 || reg
->s32_max_value
< 0) {
2350 reg
->s32_min_value
= reg
->u32_min_value
=
2351 max_t(u32
, reg
->s32_min_value
, reg
->u32_min_value
);
2352 reg
->s32_max_value
= reg
->u32_max_value
=
2353 min_t(u32
, reg
->s32_max_value
, reg
->u32_max_value
);
2356 /* Learn sign from unsigned bounds. Signed bounds cross the sign
2357 * boundary, so we must be careful.
2359 if ((s32
)reg
->u32_max_value
>= 0) {
2360 /* Positive. We can't learn anything from the smin, but smax
2361 * is positive, hence safe.
2363 reg
->s32_min_value
= reg
->u32_min_value
;
2364 reg
->s32_max_value
= reg
->u32_max_value
=
2365 min_t(u32
, reg
->s32_max_value
, reg
->u32_max_value
);
2366 } else if ((s32
)reg
->u32_min_value
< 0) {
2367 /* Negative. We can't learn anything from the smax, but smin
2368 * is negative, hence safe.
2370 reg
->s32_min_value
= reg
->u32_min_value
=
2371 max_t(u32
, reg
->s32_min_value
, reg
->u32_min_value
);
2372 reg
->s32_max_value
= reg
->u32_max_value
;
2376 static void __reg64_deduce_bounds(struct bpf_reg_state
*reg
)
2378 /* Learn sign from signed bounds.
2379 * If we cannot cross the sign boundary, then signed and unsigned bounds
2380 * are the same, so combine. This works even in the negative case, e.g.
2381 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
2383 if (reg
->smin_value
>= 0 || reg
->smax_value
< 0) {
2384 reg
->smin_value
= reg
->umin_value
= max_t(u64
, reg
->smin_value
,
2386 reg
->smax_value
= reg
->umax_value
= min_t(u64
, reg
->smax_value
,
2390 /* Learn sign from unsigned bounds. Signed bounds cross the sign
2391 * boundary, so we must be careful.
2393 if ((s64
)reg
->umax_value
>= 0) {
2394 /* Positive. We can't learn anything from the smin, but smax
2395 * is positive, hence safe.
2397 reg
->smin_value
= reg
->umin_value
;
2398 reg
->smax_value
= reg
->umax_value
= min_t(u64
, reg
->smax_value
,
2400 } else if ((s64
)reg
->umin_value
< 0) {
2401 /* Negative. We can't learn anything from the smax, but smin
2402 * is negative, hence safe.
2404 reg
->smin_value
= reg
->umin_value
= max_t(u64
, reg
->smin_value
,
2406 reg
->smax_value
= reg
->umax_value
;
2410 static void __reg_deduce_bounds(struct bpf_reg_state
*reg
)
2412 __reg32_deduce_bounds(reg
);
2413 __reg64_deduce_bounds(reg
);
2416 /* Attempts to improve var_off based on unsigned min/max information */
2417 static void __reg_bound_offset(struct bpf_reg_state
*reg
)
2419 struct tnum var64_off
= tnum_intersect(reg
->var_off
,
2420 tnum_range(reg
->umin_value
,
2422 struct tnum var32_off
= tnum_intersect(tnum_subreg(var64_off
),
2423 tnum_range(reg
->u32_min_value
,
2424 reg
->u32_max_value
));
2426 reg
->var_off
= tnum_or(tnum_clear_subreg(var64_off
), var32_off
);
2429 static void reg_bounds_sync(struct bpf_reg_state
*reg
)
2431 /* We might have learned new bounds from the var_off. */
2432 __update_reg_bounds(reg
);
2433 /* We might have learned something about the sign bit. */
2434 __reg_deduce_bounds(reg
);
2435 /* We might have learned some bits from the bounds. */
2436 __reg_bound_offset(reg
);
2437 /* Intersecting with the old var_off might have improved our bounds
2438 * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
2439 * then new var_off is (0; 0x7f...fc) which improves our umax.
2441 __update_reg_bounds(reg
);
2444 static bool __reg32_bound_s64(s32 a
)
2446 return a
>= 0 && a
<= S32_MAX
;
2449 static void __reg_assign_32_into_64(struct bpf_reg_state
*reg
)
2451 reg
->umin_value
= reg
->u32_min_value
;
2452 reg
->umax_value
= reg
->u32_max_value
;
2454 /* Attempt to pull 32-bit signed bounds into 64-bit bounds but must
2455 * be positive otherwise set to worse case bounds and refine later
2458 if (__reg32_bound_s64(reg
->s32_min_value
) &&
2459 __reg32_bound_s64(reg
->s32_max_value
)) {
2460 reg
->smin_value
= reg
->s32_min_value
;
2461 reg
->smax_value
= reg
->s32_max_value
;
2463 reg
->smin_value
= 0;
2464 reg
->smax_value
= U32_MAX
;
2468 static void __reg_combine_32_into_64(struct bpf_reg_state
*reg
)
2470 /* special case when 64-bit register has upper 32-bit register
2471 * zeroed. Typically happens after zext or <<32, >>32 sequence
2472 * allowing us to use 32-bit bounds directly,
2474 if (tnum_equals_const(tnum_clear_subreg(reg
->var_off
), 0)) {
2475 __reg_assign_32_into_64(reg
);
2477 /* Otherwise the best we can do is push lower 32bit known and
2478 * unknown bits into register (var_off set from jmp logic)
2479 * then learn as much as possible from the 64-bit tnum
2480 * known and unknown bits. The previous smin/smax bounds are
2481 * invalid here because of jmp32 compare so mark them unknown
2482 * so they do not impact tnum bounds calculation.
2484 __mark_reg64_unbounded(reg
);
2486 reg_bounds_sync(reg
);
2489 static bool __reg64_bound_s32(s64 a
)
2491 return a
>= S32_MIN
&& a
<= S32_MAX
;
2494 static bool __reg64_bound_u32(u64 a
)
2496 return a
>= U32_MIN
&& a
<= U32_MAX
;
2499 static void __reg_combine_64_into_32(struct bpf_reg_state
*reg
)
2501 __mark_reg32_unbounded(reg
);
2502 if (__reg64_bound_s32(reg
->smin_value
) && __reg64_bound_s32(reg
->smax_value
)) {
2503 reg
->s32_min_value
= (s32
)reg
->smin_value
;
2504 reg
->s32_max_value
= (s32
)reg
->smax_value
;
2506 if (__reg64_bound_u32(reg
->umin_value
) && __reg64_bound_u32(reg
->umax_value
)) {
2507 reg
->u32_min_value
= (u32
)reg
->umin_value
;
2508 reg
->u32_max_value
= (u32
)reg
->umax_value
;
2510 reg_bounds_sync(reg
);
2513 /* Mark a register as having a completely unknown (scalar) value. */
2514 static void __mark_reg_unknown(const struct bpf_verifier_env
*env
,
2515 struct bpf_reg_state
*reg
)
2518 * Clear type, off, and union(map_ptr, range) and
2519 * padding between 'type' and union
2521 memset(reg
, 0, offsetof(struct bpf_reg_state
, var_off
));
2522 reg
->type
= SCALAR_VALUE
;
2524 reg
->ref_obj_id
= 0;
2525 reg
->var_off
= tnum_unknown
;
2527 reg
->precise
= !env
->bpf_capable
;
2528 __mark_reg_unbounded(reg
);
2531 static void mark_reg_unknown(struct bpf_verifier_env
*env
,
2532 struct bpf_reg_state
*regs
, u32 regno
)
2534 if (WARN_ON(regno
>= MAX_BPF_REG
)) {
2535 verbose(env
, "mark_reg_unknown(regs, %u)\n", regno
);
2536 /* Something bad happened, let's kill all regs except FP */
2537 for (regno
= 0; regno
< BPF_REG_FP
; regno
++)
2538 __mark_reg_not_init(env
, regs
+ regno
);
2541 __mark_reg_unknown(env
, regs
+ regno
);
2544 static void __mark_reg_not_init(const struct bpf_verifier_env
*env
,
2545 struct bpf_reg_state
*reg
)
2547 __mark_reg_unknown(env
, reg
);
2548 reg
->type
= NOT_INIT
;
2551 static void mark_reg_not_init(struct bpf_verifier_env
*env
,
2552 struct bpf_reg_state
*regs
, u32 regno
)
2554 if (WARN_ON(regno
>= MAX_BPF_REG
)) {
2555 verbose(env
, "mark_reg_not_init(regs, %u)\n", regno
);
2556 /* Something bad happened, let's kill all regs except FP */
2557 for (regno
= 0; regno
< BPF_REG_FP
; regno
++)
2558 __mark_reg_not_init(env
, regs
+ regno
);
2561 __mark_reg_not_init(env
, regs
+ regno
);
2564 static void mark_btf_ld_reg(struct bpf_verifier_env
*env
,
2565 struct bpf_reg_state
*regs
, u32 regno
,
2566 enum bpf_reg_type reg_type
,
2567 struct btf
*btf
, u32 btf_id
,
2568 enum bpf_type_flag flag
)
2570 if (reg_type
== SCALAR_VALUE
) {
2571 mark_reg_unknown(env
, regs
, regno
);
2574 mark_reg_known_zero(env
, regs
, regno
);
2575 regs
[regno
].type
= PTR_TO_BTF_ID
| flag
;
2576 regs
[regno
].btf
= btf
;
2577 regs
[regno
].btf_id
= btf_id
;
2580 #define DEF_NOT_SUBREG (0)
2581 static void init_reg_state(struct bpf_verifier_env
*env
,
2582 struct bpf_func_state
*state
)
2584 struct bpf_reg_state
*regs
= state
->regs
;
2587 for (i
= 0; i
< MAX_BPF_REG
; i
++) {
2588 mark_reg_not_init(env
, regs
, i
);
2589 regs
[i
].live
= REG_LIVE_NONE
;
2590 regs
[i
].parent
= NULL
;
2591 regs
[i
].subreg_def
= DEF_NOT_SUBREG
;
2595 regs
[BPF_REG_FP
].type
= PTR_TO_STACK
;
2596 mark_reg_known_zero(env
, regs
, BPF_REG_FP
);
2597 regs
[BPF_REG_FP
].frameno
= state
->frameno
;
2600 #define BPF_MAIN_FUNC (-1)
2601 static void init_func_state(struct bpf_verifier_env
*env
,
2602 struct bpf_func_state
*state
,
2603 int callsite
, int frameno
, int subprogno
)
2605 state
->callsite
= callsite
;
2606 state
->frameno
= frameno
;
2607 state
->subprogno
= subprogno
;
2608 state
->callback_ret_range
= tnum_range(0, 0);
2609 init_reg_state(env
, state
);
2610 mark_verifier_state_scratched(env
);
2613 /* Similar to push_stack(), but for async callbacks */
2614 static struct bpf_verifier_state
*push_async_cb(struct bpf_verifier_env
*env
,
2615 int insn_idx
, int prev_insn_idx
,
2618 struct bpf_verifier_stack_elem
*elem
;
2619 struct bpf_func_state
*frame
;
2621 elem
= kzalloc(sizeof(struct bpf_verifier_stack_elem
), GFP_KERNEL
);
2625 elem
->insn_idx
= insn_idx
;
2626 elem
->prev_insn_idx
= prev_insn_idx
;
2627 elem
->next
= env
->head
;
2628 elem
->log_pos
= env
->log
.end_pos
;
2631 if (env
->stack_size
> BPF_COMPLEXITY_LIMIT_JMP_SEQ
) {
2633 "The sequence of %d jumps is too complex for async cb.\n",
2637 /* Unlike push_stack() do not copy_verifier_state().
2638 * The caller state doesn't matter.
2639 * This is async callback. It starts in a fresh stack.
2640 * Initialize it similar to do_check_common().
2642 elem
->st
.branches
= 1;
2643 frame
= kzalloc(sizeof(*frame
), GFP_KERNEL
);
2646 init_func_state(env
, frame
,
2647 BPF_MAIN_FUNC
/* callsite */,
2648 0 /* frameno within this callchain */,
2649 subprog
/* subprog number within this prog */);
2650 elem
->st
.frame
[0] = frame
;
2653 free_verifier_state(env
->cur_state
, true);
2654 env
->cur_state
= NULL
;
2655 /* pop all elements and return */
2656 while (!pop_stack(env
, NULL
, NULL
, false));
2662 SRC_OP
, /* register is used as source operand */
2663 DST_OP
, /* register is used as destination operand */
2664 DST_OP_NO_MARK
/* same as above, check only, don't mark */
2667 static int cmp_subprogs(const void *a
, const void *b
)
2669 return ((struct bpf_subprog_info
*)a
)->start
-
2670 ((struct bpf_subprog_info
*)b
)->start
;
2673 static int find_subprog(struct bpf_verifier_env
*env
, int off
)
2675 struct bpf_subprog_info
*p
;
2677 p
= bsearch(&off
, env
->subprog_info
, env
->subprog_cnt
,
2678 sizeof(env
->subprog_info
[0]), cmp_subprogs
);
2681 return p
- env
->subprog_info
;
2685 static int add_subprog(struct bpf_verifier_env
*env
, int off
)
2687 int insn_cnt
= env
->prog
->len
;
2690 if (off
>= insn_cnt
|| off
< 0) {
2691 verbose(env
, "call to invalid destination\n");
2694 ret
= find_subprog(env
, off
);
2697 if (env
->subprog_cnt
>= BPF_MAX_SUBPROGS
) {
2698 verbose(env
, "too many subprograms\n");
2701 /* determine subprog starts. The end is one before the next starts */
2702 env
->subprog_info
[env
->subprog_cnt
++].start
= off
;
2703 sort(env
->subprog_info
, env
->subprog_cnt
,
2704 sizeof(env
->subprog_info
[0]), cmp_subprogs
, NULL
);
2705 return env
->subprog_cnt
- 1;
2708 static int bpf_find_exception_callback_insn_off(struct bpf_verifier_env
*env
)
2710 struct bpf_prog_aux
*aux
= env
->prog
->aux
;
2711 struct btf
*btf
= aux
->btf
;
2712 const struct btf_type
*t
;
2713 u32 main_btf_id
, id
;
2717 /* Non-zero func_info_cnt implies valid btf */
2718 if (!aux
->func_info_cnt
)
2720 main_btf_id
= aux
->func_info
[0].type_id
;
2722 t
= btf_type_by_id(btf
, main_btf_id
);
2724 verbose(env
, "invalid btf id for main subprog in func_info\n");
2728 name
= btf_find_decl_tag_value(btf
, t
, -1, "exception_callback:");
2730 ret
= PTR_ERR(name
);
2731 /* If there is no tag present, there is no exception callback */
2734 else if (ret
== -EEXIST
)
2735 verbose(env
, "multiple exception callback tags for main subprog\n");
2739 ret
= btf_find_by_name_kind(btf
, name
, BTF_KIND_FUNC
);
2741 verbose(env
, "exception callback '%s' could not be found in BTF\n", name
);
2745 t
= btf_type_by_id(btf
, id
);
2746 if (btf_func_linkage(t
) != BTF_FUNC_GLOBAL
) {
2747 verbose(env
, "exception callback '%s' must have global linkage\n", name
);
2751 for (i
= 0; i
< aux
->func_info_cnt
; i
++) {
2752 if (aux
->func_info
[i
].type_id
!= id
)
2754 ret
= aux
->func_info
[i
].insn_off
;
2755 /* Further func_info and subprog checks will also happen
2756 * later, so assume this is the right insn_off for now.
2759 verbose(env
, "invalid exception callback insn_off in func_info: 0\n");
2764 verbose(env
, "exception callback type id not found in func_info\n");
2770 #define MAX_KFUNC_DESCS 256
2771 #define MAX_KFUNC_BTFS 256
2773 struct bpf_kfunc_desc
{
2774 struct btf_func_model func_model
;
2781 struct bpf_kfunc_btf
{
2783 struct module
*module
;
2787 struct bpf_kfunc_desc_tab
{
2788 /* Sorted by func_id (BTF ID) and offset (fd_array offset) during
2789 * verification. JITs do lookups by bpf_insn, where func_id may not be
2790 * available, therefore at the end of verification do_misc_fixups()
2791 * sorts this by imm and offset.
2793 struct bpf_kfunc_desc descs
[MAX_KFUNC_DESCS
];
2797 struct bpf_kfunc_btf_tab
{
2798 struct bpf_kfunc_btf descs
[MAX_KFUNC_BTFS
];
2802 static int kfunc_desc_cmp_by_id_off(const void *a
, const void *b
)
2804 const struct bpf_kfunc_desc
*d0
= a
;
2805 const struct bpf_kfunc_desc
*d1
= b
;
2807 /* func_id is not greater than BTF_MAX_TYPE */
2808 return d0
->func_id
- d1
->func_id
?: d0
->offset
- d1
->offset
;
2811 static int kfunc_btf_cmp_by_off(const void *a
, const void *b
)
2813 const struct bpf_kfunc_btf
*d0
= a
;
2814 const struct bpf_kfunc_btf
*d1
= b
;
2816 return d0
->offset
- d1
->offset
;
2819 static const struct bpf_kfunc_desc
*
2820 find_kfunc_desc(const struct bpf_prog
*prog
, u32 func_id
, u16 offset
)
2822 struct bpf_kfunc_desc desc
= {
2826 struct bpf_kfunc_desc_tab
*tab
;
2828 tab
= prog
->aux
->kfunc_tab
;
2829 return bsearch(&desc
, tab
->descs
, tab
->nr_descs
,
2830 sizeof(tab
->descs
[0]), kfunc_desc_cmp_by_id_off
);
2833 int bpf_get_kfunc_addr(const struct bpf_prog
*prog
, u32 func_id
,
2834 u16 btf_fd_idx
, u8
**func_addr
)
2836 const struct bpf_kfunc_desc
*desc
;
2838 desc
= find_kfunc_desc(prog
, func_id
, btf_fd_idx
);
2842 *func_addr
= (u8
*)desc
->addr
;
2846 static struct btf
*__find_kfunc_desc_btf(struct bpf_verifier_env
*env
,
2849 struct bpf_kfunc_btf kf_btf
= { .offset
= offset
};
2850 struct bpf_kfunc_btf_tab
*tab
;
2851 struct bpf_kfunc_btf
*b
;
2856 tab
= env
->prog
->aux
->kfunc_btf_tab
;
2857 b
= bsearch(&kf_btf
, tab
->descs
, tab
->nr_descs
,
2858 sizeof(tab
->descs
[0]), kfunc_btf_cmp_by_off
);
2860 if (tab
->nr_descs
== MAX_KFUNC_BTFS
) {
2861 verbose(env
, "too many different module BTFs\n");
2862 return ERR_PTR(-E2BIG
);
2865 if (bpfptr_is_null(env
->fd_array
)) {
2866 verbose(env
, "kfunc offset > 0 without fd_array is invalid\n");
2867 return ERR_PTR(-EPROTO
);
2870 if (copy_from_bpfptr_offset(&btf_fd
, env
->fd_array
,
2871 offset
* sizeof(btf_fd
),
2873 return ERR_PTR(-EFAULT
);
2875 btf
= btf_get_by_fd(btf_fd
);
2877 verbose(env
, "invalid module BTF fd specified\n");
2881 if (!btf_is_module(btf
)) {
2882 verbose(env
, "BTF fd for kfunc is not a module BTF\n");
2884 return ERR_PTR(-EINVAL
);
2887 mod
= btf_try_get_module(btf
);
2890 return ERR_PTR(-ENXIO
);
2893 b
= &tab
->descs
[tab
->nr_descs
++];
2898 sort(tab
->descs
, tab
->nr_descs
, sizeof(tab
->descs
[0]),
2899 kfunc_btf_cmp_by_off
, NULL
);
2904 void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab
*tab
)
2909 while (tab
->nr_descs
--) {
2910 module_put(tab
->descs
[tab
->nr_descs
].module
);
2911 btf_put(tab
->descs
[tab
->nr_descs
].btf
);
2916 static struct btf
*find_kfunc_desc_btf(struct bpf_verifier_env
*env
, s16 offset
)
2920 /* In the future, this can be allowed to increase limit
2921 * of fd index into fd_array, interpreted as u16.
2923 verbose(env
, "negative offset disallowed for kernel module function call\n");
2924 return ERR_PTR(-EINVAL
);
2927 return __find_kfunc_desc_btf(env
, offset
);
2929 return btf_vmlinux
?: ERR_PTR(-ENOENT
);
2932 static int add_kfunc_call(struct bpf_verifier_env
*env
, u32 func_id
, s16 offset
)
2934 const struct btf_type
*func
, *func_proto
;
2935 struct bpf_kfunc_btf_tab
*btf_tab
;
2936 struct bpf_kfunc_desc_tab
*tab
;
2937 struct bpf_prog_aux
*prog_aux
;
2938 struct bpf_kfunc_desc
*desc
;
2939 const char *func_name
;
2940 struct btf
*desc_btf
;
2941 unsigned long call_imm
;
2945 prog_aux
= env
->prog
->aux
;
2946 tab
= prog_aux
->kfunc_tab
;
2947 btf_tab
= prog_aux
->kfunc_btf_tab
;
2950 verbose(env
, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
2954 if (!env
->prog
->jit_requested
) {
2955 verbose(env
, "JIT is required for calling kernel function\n");
2959 if (!bpf_jit_supports_kfunc_call()) {
2960 verbose(env
, "JIT does not support calling kernel function\n");
2964 if (!env
->prog
->gpl_compatible
) {
2965 verbose(env
, "cannot call kernel function from non-GPL compatible program\n");
2969 tab
= kzalloc(sizeof(*tab
), GFP_KERNEL
);
2972 prog_aux
->kfunc_tab
= tab
;
2975 /* func_id == 0 is always invalid, but instead of returning an error, be
2976 * conservative and wait until the code elimination pass before returning
2977 * error, so that invalid calls that get pruned out can be in BPF programs
2978 * loaded from userspace. It is also required that offset be untouched
2981 if (!func_id
&& !offset
)
2984 if (!btf_tab
&& offset
) {
2985 btf_tab
= kzalloc(sizeof(*btf_tab
), GFP_KERNEL
);
2988 prog_aux
->kfunc_btf_tab
= btf_tab
;
2991 desc_btf
= find_kfunc_desc_btf(env
, offset
);
2992 if (IS_ERR(desc_btf
)) {
2993 verbose(env
, "failed to find BTF for kernel function\n");
2994 return PTR_ERR(desc_btf
);
2997 if (find_kfunc_desc(env
->prog
, func_id
, offset
))
3000 if (tab
->nr_descs
== MAX_KFUNC_DESCS
) {
3001 verbose(env
, "too many different kernel function calls\n");
3005 func
= btf_type_by_id(desc_btf
, func_id
);
3006 if (!func
|| !btf_type_is_func(func
)) {
3007 verbose(env
, "kernel btf_id %u is not a function\n",
3011 func_proto
= btf_type_by_id(desc_btf
, func
->type
);
3012 if (!func_proto
|| !btf_type_is_func_proto(func_proto
)) {
3013 verbose(env
, "kernel function btf_id %u does not have a valid func_proto\n",
3018 func_name
= btf_name_by_offset(desc_btf
, func
->name_off
);
3019 addr
= kallsyms_lookup_name(func_name
);
3021 verbose(env
, "cannot find address for kernel function %s\n",
3025 specialize_kfunc(env
, func_id
, offset
, &addr
);
3027 if (bpf_jit_supports_far_kfunc_call()) {
3030 call_imm
= BPF_CALL_IMM(addr
);
3031 /* Check whether the relative offset overflows desc->imm */
3032 if ((unsigned long)(s32
)call_imm
!= call_imm
) {
3033 verbose(env
, "address of kernel function %s is out of range\n",
3039 if (bpf_dev_bound_kfunc_id(func_id
)) {
3040 err
= bpf_dev_bound_kfunc_check(&env
->log
, prog_aux
);
3045 desc
= &tab
->descs
[tab
->nr_descs
++];
3046 desc
->func_id
= func_id
;
3047 desc
->imm
= call_imm
;
3048 desc
->offset
= offset
;
3050 err
= btf_distill_func_proto(&env
->log
, desc_btf
,
3051 func_proto
, func_name
,
3054 sort(tab
->descs
, tab
->nr_descs
, sizeof(tab
->descs
[0]),
3055 kfunc_desc_cmp_by_id_off
, NULL
);
3059 static int kfunc_desc_cmp_by_imm_off(const void *a
, const void *b
)
3061 const struct bpf_kfunc_desc
*d0
= a
;
3062 const struct bpf_kfunc_desc
*d1
= b
;
3064 if (d0
->imm
!= d1
->imm
)
3065 return d0
->imm
< d1
->imm
? -1 : 1;
3066 if (d0
->offset
!= d1
->offset
)
3067 return d0
->offset
< d1
->offset
? -1 : 1;
3071 static void sort_kfunc_descs_by_imm_off(struct bpf_prog
*prog
)
3073 struct bpf_kfunc_desc_tab
*tab
;
3075 tab
= prog
->aux
->kfunc_tab
;
3079 sort(tab
->descs
, tab
->nr_descs
, sizeof(tab
->descs
[0]),
3080 kfunc_desc_cmp_by_imm_off
, NULL
);
3083 bool bpf_prog_has_kfunc_call(const struct bpf_prog
*prog
)
3085 return !!prog
->aux
->kfunc_tab
;
3088 const struct btf_func_model
*
3089 bpf_jit_find_kfunc_model(const struct bpf_prog
*prog
,
3090 const struct bpf_insn
*insn
)
3092 const struct bpf_kfunc_desc desc
= {
3094 .offset
= insn
->off
,
3096 const struct bpf_kfunc_desc
*res
;
3097 struct bpf_kfunc_desc_tab
*tab
;
3099 tab
= prog
->aux
->kfunc_tab
;
3100 res
= bsearch(&desc
, tab
->descs
, tab
->nr_descs
,
3101 sizeof(tab
->descs
[0]), kfunc_desc_cmp_by_imm_off
);
3103 return res
? &res
->func_model
: NULL
;
3106 static int add_subprog_and_kfunc(struct bpf_verifier_env
*env
)
3108 struct bpf_subprog_info
*subprog
= env
->subprog_info
;
3109 int i
, ret
, insn_cnt
= env
->prog
->len
, ex_cb_insn
;
3110 struct bpf_insn
*insn
= env
->prog
->insnsi
;
3112 /* Add entry function. */
3113 ret
= add_subprog(env
, 0);
3117 for (i
= 0; i
< insn_cnt
; i
++, insn
++) {
3118 if (!bpf_pseudo_func(insn
) && !bpf_pseudo_call(insn
) &&
3119 !bpf_pseudo_kfunc_call(insn
))
3122 if (!env
->bpf_capable
) {
3123 verbose(env
, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
3127 if (bpf_pseudo_func(insn
) || bpf_pseudo_call(insn
))
3128 ret
= add_subprog(env
, i
+ insn
->imm
+ 1);
3130 ret
= add_kfunc_call(env
, insn
->imm
, insn
->off
);
3136 ret
= bpf_find_exception_callback_insn_off(env
);
3141 /* If ex_cb_insn > 0, this means that the main program has a subprog
3142 * marked using BTF decl tag to serve as the exception callback.
3145 ret
= add_subprog(env
, ex_cb_insn
);
3148 for (i
= 1; i
< env
->subprog_cnt
; i
++) {
3149 if (env
->subprog_info
[i
].start
!= ex_cb_insn
)
3151 env
->exception_callback_subprog
= i
;
3156 /* Add a fake 'exit' subprog which could simplify subprog iteration
3157 * logic. 'subprog_cnt' should not be increased.
3159 subprog
[env
->subprog_cnt
].start
= insn_cnt
;
3161 if (env
->log
.level
& BPF_LOG_LEVEL2
)
3162 for (i
= 0; i
< env
->subprog_cnt
; i
++)
3163 verbose(env
, "func#%d @%d\n", i
, subprog
[i
].start
);
3168 static int check_subprogs(struct bpf_verifier_env
*env
)
3170 int i
, subprog_start
, subprog_end
, off
, cur_subprog
= 0;
3171 struct bpf_subprog_info
*subprog
= env
->subprog_info
;
3172 struct bpf_insn
*insn
= env
->prog
->insnsi
;
3173 int insn_cnt
= env
->prog
->len
;
3175 /* now check that all jumps are within the same subprog */
3176 subprog_start
= subprog
[cur_subprog
].start
;
3177 subprog_end
= subprog
[cur_subprog
+ 1].start
;
3178 for (i
= 0; i
< insn_cnt
; i
++) {
3179 u8 code
= insn
[i
].code
;
3181 if (code
== (BPF_JMP
| BPF_CALL
) &&
3182 insn
[i
].src_reg
== 0 &&
3183 insn
[i
].imm
== BPF_FUNC_tail_call
)
3184 subprog
[cur_subprog
].has_tail_call
= true;
3185 if (BPF_CLASS(code
) == BPF_LD
&&
3186 (BPF_MODE(code
) == BPF_ABS
|| BPF_MODE(code
) == BPF_IND
))
3187 subprog
[cur_subprog
].has_ld_abs
= true;
3188 if (BPF_CLASS(code
) != BPF_JMP
&& BPF_CLASS(code
) != BPF_JMP32
)
3190 if (BPF_OP(code
) == BPF_EXIT
|| BPF_OP(code
) == BPF_CALL
)
3192 if (code
== (BPF_JMP32
| BPF_JA
))
3193 off
= i
+ insn
[i
].imm
+ 1;
3195 off
= i
+ insn
[i
].off
+ 1;
3196 if (off
< subprog_start
|| off
>= subprog_end
) {
3197 verbose(env
, "jump out of range from insn %d to %d\n", i
, off
);
3201 if (i
== subprog_end
- 1) {
3202 /* to avoid fall-through from one subprog into another
3203 * the last insn of the subprog should be either exit
3204 * or unconditional jump back or bpf_throw call
3206 if (code
!= (BPF_JMP
| BPF_EXIT
) &&
3207 code
!= (BPF_JMP32
| BPF_JA
) &&
3208 code
!= (BPF_JMP
| BPF_JA
)) {
3209 verbose(env
, "last insn is not an exit or jmp\n");
3212 subprog_start
= subprog_end
;
3214 if (cur_subprog
< env
->subprog_cnt
)
3215 subprog_end
= subprog
[cur_subprog
+ 1].start
;
3221 /* Parentage chain of this register (or stack slot) should take care of all
3222 * issues like callee-saved registers, stack slot allocation time, etc.
3224 static int mark_reg_read(struct bpf_verifier_env
*env
,
3225 const struct bpf_reg_state
*state
,
3226 struct bpf_reg_state
*parent
, u8 flag
)
3228 bool writes
= parent
== state
->parent
; /* Observe write marks */
3232 /* if read wasn't screened by an earlier write ... */
3233 if (writes
&& state
->live
& REG_LIVE_WRITTEN
)
3235 if (parent
->live
& REG_LIVE_DONE
) {
3236 verbose(env
, "verifier BUG type %s var_off %lld off %d\n",
3237 reg_type_str(env
, parent
->type
),
3238 parent
->var_off
.value
, parent
->off
);
3241 /* The first condition is more likely to be true than the
3242 * second, checked it first.
3244 if ((parent
->live
& REG_LIVE_READ
) == flag
||
3245 parent
->live
& REG_LIVE_READ64
)
3246 /* The parentage chain never changes and
3247 * this parent was already marked as LIVE_READ.
3248 * There is no need to keep walking the chain again and
3249 * keep re-marking all parents as LIVE_READ.
3250 * This case happens when the same register is read
3251 * multiple times without writes into it in-between.
3252 * Also, if parent has the stronger REG_LIVE_READ64 set,
3253 * then no need to set the weak REG_LIVE_READ32.
3256 /* ... then we depend on parent's value */
3257 parent
->live
|= flag
;
3258 /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
3259 if (flag
== REG_LIVE_READ64
)
3260 parent
->live
&= ~REG_LIVE_READ32
;
3262 parent
= state
->parent
;
3267 if (env
->longest_mark_read_walk
< cnt
)
3268 env
->longest_mark_read_walk
= cnt
;
3272 static int mark_dynptr_read(struct bpf_verifier_env
*env
, struct bpf_reg_state
*reg
)
3274 struct bpf_func_state
*state
= func(env
, reg
);
3277 /* For CONST_PTR_TO_DYNPTR, it must have already been done by
3278 * check_reg_arg in check_helper_call and mark_btf_func_reg_size in
3281 if (reg
->type
== CONST_PTR_TO_DYNPTR
)
3283 spi
= dynptr_get_spi(env
, reg
);
3286 /* Caller ensures dynptr is valid and initialized, which means spi is in
3287 * bounds and spi is the first dynptr slot. Simply mark stack slot as
3290 ret
= mark_reg_read(env
, &state
->stack
[spi
].spilled_ptr
,
3291 state
->stack
[spi
].spilled_ptr
.parent
, REG_LIVE_READ64
);
3294 return mark_reg_read(env
, &state
->stack
[spi
- 1].spilled_ptr
,
3295 state
->stack
[spi
- 1].spilled_ptr
.parent
, REG_LIVE_READ64
);
3298 static int mark_iter_read(struct bpf_verifier_env
*env
, struct bpf_reg_state
*reg
,
3299 int spi
, int nr_slots
)
3301 struct bpf_func_state
*state
= func(env
, reg
);
3304 for (i
= 0; i
< nr_slots
; i
++) {
3305 struct bpf_reg_state
*st
= &state
->stack
[spi
- i
].spilled_ptr
;
3307 err
= mark_reg_read(env
, st
, st
->parent
, REG_LIVE_READ64
);
3311 mark_stack_slot_scratched(env
, spi
- i
);
3317 /* This function is supposed to be used by the following 32-bit optimization
3318 * code only. It returns TRUE if the source or destination register operates
3319 * on 64-bit, otherwise return FALSE.
3321 static bool is_reg64(struct bpf_verifier_env
*env
, struct bpf_insn
*insn
,
3322 u32 regno
, struct bpf_reg_state
*reg
, enum reg_arg_type t
)
3327 class = BPF_CLASS(code
);
3329 if (class == BPF_JMP
) {
3330 /* BPF_EXIT for "main" will reach here. Return TRUE
3335 if (op
== BPF_CALL
) {
3336 /* BPF to BPF call will reach here because of marking
3337 * caller saved clobber with DST_OP_NO_MARK for which we
3338 * don't care the register def because they are anyway
3339 * marked as NOT_INIT already.
3341 if (insn
->src_reg
== BPF_PSEUDO_CALL
)
3343 /* Helper call will reach here because of arg type
3344 * check, conservatively return TRUE.
3353 if (class == BPF_ALU64
&& op
== BPF_END
&& (insn
->imm
== 16 || insn
->imm
== 32))
3356 if (class == BPF_ALU64
|| class == BPF_JMP
||
3357 (class == BPF_ALU
&& op
== BPF_END
&& insn
->imm
== 64))
3360 if (class == BPF_ALU
|| class == BPF_JMP32
)
3363 if (class == BPF_LDX
) {
3365 return BPF_SIZE(code
) == BPF_DW
|| BPF_MODE(code
) == BPF_MEMSX
;
3366 /* LDX source must be ptr. */
3370 if (class == BPF_STX
) {
3371 /* BPF_STX (including atomic variants) has multiple source
3372 * operands, one of which is a ptr. Check whether the caller is
3375 if (t
== SRC_OP
&& reg
->type
!= SCALAR_VALUE
)
3377 return BPF_SIZE(code
) == BPF_DW
;
3380 if (class == BPF_LD
) {
3381 u8 mode
= BPF_MODE(code
);
3384 if (mode
== BPF_IMM
)
3387 /* Both LD_IND and LD_ABS return 32-bit data. */
3391 /* Implicit ctx ptr. */
3392 if (regno
== BPF_REG_6
)
3395 /* Explicit source could be any width. */
3399 if (class == BPF_ST
)
3400 /* The only source register for BPF_ST is a ptr. */
3403 /* Conservatively return true at default. */
3407 /* Return the regno defined by the insn, or -1. */
3408 static int insn_def_regno(const struct bpf_insn
*insn
)
3410 switch (BPF_CLASS(insn
->code
)) {
3416 if (BPF_MODE(insn
->code
) == BPF_ATOMIC
&&
3417 (insn
->imm
& BPF_FETCH
)) {
3418 if (insn
->imm
== BPF_CMPXCHG
)
3421 return insn
->src_reg
;
3426 return insn
->dst_reg
;
3430 /* Return TRUE if INSN has defined any 32-bit value explicitly. */
3431 static bool insn_has_def32(struct bpf_verifier_env
*env
, struct bpf_insn
*insn
)
3433 int dst_reg
= insn_def_regno(insn
);
3438 return !is_reg64(env
, insn
, dst_reg
, NULL
, DST_OP
);
3441 static void mark_insn_zext(struct bpf_verifier_env
*env
,
3442 struct bpf_reg_state
*reg
)
3444 s32 def_idx
= reg
->subreg_def
;
3446 if (def_idx
== DEF_NOT_SUBREG
)
3449 env
->insn_aux_data
[def_idx
- 1].zext_dst
= true;
3450 /* The dst will be zero extended, so won't be sub-register anymore. */
3451 reg
->subreg_def
= DEF_NOT_SUBREG
;
3454 static int __check_reg_arg(struct bpf_verifier_env
*env
, struct bpf_reg_state
*regs
, u32 regno
,
3455 enum reg_arg_type t
)
3457 struct bpf_insn
*insn
= env
->prog
->insnsi
+ env
->insn_idx
;
3458 struct bpf_reg_state
*reg
;
3461 if (regno
>= MAX_BPF_REG
) {
3462 verbose(env
, "R%d is invalid\n", regno
);
3466 mark_reg_scratched(env
, regno
);
3469 rw64
= is_reg64(env
, insn
, regno
, reg
, t
);
3471 /* check whether register used as source operand can be read */
3472 if (reg
->type
== NOT_INIT
) {
3473 verbose(env
, "R%d !read_ok\n", regno
);
3476 /* We don't need to worry about FP liveness because it's read-only */
3477 if (regno
== BPF_REG_FP
)
3481 mark_insn_zext(env
, reg
);
3483 return mark_reg_read(env
, reg
, reg
->parent
,
3484 rw64
? REG_LIVE_READ64
: REG_LIVE_READ32
);
3486 /* check whether register used as dest operand can be written to */
3487 if (regno
== BPF_REG_FP
) {
3488 verbose(env
, "frame pointer is read only\n");
3491 reg
->live
|= REG_LIVE_WRITTEN
;
3492 reg
->subreg_def
= rw64
? DEF_NOT_SUBREG
: env
->insn_idx
+ 1;
3494 mark_reg_unknown(env
, regs
, regno
);
3499 static int check_reg_arg(struct bpf_verifier_env
*env
, u32 regno
,
3500 enum reg_arg_type t
)
3502 struct bpf_verifier_state
*vstate
= env
->cur_state
;
3503 struct bpf_func_state
*state
= vstate
->frame
[vstate
->curframe
];
3505 return __check_reg_arg(env
, state
->regs
, regno
, t
);
3508 static void mark_jmp_point(struct bpf_verifier_env
*env
, int idx
)
3510 env
->insn_aux_data
[idx
].jmp_point
= true;
3513 static bool is_jmp_point(struct bpf_verifier_env
*env
, int insn_idx
)
3515 return env
->insn_aux_data
[insn_idx
].jmp_point
;
3518 /* for any branch, call, exit record the history of jmps in the given state */
3519 static int push_jmp_history(struct bpf_verifier_env
*env
,
3520 struct bpf_verifier_state
*cur
)
3522 u32 cnt
= cur
->jmp_history_cnt
;
3523 struct bpf_idx_pair
*p
;
3526 if (!is_jmp_point(env
, env
->insn_idx
))
3530 alloc_size
= kmalloc_size_roundup(size_mul(cnt
, sizeof(*p
)));
3531 p
= krealloc(cur
->jmp_history
, alloc_size
, GFP_USER
);
3534 p
[cnt
- 1].idx
= env
->insn_idx
;
3535 p
[cnt
- 1].prev_idx
= env
->prev_insn_idx
;
3536 cur
->jmp_history
= p
;
3537 cur
->jmp_history_cnt
= cnt
;
3541 /* Backtrack one insn at a time. If idx is not at the top of recorded
3542 * history then previous instruction came from straight line execution.
3543 * Return -ENOENT if we exhausted all instructions within given state.
3545 * It's legal to have a bit of a looping with the same starting and ending
3546 * insn index within the same state, e.g.: 3->4->5->3, so just because current
3547 * instruction index is the same as state's first_idx doesn't mean we are
3548 * done. If there is still some jump history left, we should keep going. We
3549 * need to take into account that we might have a jump history between given
3550 * state's parent and itself, due to checkpointing. In this case, we'll have
3551 * history entry recording a jump from last instruction of parent state and
3552 * first instruction of given state.
3554 static int get_prev_insn_idx(struct bpf_verifier_state
*st
, int i
,
3559 if (i
== st
->first_insn_idx
) {
3562 if (cnt
== 1 && st
->jmp_history
[0].idx
== i
)
3566 if (cnt
&& st
->jmp_history
[cnt
- 1].idx
== i
) {
3567 i
= st
->jmp_history
[cnt
- 1].prev_idx
;
3575 static const char *disasm_kfunc_name(void *data
, const struct bpf_insn
*insn
)
3577 const struct btf_type
*func
;
3578 struct btf
*desc_btf
;
3580 if (insn
->src_reg
!= BPF_PSEUDO_KFUNC_CALL
)
3583 desc_btf
= find_kfunc_desc_btf(data
, insn
->off
);
3584 if (IS_ERR(desc_btf
))
3587 func
= btf_type_by_id(desc_btf
, insn
->imm
);
3588 return btf_name_by_offset(desc_btf
, func
->name_off
);
3591 static inline void bt_init(struct backtrack_state
*bt
, u32 frame
)
3596 static inline void bt_reset(struct backtrack_state
*bt
)
3598 struct bpf_verifier_env
*env
= bt
->env
;
3600 memset(bt
, 0, sizeof(*bt
));
3604 static inline u32
bt_empty(struct backtrack_state
*bt
)
3609 for (i
= 0; i
<= bt
->frame
; i
++)
3610 mask
|= bt
->reg_masks
[i
] | bt
->stack_masks
[i
];
3615 static inline int bt_subprog_enter(struct backtrack_state
*bt
)
3617 if (bt
->frame
== MAX_CALL_FRAMES
- 1) {
3618 verbose(bt
->env
, "BUG subprog enter from frame %d\n", bt
->frame
);
3619 WARN_ONCE(1, "verifier backtracking bug");
3626 static inline int bt_subprog_exit(struct backtrack_state
*bt
)
3628 if (bt
->frame
== 0) {
3629 verbose(bt
->env
, "BUG subprog exit from frame 0\n");
3630 WARN_ONCE(1, "verifier backtracking bug");
3637 static inline void bt_set_frame_reg(struct backtrack_state
*bt
, u32 frame
, u32 reg
)
3639 bt
->reg_masks
[frame
] |= 1 << reg
;
3642 static inline void bt_clear_frame_reg(struct backtrack_state
*bt
, u32 frame
, u32 reg
)
3644 bt
->reg_masks
[frame
] &= ~(1 << reg
);
3647 static inline void bt_set_reg(struct backtrack_state
*bt
, u32 reg
)
3649 bt_set_frame_reg(bt
, bt
->frame
, reg
);
3652 static inline void bt_clear_reg(struct backtrack_state
*bt
, u32 reg
)
3654 bt_clear_frame_reg(bt
, bt
->frame
, reg
);
3657 static inline void bt_set_frame_slot(struct backtrack_state
*bt
, u32 frame
, u32 slot
)
3659 bt
->stack_masks
[frame
] |= 1ull << slot
;
3662 static inline void bt_clear_frame_slot(struct backtrack_state
*bt
, u32 frame
, u32 slot
)
3664 bt
->stack_masks
[frame
] &= ~(1ull << slot
);
3667 static inline void bt_set_slot(struct backtrack_state
*bt
, u32 slot
)
3669 bt_set_frame_slot(bt
, bt
->frame
, slot
);
3672 static inline void bt_clear_slot(struct backtrack_state
*bt
, u32 slot
)
3674 bt_clear_frame_slot(bt
, bt
->frame
, slot
);
3677 static inline u32
bt_frame_reg_mask(struct backtrack_state
*bt
, u32 frame
)
3679 return bt
->reg_masks
[frame
];
3682 static inline u32
bt_reg_mask(struct backtrack_state
*bt
)
3684 return bt
->reg_masks
[bt
->frame
];
3687 static inline u64
bt_frame_stack_mask(struct backtrack_state
*bt
, u32 frame
)
3689 return bt
->stack_masks
[frame
];
3692 static inline u64
bt_stack_mask(struct backtrack_state
*bt
)
3694 return bt
->stack_masks
[bt
->frame
];
3697 static inline bool bt_is_reg_set(struct backtrack_state
*bt
, u32 reg
)
3699 return bt
->reg_masks
[bt
->frame
] & (1 << reg
);
3702 static inline bool bt_is_slot_set(struct backtrack_state
*bt
, u32 slot
)
3704 return bt
->stack_masks
[bt
->frame
] & (1ull << slot
);
3707 /* format registers bitmask, e.g., "r0,r2,r4" for 0x15 mask */
3708 static void fmt_reg_mask(char *buf
, ssize_t buf_sz
, u32 reg_mask
)
3710 DECLARE_BITMAP(mask
, 64);
3716 bitmap_from_u64(mask
, reg_mask
);
3717 for_each_set_bit(i
, mask
, 32) {
3718 n
= snprintf(buf
, buf_sz
, "%sr%d", first
? "" : ",", i
);
3726 /* format stack slots bitmask, e.g., "-8,-24,-40" for 0x15 mask */
3727 static void fmt_stack_mask(char *buf
, ssize_t buf_sz
, u64 stack_mask
)
3729 DECLARE_BITMAP(mask
, 64);
3735 bitmap_from_u64(mask
, stack_mask
);
3736 for_each_set_bit(i
, mask
, 64) {
3737 n
= snprintf(buf
, buf_sz
, "%s%d", first
? "" : ",", -(i
+ 1) * 8);
3746 static bool calls_callback(struct bpf_verifier_env
*env
, int insn_idx
);
3748 /* For given verifier state backtrack_insn() is called from the last insn to
3749 * the first insn. Its purpose is to compute a bitmask of registers and
3750 * stack slots that needs precision in the parent verifier state.
3752 * @idx is an index of the instruction we are currently processing;
3753 * @subseq_idx is an index of the subsequent instruction that:
3754 * - *would be* executed next, if jump history is viewed in forward order;
3755 * - *was* processed previously during backtracking.
3757 static int backtrack_insn(struct bpf_verifier_env
*env
, int idx
, int subseq_idx
,
3758 struct backtrack_state
*bt
)
3760 const struct bpf_insn_cbs cbs
= {
3761 .cb_call
= disasm_kfunc_name
,
3762 .cb_print
= verbose
,
3763 .private_data
= env
,
3765 struct bpf_insn
*insn
= env
->prog
->insnsi
+ idx
;
3766 u8
class = BPF_CLASS(insn
->code
);
3767 u8 opcode
= BPF_OP(insn
->code
);
3768 u8 mode
= BPF_MODE(insn
->code
);
3769 u32 dreg
= insn
->dst_reg
;
3770 u32 sreg
= insn
->src_reg
;
3773 if (insn
->code
== 0)
3775 if (env
->log
.level
& BPF_LOG_LEVEL2
) {
3776 fmt_reg_mask(env
->tmp_str_buf
, TMP_STR_BUF_LEN
, bt_reg_mask(bt
));
3777 verbose(env
, "mark_precise: frame%d: regs=%s ",
3778 bt
->frame
, env
->tmp_str_buf
);
3779 fmt_stack_mask(env
->tmp_str_buf
, TMP_STR_BUF_LEN
, bt_stack_mask(bt
));
3780 verbose(env
, "stack=%s before ", env
->tmp_str_buf
);
3781 verbose(env
, "%d: ", idx
);
3782 print_bpf_insn(&cbs
, insn
, env
->allow_ptr_leaks
);
3785 if (class == BPF_ALU
|| class == BPF_ALU64
) {
3786 if (!bt_is_reg_set(bt
, dreg
))
3788 if (opcode
== BPF_END
|| opcode
== BPF_NEG
) {
3789 /* sreg is reserved and unused
3790 * dreg still need precision before this insn
3793 } else if (opcode
== BPF_MOV
) {
3794 if (BPF_SRC(insn
->code
) == BPF_X
) {
3795 /* dreg = sreg or dreg = (s8, s16, s32)sreg
3796 * dreg needs precision after this insn
3797 * sreg needs precision before this insn
3799 bt_clear_reg(bt
, dreg
);
3800 bt_set_reg(bt
, sreg
);
3803 * dreg needs precision after this insn.
3804 * Corresponding register is already marked
3805 * as precise=true in this verifier state.
3806 * No further markings in parent are necessary
3808 bt_clear_reg(bt
, dreg
);
3811 if (BPF_SRC(insn
->code
) == BPF_X
) {
3813 * both dreg and sreg need precision
3816 bt_set_reg(bt
, sreg
);
3818 * dreg still needs precision before this insn
3821 } else if (class == BPF_LDX
) {
3822 if (!bt_is_reg_set(bt
, dreg
))
3824 bt_clear_reg(bt
, dreg
);
3826 /* scalars can only be spilled into stack w/o losing precision.
3827 * Load from any other memory can be zero extended.
3828 * The desire to keep that precision is already indicated
3829 * by 'precise' mark in corresponding register of this state.
3830 * No further tracking necessary.
3832 if (insn
->src_reg
!= BPF_REG_FP
)
3835 /* dreg = *(u64 *)[fp - off] was a fill from the stack.
3836 * that [fp - off] slot contains scalar that needs to be
3837 * tracked with precision
3839 spi
= (-insn
->off
- 1) / BPF_REG_SIZE
;
3841 verbose(env
, "BUG spi %d\n", spi
);
3842 WARN_ONCE(1, "verifier backtracking bug");
3845 bt_set_slot(bt
, spi
);
3846 } else if (class == BPF_STX
|| class == BPF_ST
) {
3847 if (bt_is_reg_set(bt
, dreg
))
3848 /* stx & st shouldn't be using _scalar_ dst_reg
3849 * to access memory. It means backtracking
3850 * encountered a case of pointer subtraction.
3853 /* scalars can only be spilled into stack */
3854 if (insn
->dst_reg
!= BPF_REG_FP
)
3856 spi
= (-insn
->off
- 1) / BPF_REG_SIZE
;
3858 verbose(env
, "BUG spi %d\n", spi
);
3859 WARN_ONCE(1, "verifier backtracking bug");
3862 if (!bt_is_slot_set(bt
, spi
))
3864 bt_clear_slot(bt
, spi
);
3865 if (class == BPF_STX
)
3866 bt_set_reg(bt
, sreg
);
3867 } else if (class == BPF_JMP
|| class == BPF_JMP32
) {
3868 if (bpf_pseudo_call(insn
)) {
3869 int subprog_insn_idx
, subprog
;
3871 subprog_insn_idx
= idx
+ insn
->imm
+ 1;
3872 subprog
= find_subprog(env
, subprog_insn_idx
);
3876 if (subprog_is_global(env
, subprog
)) {
3877 /* check that jump history doesn't have any
3878 * extra instructions from subprog; the next
3879 * instruction after call to global subprog
3880 * should be literally next instruction in
3883 WARN_ONCE(idx
+ 1 != subseq_idx
, "verifier backtracking bug");
3884 /* r1-r5 are invalidated after subprog call,
3885 * so for global func call it shouldn't be set
3888 if (bt_reg_mask(bt
) & BPF_REGMASK_ARGS
) {
3889 verbose(env
, "BUG regs %x\n", bt_reg_mask(bt
));
3890 WARN_ONCE(1, "verifier backtracking bug");
3893 /* global subprog always sets R0 */
3894 bt_clear_reg(bt
, BPF_REG_0
);
3897 /* static subprog call instruction, which
3898 * means that we are exiting current subprog,
3899 * so only r1-r5 could be still requested as
3900 * precise, r0 and r6-r10 or any stack slot in
3901 * the current frame should be zero by now
3903 if (bt_reg_mask(bt
) & ~BPF_REGMASK_ARGS
) {
3904 verbose(env
, "BUG regs %x\n", bt_reg_mask(bt
));
3905 WARN_ONCE(1, "verifier backtracking bug");
3908 /* we don't track register spills perfectly,
3909 * so fallback to force-precise instead of failing */
3910 if (bt_stack_mask(bt
) != 0)
3912 /* propagate r1-r5 to the caller */
3913 for (i
= BPF_REG_1
; i
<= BPF_REG_5
; i
++) {
3914 if (bt_is_reg_set(bt
, i
)) {
3915 bt_clear_reg(bt
, i
);
3916 bt_set_frame_reg(bt
, bt
->frame
- 1, i
);
3919 if (bt_subprog_exit(bt
))
3923 } else if (is_sync_callback_calling_insn(insn
) && idx
!= subseq_idx
- 1) {
3924 /* exit from callback subprog to callback-calling helper or
3925 * kfunc call. Use idx/subseq_idx check to discern it from
3926 * straight line code backtracking.
3927 * Unlike the subprog call handling above, we shouldn't
3928 * propagate precision of r1-r5 (if any requested), as they are
3929 * not actually arguments passed directly to callback subprogs
3931 if (bt_reg_mask(bt
) & ~BPF_REGMASK_ARGS
) {
3932 verbose(env
, "BUG regs %x\n", bt_reg_mask(bt
));
3933 WARN_ONCE(1, "verifier backtracking bug");
3936 if (bt_stack_mask(bt
) != 0)
3938 /* clear r1-r5 in callback subprog's mask */
3939 for (i
= BPF_REG_1
; i
<= BPF_REG_5
; i
++)
3940 bt_clear_reg(bt
, i
);
3941 if (bt_subprog_exit(bt
))
3944 } else if (opcode
== BPF_CALL
) {
3945 /* kfunc with imm==0 is invalid and fixup_kfunc_call will
3946 * catch this error later. Make backtracking conservative
3949 if (insn
->src_reg
== BPF_PSEUDO_KFUNC_CALL
&& insn
->imm
== 0)
3951 /* regular helper call sets R0 */
3952 bt_clear_reg(bt
, BPF_REG_0
);
3953 if (bt_reg_mask(bt
) & BPF_REGMASK_ARGS
) {
3954 /* if backtracing was looking for registers R1-R5
3955 * they should have been found already.
3957 verbose(env
, "BUG regs %x\n", bt_reg_mask(bt
));
3958 WARN_ONCE(1, "verifier backtracking bug");
3961 } else if (opcode
== BPF_EXIT
) {
3964 /* Backtracking to a nested function call, 'idx' is a part of
3965 * the inner frame 'subseq_idx' is a part of the outer frame.
3966 * In case of a regular function call, instructions giving
3967 * precision to registers R1-R5 should have been found already.
3968 * In case of a callback, it is ok to have R1-R5 marked for
3969 * backtracking, as these registers are set by the function
3970 * invoking callback.
3972 if (subseq_idx
>= 0 && calls_callback(env
, subseq_idx
))
3973 for (i
= BPF_REG_1
; i
<= BPF_REG_5
; i
++)
3974 bt_clear_reg(bt
, i
);
3975 if (bt_reg_mask(bt
) & BPF_REGMASK_ARGS
) {
3976 verbose(env
, "BUG regs %x\n", bt_reg_mask(bt
));
3977 WARN_ONCE(1, "verifier backtracking bug");
3981 /* BPF_EXIT in subprog or callback always returns
3982 * right after the call instruction, so by checking
3983 * whether the instruction at subseq_idx-1 is subprog
3984 * call or not we can distinguish actual exit from
3985 * *subprog* from exit from *callback*. In the former
3986 * case, we need to propagate r0 precision, if
3987 * necessary. In the former we never do that.
3989 r0_precise
= subseq_idx
- 1 >= 0 &&
3990 bpf_pseudo_call(&env
->prog
->insnsi
[subseq_idx
- 1]) &&
3991 bt_is_reg_set(bt
, BPF_REG_0
);
3993 bt_clear_reg(bt
, BPF_REG_0
);
3994 if (bt_subprog_enter(bt
))
3998 bt_set_reg(bt
, BPF_REG_0
);
3999 /* r6-r9 and stack slots will stay set in caller frame
4000 * bitmasks until we return back from callee(s)
4003 } else if (BPF_SRC(insn
->code
) == BPF_X
) {
4004 if (!bt_is_reg_set(bt
, dreg
) && !bt_is_reg_set(bt
, sreg
))
4007 * Both dreg and sreg need precision before
4008 * this insn. If only sreg was marked precise
4009 * before it would be equally necessary to
4010 * propagate it to dreg.
4012 bt_set_reg(bt
, dreg
);
4013 bt_set_reg(bt
, sreg
);
4014 /* else dreg <cond> K
4015 * Only dreg still needs precision before
4016 * this insn, so for the K-based conditional
4017 * there is nothing new to be marked.
4020 } else if (class == BPF_LD
) {
4021 if (!bt_is_reg_set(bt
, dreg
))
4023 bt_clear_reg(bt
, dreg
);
4024 /* It's ld_imm64 or ld_abs or ld_ind.
4025 * For ld_imm64 no further tracking of precision
4026 * into parent is necessary
4028 if (mode
== BPF_IND
|| mode
== BPF_ABS
)
4029 /* to be analyzed */
4035 /* the scalar precision tracking algorithm:
4036 * . at the start all registers have precise=false.
4037 * . scalar ranges are tracked as normal through alu and jmp insns.
4038 * . once precise value of the scalar register is used in:
4039 * . ptr + scalar alu
4040 * . if (scalar cond K|scalar)
4041 * . helper_call(.., scalar, ...) where ARG_CONST is expected
4042 * backtrack through the verifier states and mark all registers and
4043 * stack slots with spilled constants that these scalar regisers
4044 * should be precise.
4045 * . during state pruning two registers (or spilled stack slots)
4046 * are equivalent if both are not precise.
4048 * Note the verifier cannot simply walk register parentage chain,
4049 * since many different registers and stack slots could have been
4050 * used to compute single precise scalar.
4052 * The approach of starting with precise=true for all registers and then
4053 * backtrack to mark a register as not precise when the verifier detects
4054 * that program doesn't care about specific value (e.g., when helper
4055 * takes register as ARG_ANYTHING parameter) is not safe.
4057 * It's ok to walk single parentage chain of the verifier states.
4058 * It's possible that this backtracking will go all the way till 1st insn.
4059 * All other branches will be explored for needing precision later.
4061 * The backtracking needs to deal with cases like:
4062 * R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
4065 * if r5 > 0x79f goto pc+7
4066 * R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
4069 * call bpf_perf_event_output#25
4070 * where .arg5_type = ARG_CONST_SIZE_OR_ZERO
4074 * call foo // uses callee's r6 inside to compute r0
4078 * to track above reg_mask/stack_mask needs to be independent for each frame.
4080 * Also if parent's curframe > frame where backtracking started,
4081 * the verifier need to mark registers in both frames, otherwise callees
4082 * may incorrectly prune callers. This is similar to
4083 * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
4085 * For now backtracking falls back into conservative marking.
4087 static void mark_all_scalars_precise(struct bpf_verifier_env
*env
,
4088 struct bpf_verifier_state
*st
)
4090 struct bpf_func_state
*func
;
4091 struct bpf_reg_state
*reg
;
4094 if (env
->log
.level
& BPF_LOG_LEVEL2
) {
4095 verbose(env
, "mark_precise: frame%d: falling back to forcing all scalars precise\n",
4099 /* big hammer: mark all scalars precise in this path.
4100 * pop_stack may still get !precise scalars.
4101 * We also skip current state and go straight to first parent state,
4102 * because precision markings in current non-checkpointed state are
4103 * not needed. See why in the comment in __mark_chain_precision below.
4105 for (st
= st
->parent
; st
; st
= st
->parent
) {
4106 for (i
= 0; i
<= st
->curframe
; i
++) {
4107 func
= st
->frame
[i
];
4108 for (j
= 0; j
< BPF_REG_FP
; j
++) {
4109 reg
= &func
->regs
[j
];
4110 if (reg
->type
!= SCALAR_VALUE
|| reg
->precise
)
4112 reg
->precise
= true;
4113 if (env
->log
.level
& BPF_LOG_LEVEL2
) {
4114 verbose(env
, "force_precise: frame%d: forcing r%d to be precise\n",
4118 for (j
= 0; j
< func
->allocated_stack
/ BPF_REG_SIZE
; j
++) {
4119 if (!is_spilled_reg(&func
->stack
[j
]))
4121 reg
= &func
->stack
[j
].spilled_ptr
;
4122 if (reg
->type
!= SCALAR_VALUE
|| reg
->precise
)
4124 reg
->precise
= true;
4125 if (env
->log
.level
& BPF_LOG_LEVEL2
) {
4126 verbose(env
, "force_precise: frame%d: forcing fp%d to be precise\n",
4134 static void mark_all_scalars_imprecise(struct bpf_verifier_env
*env
, struct bpf_verifier_state
*st
)
4136 struct bpf_func_state
*func
;
4137 struct bpf_reg_state
*reg
;
4140 for (i
= 0; i
<= st
->curframe
; i
++) {
4141 func
= st
->frame
[i
];
4142 for (j
= 0; j
< BPF_REG_FP
; j
++) {
4143 reg
= &func
->regs
[j
];
4144 if (reg
->type
!= SCALAR_VALUE
)
4146 reg
->precise
= false;
4148 for (j
= 0; j
< func
->allocated_stack
/ BPF_REG_SIZE
; j
++) {
4149 if (!is_spilled_reg(&func
->stack
[j
]))
4151 reg
= &func
->stack
[j
].spilled_ptr
;
4152 if (reg
->type
!= SCALAR_VALUE
)
4154 reg
->precise
= false;
4159 static bool idset_contains(struct bpf_idset
*s
, u32 id
)
4163 for (i
= 0; i
< s
->count
; ++i
)
4164 if (s
->ids
[i
] == id
)
4170 static int idset_push(struct bpf_idset
*s
, u32 id
)
4172 if (WARN_ON_ONCE(s
->count
>= ARRAY_SIZE(s
->ids
)))
4174 s
->ids
[s
->count
++] = id
;
4178 static void idset_reset(struct bpf_idset
*s
)
4183 /* Collect a set of IDs for all registers currently marked as precise in env->bt.
4184 * Mark all registers with these IDs as precise.
4186 static int mark_precise_scalar_ids(struct bpf_verifier_env
*env
, struct bpf_verifier_state
*st
)
4188 struct bpf_idset
*precise_ids
= &env
->idset_scratch
;
4189 struct backtrack_state
*bt
= &env
->bt
;
4190 struct bpf_func_state
*func
;
4191 struct bpf_reg_state
*reg
;
4192 DECLARE_BITMAP(mask
, 64);
4195 idset_reset(precise_ids
);
4197 for (fr
= bt
->frame
; fr
>= 0; fr
--) {
4198 func
= st
->frame
[fr
];
4200 bitmap_from_u64(mask
, bt_frame_reg_mask(bt
, fr
));
4201 for_each_set_bit(i
, mask
, 32) {
4202 reg
= &func
->regs
[i
];
4203 if (!reg
->id
|| reg
->type
!= SCALAR_VALUE
)
4205 if (idset_push(precise_ids
, reg
->id
))
4209 bitmap_from_u64(mask
, bt_frame_stack_mask(bt
, fr
));
4210 for_each_set_bit(i
, mask
, 64) {
4211 if (i
>= func
->allocated_stack
/ BPF_REG_SIZE
)
4213 if (!is_spilled_scalar_reg(&func
->stack
[i
]))
4215 reg
= &func
->stack
[i
].spilled_ptr
;
4218 if (idset_push(precise_ids
, reg
->id
))
4223 for (fr
= 0; fr
<= st
->curframe
; ++fr
) {
4224 func
= st
->frame
[fr
];
4226 for (i
= BPF_REG_0
; i
< BPF_REG_10
; ++i
) {
4227 reg
= &func
->regs
[i
];
4230 if (!idset_contains(precise_ids
, reg
->id
))
4232 bt_set_frame_reg(bt
, fr
, i
);
4234 for (i
= 0; i
< func
->allocated_stack
/ BPF_REG_SIZE
; ++i
) {
4235 if (!is_spilled_scalar_reg(&func
->stack
[i
]))
4237 reg
= &func
->stack
[i
].spilled_ptr
;
4240 if (!idset_contains(precise_ids
, reg
->id
))
4242 bt_set_frame_slot(bt
, fr
, i
);
4250 * __mark_chain_precision() backtracks BPF program instruction sequence and
4251 * chain of verifier states making sure that register *regno* (if regno >= 0)
4252 * and/or stack slot *spi* (if spi >= 0) are marked as precisely tracked
4253 * SCALARS, as well as any other registers and slots that contribute to
4254 * a tracked state of given registers/stack slots, depending on specific BPF
4255 * assembly instructions (see backtrack_insns() for exact instruction handling
4256 * logic). This backtracking relies on recorded jmp_history and is able to
4257 * traverse entire chain of parent states. This process ends only when all the
4258 * necessary registers/slots and their transitive dependencies are marked as
4261 * One important and subtle aspect is that precise marks *do not matter* in
4262 * the currently verified state (current state). It is important to understand
4263 * why this is the case.
4265 * First, note that current state is the state that is not yet "checkpointed",
4266 * i.e., it is not yet put into env->explored_states, and it has no children
4267 * states as well. It's ephemeral, and can end up either a) being discarded if
4268 * compatible explored state is found at some point or BPF_EXIT instruction is
4269 * reached or b) checkpointed and put into env->explored_states, branching out
4270 * into one or more children states.
4272 * In the former case, precise markings in current state are completely
4273 * ignored by state comparison code (see regsafe() for details). Only
4274 * checkpointed ("old") state precise markings are important, and if old
4275 * state's register/slot is precise, regsafe() assumes current state's
4276 * register/slot as precise and checks value ranges exactly and precisely. If
4277 * states turn out to be compatible, current state's necessary precise
4278 * markings and any required parent states' precise markings are enforced
4279 * after the fact with propagate_precision() logic, after the fact. But it's
4280 * important to realize that in this case, even after marking current state
4281 * registers/slots as precise, we immediately discard current state. So what
4282 * actually matters is any of the precise markings propagated into current
4283 * state's parent states, which are always checkpointed (due to b) case above).
4284 * As such, for scenario a) it doesn't matter if current state has precise
4285 * markings set or not.
4287 * Now, for the scenario b), checkpointing and forking into child(ren)
4288 * state(s). Note that before current state gets to checkpointing step, any
4289 * processed instruction always assumes precise SCALAR register/slot
4290 * knowledge: if precise value or range is useful to prune jump branch, BPF
4291 * verifier takes this opportunity enthusiastically. Similarly, when
4292 * register's value is used to calculate offset or memory address, exact
4293 * knowledge of SCALAR range is assumed, checked, and enforced. So, similar to
4294 * what we mentioned above about state comparison ignoring precise markings
4295 * during state comparison, BPF verifier ignores and also assumes precise
4296 * markings *at will* during instruction verification process. But as verifier
4297 * assumes precision, it also propagates any precision dependencies across
4298 * parent states, which are not yet finalized, so can be further restricted
4299 * based on new knowledge gained from restrictions enforced by their children
4300 * states. This is so that once those parent states are finalized, i.e., when
4301 * they have no more active children state, state comparison logic in
4302 * is_state_visited() would enforce strict and precise SCALAR ranges, if
4303 * required for correctness.
4305 * To build a bit more intuition, note also that once a state is checkpointed,
4306 * the path we took to get to that state is not important. This is crucial
4307 * property for state pruning. When state is checkpointed and finalized at
4308 * some instruction index, it can be correctly and safely used to "short
4309 * circuit" any *compatible* state that reaches exactly the same instruction
4310 * index. I.e., if we jumped to that instruction from a completely different
4311 * code path than original finalized state was derived from, it doesn't
4312 * matter, current state can be discarded because from that instruction
4313 * forward having a compatible state will ensure we will safely reach the
4314 * exit. States describe preconditions for further exploration, but completely
4315 * forget the history of how we got here.
4317 * This also means that even if we needed precise SCALAR range to get to
4318 * finalized state, but from that point forward *that same* SCALAR register is
4319 * never used in a precise context (i.e., it's precise value is not needed for
4320 * correctness), it's correct and safe to mark such register as "imprecise"
4321 * (i.e., precise marking set to false). This is what we rely on when we do
4322 * not set precise marking in current state. If no child state requires
4323 * precision for any given SCALAR register, it's safe to dictate that it can
4324 * be imprecise. If any child state does require this register to be precise,
4325 * we'll mark it precise later retroactively during precise markings
4326 * propagation from child state to parent states.
4328 * Skipping precise marking setting in current state is a mild version of
4329 * relying on the above observation. But we can utilize this property even
4330 * more aggressively by proactively forgetting any precise marking in the
4331 * current state (which we inherited from the parent state), right before we
4332 * checkpoint it and branch off into new child state. This is done by
4333 * mark_all_scalars_imprecise() to hopefully get more permissive and generic
4334 * finalized states which help in short circuiting more future states.
4336 static int __mark_chain_precision(struct bpf_verifier_env
*env
, int regno
)
4338 struct backtrack_state
*bt
= &env
->bt
;
4339 struct bpf_verifier_state
*st
= env
->cur_state
;
4340 int first_idx
= st
->first_insn_idx
;
4341 int last_idx
= env
->insn_idx
;
4342 int subseq_idx
= -1;
4343 struct bpf_func_state
*func
;
4344 struct bpf_reg_state
*reg
;
4345 bool skip_first
= true;
4348 if (!env
->bpf_capable
)
4351 /* set frame number from which we are starting to backtrack */
4352 bt_init(bt
, env
->cur_state
->curframe
);
4354 /* Do sanity checks against current state of register and/or stack
4355 * slot, but don't set precise flag in current state, as precision
4356 * tracking in the current state is unnecessary.
4358 func
= st
->frame
[bt
->frame
];
4360 reg
= &func
->regs
[regno
];
4361 if (reg
->type
!= SCALAR_VALUE
) {
4362 WARN_ONCE(1, "backtracing misuse");
4365 bt_set_reg(bt
, regno
);
4372 DECLARE_BITMAP(mask
, 64);
4373 u32 history
= st
->jmp_history_cnt
;
4375 if (env
->log
.level
& BPF_LOG_LEVEL2
) {
4376 verbose(env
, "mark_precise: frame%d: last_idx %d first_idx %d subseq_idx %d \n",
4377 bt
->frame
, last_idx
, first_idx
, subseq_idx
);
4380 /* If some register with scalar ID is marked as precise,
4381 * make sure that all registers sharing this ID are also precise.
4382 * This is needed to estimate effect of find_equal_scalars().
4383 * Do this at the last instruction of each state,
4384 * bpf_reg_state::id fields are valid for these instructions.
4386 * Allows to track precision in situation like below:
4388 * r2 = unknown value
4392 * r1 = r2 // r1 and r2 now share the same ID
4394 * --- state #1 {r1.id = A, r2.id = A} ---
4396 * if (r2 > 10) goto exit; // find_equal_scalars() assigns range to r1
4398 * --- state #2 {r1.id = A, r2.id = A} ---
4400 * r3 += r1 // need to mark both r1 and r2
4402 if (mark_precise_scalar_ids(env
, st
))
4406 /* we are at the entry into subprog, which
4407 * is expected for global funcs, but only if
4408 * requested precise registers are R1-R5
4409 * (which are global func's input arguments)
4411 if (st
->curframe
== 0 &&
4412 st
->frame
[0]->subprogno
> 0 &&
4413 st
->frame
[0]->callsite
== BPF_MAIN_FUNC
&&
4414 bt_stack_mask(bt
) == 0 &&
4415 (bt_reg_mask(bt
) & ~BPF_REGMASK_ARGS
) == 0) {
4416 bitmap_from_u64(mask
, bt_reg_mask(bt
));
4417 for_each_set_bit(i
, mask
, 32) {
4418 reg
= &st
->frame
[0]->regs
[i
];
4419 bt_clear_reg(bt
, i
);
4420 if (reg
->type
== SCALAR_VALUE
)
4421 reg
->precise
= true;
4426 verbose(env
, "BUG backtracking func entry subprog %d reg_mask %x stack_mask %llx\n",
4427 st
->frame
[0]->subprogno
, bt_reg_mask(bt
), bt_stack_mask(bt
));
4428 WARN_ONCE(1, "verifier backtracking bug");
4432 for (i
= last_idx
;;) {
4437 err
= backtrack_insn(env
, i
, subseq_idx
, bt
);
4439 if (err
== -ENOTSUPP
) {
4440 mark_all_scalars_precise(env
, env
->cur_state
);
4447 /* Found assignment(s) into tracked register in this state.
4448 * Since this state is already marked, just return.
4449 * Nothing to be tracked further in the parent state.
4453 i
= get_prev_insn_idx(st
, i
, &history
);
4456 if (i
>= env
->prog
->len
) {
4457 /* This can happen if backtracking reached insn 0
4458 * and there are still reg_mask or stack_mask
4460 * It means the backtracking missed the spot where
4461 * particular register was initialized with a constant.
4463 verbose(env
, "BUG backtracking idx %d\n", i
);
4464 WARN_ONCE(1, "verifier backtracking bug");
4472 for (fr
= bt
->frame
; fr
>= 0; fr
--) {
4473 func
= st
->frame
[fr
];
4474 bitmap_from_u64(mask
, bt_frame_reg_mask(bt
, fr
));
4475 for_each_set_bit(i
, mask
, 32) {
4476 reg
= &func
->regs
[i
];
4477 if (reg
->type
!= SCALAR_VALUE
) {
4478 bt_clear_frame_reg(bt
, fr
, i
);
4482 bt_clear_frame_reg(bt
, fr
, i
);
4484 reg
->precise
= true;
4487 bitmap_from_u64(mask
, bt_frame_stack_mask(bt
, fr
));
4488 for_each_set_bit(i
, mask
, 64) {
4489 if (i
>= func
->allocated_stack
/ BPF_REG_SIZE
) {
4490 /* the sequence of instructions:
4492 * 3: (7b) *(u64 *)(r3 -8) = r0
4493 * 4: (79) r4 = *(u64 *)(r10 -8)
4494 * doesn't contain jmps. It's backtracked
4495 * as a single block.
4496 * During backtracking insn 3 is not recognized as
4497 * stack access, so at the end of backtracking
4498 * stack slot fp-8 is still marked in stack_mask.
4499 * However the parent state may not have accessed
4500 * fp-8 and it's "unallocated" stack space.
4501 * In such case fallback to conservative.
4503 mark_all_scalars_precise(env
, env
->cur_state
);
4508 if (!is_spilled_scalar_reg(&func
->stack
[i
])) {
4509 bt_clear_frame_slot(bt
, fr
, i
);
4512 reg
= &func
->stack
[i
].spilled_ptr
;
4514 bt_clear_frame_slot(bt
, fr
, i
);
4516 reg
->precise
= true;
4518 if (env
->log
.level
& BPF_LOG_LEVEL2
) {
4519 fmt_reg_mask(env
->tmp_str_buf
, TMP_STR_BUF_LEN
,
4520 bt_frame_reg_mask(bt
, fr
));
4521 verbose(env
, "mark_precise: frame%d: parent state regs=%s ",
4522 fr
, env
->tmp_str_buf
);
4523 fmt_stack_mask(env
->tmp_str_buf
, TMP_STR_BUF_LEN
,
4524 bt_frame_stack_mask(bt
, fr
));
4525 verbose(env
, "stack=%s: ", env
->tmp_str_buf
);
4526 print_verifier_state(env
, func
, true);
4533 subseq_idx
= first_idx
;
4534 last_idx
= st
->last_insn_idx
;
4535 first_idx
= st
->first_insn_idx
;
4538 /* if we still have requested precise regs or slots, we missed
4539 * something (e.g., stack access through non-r10 register), so
4540 * fallback to marking all precise
4542 if (!bt_empty(bt
)) {
4543 mark_all_scalars_precise(env
, env
->cur_state
);
4550 int mark_chain_precision(struct bpf_verifier_env
*env
, int regno
)
4552 return __mark_chain_precision(env
, regno
);
4555 /* mark_chain_precision_batch() assumes that env->bt is set in the caller to
4556 * desired reg and stack masks across all relevant frames
4558 static int mark_chain_precision_batch(struct bpf_verifier_env
*env
)
4560 return __mark_chain_precision(env
, -1);
4563 static bool is_spillable_regtype(enum bpf_reg_type type
)
4565 switch (base_type(type
)) {
4566 case PTR_TO_MAP_VALUE
:
4570 case PTR_TO_PACKET_META
:
4571 case PTR_TO_PACKET_END
:
4572 case PTR_TO_FLOW_KEYS
:
4573 case CONST_PTR_TO_MAP
:
4575 case PTR_TO_SOCK_COMMON
:
4576 case PTR_TO_TCP_SOCK
:
4577 case PTR_TO_XDP_SOCK
:
4582 case PTR_TO_MAP_KEY
:
4589 /* Does this register contain a constant zero? */
4590 static bool register_is_null(struct bpf_reg_state
*reg
)
4592 return reg
->type
== SCALAR_VALUE
&& tnum_equals_const(reg
->var_off
, 0);
4595 static bool register_is_const(struct bpf_reg_state
*reg
)
4597 return reg
->type
== SCALAR_VALUE
&& tnum_is_const(reg
->var_off
);
4600 static bool __is_scalar_unbounded(struct bpf_reg_state
*reg
)
4602 return tnum_is_unknown(reg
->var_off
) &&
4603 reg
->smin_value
== S64_MIN
&& reg
->smax_value
== S64_MAX
&&
4604 reg
->umin_value
== 0 && reg
->umax_value
== U64_MAX
&&
4605 reg
->s32_min_value
== S32_MIN
&& reg
->s32_max_value
== S32_MAX
&&
4606 reg
->u32_min_value
== 0 && reg
->u32_max_value
== U32_MAX
;
4609 static bool register_is_bounded(struct bpf_reg_state
*reg
)
4611 return reg
->type
== SCALAR_VALUE
&& !__is_scalar_unbounded(reg
);
4614 static bool __is_pointer_value(bool allow_ptr_leaks
,
4615 const struct bpf_reg_state
*reg
)
4617 if (allow_ptr_leaks
)
4620 return reg
->type
!= SCALAR_VALUE
;
4623 /* Copy src state preserving dst->parent and dst->live fields */
4624 static void copy_register_state(struct bpf_reg_state
*dst
, const struct bpf_reg_state
*src
)
4626 struct bpf_reg_state
*parent
= dst
->parent
;
4627 enum bpf_reg_liveness live
= dst
->live
;
4630 dst
->parent
= parent
;
4634 static void save_register_state(struct bpf_func_state
*state
,
4635 int spi
, struct bpf_reg_state
*reg
,
4640 copy_register_state(&state
->stack
[spi
].spilled_ptr
, reg
);
4641 if (size
== BPF_REG_SIZE
)
4642 state
->stack
[spi
].spilled_ptr
.live
|= REG_LIVE_WRITTEN
;
4644 for (i
= BPF_REG_SIZE
; i
> BPF_REG_SIZE
- size
; i
--)
4645 state
->stack
[spi
].slot_type
[i
- 1] = STACK_SPILL
;
4647 /* size < 8 bytes spill */
4649 scrub_spilled_slot(&state
->stack
[spi
].slot_type
[i
- 1]);
4652 static bool is_bpf_st_mem(struct bpf_insn
*insn
)
4654 return BPF_CLASS(insn
->code
) == BPF_ST
&& BPF_MODE(insn
->code
) == BPF_MEM
;
4657 /* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
4658 * stack boundary and alignment are checked in check_mem_access()
4660 static int check_stack_write_fixed_off(struct bpf_verifier_env
*env
,
4661 /* stack frame we're writing to */
4662 struct bpf_func_state
*state
,
4663 int off
, int size
, int value_regno
,
4666 struct bpf_func_state
*cur
; /* state of the current function */
4667 int i
, slot
= -off
- 1, spi
= slot
/ BPF_REG_SIZE
, err
;
4668 struct bpf_insn
*insn
= &env
->prog
->insnsi
[insn_idx
];
4669 struct bpf_reg_state
*reg
= NULL
;
4670 u32 dst_reg
= insn
->dst_reg
;
4672 err
= grow_stack_state(state
, round_up(slot
+ 1, BPF_REG_SIZE
));
4675 /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
4676 * so it's aligned access and [off, off + size) are within stack limits
4678 if (!env
->allow_ptr_leaks
&&
4679 state
->stack
[spi
].slot_type
[0] == STACK_SPILL
&&
4680 size
!= BPF_REG_SIZE
) {
4681 verbose(env
, "attempt to corrupt spilled pointer on stack\n");
4685 cur
= env
->cur_state
->frame
[env
->cur_state
->curframe
];
4686 if (value_regno
>= 0)
4687 reg
= &cur
->regs
[value_regno
];
4688 if (!env
->bypass_spec_v4
) {
4689 bool sanitize
= reg
&& is_spillable_regtype(reg
->type
);
4691 for (i
= 0; i
< size
; i
++) {
4692 u8 type
= state
->stack
[spi
].slot_type
[i
];
4694 if (type
!= STACK_MISC
&& type
!= STACK_ZERO
) {
4701 env
->insn_aux_data
[insn_idx
].sanitize_stack_spill
= true;
4704 err
= destroy_if_dynptr_stack_slot(env
, state
, spi
);
4708 mark_stack_slot_scratched(env
, spi
);
4709 if (reg
&& !(off
% BPF_REG_SIZE
) && register_is_bounded(reg
) &&
4710 !register_is_null(reg
) && env
->bpf_capable
) {
4711 if (dst_reg
!= BPF_REG_FP
) {
4712 /* The backtracking logic can only recognize explicit
4713 * stack slot address like [fp - 8]. Other spill of
4714 * scalar via different register has to be conservative.
4715 * Backtrack from here and mark all registers as precise
4716 * that contributed into 'reg' being a constant.
4718 err
= mark_chain_precision(env
, value_regno
);
4722 save_register_state(state
, spi
, reg
, size
);
4723 /* Break the relation on a narrowing spill. */
4724 if (fls64(reg
->umax_value
) > BITS_PER_BYTE
* size
)
4725 state
->stack
[spi
].spilled_ptr
.id
= 0;
4726 } else if (!reg
&& !(off
% BPF_REG_SIZE
) && is_bpf_st_mem(insn
) &&
4727 insn
->imm
!= 0 && env
->bpf_capable
) {
4728 struct bpf_reg_state fake_reg
= {};
4730 __mark_reg_known(&fake_reg
, insn
->imm
);
4731 fake_reg
.type
= SCALAR_VALUE
;
4732 save_register_state(state
, spi
, &fake_reg
, size
);
4733 } else if (reg
&& is_spillable_regtype(reg
->type
)) {
4734 /* register containing pointer is being spilled into stack */
4735 if (size
!= BPF_REG_SIZE
) {
4736 verbose_linfo(env
, insn_idx
, "; ");
4737 verbose(env
, "invalid size of register spill\n");
4740 if (state
!= cur
&& reg
->type
== PTR_TO_STACK
) {
4741 verbose(env
, "cannot spill pointers to stack into stack frame of the caller\n");
4744 save_register_state(state
, spi
, reg
, size
);
4746 u8 type
= STACK_MISC
;
4748 /* regular write of data into stack destroys any spilled ptr */
4749 state
->stack
[spi
].spilled_ptr
.type
= NOT_INIT
;
4750 /* Mark slots as STACK_MISC if they belonged to spilled ptr/dynptr/iter. */
4751 if (is_stack_slot_special(&state
->stack
[spi
]))
4752 for (i
= 0; i
< BPF_REG_SIZE
; i
++)
4753 scrub_spilled_slot(&state
->stack
[spi
].slot_type
[i
]);
4755 /* only mark the slot as written if all 8 bytes were written
4756 * otherwise read propagation may incorrectly stop too soon
4757 * when stack slots are partially written.
4758 * This heuristic means that read propagation will be
4759 * conservative, since it will add reg_live_read marks
4760 * to stack slots all the way to first state when programs
4761 * writes+reads less than 8 bytes
4763 if (size
== BPF_REG_SIZE
)
4764 state
->stack
[spi
].spilled_ptr
.live
|= REG_LIVE_WRITTEN
;
4766 /* when we zero initialize stack slots mark them as such */
4767 if ((reg
&& register_is_null(reg
)) ||
4768 (!reg
&& is_bpf_st_mem(insn
) && insn
->imm
== 0)) {
4769 /* backtracking doesn't work for STACK_ZERO yet. */
4770 err
= mark_chain_precision(env
, value_regno
);
4776 /* Mark slots affected by this stack write. */
4777 for (i
= 0; i
< size
; i
++)
4778 state
->stack
[spi
].slot_type
[(slot
- i
) % BPF_REG_SIZE
] =
4784 /* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
4785 * known to contain a variable offset.
4786 * This function checks whether the write is permitted and conservatively
4787 * tracks the effects of the write, considering that each stack slot in the
4788 * dynamic range is potentially written to.
4790 * 'off' includes 'regno->off'.
4791 * 'value_regno' can be -1, meaning that an unknown value is being written to
4794 * Spilled pointers in range are not marked as written because we don't know
4795 * what's going to be actually written. This means that read propagation for
4796 * future reads cannot be terminated by this write.
4798 * For privileged programs, uninitialized stack slots are considered
4799 * initialized by this write (even though we don't know exactly what offsets
4800 * are going to be written to). The idea is that we don't want the verifier to
4801 * reject future reads that access slots written to through variable offsets.
4803 static int check_stack_write_var_off(struct bpf_verifier_env
*env
,
4804 /* func where register points to */
4805 struct bpf_func_state
*state
,
4806 int ptr_regno
, int off
, int size
,
4807 int value_regno
, int insn_idx
)
4809 struct bpf_func_state
*cur
; /* state of the current function */
4810 int min_off
, max_off
;
4812 struct bpf_reg_state
*ptr_reg
= NULL
, *value_reg
= NULL
;
4813 struct bpf_insn
*insn
= &env
->prog
->insnsi
[insn_idx
];
4814 bool writing_zero
= false;
4815 /* set if the fact that we're writing a zero is used to let any
4816 * stack slots remain STACK_ZERO
4818 bool zero_used
= false;
4820 cur
= env
->cur_state
->frame
[env
->cur_state
->curframe
];
4821 ptr_reg
= &cur
->regs
[ptr_regno
];
4822 min_off
= ptr_reg
->smin_value
+ off
;
4823 max_off
= ptr_reg
->smax_value
+ off
+ size
;
4824 if (value_regno
>= 0)
4825 value_reg
= &cur
->regs
[value_regno
];
4826 if ((value_reg
&& register_is_null(value_reg
)) ||
4827 (!value_reg
&& is_bpf_st_mem(insn
) && insn
->imm
== 0))
4828 writing_zero
= true;
4830 err
= grow_stack_state(state
, round_up(-min_off
, BPF_REG_SIZE
));
4834 for (i
= min_off
; i
< max_off
; i
++) {
4838 err
= destroy_if_dynptr_stack_slot(env
, state
, spi
);
4843 /* Variable offset writes destroy any spilled pointers in range. */
4844 for (i
= min_off
; i
< max_off
; i
++) {
4845 u8 new_type
, *stype
;
4849 spi
= slot
/ BPF_REG_SIZE
;
4850 stype
= &state
->stack
[spi
].slot_type
[slot
% BPF_REG_SIZE
];
4851 mark_stack_slot_scratched(env
, spi
);
4853 if (!env
->allow_ptr_leaks
&& *stype
!= STACK_MISC
&& *stype
!= STACK_ZERO
) {
4854 /* Reject the write if range we may write to has not
4855 * been initialized beforehand. If we didn't reject
4856 * here, the ptr status would be erased below (even
4857 * though not all slots are actually overwritten),
4858 * possibly opening the door to leaks.
4860 * We do however catch STACK_INVALID case below, and
4861 * only allow reading possibly uninitialized memory
4862 * later for CAP_PERFMON, as the write may not happen to
4865 verbose(env
, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
4870 /* Erase all spilled pointers. */
4871 state
->stack
[spi
].spilled_ptr
.type
= NOT_INIT
;
4873 /* Update the slot type. */
4874 new_type
= STACK_MISC
;
4875 if (writing_zero
&& *stype
== STACK_ZERO
) {
4876 new_type
= STACK_ZERO
;
4879 /* If the slot is STACK_INVALID, we check whether it's OK to
4880 * pretend that it will be initialized by this write. The slot
4881 * might not actually be written to, and so if we mark it as
4882 * initialized future reads might leak uninitialized memory.
4883 * For privileged programs, we will accept such reads to slots
4884 * that may or may not be written because, if we're reject
4885 * them, the error would be too confusing.
4887 if (*stype
== STACK_INVALID
&& !env
->allow_uninit_stack
) {
4888 verbose(env
, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
4895 /* backtracking doesn't work for STACK_ZERO yet. */
4896 err
= mark_chain_precision(env
, value_regno
);
4903 /* When register 'dst_regno' is assigned some values from stack[min_off,
4904 * max_off), we set the register's type according to the types of the
4905 * respective stack slots. If all the stack values are known to be zeros, then
4906 * so is the destination reg. Otherwise, the register is considered to be
4907 * SCALAR. This function does not deal with register filling; the caller must
4908 * ensure that all spilled registers in the stack range have been marked as
4911 static void mark_reg_stack_read(struct bpf_verifier_env
*env
,
4912 /* func where src register points to */
4913 struct bpf_func_state
*ptr_state
,
4914 int min_off
, int max_off
, int dst_regno
)
4916 struct bpf_verifier_state
*vstate
= env
->cur_state
;
4917 struct bpf_func_state
*state
= vstate
->frame
[vstate
->curframe
];
4922 for (i
= min_off
; i
< max_off
; i
++) {
4924 spi
= slot
/ BPF_REG_SIZE
;
4925 mark_stack_slot_scratched(env
, spi
);
4926 stype
= ptr_state
->stack
[spi
].slot_type
;
4927 if (stype
[slot
% BPF_REG_SIZE
] != STACK_ZERO
)
4931 if (zeros
== max_off
- min_off
) {
4932 /* any access_size read into register is zero extended,
4933 * so the whole register == const_zero
4935 __mark_reg_const_zero(&state
->regs
[dst_regno
]);
4936 /* backtracking doesn't support STACK_ZERO yet,
4937 * so mark it precise here, so that later
4938 * backtracking can stop here.
4939 * Backtracking may not need this if this register
4940 * doesn't participate in pointer adjustment.
4941 * Forward propagation of precise flag is not
4942 * necessary either. This mark is only to stop
4943 * backtracking. Any register that contributed
4944 * to const 0 was marked precise before spill.
4946 state
->regs
[dst_regno
].precise
= true;
4948 /* have read misc data from the stack */
4949 mark_reg_unknown(env
, state
->regs
, dst_regno
);
4951 state
->regs
[dst_regno
].live
|= REG_LIVE_WRITTEN
;
4954 /* Read the stack at 'off' and put the results into the register indicated by
4955 * 'dst_regno'. It handles reg filling if the addressed stack slot is a
4958 * 'dst_regno' can be -1, meaning that the read value is not going to a
4961 * The access is assumed to be within the current stack bounds.
4963 static int check_stack_read_fixed_off(struct bpf_verifier_env
*env
,
4964 /* func where src register points to */
4965 struct bpf_func_state
*reg_state
,
4966 int off
, int size
, int dst_regno
)
4968 struct bpf_verifier_state
*vstate
= env
->cur_state
;
4969 struct bpf_func_state
*state
= vstate
->frame
[vstate
->curframe
];
4970 int i
, slot
= -off
- 1, spi
= slot
/ BPF_REG_SIZE
;
4971 struct bpf_reg_state
*reg
;
4974 stype
= reg_state
->stack
[spi
].slot_type
;
4975 reg
= ®_state
->stack
[spi
].spilled_ptr
;
4977 mark_stack_slot_scratched(env
, spi
);
4979 if (is_spilled_reg(®_state
->stack
[spi
])) {
4982 for (i
= BPF_REG_SIZE
- 1; i
> 0 && stype
[i
- 1] == STACK_SPILL
; i
--)
4985 if (size
!= BPF_REG_SIZE
|| spill_size
!= BPF_REG_SIZE
) {
4986 if (reg
->type
!= SCALAR_VALUE
) {
4987 verbose_linfo(env
, env
->insn_idx
, "; ");
4988 verbose(env
, "invalid size of register fill\n");
4992 mark_reg_read(env
, reg
, reg
->parent
, REG_LIVE_READ64
);
4996 if (!(off
% BPF_REG_SIZE
) && size
== spill_size
) {
4997 /* The earlier check_reg_arg() has decided the
4998 * subreg_def for this insn. Save it first.
5000 s32 subreg_def
= state
->regs
[dst_regno
].subreg_def
;
5002 copy_register_state(&state
->regs
[dst_regno
], reg
);
5003 state
->regs
[dst_regno
].subreg_def
= subreg_def
;
5005 for (i
= 0; i
< size
; i
++) {
5006 type
= stype
[(slot
- i
) % BPF_REG_SIZE
];
5007 if (type
== STACK_SPILL
)
5009 if (type
== STACK_MISC
)
5011 if (type
== STACK_INVALID
&& env
->allow_uninit_stack
)
5013 verbose(env
, "invalid read from stack off %d+%d size %d\n",
5017 mark_reg_unknown(env
, state
->regs
, dst_regno
);
5019 state
->regs
[dst_regno
].live
|= REG_LIVE_WRITTEN
;
5023 if (dst_regno
>= 0) {
5024 /* restore register state from stack */
5025 copy_register_state(&state
->regs
[dst_regno
], reg
);
5026 /* mark reg as written since spilled pointer state likely
5027 * has its liveness marks cleared by is_state_visited()
5028 * which resets stack/reg liveness for state transitions
5030 state
->regs
[dst_regno
].live
|= REG_LIVE_WRITTEN
;
5031 } else if (__is_pointer_value(env
->allow_ptr_leaks
, reg
)) {
5032 /* If dst_regno==-1, the caller is asking us whether
5033 * it is acceptable to use this value as a SCALAR_VALUE
5035 * We must not allow unprivileged callers to do that
5036 * with spilled pointers.
5038 verbose(env
, "leaking pointer from stack off %d\n",
5042 mark_reg_read(env
, reg
, reg
->parent
, REG_LIVE_READ64
);
5044 for (i
= 0; i
< size
; i
++) {
5045 type
= stype
[(slot
- i
) % BPF_REG_SIZE
];
5046 if (type
== STACK_MISC
)
5048 if (type
== STACK_ZERO
)
5050 if (type
== STACK_INVALID
&& env
->allow_uninit_stack
)
5052 verbose(env
, "invalid read from stack off %d+%d size %d\n",
5056 mark_reg_read(env
, reg
, reg
->parent
, REG_LIVE_READ64
);
5058 mark_reg_stack_read(env
, reg_state
, off
, off
+ size
, dst_regno
);
5063 enum bpf_access_src
{
5064 ACCESS_DIRECT
= 1, /* the access is performed by an instruction */
5065 ACCESS_HELPER
= 2, /* the access is performed by a helper */
5068 static int check_stack_range_initialized(struct bpf_verifier_env
*env
,
5069 int regno
, int off
, int access_size
,
5070 bool zero_size_allowed
,
5071 enum bpf_access_src type
,
5072 struct bpf_call_arg_meta
*meta
);
5074 static struct bpf_reg_state
*reg_state(struct bpf_verifier_env
*env
, int regno
)
5076 return cur_regs(env
) + regno
;
5079 /* Read the stack at 'ptr_regno + off' and put the result into the register
5081 * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
5082 * but not its variable offset.
5083 * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
5085 * As opposed to check_stack_read_fixed_off, this function doesn't deal with
5086 * filling registers (i.e. reads of spilled register cannot be detected when
5087 * the offset is not fixed). We conservatively mark 'dst_regno' as containing
5088 * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
5089 * offset; for a fixed offset check_stack_read_fixed_off should be used
5092 static int check_stack_read_var_off(struct bpf_verifier_env
*env
,
5093 int ptr_regno
, int off
, int size
, int dst_regno
)
5095 /* The state of the source register. */
5096 struct bpf_reg_state
*reg
= reg_state(env
, ptr_regno
);
5097 struct bpf_func_state
*ptr_state
= func(env
, reg
);
5099 int min_off
, max_off
;
5101 /* Note that we pass a NULL meta, so raw access will not be permitted.
5103 err
= check_stack_range_initialized(env
, ptr_regno
, off
, size
,
5104 false, ACCESS_DIRECT
, NULL
);
5108 min_off
= reg
->smin_value
+ off
;
5109 max_off
= reg
->smax_value
+ off
;
5110 mark_reg_stack_read(env
, ptr_state
, min_off
, max_off
+ size
, dst_regno
);
5114 /* check_stack_read dispatches to check_stack_read_fixed_off or
5115 * check_stack_read_var_off.
5117 * The caller must ensure that the offset falls within the allocated stack
5120 * 'dst_regno' is a register which will receive the value from the stack. It
5121 * can be -1, meaning that the read value is not going to a register.
5123 static int check_stack_read(struct bpf_verifier_env
*env
,
5124 int ptr_regno
, int off
, int size
,
5127 struct bpf_reg_state
*reg
= reg_state(env
, ptr_regno
);
5128 struct bpf_func_state
*state
= func(env
, reg
);
5130 /* Some accesses are only permitted with a static offset. */
5131 bool var_off
= !tnum_is_const(reg
->var_off
);
5133 /* The offset is required to be static when reads don't go to a
5134 * register, in order to not leak pointers (see
5135 * check_stack_read_fixed_off).
5137 if (dst_regno
< 0 && var_off
) {
5140 tnum_strn(tn_buf
, sizeof(tn_buf
), reg
->var_off
);
5141 verbose(env
, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
5145 /* Variable offset is prohibited for unprivileged mode for simplicity
5146 * since it requires corresponding support in Spectre masking for stack
5147 * ALU. See also retrieve_ptr_limit(). The check in
5148 * check_stack_access_for_ptr_arithmetic() called by
5149 * adjust_ptr_min_max_vals() prevents users from creating stack pointers
5150 * with variable offsets, therefore no check is required here. Further,
5151 * just checking it here would be insufficient as speculative stack
5152 * writes could still lead to unsafe speculative behaviour.
5155 off
+= reg
->var_off
.value
;
5156 err
= check_stack_read_fixed_off(env
, state
, off
, size
,
5159 /* Variable offset stack reads need more conservative handling
5160 * than fixed offset ones. Note that dst_regno >= 0 on this
5163 err
= check_stack_read_var_off(env
, ptr_regno
, off
, size
,
5170 /* check_stack_write dispatches to check_stack_write_fixed_off or
5171 * check_stack_write_var_off.
5173 * 'ptr_regno' is the register used as a pointer into the stack.
5174 * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
5175 * 'value_regno' is the register whose value we're writing to the stack. It can
5176 * be -1, meaning that we're not writing from a register.
5178 * The caller must ensure that the offset falls within the maximum stack size.
5180 static int check_stack_write(struct bpf_verifier_env
*env
,
5181 int ptr_regno
, int off
, int size
,
5182 int value_regno
, int insn_idx
)
5184 struct bpf_reg_state
*reg
= reg_state(env
, ptr_regno
);
5185 struct bpf_func_state
*state
= func(env
, reg
);
5188 if (tnum_is_const(reg
->var_off
)) {
5189 off
+= reg
->var_off
.value
;
5190 err
= check_stack_write_fixed_off(env
, state
, off
, size
,
5191 value_regno
, insn_idx
);
5193 /* Variable offset stack reads need more conservative handling
5194 * than fixed offset ones.
5196 err
= check_stack_write_var_off(env
, state
,
5197 ptr_regno
, off
, size
,
5198 value_regno
, insn_idx
);
5203 static int check_map_access_type(struct bpf_verifier_env
*env
, u32 regno
,
5204 int off
, int size
, enum bpf_access_type type
)
5206 struct bpf_reg_state
*regs
= cur_regs(env
);
5207 struct bpf_map
*map
= regs
[regno
].map_ptr
;
5208 u32 cap
= bpf_map_flags_to_cap(map
);
5210 if (type
== BPF_WRITE
&& !(cap
& BPF_MAP_CAN_WRITE
)) {
5211 verbose(env
, "write into map forbidden, value_size=%d off=%d size=%d\n",
5212 map
->value_size
, off
, size
);
5216 if (type
== BPF_READ
&& !(cap
& BPF_MAP_CAN_READ
)) {
5217 verbose(env
, "read from map forbidden, value_size=%d off=%d size=%d\n",
5218 map
->value_size
, off
, size
);
5225 /* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
5226 static int __check_mem_access(struct bpf_verifier_env
*env
, int regno
,
5227 int off
, int size
, u32 mem_size
,
5228 bool zero_size_allowed
)
5230 bool size_ok
= size
> 0 || (size
== 0 && zero_size_allowed
);
5231 struct bpf_reg_state
*reg
;
5233 if (off
>= 0 && size_ok
&& (u64
)off
+ size
<= mem_size
)
5236 reg
= &cur_regs(env
)[regno
];
5237 switch (reg
->type
) {
5238 case PTR_TO_MAP_KEY
:
5239 verbose(env
, "invalid access to map key, key_size=%d off=%d size=%d\n",
5240 mem_size
, off
, size
);
5242 case PTR_TO_MAP_VALUE
:
5243 verbose(env
, "invalid access to map value, value_size=%d off=%d size=%d\n",
5244 mem_size
, off
, size
);
5247 case PTR_TO_PACKET_META
:
5248 case PTR_TO_PACKET_END
:
5249 verbose(env
, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
5250 off
, size
, regno
, reg
->id
, off
, mem_size
);
5254 verbose(env
, "invalid access to memory, mem_size=%u off=%d size=%d\n",
5255 mem_size
, off
, size
);
5261 /* check read/write into a memory region with possible variable offset */
5262 static int check_mem_region_access(struct bpf_verifier_env
*env
, u32 regno
,
5263 int off
, int size
, u32 mem_size
,
5264 bool zero_size_allowed
)
5266 struct bpf_verifier_state
*vstate
= env
->cur_state
;
5267 struct bpf_func_state
*state
= vstate
->frame
[vstate
->curframe
];
5268 struct bpf_reg_state
*reg
= &state
->regs
[regno
];
5271 /* We may have adjusted the register pointing to memory region, so we
5272 * need to try adding each of min_value and max_value to off
5273 * to make sure our theoretical access will be safe.
5275 * The minimum value is only important with signed
5276 * comparisons where we can't assume the floor of a
5277 * value is 0. If we are using signed variables for our
5278 * index'es we need to make sure that whatever we use
5279 * will have a set floor within our range.
5281 if (reg
->smin_value
< 0 &&
5282 (reg
->smin_value
== S64_MIN
||
5283 (off
+ reg
->smin_value
!= (s64
)(s32
)(off
+ reg
->smin_value
)) ||
5284 reg
->smin_value
+ off
< 0)) {
5285 verbose(env
, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
5289 err
= __check_mem_access(env
, regno
, reg
->smin_value
+ off
, size
,
5290 mem_size
, zero_size_allowed
);
5292 verbose(env
, "R%d min value is outside of the allowed memory range\n",
5297 /* If we haven't set a max value then we need to bail since we can't be
5298 * sure we won't do bad things.
5299 * If reg->umax_value + off could overflow, treat that as unbounded too.
5301 if (reg
->umax_value
>= BPF_MAX_VAR_OFF
) {
5302 verbose(env
, "R%d unbounded memory access, make sure to bounds check any such access\n",
5306 err
= __check_mem_access(env
, regno
, reg
->umax_value
+ off
, size
,
5307 mem_size
, zero_size_allowed
);
5309 verbose(env
, "R%d max value is outside of the allowed memory range\n",
5317 static int __check_ptr_off_reg(struct bpf_verifier_env
*env
,
5318 const struct bpf_reg_state
*reg
, int regno
,
5321 /* Access to this pointer-typed register or passing it to a helper
5322 * is only allowed in its original, unmodified form.
5326 verbose(env
, "negative offset %s ptr R%d off=%d disallowed\n",
5327 reg_type_str(env
, reg
->type
), regno
, reg
->off
);
5331 if (!fixed_off_ok
&& reg
->off
) {
5332 verbose(env
, "dereference of modified %s ptr R%d off=%d disallowed\n",
5333 reg_type_str(env
, reg
->type
), regno
, reg
->off
);
5337 if (!tnum_is_const(reg
->var_off
) || reg
->var_off
.value
) {
5340 tnum_strn(tn_buf
, sizeof(tn_buf
), reg
->var_off
);
5341 verbose(env
, "variable %s access var_off=%s disallowed\n",
5342 reg_type_str(env
, reg
->type
), tn_buf
);
5349 int check_ptr_off_reg(struct bpf_verifier_env
*env
,
5350 const struct bpf_reg_state
*reg
, int regno
)
5352 return __check_ptr_off_reg(env
, reg
, regno
, false);
5355 static int map_kptr_match_type(struct bpf_verifier_env
*env
,
5356 struct btf_field
*kptr_field
,
5357 struct bpf_reg_state
*reg
, u32 regno
)
5359 const char *targ_name
= btf_type_name(kptr_field
->kptr
.btf
, kptr_field
->kptr
.btf_id
);
5361 const char *reg_name
= "";
5363 if (btf_is_kernel(reg
->btf
)) {
5364 perm_flags
= PTR_MAYBE_NULL
| PTR_TRUSTED
| MEM_RCU
;
5366 /* Only unreferenced case accepts untrusted pointers */
5367 if (kptr_field
->type
== BPF_KPTR_UNREF
)
5368 perm_flags
|= PTR_UNTRUSTED
;
5370 perm_flags
= PTR_MAYBE_NULL
| MEM_ALLOC
;
5371 if (kptr_field
->type
== BPF_KPTR_PERCPU
)
5372 perm_flags
|= MEM_PERCPU
;
5375 if (base_type(reg
->type
) != PTR_TO_BTF_ID
|| (type_flag(reg
->type
) & ~perm_flags
))
5378 /* We need to verify reg->type and reg->btf, before accessing reg->btf */
5379 reg_name
= btf_type_name(reg
->btf
, reg
->btf_id
);
5381 /* For ref_ptr case, release function check should ensure we get one
5382 * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the
5383 * normal store of unreferenced kptr, we must ensure var_off is zero.
5384 * Since ref_ptr cannot be accessed directly by BPF insns, checks for
5385 * reg->off and reg->ref_obj_id are not needed here.
5387 if (__check_ptr_off_reg(env
, reg
, regno
, true))
5390 /* A full type match is needed, as BTF can be vmlinux, module or prog BTF, and
5391 * we also need to take into account the reg->off.
5393 * We want to support cases like:
5401 * v = func(); // PTR_TO_BTF_ID
5402 * val->foo = v; // reg->off is zero, btf and btf_id match type
5403 * val->bar = &v->br; // reg->off is still zero, but we need to retry with
5404 * // first member type of struct after comparison fails
5405 * val->baz = &v->bz; // reg->off is non-zero, so struct needs to be walked
5408 * In the kptr_ref case, check_func_arg_reg_off already ensures reg->off
5409 * is zero. We must also ensure that btf_struct_ids_match does not walk
5410 * the struct to match type against first member of struct, i.e. reject
5411 * second case from above. Hence, when type is BPF_KPTR_REF, we set
5412 * strict mode to true for type match.
5414 if (!btf_struct_ids_match(&env
->log
, reg
->btf
, reg
->btf_id
, reg
->off
,
5415 kptr_field
->kptr
.btf
, kptr_field
->kptr
.btf_id
,
5416 kptr_field
->type
!= BPF_KPTR_UNREF
))
5420 verbose(env
, "invalid kptr access, R%d type=%s%s ", regno
,
5421 reg_type_str(env
, reg
->type
), reg_name
);
5422 verbose(env
, "expected=%s%s", reg_type_str(env
, PTR_TO_BTF_ID
), targ_name
);
5423 if (kptr_field
->type
== BPF_KPTR_UNREF
)
5424 verbose(env
, " or %s%s\n", reg_type_str(env
, PTR_TO_BTF_ID
| PTR_UNTRUSTED
),
5431 /* The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock()
5432 * can dereference RCU protected pointers and result is PTR_TRUSTED.
5434 static bool in_rcu_cs(struct bpf_verifier_env
*env
)
5436 return env
->cur_state
->active_rcu_lock
||
5437 env
->cur_state
->active_lock
.ptr
||
5438 !env
->prog
->aux
->sleepable
;
5441 /* Once GCC supports btf_type_tag the following mechanism will be replaced with tag check */
5442 BTF_SET_START(rcu_protected_types
)
5443 BTF_ID(struct, prog_test_ref_kfunc
)
5444 #ifdef CONFIG_CGROUPS
5445 BTF_ID(struct, cgroup
)
5447 BTF_ID(struct, bpf_cpumask
)
5448 BTF_ID(struct, task_struct
)
5449 BTF_SET_END(rcu_protected_types
)
5451 static bool rcu_protected_object(const struct btf
*btf
, u32 btf_id
)
5453 if (!btf_is_kernel(btf
))
5455 return btf_id_set_contains(&rcu_protected_types
, btf_id
);
5458 static bool rcu_safe_kptr(const struct btf_field
*field
)
5460 const struct btf_field_kptr
*kptr
= &field
->kptr
;
5462 return field
->type
== BPF_KPTR_PERCPU
||
5463 (field
->type
== BPF_KPTR_REF
&& rcu_protected_object(kptr
->btf
, kptr
->btf_id
));
5466 static u32
btf_ld_kptr_type(struct bpf_verifier_env
*env
, struct btf_field
*kptr_field
)
5468 if (rcu_safe_kptr(kptr_field
) && in_rcu_cs(env
)) {
5469 if (kptr_field
->type
!= BPF_KPTR_PERCPU
)
5470 return PTR_MAYBE_NULL
| MEM_RCU
;
5471 return PTR_MAYBE_NULL
| MEM_RCU
| MEM_PERCPU
;
5473 return PTR_MAYBE_NULL
| PTR_UNTRUSTED
;
5476 static int check_map_kptr_access(struct bpf_verifier_env
*env
, u32 regno
,
5477 int value_regno
, int insn_idx
,
5478 struct btf_field
*kptr_field
)
5480 struct bpf_insn
*insn
= &env
->prog
->insnsi
[insn_idx
];
5481 int class = BPF_CLASS(insn
->code
);
5482 struct bpf_reg_state
*val_reg
;
5484 /* Things we already checked for in check_map_access and caller:
5485 * - Reject cases where variable offset may touch kptr
5486 * - size of access (must be BPF_DW)
5487 * - tnum_is_const(reg->var_off)
5488 * - kptr_field->offset == off + reg->var_off.value
5490 /* Only BPF_[LDX,STX,ST] | BPF_MEM | BPF_DW is supported */
5491 if (BPF_MODE(insn
->code
) != BPF_MEM
) {
5492 verbose(env
, "kptr in map can only be accessed using BPF_MEM instruction mode\n");
5496 /* We only allow loading referenced kptr, since it will be marked as
5497 * untrusted, similar to unreferenced kptr.
5499 if (class != BPF_LDX
&&
5500 (kptr_field
->type
== BPF_KPTR_REF
|| kptr_field
->type
== BPF_KPTR_PERCPU
)) {
5501 verbose(env
, "store to referenced kptr disallowed\n");
5505 if (class == BPF_LDX
) {
5506 val_reg
= reg_state(env
, value_regno
);
5507 /* We can simply mark the value_regno receiving the pointer
5508 * value from map as PTR_TO_BTF_ID, with the correct type.
5510 mark_btf_ld_reg(env
, cur_regs(env
), value_regno
, PTR_TO_BTF_ID
, kptr_field
->kptr
.btf
,
5511 kptr_field
->kptr
.btf_id
, btf_ld_kptr_type(env
, kptr_field
));
5512 /* For mark_ptr_or_null_reg */
5513 val_reg
->id
= ++env
->id_gen
;
5514 } else if (class == BPF_STX
) {
5515 val_reg
= reg_state(env
, value_regno
);
5516 if (!register_is_null(val_reg
) &&
5517 map_kptr_match_type(env
, kptr_field
, val_reg
, value_regno
))
5519 } else if (class == BPF_ST
) {
5521 verbose(env
, "BPF_ST imm must be 0 when storing to kptr at off=%u\n",
5522 kptr_field
->offset
);
5526 verbose(env
, "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n");
5532 /* check read/write into a map element with possible variable offset */
5533 static int check_map_access(struct bpf_verifier_env
*env
, u32 regno
,
5534 int off
, int size
, bool zero_size_allowed
,
5535 enum bpf_access_src src
)
5537 struct bpf_verifier_state
*vstate
= env
->cur_state
;
5538 struct bpf_func_state
*state
= vstate
->frame
[vstate
->curframe
];
5539 struct bpf_reg_state
*reg
= &state
->regs
[regno
];
5540 struct bpf_map
*map
= reg
->map_ptr
;
5541 struct btf_record
*rec
;
5544 err
= check_mem_region_access(env
, regno
, off
, size
, map
->value_size
,
5549 if (IS_ERR_OR_NULL(map
->record
))
5552 for (i
= 0; i
< rec
->cnt
; i
++) {
5553 struct btf_field
*field
= &rec
->fields
[i
];
5554 u32 p
= field
->offset
;
5556 /* If any part of a field can be touched by load/store, reject
5557 * this program. To check that [x1, x2) overlaps with [y1, y2),
5558 * it is sufficient to check x1 < y2 && y1 < x2.
5560 if (reg
->smin_value
+ off
< p
+ btf_field_type_size(field
->type
) &&
5561 p
< reg
->umax_value
+ off
+ size
) {
5562 switch (field
->type
) {
5563 case BPF_KPTR_UNREF
:
5565 case BPF_KPTR_PERCPU
:
5566 if (src
!= ACCESS_DIRECT
) {
5567 verbose(env
, "kptr cannot be accessed indirectly by helper\n");
5570 if (!tnum_is_const(reg
->var_off
)) {
5571 verbose(env
, "kptr access cannot have variable offset\n");
5574 if (p
!= off
+ reg
->var_off
.value
) {
5575 verbose(env
, "kptr access misaligned expected=%u off=%llu\n",
5576 p
, off
+ reg
->var_off
.value
);
5579 if (size
!= bpf_size_to_bytes(BPF_DW
)) {
5580 verbose(env
, "kptr access size must be BPF_DW\n");
5585 verbose(env
, "%s cannot be accessed directly by load/store\n",
5586 btf_field_type_name(field
->type
));
5594 #define MAX_PACKET_OFF 0xffff
5596 static bool may_access_direct_pkt_data(struct bpf_verifier_env
*env
,
5597 const struct bpf_call_arg_meta
*meta
,
5598 enum bpf_access_type t
)
5600 enum bpf_prog_type prog_type
= resolve_prog_type(env
->prog
);
5602 switch (prog_type
) {
5603 /* Program types only with direct read access go here! */
5604 case BPF_PROG_TYPE_LWT_IN
:
5605 case BPF_PROG_TYPE_LWT_OUT
:
5606 case BPF_PROG_TYPE_LWT_SEG6LOCAL
:
5607 case BPF_PROG_TYPE_SK_REUSEPORT
:
5608 case BPF_PROG_TYPE_FLOW_DISSECTOR
:
5609 case BPF_PROG_TYPE_CGROUP_SKB
:
5614 /* Program types with direct read + write access go here! */
5615 case BPF_PROG_TYPE_SCHED_CLS
:
5616 case BPF_PROG_TYPE_SCHED_ACT
:
5617 case BPF_PROG_TYPE_XDP
:
5618 case BPF_PROG_TYPE_LWT_XMIT
:
5619 case BPF_PROG_TYPE_SK_SKB
:
5620 case BPF_PROG_TYPE_SK_MSG
:
5622 return meta
->pkt_access
;
5624 env
->seen_direct_write
= true;
5627 case BPF_PROG_TYPE_CGROUP_SOCKOPT
:
5629 env
->seen_direct_write
= true;
5638 static int check_packet_access(struct bpf_verifier_env
*env
, u32 regno
, int off
,
5639 int size
, bool zero_size_allowed
)
5641 struct bpf_reg_state
*regs
= cur_regs(env
);
5642 struct bpf_reg_state
*reg
= ®s
[regno
];
5645 /* We may have added a variable offset to the packet pointer; but any
5646 * reg->range we have comes after that. We are only checking the fixed
5650 /* We don't allow negative numbers, because we aren't tracking enough
5651 * detail to prove they're safe.
5653 if (reg
->smin_value
< 0) {
5654 verbose(env
, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
5659 err
= reg
->range
< 0 ? -EINVAL
:
5660 __check_mem_access(env
, regno
, off
, size
, reg
->range
,
5663 verbose(env
, "R%d offset is outside of the packet\n", regno
);
5667 /* __check_mem_access has made sure "off + size - 1" is within u16.
5668 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
5669 * otherwise find_good_pkt_pointers would have refused to set range info
5670 * that __check_mem_access would have rejected this pkt access.
5671 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
5673 env
->prog
->aux
->max_pkt_offset
=
5674 max_t(u32
, env
->prog
->aux
->max_pkt_offset
,
5675 off
+ reg
->umax_value
+ size
- 1);
5680 /* check access to 'struct bpf_context' fields. Supports fixed offsets only */
5681 static int check_ctx_access(struct bpf_verifier_env
*env
, int insn_idx
, int off
, int size
,
5682 enum bpf_access_type t
, enum bpf_reg_type
*reg_type
,
5683 struct btf
**btf
, u32
*btf_id
)
5685 struct bpf_insn_access_aux info
= {
5686 .reg_type
= *reg_type
,
5690 if (env
->ops
->is_valid_access
&&
5691 env
->ops
->is_valid_access(off
, size
, t
, env
->prog
, &info
)) {
5692 /* A non zero info.ctx_field_size indicates that this field is a
5693 * candidate for later verifier transformation to load the whole
5694 * field and then apply a mask when accessed with a narrower
5695 * access than actual ctx access size. A zero info.ctx_field_size
5696 * will only allow for whole field access and rejects any other
5697 * type of narrower access.
5699 *reg_type
= info
.reg_type
;
5701 if (base_type(*reg_type
) == PTR_TO_BTF_ID
) {
5703 *btf_id
= info
.btf_id
;
5705 env
->insn_aux_data
[insn_idx
].ctx_field_size
= info
.ctx_field_size
;
5707 /* remember the offset of last byte accessed in ctx */
5708 if (env
->prog
->aux
->max_ctx_offset
< off
+ size
)
5709 env
->prog
->aux
->max_ctx_offset
= off
+ size
;
5713 verbose(env
, "invalid bpf_context access off=%d size=%d\n", off
, size
);
5717 static int check_flow_keys_access(struct bpf_verifier_env
*env
, int off
,
5720 if (size
< 0 || off
< 0 ||
5721 (u64
)off
+ size
> sizeof(struct bpf_flow_keys
)) {
5722 verbose(env
, "invalid access to flow keys off=%d size=%d\n",
5729 static int check_sock_access(struct bpf_verifier_env
*env
, int insn_idx
,
5730 u32 regno
, int off
, int size
,
5731 enum bpf_access_type t
)
5733 struct bpf_reg_state
*regs
= cur_regs(env
);
5734 struct bpf_reg_state
*reg
= ®s
[regno
];
5735 struct bpf_insn_access_aux info
= {};
5738 if (reg
->smin_value
< 0) {
5739 verbose(env
, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
5744 switch (reg
->type
) {
5745 case PTR_TO_SOCK_COMMON
:
5746 valid
= bpf_sock_common_is_valid_access(off
, size
, t
, &info
);
5749 valid
= bpf_sock_is_valid_access(off
, size
, t
, &info
);
5751 case PTR_TO_TCP_SOCK
:
5752 valid
= bpf_tcp_sock_is_valid_access(off
, size
, t
, &info
);
5754 case PTR_TO_XDP_SOCK
:
5755 valid
= bpf_xdp_sock_is_valid_access(off
, size
, t
, &info
);
5763 env
->insn_aux_data
[insn_idx
].ctx_field_size
=
5764 info
.ctx_field_size
;
5768 verbose(env
, "R%d invalid %s access off=%d size=%d\n",
5769 regno
, reg_type_str(env
, reg
->type
), off
, size
);
5774 static bool is_pointer_value(struct bpf_verifier_env
*env
, int regno
)
5776 return __is_pointer_value(env
->allow_ptr_leaks
, reg_state(env
, regno
));
5779 static bool is_ctx_reg(struct bpf_verifier_env
*env
, int regno
)
5781 const struct bpf_reg_state
*reg
= reg_state(env
, regno
);
5783 return reg
->type
== PTR_TO_CTX
;
5786 static bool is_sk_reg(struct bpf_verifier_env
*env
, int regno
)
5788 const struct bpf_reg_state
*reg
= reg_state(env
, regno
);
5790 return type_is_sk_pointer(reg
->type
);
5793 static bool is_pkt_reg(struct bpf_verifier_env
*env
, int regno
)
5795 const struct bpf_reg_state
*reg
= reg_state(env
, regno
);
5797 return type_is_pkt_pointer(reg
->type
);
5800 static bool is_flow_key_reg(struct bpf_verifier_env
*env
, int regno
)
5802 const struct bpf_reg_state
*reg
= reg_state(env
, regno
);
5804 /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
5805 return reg
->type
== PTR_TO_FLOW_KEYS
;
5808 static u32
*reg2btf_ids
[__BPF_REG_TYPE_MAX
] = {
5810 [PTR_TO_SOCKET
] = &btf_sock_ids
[BTF_SOCK_TYPE_SOCK
],
5811 [PTR_TO_SOCK_COMMON
] = &btf_sock_ids
[BTF_SOCK_TYPE_SOCK_COMMON
],
5812 [PTR_TO_TCP_SOCK
] = &btf_sock_ids
[BTF_SOCK_TYPE_TCP
],
5814 [CONST_PTR_TO_MAP
] = btf_bpf_map_id
,
5817 static bool is_trusted_reg(const struct bpf_reg_state
*reg
)
5819 /* A referenced register is always trusted. */
5820 if (reg
->ref_obj_id
)
5823 /* Types listed in the reg2btf_ids are always trusted */
5824 if (reg2btf_ids
[base_type(reg
->type
)])
5827 /* If a register is not referenced, it is trusted if it has the
5828 * MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
5829 * other type modifiers may be safe, but we elect to take an opt-in
5830 * approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
5833 * Eventually, we should make PTR_TRUSTED the single source of truth
5834 * for whether a register is trusted.
5836 return type_flag(reg
->type
) & BPF_REG_TRUSTED_MODIFIERS
&&
5837 !bpf_type_has_unsafe_modifiers(reg
->type
);
5840 static bool is_rcu_reg(const struct bpf_reg_state
*reg
)
5842 return reg
->type
& MEM_RCU
;
5845 static void clear_trusted_flags(enum bpf_type_flag
*flag
)
5847 *flag
&= ~(BPF_REG_TRUSTED_MODIFIERS
| MEM_RCU
);
5850 static int check_pkt_ptr_alignment(struct bpf_verifier_env
*env
,
5851 const struct bpf_reg_state
*reg
,
5852 int off
, int size
, bool strict
)
5854 struct tnum reg_off
;
5857 /* Byte size accesses are always allowed. */
5858 if (!strict
|| size
== 1)
5861 /* For platforms that do not have a Kconfig enabling
5862 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
5863 * NET_IP_ALIGN is universally set to '2'. And on platforms
5864 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
5865 * to this code only in strict mode where we want to emulate
5866 * the NET_IP_ALIGN==2 checking. Therefore use an
5867 * unconditional IP align value of '2'.
5871 reg_off
= tnum_add(reg
->var_off
, tnum_const(ip_align
+ reg
->off
+ off
));
5872 if (!tnum_is_aligned(reg_off
, size
)) {
5875 tnum_strn(tn_buf
, sizeof(tn_buf
), reg
->var_off
);
5877 "misaligned packet access off %d+%s+%d+%d size %d\n",
5878 ip_align
, tn_buf
, reg
->off
, off
, size
);
5885 static int check_generic_ptr_alignment(struct bpf_verifier_env
*env
,
5886 const struct bpf_reg_state
*reg
,
5887 const char *pointer_desc
,
5888 int off
, int size
, bool strict
)
5890 struct tnum reg_off
;
5892 /* Byte size accesses are always allowed. */
5893 if (!strict
|| size
== 1)
5896 reg_off
= tnum_add(reg
->var_off
, tnum_const(reg
->off
+ off
));
5897 if (!tnum_is_aligned(reg_off
, size
)) {
5900 tnum_strn(tn_buf
, sizeof(tn_buf
), reg
->var_off
);
5901 verbose(env
, "misaligned %saccess off %s+%d+%d size %d\n",
5902 pointer_desc
, tn_buf
, reg
->off
, off
, size
);
5909 static int check_ptr_alignment(struct bpf_verifier_env
*env
,
5910 const struct bpf_reg_state
*reg
, int off
,
5911 int size
, bool strict_alignment_once
)
5913 bool strict
= env
->strict_alignment
|| strict_alignment_once
;
5914 const char *pointer_desc
= "";
5916 switch (reg
->type
) {
5918 case PTR_TO_PACKET_META
:
5919 /* Special case, because of NET_IP_ALIGN. Given metadata sits
5920 * right in front, treat it the very same way.
5922 return check_pkt_ptr_alignment(env
, reg
, off
, size
, strict
);
5923 case PTR_TO_FLOW_KEYS
:
5924 pointer_desc
= "flow keys ";
5926 case PTR_TO_MAP_KEY
:
5927 pointer_desc
= "key ";
5929 case PTR_TO_MAP_VALUE
:
5930 pointer_desc
= "value ";
5933 pointer_desc
= "context ";
5936 pointer_desc
= "stack ";
5937 /* The stack spill tracking logic in check_stack_write_fixed_off()
5938 * and check_stack_read_fixed_off() relies on stack accesses being
5944 pointer_desc
= "sock ";
5946 case PTR_TO_SOCK_COMMON
:
5947 pointer_desc
= "sock_common ";
5949 case PTR_TO_TCP_SOCK
:
5950 pointer_desc
= "tcp_sock ";
5952 case PTR_TO_XDP_SOCK
:
5953 pointer_desc
= "xdp_sock ";
5958 return check_generic_ptr_alignment(env
, reg
, pointer_desc
, off
, size
,
5962 static int update_stack_depth(struct bpf_verifier_env
*env
,
5963 const struct bpf_func_state
*func
,
5966 u16 stack
= env
->subprog_info
[func
->subprogno
].stack_depth
;
5971 /* update known max for given subprogram */
5972 env
->subprog_info
[func
->subprogno
].stack_depth
= -off
;
5976 /* starting from main bpf function walk all instructions of the function
5977 * and recursively walk all callees that given function can call.
5978 * Ignore jump and exit insns.
5979 * Since recursion is prevented by check_cfg() this algorithm
5980 * only needs a local stack of MAX_CALL_FRAMES to remember callsites
5982 static int check_max_stack_depth_subprog(struct bpf_verifier_env
*env
, int idx
)
5984 struct bpf_subprog_info
*subprog
= env
->subprog_info
;
5985 struct bpf_insn
*insn
= env
->prog
->insnsi
;
5986 int depth
= 0, frame
= 0, i
, subprog_end
;
5987 bool tail_call_reachable
= false;
5988 int ret_insn
[MAX_CALL_FRAMES
];
5989 int ret_prog
[MAX_CALL_FRAMES
];
5992 i
= subprog
[idx
].start
;
5994 /* protect against potential stack overflow that might happen when
5995 * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
5996 * depth for such case down to 256 so that the worst case scenario
5997 * would result in 8k stack size (32 which is tailcall limit * 256 =
6000 * To get the idea what might happen, see an example:
6001 * func1 -> sub rsp, 128
6002 * subfunc1 -> sub rsp, 256
6003 * tailcall1 -> add rsp, 256
6004 * func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
6005 * subfunc2 -> sub rsp, 64
6006 * subfunc22 -> sub rsp, 128
6007 * tailcall2 -> add rsp, 128
6008 * func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
6010 * tailcall will unwind the current stack frame but it will not get rid
6011 * of caller's stack as shown on the example above.
6013 if (idx
&& subprog
[idx
].has_tail_call
&& depth
>= 256) {
6015 "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
6019 /* round up to 32-bytes, since this is granularity
6020 * of interpreter stack size
6022 depth
+= round_up(max_t(u32
, subprog
[idx
].stack_depth
, 1), 32);
6023 if (depth
> MAX_BPF_STACK
) {
6024 verbose(env
, "combined stack size of %d calls is %d. Too large\n",
6029 subprog_end
= subprog
[idx
+ 1].start
;
6030 for (; i
< subprog_end
; i
++) {
6031 int next_insn
, sidx
;
6033 if (bpf_pseudo_kfunc_call(insn
+ i
) && !insn
[i
].off
) {
6036 if (!is_bpf_throw_kfunc(insn
+ i
))
6038 if (subprog
[idx
].is_cb
)
6040 for (int c
= 0; c
< frame
&& !err
; c
++) {
6041 if (subprog
[ret_prog
[c
]].is_cb
) {
6049 "bpf_throw kfunc (insn %d) cannot be called from callback subprog %d\n",
6054 if (!bpf_pseudo_call(insn
+ i
) && !bpf_pseudo_func(insn
+ i
))
6056 /* remember insn and function to return to */
6057 ret_insn
[frame
] = i
+ 1;
6058 ret_prog
[frame
] = idx
;
6060 /* find the callee */
6061 next_insn
= i
+ insn
[i
].imm
+ 1;
6062 sidx
= find_subprog(env
, next_insn
);
6064 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
6068 if (subprog
[sidx
].is_async_cb
) {
6069 if (subprog
[sidx
].has_tail_call
) {
6070 verbose(env
, "verifier bug. subprog has tail_call and async cb\n");
6073 /* async callbacks don't increase bpf prog stack size unless called directly */
6074 if (!bpf_pseudo_call(insn
+ i
))
6076 if (subprog
[sidx
].is_exception_cb
) {
6077 verbose(env
, "insn %d cannot call exception cb directly\n", i
);
6084 if (subprog
[idx
].has_tail_call
)
6085 tail_call_reachable
= true;
6088 if (frame
>= MAX_CALL_FRAMES
) {
6089 verbose(env
, "the call stack of %d frames is too deep !\n",
6095 /* if tail call got detected across bpf2bpf calls then mark each of the
6096 * currently present subprog frames as tail call reachable subprogs;
6097 * this info will be utilized by JIT so that we will be preserving the
6098 * tail call counter throughout bpf2bpf calls combined with tailcalls
6100 if (tail_call_reachable
)
6101 for (j
= 0; j
< frame
; j
++) {
6102 if (subprog
[ret_prog
[j
]].is_exception_cb
) {
6103 verbose(env
, "cannot tail call within exception cb\n");
6106 subprog
[ret_prog
[j
]].tail_call_reachable
= true;
6108 if (subprog
[0].tail_call_reachable
)
6109 env
->prog
->aux
->tail_call_reachable
= true;
6111 /* end of for() loop means the last insn of the 'subprog'
6112 * was reached. Doesn't matter whether it was JA or EXIT
6116 depth
-= round_up(max_t(u32
, subprog
[idx
].stack_depth
, 1), 32);
6118 i
= ret_insn
[frame
];
6119 idx
= ret_prog
[frame
];
6123 static int check_max_stack_depth(struct bpf_verifier_env
*env
)
6125 struct bpf_subprog_info
*si
= env
->subprog_info
;
6128 for (int i
= 0; i
< env
->subprog_cnt
; i
++) {
6129 if (!i
|| si
[i
].is_async_cb
) {
6130 ret
= check_max_stack_depth_subprog(env
, i
);
6139 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
6140 static int get_callee_stack_depth(struct bpf_verifier_env
*env
,
6141 const struct bpf_insn
*insn
, int idx
)
6143 int start
= idx
+ insn
->imm
+ 1, subprog
;
6145 subprog
= find_subprog(env
, start
);
6147 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
6151 return env
->subprog_info
[subprog
].stack_depth
;
6155 static int __check_buffer_access(struct bpf_verifier_env
*env
,
6156 const char *buf_info
,
6157 const struct bpf_reg_state
*reg
,
6158 int regno
, int off
, int size
)
6162 "R%d invalid %s buffer access: off=%d, size=%d\n",
6163 regno
, buf_info
, off
, size
);
6166 if (!tnum_is_const(reg
->var_off
) || reg
->var_off
.value
) {
6169 tnum_strn(tn_buf
, sizeof(tn_buf
), reg
->var_off
);
6171 "R%d invalid variable buffer offset: off=%d, var_off=%s\n",
6172 regno
, off
, tn_buf
);
6179 static int check_tp_buffer_access(struct bpf_verifier_env
*env
,
6180 const struct bpf_reg_state
*reg
,
6181 int regno
, int off
, int size
)
6185 err
= __check_buffer_access(env
, "tracepoint", reg
, regno
, off
, size
);
6189 if (off
+ size
> env
->prog
->aux
->max_tp_access
)
6190 env
->prog
->aux
->max_tp_access
= off
+ size
;
6195 static int check_buffer_access(struct bpf_verifier_env
*env
,
6196 const struct bpf_reg_state
*reg
,
6197 int regno
, int off
, int size
,
6198 bool zero_size_allowed
,
6201 const char *buf_info
= type_is_rdonly_mem(reg
->type
) ? "rdonly" : "rdwr";
6204 err
= __check_buffer_access(env
, buf_info
, reg
, regno
, off
, size
);
6208 if (off
+ size
> *max_access
)
6209 *max_access
= off
+ size
;
6214 /* BPF architecture zero extends alu32 ops into 64-bit registesr */
6215 static void zext_32_to_64(struct bpf_reg_state
*reg
)
6217 reg
->var_off
= tnum_subreg(reg
->var_off
);
6218 __reg_assign_32_into_64(reg
);
6221 /* truncate register to smaller size (in bytes)
6222 * must be called with size < BPF_REG_SIZE
6224 static void coerce_reg_to_size(struct bpf_reg_state
*reg
, int size
)
6228 /* clear high bits in bit representation */
6229 reg
->var_off
= tnum_cast(reg
->var_off
, size
);
6231 /* fix arithmetic bounds */
6232 mask
= ((u64
)1 << (size
* 8)) - 1;
6233 if ((reg
->umin_value
& ~mask
) == (reg
->umax_value
& ~mask
)) {
6234 reg
->umin_value
&= mask
;
6235 reg
->umax_value
&= mask
;
6237 reg
->umin_value
= 0;
6238 reg
->umax_value
= mask
;
6240 reg
->smin_value
= reg
->umin_value
;
6241 reg
->smax_value
= reg
->umax_value
;
6243 /* If size is smaller than 32bit register the 32bit register
6244 * values are also truncated so we push 64-bit bounds into
6245 * 32-bit bounds. Above were truncated < 32-bits already.
6249 __reg_combine_64_into_32(reg
);
6252 static void set_sext64_default_val(struct bpf_reg_state
*reg
, int size
)
6255 reg
->smin_value
= reg
->s32_min_value
= S8_MIN
;
6256 reg
->smax_value
= reg
->s32_max_value
= S8_MAX
;
6257 } else if (size
== 2) {
6258 reg
->smin_value
= reg
->s32_min_value
= S16_MIN
;
6259 reg
->smax_value
= reg
->s32_max_value
= S16_MAX
;
6262 reg
->smin_value
= reg
->s32_min_value
= S32_MIN
;
6263 reg
->smax_value
= reg
->s32_max_value
= S32_MAX
;
6265 reg
->umin_value
= reg
->u32_min_value
= 0;
6266 reg
->umax_value
= U64_MAX
;
6267 reg
->u32_max_value
= U32_MAX
;
6268 reg
->var_off
= tnum_unknown
;
6271 static void coerce_reg_to_size_sx(struct bpf_reg_state
*reg
, int size
)
6273 s64 init_s64_max
, init_s64_min
, s64_max
, s64_min
, u64_cval
;
6274 u64 top_smax_value
, top_smin_value
;
6275 u64 num_bits
= size
* 8;
6277 if (tnum_is_const(reg
->var_off
)) {
6278 u64_cval
= reg
->var_off
.value
;
6280 reg
->var_off
= tnum_const((s8
)u64_cval
);
6282 reg
->var_off
= tnum_const((s16
)u64_cval
);
6285 reg
->var_off
= tnum_const((s32
)u64_cval
);
6287 u64_cval
= reg
->var_off
.value
;
6288 reg
->smax_value
= reg
->smin_value
= u64_cval
;
6289 reg
->umax_value
= reg
->umin_value
= u64_cval
;
6290 reg
->s32_max_value
= reg
->s32_min_value
= u64_cval
;
6291 reg
->u32_max_value
= reg
->u32_min_value
= u64_cval
;
6295 top_smax_value
= ((u64
)reg
->smax_value
>> num_bits
) << num_bits
;
6296 top_smin_value
= ((u64
)reg
->smin_value
>> num_bits
) << num_bits
;
6298 if (top_smax_value
!= top_smin_value
)
6301 /* find the s64_min and s64_min after sign extension */
6303 init_s64_max
= (s8
)reg
->smax_value
;
6304 init_s64_min
= (s8
)reg
->smin_value
;
6305 } else if (size
== 2) {
6306 init_s64_max
= (s16
)reg
->smax_value
;
6307 init_s64_min
= (s16
)reg
->smin_value
;
6309 init_s64_max
= (s32
)reg
->smax_value
;
6310 init_s64_min
= (s32
)reg
->smin_value
;
6313 s64_max
= max(init_s64_max
, init_s64_min
);
6314 s64_min
= min(init_s64_max
, init_s64_min
);
6316 /* both of s64_max/s64_min positive or negative */
6317 if ((s64_max
>= 0) == (s64_min
>= 0)) {
6318 reg
->smin_value
= reg
->s32_min_value
= s64_min
;
6319 reg
->smax_value
= reg
->s32_max_value
= s64_max
;
6320 reg
->umin_value
= reg
->u32_min_value
= s64_min
;
6321 reg
->umax_value
= reg
->u32_max_value
= s64_max
;
6322 reg
->var_off
= tnum_range(s64_min
, s64_max
);
6327 set_sext64_default_val(reg
, size
);
6330 static void set_sext32_default_val(struct bpf_reg_state
*reg
, int size
)
6333 reg
->s32_min_value
= S8_MIN
;
6334 reg
->s32_max_value
= S8_MAX
;
6337 reg
->s32_min_value
= S16_MIN
;
6338 reg
->s32_max_value
= S16_MAX
;
6340 reg
->u32_min_value
= 0;
6341 reg
->u32_max_value
= U32_MAX
;
6344 static void coerce_subreg_to_size_sx(struct bpf_reg_state
*reg
, int size
)
6346 s32 init_s32_max
, init_s32_min
, s32_max
, s32_min
, u32_val
;
6347 u32 top_smax_value
, top_smin_value
;
6348 u32 num_bits
= size
* 8;
6350 if (tnum_is_const(reg
->var_off
)) {
6351 u32_val
= reg
->var_off
.value
;
6353 reg
->var_off
= tnum_const((s8
)u32_val
);
6355 reg
->var_off
= tnum_const((s16
)u32_val
);
6357 u32_val
= reg
->var_off
.value
;
6358 reg
->s32_min_value
= reg
->s32_max_value
= u32_val
;
6359 reg
->u32_min_value
= reg
->u32_max_value
= u32_val
;
6363 top_smax_value
= ((u32
)reg
->s32_max_value
>> num_bits
) << num_bits
;
6364 top_smin_value
= ((u32
)reg
->s32_min_value
>> num_bits
) << num_bits
;
6366 if (top_smax_value
!= top_smin_value
)
6369 /* find the s32_min and s32_min after sign extension */
6371 init_s32_max
= (s8
)reg
->s32_max_value
;
6372 init_s32_min
= (s8
)reg
->s32_min_value
;
6375 init_s32_max
= (s16
)reg
->s32_max_value
;
6376 init_s32_min
= (s16
)reg
->s32_min_value
;
6378 s32_max
= max(init_s32_max
, init_s32_min
);
6379 s32_min
= min(init_s32_max
, init_s32_min
);
6381 if ((s32_min
>= 0) == (s32_max
>= 0)) {
6382 reg
->s32_min_value
= s32_min
;
6383 reg
->s32_max_value
= s32_max
;
6384 reg
->u32_min_value
= (u32
)s32_min
;
6385 reg
->u32_max_value
= (u32
)s32_max
;
6390 set_sext32_default_val(reg
, size
);
6393 static bool bpf_map_is_rdonly(const struct bpf_map
*map
)
6395 /* A map is considered read-only if the following condition are true:
6397 * 1) BPF program side cannot change any of the map content. The
6398 * BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
6399 * and was set at map creation time.
6400 * 2) The map value(s) have been initialized from user space by a
6401 * loader and then "frozen", such that no new map update/delete
6402 * operations from syscall side are possible for the rest of
6403 * the map's lifetime from that point onwards.
6404 * 3) Any parallel/pending map update/delete operations from syscall
6405 * side have been completed. Only after that point, it's safe to
6406 * assume that map value(s) are immutable.
6408 return (map
->map_flags
& BPF_F_RDONLY_PROG
) &&
6409 READ_ONCE(map
->frozen
) &&
6410 !bpf_map_write_active(map
);
6413 static int bpf_map_direct_read(struct bpf_map
*map
, int off
, int size
, u64
*val
,
6420 err
= map
->ops
->map_direct_value_addr(map
, &addr
, off
);
6423 ptr
= (void *)(long)addr
+ off
;
6427 *val
= is_ldsx
? (s64
)*(s8
*)ptr
: (u64
)*(u8
*)ptr
;
6430 *val
= is_ldsx
? (s64
)*(s16
*)ptr
: (u64
)*(u16
*)ptr
;
6433 *val
= is_ldsx
? (s64
)*(s32
*)ptr
: (u64
)*(u32
*)ptr
;
6444 #define BTF_TYPE_SAFE_RCU(__type) __PASTE(__type, __safe_rcu)
6445 #define BTF_TYPE_SAFE_RCU_OR_NULL(__type) __PASTE(__type, __safe_rcu_or_null)
6446 #define BTF_TYPE_SAFE_TRUSTED(__type) __PASTE(__type, __safe_trusted)
6449 * Allow list few fields as RCU trusted or full trusted.
6450 * This logic doesn't allow mix tagging and will be removed once GCC supports
6454 /* RCU trusted: these fields are trusted in RCU CS and never NULL */
6455 BTF_TYPE_SAFE_RCU(struct task_struct
) {
6456 const cpumask_t
*cpus_ptr
;
6457 struct css_set __rcu
*cgroups
;
6458 struct task_struct __rcu
*real_parent
;
6459 struct task_struct
*group_leader
;
6462 BTF_TYPE_SAFE_RCU(struct cgroup
) {
6463 /* cgrp->kn is always accessible as documented in kernel/cgroup/cgroup.c */
6464 struct kernfs_node
*kn
;
6467 BTF_TYPE_SAFE_RCU(struct css_set
) {
6468 struct cgroup
*dfl_cgrp
;
6471 /* RCU trusted: these fields are trusted in RCU CS and can be NULL */
6472 BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct
) {
6473 struct file __rcu
*exe_file
;
6476 /* skb->sk, req->sk are not RCU protected, but we mark them as such
6477 * because bpf prog accessible sockets are SOCK_RCU_FREE.
6479 BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff
) {
6483 BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock
) {
6487 /* full trusted: these fields are trusted even outside of RCU CS and never NULL */
6488 BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta
) {
6489 struct seq_file
*seq
;
6492 BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task
) {
6493 struct bpf_iter_meta
*meta
;
6494 struct task_struct
*task
;
6497 BTF_TYPE_SAFE_TRUSTED(struct linux_binprm
) {
6501 BTF_TYPE_SAFE_TRUSTED(struct file
) {
6502 struct inode
*f_inode
;
6505 BTF_TYPE_SAFE_TRUSTED(struct dentry
) {
6506 /* no negative dentry-s in places where bpf can see it */
6507 struct inode
*d_inode
;
6510 BTF_TYPE_SAFE_TRUSTED(struct socket
) {
6514 static bool type_is_rcu(struct bpf_verifier_env
*env
,
6515 struct bpf_reg_state
*reg
,
6516 const char *field_name
, u32 btf_id
)
6518 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct task_struct
));
6519 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup
));
6520 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct css_set
));
6522 return btf_nested_type_is_trusted(&env
->log
, reg
, field_name
, btf_id
, "__safe_rcu");
6525 static bool type_is_rcu_or_null(struct bpf_verifier_env
*env
,
6526 struct bpf_reg_state
*reg
,
6527 const char *field_name
, u32 btf_id
)
6529 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct
));
6530 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff
));
6531 BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock
));
6533 return btf_nested_type_is_trusted(&env
->log
, reg
, field_name
, btf_id
, "__safe_rcu_or_null");
6536 static bool type_is_trusted(struct bpf_verifier_env
*env
,
6537 struct bpf_reg_state
*reg
,
6538 const char *field_name
, u32 btf_id
)
6540 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta
));
6541 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task
));
6542 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm
));
6543 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file
));
6544 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct dentry
));
6545 BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct socket
));
6547 return btf_nested_type_is_trusted(&env
->log
, reg
, field_name
, btf_id
, "__safe_trusted");
6550 static int check_ptr_to_btf_access(struct bpf_verifier_env
*env
,
6551 struct bpf_reg_state
*regs
,
6552 int regno
, int off
, int size
,
6553 enum bpf_access_type atype
,
6556 struct bpf_reg_state
*reg
= regs
+ regno
;
6557 const struct btf_type
*t
= btf_type_by_id(reg
->btf
, reg
->btf_id
);
6558 const char *tname
= btf_name_by_offset(reg
->btf
, t
->name_off
);
6559 const char *field_name
= NULL
;
6560 enum bpf_type_flag flag
= 0;
6564 if (!env
->allow_ptr_leaks
) {
6566 "'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
6570 if (!env
->prog
->gpl_compatible
&& btf_is_kernel(reg
->btf
)) {
6572 "Cannot access kernel 'struct %s' from non-GPL compatible program\n",
6578 "R%d is ptr_%s invalid negative access: off=%d\n",
6582 if (!tnum_is_const(reg
->var_off
) || reg
->var_off
.value
) {
6585 tnum_strn(tn_buf
, sizeof(tn_buf
), reg
->var_off
);
6587 "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
6588 regno
, tname
, off
, tn_buf
);
6592 if (reg
->type
& MEM_USER
) {
6594 "R%d is ptr_%s access user memory: off=%d\n",
6599 if (reg
->type
& MEM_PERCPU
) {
6601 "R%d is ptr_%s access percpu memory: off=%d\n",
6606 if (env
->ops
->btf_struct_access
&& !type_is_alloc(reg
->type
) && atype
== BPF_WRITE
) {
6607 if (!btf_is_kernel(reg
->btf
)) {
6608 verbose(env
, "verifier internal error: reg->btf must be kernel btf\n");
6611 ret
= env
->ops
->btf_struct_access(&env
->log
, reg
, off
, size
);
6613 /* Writes are permitted with default btf_struct_access for
6614 * program allocated objects (which always have ref_obj_id > 0),
6615 * but not for untrusted PTR_TO_BTF_ID | MEM_ALLOC.
6617 if (atype
!= BPF_READ
&& !type_is_ptr_alloc_obj(reg
->type
)) {
6618 verbose(env
, "only read is supported\n");
6622 if (type_is_alloc(reg
->type
) && !type_is_non_owning_ref(reg
->type
) &&
6623 !(reg
->type
& MEM_RCU
) && !reg
->ref_obj_id
) {
6624 verbose(env
, "verifier internal error: ref_obj_id for allocated object must be non-zero\n");
6628 ret
= btf_struct_access(&env
->log
, reg
, off
, size
, atype
, &btf_id
, &flag
, &field_name
);
6634 if (ret
!= PTR_TO_BTF_ID
) {
6637 } else if (type_flag(reg
->type
) & PTR_UNTRUSTED
) {
6638 /* If this is an untrusted pointer, all pointers formed by walking it
6639 * also inherit the untrusted flag.
6641 flag
= PTR_UNTRUSTED
;
6643 } else if (is_trusted_reg(reg
) || is_rcu_reg(reg
)) {
6644 /* By default any pointer obtained from walking a trusted pointer is no
6645 * longer trusted, unless the field being accessed has explicitly been
6646 * marked as inheriting its parent's state of trust (either full or RCU).
6648 * 'cgroups' pointer is untrusted if task->cgroups dereference
6649 * happened in a sleepable program outside of bpf_rcu_read_lock()
6650 * section. In a non-sleepable program it's trusted while in RCU CS (aka MEM_RCU).
6651 * Note bpf_rcu_read_unlock() converts MEM_RCU pointers to PTR_UNTRUSTED.
6653 * A regular RCU-protected pointer with __rcu tag can also be deemed
6654 * trusted if we are in an RCU CS. Such pointer can be NULL.
6656 if (type_is_trusted(env
, reg
, field_name
, btf_id
)) {
6657 flag
|= PTR_TRUSTED
;
6658 } else if (in_rcu_cs(env
) && !type_may_be_null(reg
->type
)) {
6659 if (type_is_rcu(env
, reg
, field_name
, btf_id
)) {
6660 /* ignore __rcu tag and mark it MEM_RCU */
6662 } else if (flag
& MEM_RCU
||
6663 type_is_rcu_or_null(env
, reg
, field_name
, btf_id
)) {
6664 /* __rcu tagged pointers can be NULL */
6665 flag
|= MEM_RCU
| PTR_MAYBE_NULL
;
6667 /* We always trust them */
6668 if (type_is_rcu_or_null(env
, reg
, field_name
, btf_id
) &&
6669 flag
& PTR_UNTRUSTED
)
6670 flag
&= ~PTR_UNTRUSTED
;
6671 } else if (flag
& (MEM_PERCPU
| MEM_USER
)) {
6674 /* walking unknown pointers yields old deprecated PTR_TO_BTF_ID */
6675 clear_trusted_flags(&flag
);
6679 * If not in RCU CS or MEM_RCU pointer can be NULL then
6680 * aggressively mark as untrusted otherwise such
6681 * pointers will be plain PTR_TO_BTF_ID without flags
6682 * and will be allowed to be passed into helpers for
6685 flag
= PTR_UNTRUSTED
;
6688 /* Old compat. Deprecated */
6689 clear_trusted_flags(&flag
);
6692 if (atype
== BPF_READ
&& value_regno
>= 0)
6693 mark_btf_ld_reg(env
, regs
, value_regno
, ret
, reg
->btf
, btf_id
, flag
);
6698 static int check_ptr_to_map_access(struct bpf_verifier_env
*env
,
6699 struct bpf_reg_state
*regs
,
6700 int regno
, int off
, int size
,
6701 enum bpf_access_type atype
,
6704 struct bpf_reg_state
*reg
= regs
+ regno
;
6705 struct bpf_map
*map
= reg
->map_ptr
;
6706 struct bpf_reg_state map_reg
;
6707 enum bpf_type_flag flag
= 0;
6708 const struct btf_type
*t
;
6714 verbose(env
, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
6718 if (!map
->ops
->map_btf_id
|| !*map
->ops
->map_btf_id
) {
6719 verbose(env
, "map_ptr access not supported for map type %d\n",
6724 t
= btf_type_by_id(btf_vmlinux
, *map
->ops
->map_btf_id
);
6725 tname
= btf_name_by_offset(btf_vmlinux
, t
->name_off
);
6727 if (!env
->allow_ptr_leaks
) {
6729 "'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
6735 verbose(env
, "R%d is %s invalid negative access: off=%d\n",
6740 if (atype
!= BPF_READ
) {
6741 verbose(env
, "only read from %s is supported\n", tname
);
6745 /* Simulate access to a PTR_TO_BTF_ID */
6746 memset(&map_reg
, 0, sizeof(map_reg
));
6747 mark_btf_ld_reg(env
, &map_reg
, 0, PTR_TO_BTF_ID
, btf_vmlinux
, *map
->ops
->map_btf_id
, 0);
6748 ret
= btf_struct_access(&env
->log
, &map_reg
, off
, size
, atype
, &btf_id
, &flag
, NULL
);
6752 if (value_regno
>= 0)
6753 mark_btf_ld_reg(env
, regs
, value_regno
, ret
, btf_vmlinux
, btf_id
, flag
);
6758 /* Check that the stack access at the given offset is within bounds. The
6759 * maximum valid offset is -1.
6761 * The minimum valid offset is -MAX_BPF_STACK for writes, and
6762 * -state->allocated_stack for reads.
6764 static int check_stack_slot_within_bounds(int off
,
6765 struct bpf_func_state
*state
,
6766 enum bpf_access_type t
)
6771 min_valid_off
= -MAX_BPF_STACK
;
6773 min_valid_off
= -state
->allocated_stack
;
6775 if (off
< min_valid_off
|| off
> -1)
6780 /* Check that the stack access at 'regno + off' falls within the maximum stack
6783 * 'off' includes `regno->offset`, but not its dynamic part (if any).
6785 static int check_stack_access_within_bounds(
6786 struct bpf_verifier_env
*env
,
6787 int regno
, int off
, int access_size
,
6788 enum bpf_access_src src
, enum bpf_access_type type
)
6790 struct bpf_reg_state
*regs
= cur_regs(env
);
6791 struct bpf_reg_state
*reg
= regs
+ regno
;
6792 struct bpf_func_state
*state
= func(env
, reg
);
6793 int min_off
, max_off
;
6797 if (src
== ACCESS_HELPER
)
6798 /* We don't know if helpers are reading or writing (or both). */
6799 err_extra
= " indirect access to";
6800 else if (type
== BPF_READ
)
6801 err_extra
= " read from";
6803 err_extra
= " write to";
6805 if (tnum_is_const(reg
->var_off
)) {
6806 min_off
= reg
->var_off
.value
+ off
;
6807 if (access_size
> 0)
6808 max_off
= min_off
+ access_size
- 1;
6812 if (reg
->smax_value
>= BPF_MAX_VAR_OFF
||
6813 reg
->smin_value
<= -BPF_MAX_VAR_OFF
) {
6814 verbose(env
, "invalid unbounded variable-offset%s stack R%d\n",
6818 min_off
= reg
->smin_value
+ off
;
6819 if (access_size
> 0)
6820 max_off
= reg
->smax_value
+ off
+ access_size
- 1;
6825 err
= check_stack_slot_within_bounds(min_off
, state
, type
);
6827 err
= check_stack_slot_within_bounds(max_off
, state
, type
);
6830 if (tnum_is_const(reg
->var_off
)) {
6831 verbose(env
, "invalid%s stack R%d off=%d size=%d\n",
6832 err_extra
, regno
, off
, access_size
);
6836 tnum_strn(tn_buf
, sizeof(tn_buf
), reg
->var_off
);
6837 verbose(env
, "invalid variable-offset%s stack R%d var_off=%s size=%d\n",
6838 err_extra
, regno
, tn_buf
, access_size
);
6844 /* check whether memory at (regno + off) is accessible for t = (read | write)
6845 * if t==write, value_regno is a register which value is stored into memory
6846 * if t==read, value_regno is a register which will receive the value from memory
6847 * if t==write && value_regno==-1, some unknown value is stored into memory
6848 * if t==read && value_regno==-1, don't care what we read from memory
6850 static int check_mem_access(struct bpf_verifier_env
*env
, int insn_idx
, u32 regno
,
6851 int off
, int bpf_size
, enum bpf_access_type t
,
6852 int value_regno
, bool strict_alignment_once
, bool is_ldsx
)
6854 struct bpf_reg_state
*regs
= cur_regs(env
);
6855 struct bpf_reg_state
*reg
= regs
+ regno
;
6856 struct bpf_func_state
*state
;
6859 size
= bpf_size_to_bytes(bpf_size
);
6863 /* alignment checks will add in reg->off themselves */
6864 err
= check_ptr_alignment(env
, reg
, off
, size
, strict_alignment_once
);
6868 /* for access checks, reg->off is just part of off */
6871 if (reg
->type
== PTR_TO_MAP_KEY
) {
6872 if (t
== BPF_WRITE
) {
6873 verbose(env
, "write to change key R%d not allowed\n", regno
);
6877 err
= check_mem_region_access(env
, regno
, off
, size
,
6878 reg
->map_ptr
->key_size
, false);
6881 if (value_regno
>= 0)
6882 mark_reg_unknown(env
, regs
, value_regno
);
6883 } else if (reg
->type
== PTR_TO_MAP_VALUE
) {
6884 struct btf_field
*kptr_field
= NULL
;
6886 if (t
== BPF_WRITE
&& value_regno
>= 0 &&
6887 is_pointer_value(env
, value_regno
)) {
6888 verbose(env
, "R%d leaks addr into map\n", value_regno
);
6891 err
= check_map_access_type(env
, regno
, off
, size
, t
);
6894 err
= check_map_access(env
, regno
, off
, size
, false, ACCESS_DIRECT
);
6897 if (tnum_is_const(reg
->var_off
))
6898 kptr_field
= btf_record_find(reg
->map_ptr
->record
,
6899 off
+ reg
->var_off
.value
, BPF_KPTR
);
6901 err
= check_map_kptr_access(env
, regno
, value_regno
, insn_idx
, kptr_field
);
6902 } else if (t
== BPF_READ
&& value_regno
>= 0) {
6903 struct bpf_map
*map
= reg
->map_ptr
;
6905 /* if map is read-only, track its contents as scalars */
6906 if (tnum_is_const(reg
->var_off
) &&
6907 bpf_map_is_rdonly(map
) &&
6908 map
->ops
->map_direct_value_addr
) {
6909 int map_off
= off
+ reg
->var_off
.value
;
6912 err
= bpf_map_direct_read(map
, map_off
, size
,
6917 regs
[value_regno
].type
= SCALAR_VALUE
;
6918 __mark_reg_known(®s
[value_regno
], val
);
6920 mark_reg_unknown(env
, regs
, value_regno
);
6923 } else if (base_type(reg
->type
) == PTR_TO_MEM
) {
6924 bool rdonly_mem
= type_is_rdonly_mem(reg
->type
);
6926 if (type_may_be_null(reg
->type
)) {
6927 verbose(env
, "R%d invalid mem access '%s'\n", regno
,
6928 reg_type_str(env
, reg
->type
));
6932 if (t
== BPF_WRITE
&& rdonly_mem
) {
6933 verbose(env
, "R%d cannot write into %s\n",
6934 regno
, reg_type_str(env
, reg
->type
));
6938 if (t
== BPF_WRITE
&& value_regno
>= 0 &&
6939 is_pointer_value(env
, value_regno
)) {
6940 verbose(env
, "R%d leaks addr into mem\n", value_regno
);
6944 err
= check_mem_region_access(env
, regno
, off
, size
,
6945 reg
->mem_size
, false);
6946 if (!err
&& value_regno
>= 0 && (t
== BPF_READ
|| rdonly_mem
))
6947 mark_reg_unknown(env
, regs
, value_regno
);
6948 } else if (reg
->type
== PTR_TO_CTX
) {
6949 enum bpf_reg_type reg_type
= SCALAR_VALUE
;
6950 struct btf
*btf
= NULL
;
6953 if (t
== BPF_WRITE
&& value_regno
>= 0 &&
6954 is_pointer_value(env
, value_regno
)) {
6955 verbose(env
, "R%d leaks addr into ctx\n", value_regno
);
6959 err
= check_ptr_off_reg(env
, reg
, regno
);
6963 err
= check_ctx_access(env
, insn_idx
, off
, size
, t
, ®_type
, &btf
,
6966 verbose_linfo(env
, insn_idx
, "; ");
6967 if (!err
&& t
== BPF_READ
&& value_regno
>= 0) {
6968 /* ctx access returns either a scalar, or a
6969 * PTR_TO_PACKET[_META,_END]. In the latter
6970 * case, we know the offset is zero.
6972 if (reg_type
== SCALAR_VALUE
) {
6973 mark_reg_unknown(env
, regs
, value_regno
);
6975 mark_reg_known_zero(env
, regs
,
6977 if (type_may_be_null(reg_type
))
6978 regs
[value_regno
].id
= ++env
->id_gen
;
6979 /* A load of ctx field could have different
6980 * actual load size with the one encoded in the
6981 * insn. When the dst is PTR, it is for sure not
6984 regs
[value_regno
].subreg_def
= DEF_NOT_SUBREG
;
6985 if (base_type(reg_type
) == PTR_TO_BTF_ID
) {
6986 regs
[value_regno
].btf
= btf
;
6987 regs
[value_regno
].btf_id
= btf_id
;
6990 regs
[value_regno
].type
= reg_type
;
6993 } else if (reg
->type
== PTR_TO_STACK
) {
6994 /* Basic bounds checks. */
6995 err
= check_stack_access_within_bounds(env
, regno
, off
, size
, ACCESS_DIRECT
, t
);
6999 state
= func(env
, reg
);
7000 err
= update_stack_depth(env
, state
, off
);
7005 err
= check_stack_read(env
, regno
, off
, size
,
7008 err
= check_stack_write(env
, regno
, off
, size
,
7009 value_regno
, insn_idx
);
7010 } else if (reg_is_pkt_pointer(reg
)) {
7011 if (t
== BPF_WRITE
&& !may_access_direct_pkt_data(env
, NULL
, t
)) {
7012 verbose(env
, "cannot write into packet\n");
7015 if (t
== BPF_WRITE
&& value_regno
>= 0 &&
7016 is_pointer_value(env
, value_regno
)) {
7017 verbose(env
, "R%d leaks addr into packet\n",
7021 err
= check_packet_access(env
, regno
, off
, size
, false);
7022 if (!err
&& t
== BPF_READ
&& value_regno
>= 0)
7023 mark_reg_unknown(env
, regs
, value_regno
);
7024 } else if (reg
->type
== PTR_TO_FLOW_KEYS
) {
7025 if (t
== BPF_WRITE
&& value_regno
>= 0 &&
7026 is_pointer_value(env
, value_regno
)) {
7027 verbose(env
, "R%d leaks addr into flow keys\n",
7032 err
= check_flow_keys_access(env
, off
, size
);
7033 if (!err
&& t
== BPF_READ
&& value_regno
>= 0)
7034 mark_reg_unknown(env
, regs
, value_regno
);
7035 } else if (type_is_sk_pointer(reg
->type
)) {
7036 if (t
== BPF_WRITE
) {
7037 verbose(env
, "R%d cannot write into %s\n",
7038 regno
, reg_type_str(env
, reg
->type
));
7041 err
= check_sock_access(env
, insn_idx
, regno
, off
, size
, t
);
7042 if (!err
&& value_regno
>= 0)
7043 mark_reg_unknown(env
, regs
, value_regno
);
7044 } else if (reg
->type
== PTR_TO_TP_BUFFER
) {
7045 err
= check_tp_buffer_access(env
, reg
, regno
, off
, size
);
7046 if (!err
&& t
== BPF_READ
&& value_regno
>= 0)
7047 mark_reg_unknown(env
, regs
, value_regno
);
7048 } else if (base_type(reg
->type
) == PTR_TO_BTF_ID
&&
7049 !type_may_be_null(reg
->type
)) {
7050 err
= check_ptr_to_btf_access(env
, regs
, regno
, off
, size
, t
,
7052 } else if (reg
->type
== CONST_PTR_TO_MAP
) {
7053 err
= check_ptr_to_map_access(env
, regs
, regno
, off
, size
, t
,
7055 } else if (base_type(reg
->type
) == PTR_TO_BUF
) {
7056 bool rdonly_mem
= type_is_rdonly_mem(reg
->type
);
7060 if (t
== BPF_WRITE
) {
7061 verbose(env
, "R%d cannot write into %s\n",
7062 regno
, reg_type_str(env
, reg
->type
));
7065 max_access
= &env
->prog
->aux
->max_rdonly_access
;
7067 max_access
= &env
->prog
->aux
->max_rdwr_access
;
7070 err
= check_buffer_access(env
, reg
, regno
, off
, size
, false,
7073 if (!err
&& value_regno
>= 0 && (rdonly_mem
|| t
== BPF_READ
))
7074 mark_reg_unknown(env
, regs
, value_regno
);
7076 verbose(env
, "R%d invalid mem access '%s'\n", regno
,
7077 reg_type_str(env
, reg
->type
));
7081 if (!err
&& size
< BPF_REG_SIZE
&& value_regno
>= 0 && t
== BPF_READ
&&
7082 regs
[value_regno
].type
== SCALAR_VALUE
) {
7084 /* b/h/w load zero-extends, mark upper bits as known 0 */
7085 coerce_reg_to_size(®s
[value_regno
], size
);
7087 coerce_reg_to_size_sx(®s
[value_regno
], size
);
7092 static int check_atomic(struct bpf_verifier_env
*env
, int insn_idx
, struct bpf_insn
*insn
)
7097 switch (insn
->imm
) {
7099 case BPF_ADD
| BPF_FETCH
:
7101 case BPF_AND
| BPF_FETCH
:
7103 case BPF_OR
| BPF_FETCH
:
7105 case BPF_XOR
| BPF_FETCH
:
7110 verbose(env
, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn
->imm
);
7114 if (BPF_SIZE(insn
->code
) != BPF_W
&& BPF_SIZE(insn
->code
) != BPF_DW
) {
7115 verbose(env
, "invalid atomic operand size\n");
7119 /* check src1 operand */
7120 err
= check_reg_arg(env
, insn
->src_reg
, SRC_OP
);
7124 /* check src2 operand */
7125 err
= check_reg_arg(env
, insn
->dst_reg
, SRC_OP
);
7129 if (insn
->imm
== BPF_CMPXCHG
) {
7130 /* Check comparison of R0 with memory location */
7131 const u32 aux_reg
= BPF_REG_0
;
7133 err
= check_reg_arg(env
, aux_reg
, SRC_OP
);
7137 if (is_pointer_value(env
, aux_reg
)) {
7138 verbose(env
, "R%d leaks addr into mem\n", aux_reg
);
7143 if (is_pointer_value(env
, insn
->src_reg
)) {
7144 verbose(env
, "R%d leaks addr into mem\n", insn
->src_reg
);
7148 if (is_ctx_reg(env
, insn
->dst_reg
) ||
7149 is_pkt_reg(env
, insn
->dst_reg
) ||
7150 is_flow_key_reg(env
, insn
->dst_reg
) ||
7151 is_sk_reg(env
, insn
->dst_reg
)) {
7152 verbose(env
, "BPF_ATOMIC stores into R%d %s is not allowed\n",
7154 reg_type_str(env
, reg_state(env
, insn
->dst_reg
)->type
));
7158 if (insn
->imm
& BPF_FETCH
) {
7159 if (insn
->imm
== BPF_CMPXCHG
)
7160 load_reg
= BPF_REG_0
;
7162 load_reg
= insn
->src_reg
;
7164 /* check and record load of old value */
7165 err
= check_reg_arg(env
, load_reg
, DST_OP
);
7169 /* This instruction accesses a memory location but doesn't
7170 * actually load it into a register.
7175 /* Check whether we can read the memory, with second call for fetch
7176 * case to simulate the register fill.
7178 err
= check_mem_access(env
, insn_idx
, insn
->dst_reg
, insn
->off
,
7179 BPF_SIZE(insn
->code
), BPF_READ
, -1, true, false);
7180 if (!err
&& load_reg
>= 0)
7181 err
= check_mem_access(env
, insn_idx
, insn
->dst_reg
, insn
->off
,
7182 BPF_SIZE(insn
->code
), BPF_READ
, load_reg
,
7187 /* Check whether we can write into the same memory. */
7188 err
= check_mem_access(env
, insn_idx
, insn
->dst_reg
, insn
->off
,
7189 BPF_SIZE(insn
->code
), BPF_WRITE
, -1, true, false);
7196 /* When register 'regno' is used to read the stack (either directly or through
7197 * a helper function) make sure that it's within stack boundary and, depending
7198 * on the access type, that all elements of the stack are initialized.
7200 * 'off' includes 'regno->off', but not its dynamic part (if any).
7202 * All registers that have been spilled on the stack in the slots within the
7203 * read offsets are marked as read.
7205 static int check_stack_range_initialized(
7206 struct bpf_verifier_env
*env
, int regno
, int off
,
7207 int access_size
, bool zero_size_allowed
,
7208 enum bpf_access_src type
, struct bpf_call_arg_meta
*meta
)
7210 struct bpf_reg_state
*reg
= reg_state(env
, regno
);
7211 struct bpf_func_state
*state
= func(env
, reg
);
7212 int err
, min_off
, max_off
, i
, j
, slot
, spi
;
7213 char *err_extra
= type
== ACCESS_HELPER
? " indirect" : "";
7214 enum bpf_access_type bounds_check_type
;
7215 /* Some accesses can write anything into the stack, others are
7218 bool clobber
= false;
7220 if (access_size
== 0 && !zero_size_allowed
) {
7221 verbose(env
, "invalid zero-sized read\n");
7225 if (type
== ACCESS_HELPER
) {
7226 /* The bounds checks for writes are more permissive than for
7227 * reads. However, if raw_mode is not set, we'll do extra
7230 bounds_check_type
= BPF_WRITE
;
7233 bounds_check_type
= BPF_READ
;
7235 err
= check_stack_access_within_bounds(env
, regno
, off
, access_size
,
7236 type
, bounds_check_type
);
7241 if (tnum_is_const(reg
->var_off
)) {
7242 min_off
= max_off
= reg
->var_off
.value
+ off
;
7244 /* Variable offset is prohibited for unprivileged mode for
7245 * simplicity since it requires corresponding support in
7246 * Spectre masking for stack ALU.
7247 * See also retrieve_ptr_limit().
7249 if (!env
->bypass_spec_v1
) {
7252 tnum_strn(tn_buf
, sizeof(tn_buf
), reg
->var_off
);
7253 verbose(env
, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n",
7254 regno
, err_extra
, tn_buf
);
7257 /* Only initialized buffer on stack is allowed to be accessed
7258 * with variable offset. With uninitialized buffer it's hard to
7259 * guarantee that whole memory is marked as initialized on
7260 * helper return since specific bounds are unknown what may
7261 * cause uninitialized stack leaking.
7263 if (meta
&& meta
->raw_mode
)
7266 min_off
= reg
->smin_value
+ off
;
7267 max_off
= reg
->smax_value
+ off
;
7270 if (meta
&& meta
->raw_mode
) {
7271 /* Ensure we won't be overwriting dynptrs when simulating byte
7272 * by byte access in check_helper_call using meta.access_size.
7273 * This would be a problem if we have a helper in the future
7276 * helper(uninit_mem, len, dynptr)
7278 * Now, uninint_mem may overlap with dynptr pointer. Hence, it
7279 * may end up writing to dynptr itself when touching memory from
7280 * arg 1. This can be relaxed on a case by case basis for known
7281 * safe cases, but reject due to the possibilitiy of aliasing by
7284 for (i
= min_off
; i
< max_off
+ access_size
; i
++) {
7285 int stack_off
= -i
- 1;
7288 /* raw_mode may write past allocated_stack */
7289 if (state
->allocated_stack
<= stack_off
)
7291 if (state
->stack
[spi
].slot_type
[stack_off
% BPF_REG_SIZE
] == STACK_DYNPTR
) {
7292 verbose(env
, "potential write to dynptr at off=%d disallowed\n", i
);
7296 meta
->access_size
= access_size
;
7297 meta
->regno
= regno
;
7301 for (i
= min_off
; i
< max_off
+ access_size
; i
++) {
7305 spi
= slot
/ BPF_REG_SIZE
;
7306 if (state
->allocated_stack
<= slot
)
7308 stype
= &state
->stack
[spi
].slot_type
[slot
% BPF_REG_SIZE
];
7309 if (*stype
== STACK_MISC
)
7311 if ((*stype
== STACK_ZERO
) ||
7312 (*stype
== STACK_INVALID
&& env
->allow_uninit_stack
)) {
7314 /* helper can write anything into the stack */
7315 *stype
= STACK_MISC
;
7320 if (is_spilled_reg(&state
->stack
[spi
]) &&
7321 (state
->stack
[spi
].spilled_ptr
.type
== SCALAR_VALUE
||
7322 env
->allow_ptr_leaks
)) {
7324 __mark_reg_unknown(env
, &state
->stack
[spi
].spilled_ptr
);
7325 for (j
= 0; j
< BPF_REG_SIZE
; j
++)
7326 scrub_spilled_slot(&state
->stack
[spi
].slot_type
[j
]);
7332 if (tnum_is_const(reg
->var_off
)) {
7333 verbose(env
, "invalid%s read from stack R%d off %d+%d size %d\n",
7334 err_extra
, regno
, min_off
, i
- min_off
, access_size
);
7338 tnum_strn(tn_buf
, sizeof(tn_buf
), reg
->var_off
);
7339 verbose(env
, "invalid%s read from stack R%d var_off %s+%d size %d\n",
7340 err_extra
, regno
, tn_buf
, i
- min_off
, access_size
);
7344 /* reading any byte out of 8-byte 'spill_slot' will cause
7345 * the whole slot to be marked as 'read'
7347 mark_reg_read(env
, &state
->stack
[spi
].spilled_ptr
,
7348 state
->stack
[spi
].spilled_ptr
.parent
,
7350 /* We do not set REG_LIVE_WRITTEN for stack slot, as we can not
7351 * be sure that whether stack slot is written to or not. Hence,
7352 * we must still conservatively propagate reads upwards even if
7353 * helper may write to the entire memory range.
7356 return update_stack_depth(env
, state
, min_off
);
7359 static int check_helper_mem_access(struct bpf_verifier_env
*env
, int regno
,
7360 int access_size
, bool zero_size_allowed
,
7361 struct bpf_call_arg_meta
*meta
)
7363 struct bpf_reg_state
*regs
= cur_regs(env
), *reg
= ®s
[regno
];
7366 switch (base_type(reg
->type
)) {
7368 case PTR_TO_PACKET_META
:
7369 return check_packet_access(env
, regno
, reg
->off
, access_size
,
7371 case PTR_TO_MAP_KEY
:
7372 if (meta
&& meta
->raw_mode
) {
7373 verbose(env
, "R%d cannot write into %s\n", regno
,
7374 reg_type_str(env
, reg
->type
));
7377 return check_mem_region_access(env
, regno
, reg
->off
, access_size
,
7378 reg
->map_ptr
->key_size
, false);
7379 case PTR_TO_MAP_VALUE
:
7380 if (check_map_access_type(env
, regno
, reg
->off
, access_size
,
7381 meta
&& meta
->raw_mode
? BPF_WRITE
:
7384 return check_map_access(env
, regno
, reg
->off
, access_size
,
7385 zero_size_allowed
, ACCESS_HELPER
);
7387 if (type_is_rdonly_mem(reg
->type
)) {
7388 if (meta
&& meta
->raw_mode
) {
7389 verbose(env
, "R%d cannot write into %s\n", regno
,
7390 reg_type_str(env
, reg
->type
));
7394 return check_mem_region_access(env
, regno
, reg
->off
,
7395 access_size
, reg
->mem_size
,
7398 if (type_is_rdonly_mem(reg
->type
)) {
7399 if (meta
&& meta
->raw_mode
) {
7400 verbose(env
, "R%d cannot write into %s\n", regno
,
7401 reg_type_str(env
, reg
->type
));
7405 max_access
= &env
->prog
->aux
->max_rdonly_access
;
7407 max_access
= &env
->prog
->aux
->max_rdwr_access
;
7409 return check_buffer_access(env
, reg
, regno
, reg
->off
,
7410 access_size
, zero_size_allowed
,
7413 return check_stack_range_initialized(
7415 regno
, reg
->off
, access_size
,
7416 zero_size_allowed
, ACCESS_HELPER
, meta
);
7418 return check_ptr_to_btf_access(env
, regs
, regno
, reg
->off
,
7419 access_size
, BPF_READ
, -1);
7421 /* in case the function doesn't know how to access the context,
7422 * (because we are in a program of type SYSCALL for example), we
7423 * can not statically check its size.
7424 * Dynamically check it now.
7426 if (!env
->ops
->convert_ctx_access
) {
7427 enum bpf_access_type atype
= meta
&& meta
->raw_mode
? BPF_WRITE
: BPF_READ
;
7428 int offset
= access_size
- 1;
7430 /* Allow zero-byte read from PTR_TO_CTX */
7431 if (access_size
== 0)
7432 return zero_size_allowed
? 0 : -EACCES
;
7434 return check_mem_access(env
, env
->insn_idx
, regno
, offset
, BPF_B
,
7435 atype
, -1, false, false);
7439 default: /* scalar_value or invalid ptr */
7440 /* Allow zero-byte read from NULL, regardless of pointer type */
7441 if (zero_size_allowed
&& access_size
== 0 &&
7442 register_is_null(reg
))
7445 verbose(env
, "R%d type=%s ", regno
,
7446 reg_type_str(env
, reg
->type
));
7447 verbose(env
, "expected=%s\n", reg_type_str(env
, PTR_TO_STACK
));
7452 static int check_mem_size_reg(struct bpf_verifier_env
*env
,
7453 struct bpf_reg_state
*reg
, u32 regno
,
7454 bool zero_size_allowed
,
7455 struct bpf_call_arg_meta
*meta
)
7459 /* This is used to refine r0 return value bounds for helpers
7460 * that enforce this value as an upper bound on return values.
7461 * See do_refine_retval_range() for helpers that can refine
7462 * the return value. C type of helper is u32 so we pull register
7463 * bound from umax_value however, if negative verifier errors
7464 * out. Only upper bounds can be learned because retval is an
7465 * int type and negative retvals are allowed.
7467 meta
->msize_max_value
= reg
->umax_value
;
7469 /* The register is SCALAR_VALUE; the access check
7470 * happens using its boundaries.
7472 if (!tnum_is_const(reg
->var_off
))
7473 /* For unprivileged variable accesses, disable raw
7474 * mode so that the program is required to
7475 * initialize all the memory that the helper could
7476 * just partially fill up.
7480 if (reg
->smin_value
< 0) {
7481 verbose(env
, "R%d min value is negative, either use unsigned or 'var &= const'\n",
7486 if (reg
->umin_value
== 0) {
7487 err
= check_helper_mem_access(env
, regno
- 1, 0,
7494 if (reg
->umax_value
>= BPF_MAX_VAR_SIZ
) {
7495 verbose(env
, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
7499 err
= check_helper_mem_access(env
, regno
- 1,
7501 zero_size_allowed
, meta
);
7503 err
= mark_chain_precision(env
, regno
);
7507 int check_mem_reg(struct bpf_verifier_env
*env
, struct bpf_reg_state
*reg
,
7508 u32 regno
, u32 mem_size
)
7510 bool may_be_null
= type_may_be_null(reg
->type
);
7511 struct bpf_reg_state saved_reg
;
7512 struct bpf_call_arg_meta meta
;
7515 if (register_is_null(reg
))
7518 memset(&meta
, 0, sizeof(meta
));
7519 /* Assuming that the register contains a value check if the memory
7520 * access is safe. Temporarily save and restore the register's state as
7521 * the conversion shouldn't be visible to a caller.
7525 mark_ptr_not_null_reg(reg
);
7528 err
= check_helper_mem_access(env
, regno
, mem_size
, true, &meta
);
7529 /* Check access for BPF_WRITE */
7530 meta
.raw_mode
= true;
7531 err
= err
?: check_helper_mem_access(env
, regno
, mem_size
, true, &meta
);
7539 static int check_kfunc_mem_size_reg(struct bpf_verifier_env
*env
, struct bpf_reg_state
*reg
,
7542 struct bpf_reg_state
*mem_reg
= &cur_regs(env
)[regno
- 1];
7543 bool may_be_null
= type_may_be_null(mem_reg
->type
);
7544 struct bpf_reg_state saved_reg
;
7545 struct bpf_call_arg_meta meta
;
7548 WARN_ON_ONCE(regno
< BPF_REG_2
|| regno
> BPF_REG_5
);
7550 memset(&meta
, 0, sizeof(meta
));
7553 saved_reg
= *mem_reg
;
7554 mark_ptr_not_null_reg(mem_reg
);
7557 err
= check_mem_size_reg(env
, reg
, regno
, true, &meta
);
7558 /* Check access for BPF_WRITE */
7559 meta
.raw_mode
= true;
7560 err
= err
?: check_mem_size_reg(env
, reg
, regno
, true, &meta
);
7563 *mem_reg
= saved_reg
;
7567 /* Implementation details:
7568 * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL.
7569 * bpf_obj_new returns PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL.
7570 * Two bpf_map_lookups (even with the same key) will have different reg->id.
7571 * Two separate bpf_obj_new will also have different reg->id.
7572 * For traditional PTR_TO_MAP_VALUE or PTR_TO_BTF_ID | MEM_ALLOC, the verifier
7573 * clears reg->id after value_or_null->value transition, since the verifier only
7574 * cares about the range of access to valid map value pointer and doesn't care
7575 * about actual address of the map element.
7576 * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
7577 * reg->id > 0 after value_or_null->value transition. By doing so
7578 * two bpf_map_lookups will be considered two different pointers that
7579 * point to different bpf_spin_locks. Likewise for pointers to allocated objects
7580 * returned from bpf_obj_new.
7581 * The verifier allows taking only one bpf_spin_lock at a time to avoid
7583 * Since only one bpf_spin_lock is allowed the checks are simpler than
7584 * reg_is_refcounted() logic. The verifier needs to remember only
7585 * one spin_lock instead of array of acquired_refs.
7586 * cur_state->active_lock remembers which map value element or allocated
7587 * object got locked and clears it after bpf_spin_unlock.
7589 static int process_spin_lock(struct bpf_verifier_env
*env
, int regno
,
7592 struct bpf_reg_state
*regs
= cur_regs(env
), *reg
= ®s
[regno
];
7593 struct bpf_verifier_state
*cur
= env
->cur_state
;
7594 bool is_const
= tnum_is_const(reg
->var_off
);
7595 u64 val
= reg
->var_off
.value
;
7596 struct bpf_map
*map
= NULL
;
7597 struct btf
*btf
= NULL
;
7598 struct btf_record
*rec
;
7602 "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
7606 if (reg
->type
== PTR_TO_MAP_VALUE
) {
7610 "map '%s' has to have BTF in order to use bpf_spin_lock\n",
7618 rec
= reg_btf_record(reg
);
7619 if (!btf_record_has_field(rec
, BPF_SPIN_LOCK
)) {
7620 verbose(env
, "%s '%s' has no valid bpf_spin_lock\n", map
? "map" : "local",
7621 map
? map
->name
: "kptr");
7624 if (rec
->spin_lock_off
!= val
+ reg
->off
) {
7625 verbose(env
, "off %lld doesn't point to 'struct bpf_spin_lock' that is at %d\n",
7626 val
+ reg
->off
, rec
->spin_lock_off
);
7630 if (cur
->active_lock
.ptr
) {
7632 "Locking two bpf_spin_locks are not allowed\n");
7636 cur
->active_lock
.ptr
= map
;
7638 cur
->active_lock
.ptr
= btf
;
7639 cur
->active_lock
.id
= reg
->id
;
7648 if (!cur
->active_lock
.ptr
) {
7649 verbose(env
, "bpf_spin_unlock without taking a lock\n");
7652 if (cur
->active_lock
.ptr
!= ptr
||
7653 cur
->active_lock
.id
!= reg
->id
) {
7654 verbose(env
, "bpf_spin_unlock of different lock\n");
7658 invalidate_non_owning_refs(env
);
7660 cur
->active_lock
.ptr
= NULL
;
7661 cur
->active_lock
.id
= 0;
7666 static int process_timer_func(struct bpf_verifier_env
*env
, int regno
,
7667 struct bpf_call_arg_meta
*meta
)
7669 struct bpf_reg_state
*regs
= cur_regs(env
), *reg
= ®s
[regno
];
7670 bool is_const
= tnum_is_const(reg
->var_off
);
7671 struct bpf_map
*map
= reg
->map_ptr
;
7672 u64 val
= reg
->var_off
.value
;
7676 "R%d doesn't have constant offset. bpf_timer has to be at the constant offset\n",
7681 verbose(env
, "map '%s' has to have BTF in order to use bpf_timer\n",
7685 if (!btf_record_has_field(map
->record
, BPF_TIMER
)) {
7686 verbose(env
, "map '%s' has no valid bpf_timer\n", map
->name
);
7689 if (map
->record
->timer_off
!= val
+ reg
->off
) {
7690 verbose(env
, "off %lld doesn't point to 'struct bpf_timer' that is at %d\n",
7691 val
+ reg
->off
, map
->record
->timer_off
);
7694 if (meta
->map_ptr
) {
7695 verbose(env
, "verifier bug. Two map pointers in a timer helper\n");
7698 meta
->map_uid
= reg
->map_uid
;
7699 meta
->map_ptr
= map
;
7703 static int process_kptr_func(struct bpf_verifier_env
*env
, int regno
,
7704 struct bpf_call_arg_meta
*meta
)
7706 struct bpf_reg_state
*regs
= cur_regs(env
), *reg
= ®s
[regno
];
7707 struct bpf_map
*map_ptr
= reg
->map_ptr
;
7708 struct btf_field
*kptr_field
;
7711 if (!tnum_is_const(reg
->var_off
)) {
7713 "R%d doesn't have constant offset. kptr has to be at the constant offset\n",
7717 if (!map_ptr
->btf
) {
7718 verbose(env
, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
7722 if (!btf_record_has_field(map_ptr
->record
, BPF_KPTR
)) {
7723 verbose(env
, "map '%s' has no valid kptr\n", map_ptr
->name
);
7727 meta
->map_ptr
= map_ptr
;
7728 kptr_off
= reg
->off
+ reg
->var_off
.value
;
7729 kptr_field
= btf_record_find(map_ptr
->record
, kptr_off
, BPF_KPTR
);
7731 verbose(env
, "off=%d doesn't point to kptr\n", kptr_off
);
7734 if (kptr_field
->type
!= BPF_KPTR_REF
&& kptr_field
->type
!= BPF_KPTR_PERCPU
) {
7735 verbose(env
, "off=%d kptr isn't referenced kptr\n", kptr_off
);
7738 meta
->kptr_field
= kptr_field
;
7742 /* There are two register types representing a bpf_dynptr, one is PTR_TO_STACK
7743 * which points to a stack slot, and the other is CONST_PTR_TO_DYNPTR.
7745 * In both cases we deal with the first 8 bytes, but need to mark the next 8
7746 * bytes as STACK_DYNPTR in case of PTR_TO_STACK. In case of
7747 * CONST_PTR_TO_DYNPTR, we are guaranteed to get the beginning of the object.
7749 * Mutability of bpf_dynptr is at two levels, one is at the level of struct
7750 * bpf_dynptr itself, i.e. whether the helper is receiving a pointer to struct
7751 * bpf_dynptr or pointer to const struct bpf_dynptr. In the former case, it can
7752 * mutate the view of the dynptr and also possibly destroy it. In the latter
7753 * case, it cannot mutate the bpf_dynptr itself but it can still mutate the
7754 * memory that dynptr points to.
7756 * The verifier will keep track both levels of mutation (bpf_dynptr's in
7757 * reg->type and the memory's in reg->dynptr.type), but there is no support for
7758 * readonly dynptr view yet, hence only the first case is tracked and checked.
7760 * This is consistent with how C applies the const modifier to a struct object,
7761 * where the pointer itself inside bpf_dynptr becomes const but not what it
7764 * Helpers which do not mutate the bpf_dynptr set MEM_RDONLY in their argument
7765 * type, and declare it as 'const struct bpf_dynptr *' in their prototype.
7767 static int process_dynptr_func(struct bpf_verifier_env
*env
, int regno
, int insn_idx
,
7768 enum bpf_arg_type arg_type
, int clone_ref_obj_id
)
7770 struct bpf_reg_state
*regs
= cur_regs(env
), *reg
= ®s
[regno
];
7773 /* MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an
7774 * ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*):
7776 if ((arg_type
& (MEM_UNINIT
| MEM_RDONLY
)) == (MEM_UNINIT
| MEM_RDONLY
)) {
7777 verbose(env
, "verifier internal error: misconfigured dynptr helper type flags\n");
7781 /* MEM_UNINIT - Points to memory that is an appropriate candidate for
7782 * constructing a mutable bpf_dynptr object.
7784 * Currently, this is only possible with PTR_TO_STACK
7785 * pointing to a region of at least 16 bytes which doesn't
7786 * contain an existing bpf_dynptr.
7788 * MEM_RDONLY - Points to a initialized bpf_dynptr that will not be
7789 * mutated or destroyed. However, the memory it points to
7792 * None - Points to a initialized dynptr that can be mutated and
7793 * destroyed, including mutation of the memory it points
7796 if (arg_type
& MEM_UNINIT
) {
7799 if (!is_dynptr_reg_valid_uninit(env
, reg
)) {
7800 verbose(env
, "Dynptr has to be an uninitialized dynptr\n");
7804 /* we write BPF_DW bits (8 bytes) at a time */
7805 for (i
= 0; i
< BPF_DYNPTR_SIZE
; i
+= 8) {
7806 err
= check_mem_access(env
, insn_idx
, regno
,
7807 i
, BPF_DW
, BPF_WRITE
, -1, false, false);
7812 err
= mark_stack_slots_dynptr(env
, reg
, arg_type
, insn_idx
, clone_ref_obj_id
);
7813 } else /* MEM_RDONLY and None case from above */ {
7814 /* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */
7815 if (reg
->type
== CONST_PTR_TO_DYNPTR
&& !(arg_type
& MEM_RDONLY
)) {
7816 verbose(env
, "cannot pass pointer to const bpf_dynptr, the helper mutates it\n");
7820 if (!is_dynptr_reg_valid_init(env
, reg
)) {
7822 "Expected an initialized dynptr as arg #%d\n",
7827 /* Fold modifiers (in this case, MEM_RDONLY) when checking expected type */
7828 if (!is_dynptr_type_expected(env
, reg
, arg_type
& ~MEM_RDONLY
)) {
7830 "Expected a dynptr of type %s as arg #%d\n",
7831 dynptr_type_str(arg_to_dynptr_type(arg_type
)), regno
);
7835 err
= mark_dynptr_read(env
, reg
);
7840 static u32
iter_ref_obj_id(struct bpf_verifier_env
*env
, struct bpf_reg_state
*reg
, int spi
)
7842 struct bpf_func_state
*state
= func(env
, reg
);
7844 return state
->stack
[spi
].spilled_ptr
.ref_obj_id
;
7847 static bool is_iter_kfunc(struct bpf_kfunc_call_arg_meta
*meta
)
7849 return meta
->kfunc_flags
& (KF_ITER_NEW
| KF_ITER_NEXT
| KF_ITER_DESTROY
);
7852 static bool is_iter_new_kfunc(struct bpf_kfunc_call_arg_meta
*meta
)
7854 return meta
->kfunc_flags
& KF_ITER_NEW
;
7857 static bool is_iter_next_kfunc(struct bpf_kfunc_call_arg_meta
*meta
)
7859 return meta
->kfunc_flags
& KF_ITER_NEXT
;
7862 static bool is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta
*meta
)
7864 return meta
->kfunc_flags
& KF_ITER_DESTROY
;
7867 static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta
*meta
, int arg
)
7869 /* btf_check_iter_kfuncs() guarantees that first argument of any iter
7870 * kfunc is iter state pointer
7872 return arg
== 0 && is_iter_kfunc(meta
);
7875 static int process_iter_arg(struct bpf_verifier_env
*env
, int regno
, int insn_idx
,
7876 struct bpf_kfunc_call_arg_meta
*meta
)
7878 struct bpf_reg_state
*regs
= cur_regs(env
), *reg
= ®s
[regno
];
7879 const struct btf_type
*t
;
7880 const struct btf_param
*arg
;
7881 int spi
, err
, i
, nr_slots
;
7884 /* btf_check_iter_kfuncs() ensures we don't need to validate anything here */
7885 arg
= &btf_params(meta
->func_proto
)[0];
7886 t
= btf_type_skip_modifiers(meta
->btf
, arg
->type
, NULL
); /* PTR */
7887 t
= btf_type_skip_modifiers(meta
->btf
, t
->type
, &btf_id
); /* STRUCT */
7888 nr_slots
= t
->size
/ BPF_REG_SIZE
;
7890 if (is_iter_new_kfunc(meta
)) {
7891 /* bpf_iter_<type>_new() expects pointer to uninit iter state */
7892 if (!is_iter_reg_valid_uninit(env
, reg
, nr_slots
)) {
7893 verbose(env
, "expected uninitialized iter_%s as arg #%d\n",
7894 iter_type_str(meta
->btf
, btf_id
), regno
);
7898 for (i
= 0; i
< nr_slots
* 8; i
+= BPF_REG_SIZE
) {
7899 err
= check_mem_access(env
, insn_idx
, regno
,
7900 i
, BPF_DW
, BPF_WRITE
, -1, false, false);
7905 err
= mark_stack_slots_iter(env
, meta
, reg
, insn_idx
, meta
->btf
, btf_id
, nr_slots
);
7909 /* iter_next() or iter_destroy() expect initialized iter state*/
7910 err
= is_iter_reg_valid_init(env
, reg
, meta
->btf
, btf_id
, nr_slots
);
7915 verbose(env
, "expected an initialized iter_%s as arg #%d\n",
7916 iter_type_str(meta
->btf
, btf_id
), regno
);
7919 verbose(env
, "expected an RCU CS when using %s\n", meta
->func_name
);
7925 spi
= iter_get_spi(env
, reg
, nr_slots
);
7929 err
= mark_iter_read(env
, reg
, spi
, nr_slots
);
7933 /* remember meta->iter info for process_iter_next_call() */
7934 meta
->iter
.spi
= spi
;
7935 meta
->iter
.frameno
= reg
->frameno
;
7936 meta
->ref_obj_id
= iter_ref_obj_id(env
, reg
, spi
);
7938 if (is_iter_destroy_kfunc(meta
)) {
7939 err
= unmark_stack_slots_iter(env
, reg
, nr_slots
);
7948 /* Look for a previous loop entry at insn_idx: nearest parent state
7949 * stopped at insn_idx with callsites matching those in cur->frame.
7951 static struct bpf_verifier_state
*find_prev_entry(struct bpf_verifier_env
*env
,
7952 struct bpf_verifier_state
*cur
,
7955 struct bpf_verifier_state_list
*sl
;
7956 struct bpf_verifier_state
*st
;
7958 /* Explored states are pushed in stack order, most recent states come first */
7959 sl
= *explored_state(env
, insn_idx
);
7960 for (; sl
; sl
= sl
->next
) {
7961 /* If st->branches != 0 state is a part of current DFS verification path,
7962 * hence cur & st for a loop.
7965 if (st
->insn_idx
== insn_idx
&& st
->branches
&& same_callsites(st
, cur
) &&
7966 st
->dfs_depth
< cur
->dfs_depth
)
7973 static void reset_idmap_scratch(struct bpf_verifier_env
*env
);
7974 static bool regs_exact(const struct bpf_reg_state
*rold
,
7975 const struct bpf_reg_state
*rcur
,
7976 struct bpf_idmap
*idmap
);
7978 static void maybe_widen_reg(struct bpf_verifier_env
*env
,
7979 struct bpf_reg_state
*rold
, struct bpf_reg_state
*rcur
,
7980 struct bpf_idmap
*idmap
)
7982 if (rold
->type
!= SCALAR_VALUE
)
7984 if (rold
->type
!= rcur
->type
)
7986 if (rold
->precise
|| rcur
->precise
|| regs_exact(rold
, rcur
, idmap
))
7988 __mark_reg_unknown(env
, rcur
);
7991 static int widen_imprecise_scalars(struct bpf_verifier_env
*env
,
7992 struct bpf_verifier_state
*old
,
7993 struct bpf_verifier_state
*cur
)
7995 struct bpf_func_state
*fold
, *fcur
;
7998 reset_idmap_scratch(env
);
7999 for (fr
= old
->curframe
; fr
>= 0; fr
--) {
8000 fold
= old
->frame
[fr
];
8001 fcur
= cur
->frame
[fr
];
8003 for (i
= 0; i
< MAX_BPF_REG
; i
++)
8004 maybe_widen_reg(env
,
8007 &env
->idmap_scratch
);
8009 for (i
= 0; i
< fold
->allocated_stack
/ BPF_REG_SIZE
; i
++) {
8010 if (!is_spilled_reg(&fold
->stack
[i
]) ||
8011 !is_spilled_reg(&fcur
->stack
[i
]))
8014 maybe_widen_reg(env
,
8015 &fold
->stack
[i
].spilled_ptr
,
8016 &fcur
->stack
[i
].spilled_ptr
,
8017 &env
->idmap_scratch
);
8023 /* process_iter_next_call() is called when verifier gets to iterator's next
8024 * "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer
8025 * to it as just "iter_next()" in comments below.
8027 * BPF verifier relies on a crucial contract for any iter_next()
8028 * implementation: it should *eventually* return NULL, and once that happens
8029 * it should keep returning NULL. That is, once iterator exhausts elements to
8030 * iterate, it should never reset or spuriously return new elements.
8032 * With the assumption of such contract, process_iter_next_call() simulates
8033 * a fork in the verifier state to validate loop logic correctness and safety
8034 * without having to simulate infinite amount of iterations.
8036 * In current state, we first assume that iter_next() returned NULL and
8037 * iterator state is set to DRAINED (BPF_ITER_STATE_DRAINED). In such
8038 * conditions we should not form an infinite loop and should eventually reach
8041 * Besides that, we also fork current state and enqueue it for later
8042 * verification. In a forked state we keep iterator state as ACTIVE
8043 * (BPF_ITER_STATE_ACTIVE) and assume non-NULL return from iter_next(). We
8044 * also bump iteration depth to prevent erroneous infinite loop detection
8045 * later on (see iter_active_depths_differ() comment for details). In this
8046 * state we assume that we'll eventually loop back to another iter_next()
8047 * calls (it could be in exactly same location or in some other instruction,
8048 * it doesn't matter, we don't make any unnecessary assumptions about this,
8049 * everything revolves around iterator state in a stack slot, not which
8050 * instruction is calling iter_next()). When that happens, we either will come
8051 * to iter_next() with equivalent state and can conclude that next iteration
8052 * will proceed in exactly the same way as we just verified, so it's safe to
8053 * assume that loop converges. If not, we'll go on another iteration
8054 * simulation with a different input state, until all possible starting states
8055 * are validated or we reach maximum number of instructions limit.
8057 * This way, we will either exhaustively discover all possible input states
8058 * that iterator loop can start with and eventually will converge, or we'll
8059 * effectively regress into bounded loop simulation logic and either reach
8060 * maximum number of instructions if loop is not provably convergent, or there
8061 * is some statically known limit on number of iterations (e.g., if there is
8062 * an explicit `if n > 100 then break;` statement somewhere in the loop).
8064 * Iteration convergence logic in is_state_visited() relies on exact
8065 * states comparison, which ignores read and precision marks.
8066 * This is necessary because read and precision marks are not finalized
8067 * while in the loop. Exact comparison might preclude convergence for
8068 * simple programs like below:
8071 * while(iter_next(&it))
8074 * At each iteration step i++ would produce a new distinct state and
8075 * eventually instruction processing limit would be reached.
8077 * To avoid such behavior speculatively forget (widen) range for
8078 * imprecise scalar registers, if those registers were not precise at the
8079 * end of the previous iteration and do not match exactly.
8081 * This is a conservative heuristic that allows to verify wide range of programs,
8082 * however it precludes verification of programs that conjure an
8083 * imprecise value on the first loop iteration and use it as precise on a second.
8084 * For example, the following safe program would fail to verify:
8086 * struct bpf_num_iter it;
8089 * bpf_iter_num_new(&it, 0, 10);
8090 * while (bpf_iter_num_next(&it)) {
8093 * i = 7; // Because i changed verifier would forget
8094 * // it's range on second loop entry.
8096 * arr[i] = 42; // This would fail to verify.
8099 * bpf_iter_num_destroy(&it);
8101 static int process_iter_next_call(struct bpf_verifier_env
*env
, int insn_idx
,
8102 struct bpf_kfunc_call_arg_meta
*meta
)
8104 struct bpf_verifier_state
*cur_st
= env
->cur_state
, *queued_st
, *prev_st
;
8105 struct bpf_func_state
*cur_fr
= cur_st
->frame
[cur_st
->curframe
], *queued_fr
;
8106 struct bpf_reg_state
*cur_iter
, *queued_iter
;
8107 int iter_frameno
= meta
->iter
.frameno
;
8108 int iter_spi
= meta
->iter
.spi
;
8110 BTF_TYPE_EMIT(struct bpf_iter
);
8112 cur_iter
= &env
->cur_state
->frame
[iter_frameno
]->stack
[iter_spi
].spilled_ptr
;
8114 if (cur_iter
->iter
.state
!= BPF_ITER_STATE_ACTIVE
&&
8115 cur_iter
->iter
.state
!= BPF_ITER_STATE_DRAINED
) {
8116 verbose(env
, "verifier internal error: unexpected iterator state %d (%s)\n",
8117 cur_iter
->iter
.state
, iter_state_str(cur_iter
->iter
.state
));
8121 if (cur_iter
->iter
.state
== BPF_ITER_STATE_ACTIVE
) {
8122 /* Because iter_next() call is a checkpoint is_state_visitied()
8123 * should guarantee parent state with same call sites and insn_idx.
8125 if (!cur_st
->parent
|| cur_st
->parent
->insn_idx
!= insn_idx
||
8126 !same_callsites(cur_st
->parent
, cur_st
)) {
8127 verbose(env
, "bug: bad parent state for iter next call");
8130 /* Note cur_st->parent in the call below, it is necessary to skip
8131 * checkpoint created for cur_st by is_state_visited()
8132 * right at this instruction.
8134 prev_st
= find_prev_entry(env
, cur_st
->parent
, insn_idx
);
8135 /* branch out active iter state */
8136 queued_st
= push_stack(env
, insn_idx
+ 1, insn_idx
, false);
8140 queued_iter
= &queued_st
->frame
[iter_frameno
]->stack
[iter_spi
].spilled_ptr
;
8141 queued_iter
->iter
.state
= BPF_ITER_STATE_ACTIVE
;
8142 queued_iter
->iter
.depth
++;
8144 widen_imprecise_scalars(env
, prev_st
, queued_st
);
8146 queued_fr
= queued_st
->frame
[queued_st
->curframe
];
8147 mark_ptr_not_null_reg(&queued_fr
->regs
[BPF_REG_0
]);
8150 /* switch to DRAINED state, but keep the depth unchanged */
8151 /* mark current iter state as drained and assume returned NULL */
8152 cur_iter
->iter
.state
= BPF_ITER_STATE_DRAINED
;
8153 __mark_reg_const_zero(&cur_fr
->regs
[BPF_REG_0
]);
8158 static bool arg_type_is_mem_size(enum bpf_arg_type type
)
8160 return type
== ARG_CONST_SIZE
||
8161 type
== ARG_CONST_SIZE_OR_ZERO
;
8164 static bool arg_type_is_release(enum bpf_arg_type type
)
8166 return type
& OBJ_RELEASE
;
8169 static bool arg_type_is_dynptr(enum bpf_arg_type type
)
8171 return base_type(type
) == ARG_PTR_TO_DYNPTR
;
8174 static int int_ptr_type_to_size(enum bpf_arg_type type
)
8176 if (type
== ARG_PTR_TO_INT
)
8178 else if (type
== ARG_PTR_TO_LONG
)
8184 static int resolve_map_arg_type(struct bpf_verifier_env
*env
,
8185 const struct bpf_call_arg_meta
*meta
,
8186 enum bpf_arg_type
*arg_type
)
8188 if (!meta
->map_ptr
) {
8189 /* kernel subsystem misconfigured verifier */
8190 verbose(env
, "invalid map_ptr to access map->type\n");
8194 switch (meta
->map_ptr
->map_type
) {
8195 case BPF_MAP_TYPE_SOCKMAP
:
8196 case BPF_MAP_TYPE_SOCKHASH
:
8197 if (*arg_type
== ARG_PTR_TO_MAP_VALUE
) {
8198 *arg_type
= ARG_PTR_TO_BTF_ID_SOCK_COMMON
;
8200 verbose(env
, "invalid arg_type for sockmap/sockhash\n");
8204 case BPF_MAP_TYPE_BLOOM_FILTER
:
8205 if (meta
->func_id
== BPF_FUNC_map_peek_elem
)
8206 *arg_type
= ARG_PTR_TO_MAP_VALUE
;
8214 struct bpf_reg_types
{
8215 const enum bpf_reg_type types
[10];
8219 static const struct bpf_reg_types sock_types
= {
8229 static const struct bpf_reg_types btf_id_sock_common_types
= {
8236 PTR_TO_BTF_ID
| PTR_TRUSTED
,
8238 .btf_id
= &btf_sock_ids
[BTF_SOCK_TYPE_SOCK_COMMON
],
8242 static const struct bpf_reg_types mem_types
= {
8250 PTR_TO_MEM
| MEM_RINGBUF
,
8252 PTR_TO_BTF_ID
| PTR_TRUSTED
,
8256 static const struct bpf_reg_types int_ptr_types
= {
8266 static const struct bpf_reg_types spin_lock_types
= {
8269 PTR_TO_BTF_ID
| MEM_ALLOC
,
8273 static const struct bpf_reg_types fullsock_types
= { .types
= { PTR_TO_SOCKET
} };
8274 static const struct bpf_reg_types scalar_types
= { .types
= { SCALAR_VALUE
} };
8275 static const struct bpf_reg_types context_types
= { .types
= { PTR_TO_CTX
} };
8276 static const struct bpf_reg_types ringbuf_mem_types
= { .types
= { PTR_TO_MEM
| MEM_RINGBUF
} };
8277 static const struct bpf_reg_types const_map_ptr_types
= { .types
= { CONST_PTR_TO_MAP
} };
8278 static const struct bpf_reg_types btf_ptr_types
= {
8281 PTR_TO_BTF_ID
| PTR_TRUSTED
,
8282 PTR_TO_BTF_ID
| MEM_RCU
,
8285 static const struct bpf_reg_types percpu_btf_ptr_types
= {
8287 PTR_TO_BTF_ID
| MEM_PERCPU
,
8288 PTR_TO_BTF_ID
| MEM_PERCPU
| MEM_RCU
,
8289 PTR_TO_BTF_ID
| MEM_PERCPU
| PTR_TRUSTED
,
8292 static const struct bpf_reg_types func_ptr_types
= { .types
= { PTR_TO_FUNC
} };
8293 static const struct bpf_reg_types stack_ptr_types
= { .types
= { PTR_TO_STACK
} };
8294 static const struct bpf_reg_types const_str_ptr_types
= { .types
= { PTR_TO_MAP_VALUE
} };
8295 static const struct bpf_reg_types timer_types
= { .types
= { PTR_TO_MAP_VALUE
} };
8296 static const struct bpf_reg_types kptr_types
= { .types
= { PTR_TO_MAP_VALUE
} };
8297 static const struct bpf_reg_types dynptr_types
= {
8300 CONST_PTR_TO_DYNPTR
,
8304 static const struct bpf_reg_types
*compatible_reg_types
[__BPF_ARG_TYPE_MAX
] = {
8305 [ARG_PTR_TO_MAP_KEY
] = &mem_types
,
8306 [ARG_PTR_TO_MAP_VALUE
] = &mem_types
,
8307 [ARG_CONST_SIZE
] = &scalar_types
,
8308 [ARG_CONST_SIZE_OR_ZERO
] = &scalar_types
,
8309 [ARG_CONST_ALLOC_SIZE_OR_ZERO
] = &scalar_types
,
8310 [ARG_CONST_MAP_PTR
] = &const_map_ptr_types
,
8311 [ARG_PTR_TO_CTX
] = &context_types
,
8312 [ARG_PTR_TO_SOCK_COMMON
] = &sock_types
,
8314 [ARG_PTR_TO_BTF_ID_SOCK_COMMON
] = &btf_id_sock_common_types
,
8316 [ARG_PTR_TO_SOCKET
] = &fullsock_types
,
8317 [ARG_PTR_TO_BTF_ID
] = &btf_ptr_types
,
8318 [ARG_PTR_TO_SPIN_LOCK
] = &spin_lock_types
,
8319 [ARG_PTR_TO_MEM
] = &mem_types
,
8320 [ARG_PTR_TO_RINGBUF_MEM
] = &ringbuf_mem_types
,
8321 [ARG_PTR_TO_INT
] = &int_ptr_types
,
8322 [ARG_PTR_TO_LONG
] = &int_ptr_types
,
8323 [ARG_PTR_TO_PERCPU_BTF_ID
] = &percpu_btf_ptr_types
,
8324 [ARG_PTR_TO_FUNC
] = &func_ptr_types
,
8325 [ARG_PTR_TO_STACK
] = &stack_ptr_types
,
8326 [ARG_PTR_TO_CONST_STR
] = &const_str_ptr_types
,
8327 [ARG_PTR_TO_TIMER
] = &timer_types
,
8328 [ARG_PTR_TO_KPTR
] = &kptr_types
,
8329 [ARG_PTR_TO_DYNPTR
] = &dynptr_types
,
8332 static int check_reg_type(struct bpf_verifier_env
*env
, u32 regno
,
8333 enum bpf_arg_type arg_type
,
8334 const u32
*arg_btf_id
,
8335 struct bpf_call_arg_meta
*meta
)
8337 struct bpf_reg_state
*regs
= cur_regs(env
), *reg
= ®s
[regno
];
8338 enum bpf_reg_type expected
, type
= reg
->type
;
8339 const struct bpf_reg_types
*compatible
;
8342 compatible
= compatible_reg_types
[base_type(arg_type
)];
8344 verbose(env
, "verifier internal error: unsupported arg type %d\n", arg_type
);
8348 /* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,
8349 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
8351 * Same for MAYBE_NULL:
8353 * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
8354 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
8356 * ARG_PTR_TO_MEM is compatible with PTR_TO_MEM that is tagged with a dynptr type.
8358 * Therefore we fold these flags depending on the arg_type before comparison.
8360 if (arg_type
& MEM_RDONLY
)
8361 type
&= ~MEM_RDONLY
;
8362 if (arg_type
& PTR_MAYBE_NULL
)
8363 type
&= ~PTR_MAYBE_NULL
;
8364 if (base_type(arg_type
) == ARG_PTR_TO_MEM
)
8365 type
&= ~DYNPTR_TYPE_FLAG_MASK
;
8367 if (meta
->func_id
== BPF_FUNC_kptr_xchg
&& type_is_alloc(type
)) {
8369 type
&= ~MEM_PERCPU
;
8372 for (i
= 0; i
< ARRAY_SIZE(compatible
->types
); i
++) {
8373 expected
= compatible
->types
[i
];
8374 if (expected
== NOT_INIT
)
8377 if (type
== expected
)
8381 verbose(env
, "R%d type=%s expected=", regno
, reg_type_str(env
, reg
->type
));
8382 for (j
= 0; j
+ 1 < i
; j
++)
8383 verbose(env
, "%s, ", reg_type_str(env
, compatible
->types
[j
]));
8384 verbose(env
, "%s\n", reg_type_str(env
, compatible
->types
[j
]));
8388 if (base_type(reg
->type
) != PTR_TO_BTF_ID
)
8391 if (compatible
== &mem_types
) {
8392 if (!(arg_type
& MEM_RDONLY
)) {
8394 "%s() may write into memory pointed by R%d type=%s\n",
8395 func_id_name(meta
->func_id
),
8396 regno
, reg_type_str(env
, reg
->type
));
8402 switch ((int)reg
->type
) {
8404 case PTR_TO_BTF_ID
| PTR_TRUSTED
:
8405 case PTR_TO_BTF_ID
| MEM_RCU
:
8406 case PTR_TO_BTF_ID
| PTR_MAYBE_NULL
:
8407 case PTR_TO_BTF_ID
| PTR_MAYBE_NULL
| MEM_RCU
:
8409 /* For bpf_sk_release, it needs to match against first member
8410 * 'struct sock_common', hence make an exception for it. This
8411 * allows bpf_sk_release to work for multiple socket types.
8413 bool strict_type_match
= arg_type_is_release(arg_type
) &&
8414 meta
->func_id
!= BPF_FUNC_sk_release
;
8416 if (type_may_be_null(reg
->type
) &&
8417 (!type_may_be_null(arg_type
) || arg_type_is_release(arg_type
))) {
8418 verbose(env
, "Possibly NULL pointer passed to helper arg%d\n", regno
);
8423 if (!compatible
->btf_id
) {
8424 verbose(env
, "verifier internal error: missing arg compatible BTF ID\n");
8427 arg_btf_id
= compatible
->btf_id
;
8430 if (meta
->func_id
== BPF_FUNC_kptr_xchg
) {
8431 if (map_kptr_match_type(env
, meta
->kptr_field
, reg
, regno
))
8434 if (arg_btf_id
== BPF_PTR_POISON
) {
8435 verbose(env
, "verifier internal error:");
8436 verbose(env
, "R%d has non-overwritten BPF_PTR_POISON type\n",
8441 if (!btf_struct_ids_match(&env
->log
, reg
->btf
, reg
->btf_id
, reg
->off
,
8442 btf_vmlinux
, *arg_btf_id
,
8443 strict_type_match
)) {
8444 verbose(env
, "R%d is of type %s but %s is expected\n",
8445 regno
, btf_type_name(reg
->btf
, reg
->btf_id
),
8446 btf_type_name(btf_vmlinux
, *arg_btf_id
));
8452 case PTR_TO_BTF_ID
| MEM_ALLOC
:
8453 case PTR_TO_BTF_ID
| MEM_PERCPU
| MEM_ALLOC
:
8454 if (meta
->func_id
!= BPF_FUNC_spin_lock
&& meta
->func_id
!= BPF_FUNC_spin_unlock
&&
8455 meta
->func_id
!= BPF_FUNC_kptr_xchg
) {
8456 verbose(env
, "verifier internal error: unimplemented handling of MEM_ALLOC\n");
8459 if (meta
->func_id
== BPF_FUNC_kptr_xchg
) {
8460 if (map_kptr_match_type(env
, meta
->kptr_field
, reg
, regno
))
8464 case PTR_TO_BTF_ID
| MEM_PERCPU
:
8465 case PTR_TO_BTF_ID
| MEM_PERCPU
| MEM_RCU
:
8466 case PTR_TO_BTF_ID
| MEM_PERCPU
| PTR_TRUSTED
:
8467 /* Handled by helper specific checks */
8470 verbose(env
, "verifier internal error: invalid PTR_TO_BTF_ID register for type match\n");
8476 static struct btf_field
*
8477 reg_find_field_offset(const struct bpf_reg_state
*reg
, s32 off
, u32 fields
)
8479 struct btf_field
*field
;
8480 struct btf_record
*rec
;
8482 rec
= reg_btf_record(reg
);
8486 field
= btf_record_find(rec
, off
, fields
);
8493 int check_func_arg_reg_off(struct bpf_verifier_env
*env
,
8494 const struct bpf_reg_state
*reg
, int regno
,
8495 enum bpf_arg_type arg_type
)
8497 u32 type
= reg
->type
;
8499 /* When referenced register is passed to release function, its fixed
8502 * We will check arg_type_is_release reg has ref_obj_id when storing
8503 * meta->release_regno.
8505 if (arg_type_is_release(arg_type
)) {
8506 /* ARG_PTR_TO_DYNPTR with OBJ_RELEASE is a bit special, as it
8507 * may not directly point to the object being released, but to
8508 * dynptr pointing to such object, which might be at some offset
8509 * on the stack. In that case, we simply to fallback to the
8512 if (arg_type_is_dynptr(arg_type
) && type
== PTR_TO_STACK
)
8515 /* Doing check_ptr_off_reg check for the offset will catch this
8516 * because fixed_off_ok is false, but checking here allows us
8517 * to give the user a better error message.
8520 verbose(env
, "R%d must have zero offset when passed to release func or trusted arg to kfunc\n",
8524 return __check_ptr_off_reg(env
, reg
, regno
, false);
8528 /* Pointer types where both fixed and variable offset is explicitly allowed: */
8531 case PTR_TO_PACKET_META
:
8532 case PTR_TO_MAP_KEY
:
8533 case PTR_TO_MAP_VALUE
:
8535 case PTR_TO_MEM
| MEM_RDONLY
:
8536 case PTR_TO_MEM
| MEM_RINGBUF
:
8538 case PTR_TO_BUF
| MEM_RDONLY
:
8541 /* All the rest must be rejected, except PTR_TO_BTF_ID which allows
8545 case PTR_TO_BTF_ID
| MEM_ALLOC
:
8546 case PTR_TO_BTF_ID
| PTR_TRUSTED
:
8547 case PTR_TO_BTF_ID
| MEM_RCU
:
8548 case PTR_TO_BTF_ID
| MEM_ALLOC
| NON_OWN_REF
:
8549 case PTR_TO_BTF_ID
| MEM_ALLOC
| NON_OWN_REF
| MEM_RCU
:
8550 /* When referenced PTR_TO_BTF_ID is passed to release function,
8551 * its fixed offset must be 0. In the other cases, fixed offset
8552 * can be non-zero. This was already checked above. So pass
8553 * fixed_off_ok as true to allow fixed offset for all other
8554 * cases. var_off always must be 0 for PTR_TO_BTF_ID, hence we
8555 * still need to do checks instead of returning.
8557 return __check_ptr_off_reg(env
, reg
, regno
, true);
8559 return __check_ptr_off_reg(env
, reg
, regno
, false);
8563 static struct bpf_reg_state
*get_dynptr_arg_reg(struct bpf_verifier_env
*env
,
8564 const struct bpf_func_proto
*fn
,
8565 struct bpf_reg_state
*regs
)
8567 struct bpf_reg_state
*state
= NULL
;
8570 for (i
= 0; i
< MAX_BPF_FUNC_REG_ARGS
; i
++)
8571 if (arg_type_is_dynptr(fn
->arg_type
[i
])) {
8573 verbose(env
, "verifier internal error: multiple dynptr args\n");
8576 state
= ®s
[BPF_REG_1
+ i
];
8580 verbose(env
, "verifier internal error: no dynptr arg found\n");
8585 static int dynptr_id(struct bpf_verifier_env
*env
, struct bpf_reg_state
*reg
)
8587 struct bpf_func_state
*state
= func(env
, reg
);
8590 if (reg
->type
== CONST_PTR_TO_DYNPTR
)
8592 spi
= dynptr_get_spi(env
, reg
);
8595 return state
->stack
[spi
].spilled_ptr
.id
;
8598 static int dynptr_ref_obj_id(struct bpf_verifier_env
*env
, struct bpf_reg_state
*reg
)
8600 struct bpf_func_state
*state
= func(env
, reg
);
8603 if (reg
->type
== CONST_PTR_TO_DYNPTR
)
8604 return reg
->ref_obj_id
;
8605 spi
= dynptr_get_spi(env
, reg
);
8608 return state
->stack
[spi
].spilled_ptr
.ref_obj_id
;
8611 static enum bpf_dynptr_type
dynptr_get_type(struct bpf_verifier_env
*env
,
8612 struct bpf_reg_state
*reg
)
8614 struct bpf_func_state
*state
= func(env
, reg
);
8617 if (reg
->type
== CONST_PTR_TO_DYNPTR
)
8618 return reg
->dynptr
.type
;
8620 spi
= __get_spi(reg
->off
);
8622 verbose(env
, "verifier internal error: invalid spi when querying dynptr type\n");
8623 return BPF_DYNPTR_TYPE_INVALID
;
8626 return state
->stack
[spi
].spilled_ptr
.dynptr
.type
;
8629 static int check_func_arg(struct bpf_verifier_env
*env
, u32 arg
,
8630 struct bpf_call_arg_meta
*meta
,
8631 const struct bpf_func_proto
*fn
,
8634 u32 regno
= BPF_REG_1
+ arg
;
8635 struct bpf_reg_state
*regs
= cur_regs(env
), *reg
= ®s
[regno
];
8636 enum bpf_arg_type arg_type
= fn
->arg_type
[arg
];
8637 enum bpf_reg_type type
= reg
->type
;
8638 u32
*arg_btf_id
= NULL
;
8641 if (arg_type
== ARG_DONTCARE
)
8644 err
= check_reg_arg(env
, regno
, SRC_OP
);
8648 if (arg_type
== ARG_ANYTHING
) {
8649 if (is_pointer_value(env
, regno
)) {
8650 verbose(env
, "R%d leaks addr into helper function\n",
8657 if (type_is_pkt_pointer(type
) &&
8658 !may_access_direct_pkt_data(env
, meta
, BPF_READ
)) {
8659 verbose(env
, "helper access to the packet is not allowed\n");
8663 if (base_type(arg_type
) == ARG_PTR_TO_MAP_VALUE
) {
8664 err
= resolve_map_arg_type(env
, meta
, &arg_type
);
8669 if (register_is_null(reg
) && type_may_be_null(arg_type
))
8670 /* A NULL register has a SCALAR_VALUE type, so skip
8673 goto skip_type_check
;
8675 /* arg_btf_id and arg_size are in a union. */
8676 if (base_type(arg_type
) == ARG_PTR_TO_BTF_ID
||
8677 base_type(arg_type
) == ARG_PTR_TO_SPIN_LOCK
)
8678 arg_btf_id
= fn
->arg_btf_id
[arg
];
8680 err
= check_reg_type(env
, regno
, arg_type
, arg_btf_id
, meta
);
8684 err
= check_func_arg_reg_off(env
, reg
, regno
, arg_type
);
8689 if (arg_type_is_release(arg_type
)) {
8690 if (arg_type_is_dynptr(arg_type
)) {
8691 struct bpf_func_state
*state
= func(env
, reg
);
8694 /* Only dynptr created on stack can be released, thus
8695 * the get_spi and stack state checks for spilled_ptr
8696 * should only be done before process_dynptr_func for
8699 if (reg
->type
== PTR_TO_STACK
) {
8700 spi
= dynptr_get_spi(env
, reg
);
8701 if (spi
< 0 || !state
->stack
[spi
].spilled_ptr
.ref_obj_id
) {
8702 verbose(env
, "arg %d is an unacquired reference\n", regno
);
8706 verbose(env
, "cannot release unowned const bpf_dynptr\n");
8709 } else if (!reg
->ref_obj_id
&& !register_is_null(reg
)) {
8710 verbose(env
, "R%d must be referenced when passed to release function\n",
8714 if (meta
->release_regno
) {
8715 verbose(env
, "verifier internal error: more than one release argument\n");
8718 meta
->release_regno
= regno
;
8721 if (reg
->ref_obj_id
) {
8722 if (meta
->ref_obj_id
) {
8723 verbose(env
, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
8724 regno
, reg
->ref_obj_id
,
8728 meta
->ref_obj_id
= reg
->ref_obj_id
;
8731 switch (base_type(arg_type
)) {
8732 case ARG_CONST_MAP_PTR
:
8733 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
8734 if (meta
->map_ptr
) {
8735 /* Use map_uid (which is unique id of inner map) to reject:
8736 * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
8737 * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
8738 * if (inner_map1 && inner_map2) {
8739 * timer = bpf_map_lookup_elem(inner_map1);
8741 * // mismatch would have been allowed
8742 * bpf_timer_init(timer, inner_map2);
8745 * Comparing map_ptr is enough to distinguish normal and outer maps.
8747 if (meta
->map_ptr
!= reg
->map_ptr
||
8748 meta
->map_uid
!= reg
->map_uid
) {
8750 "timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
8751 meta
->map_uid
, reg
->map_uid
);
8755 meta
->map_ptr
= reg
->map_ptr
;
8756 meta
->map_uid
= reg
->map_uid
;
8758 case ARG_PTR_TO_MAP_KEY
:
8759 /* bpf_map_xxx(..., map_ptr, ..., key) call:
8760 * check that [key, key + map->key_size) are within
8761 * stack limits and initialized
8763 if (!meta
->map_ptr
) {
8764 /* in function declaration map_ptr must come before
8765 * map_key, so that it's verified and known before
8766 * we have to check map_key here. Otherwise it means
8767 * that kernel subsystem misconfigured verifier
8769 verbose(env
, "invalid map_ptr to access map->key\n");
8772 err
= check_helper_mem_access(env
, regno
,
8773 meta
->map_ptr
->key_size
, false,
8776 case ARG_PTR_TO_MAP_VALUE
:
8777 if (type_may_be_null(arg_type
) && register_is_null(reg
))
8780 /* bpf_map_xxx(..., map_ptr, ..., value) call:
8781 * check [value, value + map->value_size) validity
8783 if (!meta
->map_ptr
) {
8784 /* kernel subsystem misconfigured verifier */
8785 verbose(env
, "invalid map_ptr to access map->value\n");
8788 meta
->raw_mode
= arg_type
& MEM_UNINIT
;
8789 err
= check_helper_mem_access(env
, regno
,
8790 meta
->map_ptr
->value_size
, false,
8793 case ARG_PTR_TO_PERCPU_BTF_ID
:
8795 verbose(env
, "Helper has invalid btf_id in R%d\n", regno
);
8798 meta
->ret_btf
= reg
->btf
;
8799 meta
->ret_btf_id
= reg
->btf_id
;
8801 case ARG_PTR_TO_SPIN_LOCK
:
8802 if (in_rbtree_lock_required_cb(env
)) {
8803 verbose(env
, "can't spin_{lock,unlock} in rbtree cb\n");
8806 if (meta
->func_id
== BPF_FUNC_spin_lock
) {
8807 err
= process_spin_lock(env
, regno
, true);
8810 } else if (meta
->func_id
== BPF_FUNC_spin_unlock
) {
8811 err
= process_spin_lock(env
, regno
, false);
8815 verbose(env
, "verifier internal error\n");
8819 case ARG_PTR_TO_TIMER
:
8820 err
= process_timer_func(env
, regno
, meta
);
8824 case ARG_PTR_TO_FUNC
:
8825 meta
->subprogno
= reg
->subprogno
;
8827 case ARG_PTR_TO_MEM
:
8828 /* The access to this pointer is only checked when we hit the
8829 * next is_mem_size argument below.
8831 meta
->raw_mode
= arg_type
& MEM_UNINIT
;
8832 if (arg_type
& MEM_FIXED_SIZE
) {
8833 err
= check_helper_mem_access(env
, regno
,
8834 fn
->arg_size
[arg
], false,
8838 case ARG_CONST_SIZE
:
8839 err
= check_mem_size_reg(env
, reg
, regno
, false, meta
);
8841 case ARG_CONST_SIZE_OR_ZERO
:
8842 err
= check_mem_size_reg(env
, reg
, regno
, true, meta
);
8844 case ARG_PTR_TO_DYNPTR
:
8845 err
= process_dynptr_func(env
, regno
, insn_idx
, arg_type
, 0);
8849 case ARG_CONST_ALLOC_SIZE_OR_ZERO
:
8850 if (!tnum_is_const(reg
->var_off
)) {
8851 verbose(env
, "R%d is not a known constant'\n",
8855 meta
->mem_size
= reg
->var_off
.value
;
8856 err
= mark_chain_precision(env
, regno
);
8860 case ARG_PTR_TO_INT
:
8861 case ARG_PTR_TO_LONG
:
8863 int size
= int_ptr_type_to_size(arg_type
);
8865 err
= check_helper_mem_access(env
, regno
, size
, false, meta
);
8868 err
= check_ptr_alignment(env
, reg
, 0, size
, true);
8871 case ARG_PTR_TO_CONST_STR
:
8873 struct bpf_map
*map
= reg
->map_ptr
;
8878 if (!bpf_map_is_rdonly(map
)) {
8879 verbose(env
, "R%d does not point to a readonly map'\n", regno
);
8883 if (!tnum_is_const(reg
->var_off
)) {
8884 verbose(env
, "R%d is not a constant address'\n", regno
);
8888 if (!map
->ops
->map_direct_value_addr
) {
8889 verbose(env
, "no direct value access support for this map type\n");
8893 err
= check_map_access(env
, regno
, reg
->off
,
8894 map
->value_size
- reg
->off
, false,
8899 map_off
= reg
->off
+ reg
->var_off
.value
;
8900 err
= map
->ops
->map_direct_value_addr(map
, &map_addr
, map_off
);
8902 verbose(env
, "direct value access on string failed\n");
8906 str_ptr
= (char *)(long)(map_addr
);
8907 if (!strnchr(str_ptr
+ map_off
, map
->value_size
- map_off
, 0)) {
8908 verbose(env
, "string is not zero-terminated\n");
8913 case ARG_PTR_TO_KPTR
:
8914 err
= process_kptr_func(env
, regno
, meta
);
8923 static bool may_update_sockmap(struct bpf_verifier_env
*env
, int func_id
)
8925 enum bpf_attach_type eatype
= env
->prog
->expected_attach_type
;
8926 enum bpf_prog_type type
= resolve_prog_type(env
->prog
);
8928 if (func_id
!= BPF_FUNC_map_update_elem
)
8931 /* It's not possible to get access to a locked struct sock in these
8932 * contexts, so updating is safe.
8935 case BPF_PROG_TYPE_TRACING
:
8936 if (eatype
== BPF_TRACE_ITER
)
8939 case BPF_PROG_TYPE_SOCKET_FILTER
:
8940 case BPF_PROG_TYPE_SCHED_CLS
:
8941 case BPF_PROG_TYPE_SCHED_ACT
:
8942 case BPF_PROG_TYPE_XDP
:
8943 case BPF_PROG_TYPE_SK_REUSEPORT
:
8944 case BPF_PROG_TYPE_FLOW_DISSECTOR
:
8945 case BPF_PROG_TYPE_SK_LOOKUP
:
8951 verbose(env
, "cannot update sockmap in this context\n");
8955 static bool allow_tail_call_in_subprogs(struct bpf_verifier_env
*env
)
8957 return env
->prog
->jit_requested
&&
8958 bpf_jit_supports_subprog_tailcalls();
8961 static int check_map_func_compatibility(struct bpf_verifier_env
*env
,
8962 struct bpf_map
*map
, int func_id
)
8967 /* We need a two way check, first is from map perspective ... */
8968 switch (map
->map_type
) {
8969 case BPF_MAP_TYPE_PROG_ARRAY
:
8970 if (func_id
!= BPF_FUNC_tail_call
)
8973 case BPF_MAP_TYPE_PERF_EVENT_ARRAY
:
8974 if (func_id
!= BPF_FUNC_perf_event_read
&&
8975 func_id
!= BPF_FUNC_perf_event_output
&&
8976 func_id
!= BPF_FUNC_skb_output
&&
8977 func_id
!= BPF_FUNC_perf_event_read_value
&&
8978 func_id
!= BPF_FUNC_xdp_output
)
8981 case BPF_MAP_TYPE_RINGBUF
:
8982 if (func_id
!= BPF_FUNC_ringbuf_output
&&
8983 func_id
!= BPF_FUNC_ringbuf_reserve
&&
8984 func_id
!= BPF_FUNC_ringbuf_query
&&
8985 func_id
!= BPF_FUNC_ringbuf_reserve_dynptr
&&
8986 func_id
!= BPF_FUNC_ringbuf_submit_dynptr
&&
8987 func_id
!= BPF_FUNC_ringbuf_discard_dynptr
)
8990 case BPF_MAP_TYPE_USER_RINGBUF
:
8991 if (func_id
!= BPF_FUNC_user_ringbuf_drain
)
8994 case BPF_MAP_TYPE_STACK_TRACE
:
8995 if (func_id
!= BPF_FUNC_get_stackid
)
8998 case BPF_MAP_TYPE_CGROUP_ARRAY
:
8999 if (func_id
!= BPF_FUNC_skb_under_cgroup
&&
9000 func_id
!= BPF_FUNC_current_task_under_cgroup
)
9003 case BPF_MAP_TYPE_CGROUP_STORAGE
:
9004 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE
:
9005 if (func_id
!= BPF_FUNC_get_local_storage
)
9008 case BPF_MAP_TYPE_DEVMAP
:
9009 case BPF_MAP_TYPE_DEVMAP_HASH
:
9010 if (func_id
!= BPF_FUNC_redirect_map
&&
9011 func_id
!= BPF_FUNC_map_lookup_elem
)
9014 /* Restrict bpf side of cpumap and xskmap, open when use-cases
9017 case BPF_MAP_TYPE_CPUMAP
:
9018 if (func_id
!= BPF_FUNC_redirect_map
)
9021 case BPF_MAP_TYPE_XSKMAP
:
9022 if (func_id
!= BPF_FUNC_redirect_map
&&
9023 func_id
!= BPF_FUNC_map_lookup_elem
)
9026 case BPF_MAP_TYPE_ARRAY_OF_MAPS
:
9027 case BPF_MAP_TYPE_HASH_OF_MAPS
:
9028 if (func_id
!= BPF_FUNC_map_lookup_elem
)
9031 case BPF_MAP_TYPE_SOCKMAP
:
9032 if (func_id
!= BPF_FUNC_sk_redirect_map
&&
9033 func_id
!= BPF_FUNC_sock_map_update
&&
9034 func_id
!= BPF_FUNC_map_delete_elem
&&
9035 func_id
!= BPF_FUNC_msg_redirect_map
&&
9036 func_id
!= BPF_FUNC_sk_select_reuseport
&&
9037 func_id
!= BPF_FUNC_map_lookup_elem
&&
9038 !may_update_sockmap(env
, func_id
))
9041 case BPF_MAP_TYPE_SOCKHASH
:
9042 if (func_id
!= BPF_FUNC_sk_redirect_hash
&&
9043 func_id
!= BPF_FUNC_sock_hash_update
&&
9044 func_id
!= BPF_FUNC_map_delete_elem
&&
9045 func_id
!= BPF_FUNC_msg_redirect_hash
&&
9046 func_id
!= BPF_FUNC_sk_select_reuseport
&&
9047 func_id
!= BPF_FUNC_map_lookup_elem
&&
9048 !may_update_sockmap(env
, func_id
))
9051 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY
:
9052 if (func_id
!= BPF_FUNC_sk_select_reuseport
)
9055 case BPF_MAP_TYPE_QUEUE
:
9056 case BPF_MAP_TYPE_STACK
:
9057 if (func_id
!= BPF_FUNC_map_peek_elem
&&
9058 func_id
!= BPF_FUNC_map_pop_elem
&&
9059 func_id
!= BPF_FUNC_map_push_elem
)
9062 case BPF_MAP_TYPE_SK_STORAGE
:
9063 if (func_id
!= BPF_FUNC_sk_storage_get
&&
9064 func_id
!= BPF_FUNC_sk_storage_delete
&&
9065 func_id
!= BPF_FUNC_kptr_xchg
)
9068 case BPF_MAP_TYPE_INODE_STORAGE
:
9069 if (func_id
!= BPF_FUNC_inode_storage_get
&&
9070 func_id
!= BPF_FUNC_inode_storage_delete
&&
9071 func_id
!= BPF_FUNC_kptr_xchg
)
9074 case BPF_MAP_TYPE_TASK_STORAGE
:
9075 if (func_id
!= BPF_FUNC_task_storage_get
&&
9076 func_id
!= BPF_FUNC_task_storage_delete
&&
9077 func_id
!= BPF_FUNC_kptr_xchg
)
9080 case BPF_MAP_TYPE_CGRP_STORAGE
:
9081 if (func_id
!= BPF_FUNC_cgrp_storage_get
&&
9082 func_id
!= BPF_FUNC_cgrp_storage_delete
&&
9083 func_id
!= BPF_FUNC_kptr_xchg
)
9086 case BPF_MAP_TYPE_BLOOM_FILTER
:
9087 if (func_id
!= BPF_FUNC_map_peek_elem
&&
9088 func_id
!= BPF_FUNC_map_push_elem
)
9095 /* ... and second from the function itself. */
9097 case BPF_FUNC_tail_call
:
9098 if (map
->map_type
!= BPF_MAP_TYPE_PROG_ARRAY
)
9100 if (env
->subprog_cnt
> 1 && !allow_tail_call_in_subprogs(env
)) {
9101 verbose(env
, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
9105 case BPF_FUNC_perf_event_read
:
9106 case BPF_FUNC_perf_event_output
:
9107 case BPF_FUNC_perf_event_read_value
:
9108 case BPF_FUNC_skb_output
:
9109 case BPF_FUNC_xdp_output
:
9110 if (map
->map_type
!= BPF_MAP_TYPE_PERF_EVENT_ARRAY
)
9113 case BPF_FUNC_ringbuf_output
:
9114 case BPF_FUNC_ringbuf_reserve
:
9115 case BPF_FUNC_ringbuf_query
:
9116 case BPF_FUNC_ringbuf_reserve_dynptr
:
9117 case BPF_FUNC_ringbuf_submit_dynptr
:
9118 case BPF_FUNC_ringbuf_discard_dynptr
:
9119 if (map
->map_type
!= BPF_MAP_TYPE_RINGBUF
)
9122 case BPF_FUNC_user_ringbuf_drain
:
9123 if (map
->map_type
!= BPF_MAP_TYPE_USER_RINGBUF
)
9126 case BPF_FUNC_get_stackid
:
9127 if (map
->map_type
!= BPF_MAP_TYPE_STACK_TRACE
)
9130 case BPF_FUNC_current_task_under_cgroup
:
9131 case BPF_FUNC_skb_under_cgroup
:
9132 if (map
->map_type
!= BPF_MAP_TYPE_CGROUP_ARRAY
)
9135 case BPF_FUNC_redirect_map
:
9136 if (map
->map_type
!= BPF_MAP_TYPE_DEVMAP
&&
9137 map
->map_type
!= BPF_MAP_TYPE_DEVMAP_HASH
&&
9138 map
->map_type
!= BPF_MAP_TYPE_CPUMAP
&&
9139 map
->map_type
!= BPF_MAP_TYPE_XSKMAP
)
9142 case BPF_FUNC_sk_redirect_map
:
9143 case BPF_FUNC_msg_redirect_map
:
9144 case BPF_FUNC_sock_map_update
:
9145 if (map
->map_type
!= BPF_MAP_TYPE_SOCKMAP
)
9148 case BPF_FUNC_sk_redirect_hash
:
9149 case BPF_FUNC_msg_redirect_hash
:
9150 case BPF_FUNC_sock_hash_update
:
9151 if (map
->map_type
!= BPF_MAP_TYPE_SOCKHASH
)
9154 case BPF_FUNC_get_local_storage
:
9155 if (map
->map_type
!= BPF_MAP_TYPE_CGROUP_STORAGE
&&
9156 map
->map_type
!= BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE
)
9159 case BPF_FUNC_sk_select_reuseport
:
9160 if (map
->map_type
!= BPF_MAP_TYPE_REUSEPORT_SOCKARRAY
&&
9161 map
->map_type
!= BPF_MAP_TYPE_SOCKMAP
&&
9162 map
->map_type
!= BPF_MAP_TYPE_SOCKHASH
)
9165 case BPF_FUNC_map_pop_elem
:
9166 if (map
->map_type
!= BPF_MAP_TYPE_QUEUE
&&
9167 map
->map_type
!= BPF_MAP_TYPE_STACK
)
9170 case BPF_FUNC_map_peek_elem
:
9171 case BPF_FUNC_map_push_elem
:
9172 if (map
->map_type
!= BPF_MAP_TYPE_QUEUE
&&
9173 map
->map_type
!= BPF_MAP_TYPE_STACK
&&
9174 map
->map_type
!= BPF_MAP_TYPE_BLOOM_FILTER
)
9177 case BPF_FUNC_map_lookup_percpu_elem
:
9178 if (map
->map_type
!= BPF_MAP_TYPE_PERCPU_ARRAY
&&
9179 map
->map_type
!= BPF_MAP_TYPE_PERCPU_HASH
&&
9180 map
->map_type
!= BPF_MAP_TYPE_LRU_PERCPU_HASH
)
9183 case BPF_FUNC_sk_storage_get
:
9184 case BPF_FUNC_sk_storage_delete
:
9185 if (map
->map_type
!= BPF_MAP_TYPE_SK_STORAGE
)
9188 case BPF_FUNC_inode_storage_get
:
9189 case BPF_FUNC_inode_storage_delete
:
9190 if (map
->map_type
!= BPF_MAP_TYPE_INODE_STORAGE
)
9193 case BPF_FUNC_task_storage_get
:
9194 case BPF_FUNC_task_storage_delete
:
9195 if (map
->map_type
!= BPF_MAP_TYPE_TASK_STORAGE
)
9198 case BPF_FUNC_cgrp_storage_get
:
9199 case BPF_FUNC_cgrp_storage_delete
:
9200 if (map
->map_type
!= BPF_MAP_TYPE_CGRP_STORAGE
)
9209 verbose(env
, "cannot pass map_type %d into func %s#%d\n",
9210 map
->map_type
, func_id_name(func_id
), func_id
);
9214 static bool check_raw_mode_ok(const struct bpf_func_proto
*fn
)
9218 if (fn
->arg1_type
== ARG_PTR_TO_UNINIT_MEM
)
9220 if (fn
->arg2_type
== ARG_PTR_TO_UNINIT_MEM
)
9222 if (fn
->arg3_type
== ARG_PTR_TO_UNINIT_MEM
)
9224 if (fn
->arg4_type
== ARG_PTR_TO_UNINIT_MEM
)
9226 if (fn
->arg5_type
== ARG_PTR_TO_UNINIT_MEM
)
9229 /* We only support one arg being in raw mode at the moment,
9230 * which is sufficient for the helper functions we have
9236 static bool check_args_pair_invalid(const struct bpf_func_proto
*fn
, int arg
)
9238 bool is_fixed
= fn
->arg_type
[arg
] & MEM_FIXED_SIZE
;
9239 bool has_size
= fn
->arg_size
[arg
] != 0;
9240 bool is_next_size
= false;
9242 if (arg
+ 1 < ARRAY_SIZE(fn
->arg_type
))
9243 is_next_size
= arg_type_is_mem_size(fn
->arg_type
[arg
+ 1]);
9245 if (base_type(fn
->arg_type
[arg
]) != ARG_PTR_TO_MEM
)
9246 return is_next_size
;
9248 return has_size
== is_next_size
|| is_next_size
== is_fixed
;
9251 static bool check_arg_pair_ok(const struct bpf_func_proto
*fn
)
9253 /* bpf_xxx(..., buf, len) call will access 'len'
9254 * bytes from memory 'buf'. Both arg types need
9255 * to be paired, so make sure there's no buggy
9256 * helper function specification.
9258 if (arg_type_is_mem_size(fn
->arg1_type
) ||
9259 check_args_pair_invalid(fn
, 0) ||
9260 check_args_pair_invalid(fn
, 1) ||
9261 check_args_pair_invalid(fn
, 2) ||
9262 check_args_pair_invalid(fn
, 3) ||
9263 check_args_pair_invalid(fn
, 4))
9269 static bool check_btf_id_ok(const struct bpf_func_proto
*fn
)
9273 for (i
= 0; i
< ARRAY_SIZE(fn
->arg_type
); i
++) {
9274 if (base_type(fn
->arg_type
[i
]) == ARG_PTR_TO_BTF_ID
)
9275 return !!fn
->arg_btf_id
[i
];
9276 if (base_type(fn
->arg_type
[i
]) == ARG_PTR_TO_SPIN_LOCK
)
9277 return fn
->arg_btf_id
[i
] == BPF_PTR_POISON
;
9278 if (base_type(fn
->arg_type
[i
]) != ARG_PTR_TO_BTF_ID
&& fn
->arg_btf_id
[i
] &&
9279 /* arg_btf_id and arg_size are in a union. */
9280 (base_type(fn
->arg_type
[i
]) != ARG_PTR_TO_MEM
||
9281 !(fn
->arg_type
[i
] & MEM_FIXED_SIZE
)))
9288 static int check_func_proto(const struct bpf_func_proto
*fn
, int func_id
)
9290 return check_raw_mode_ok(fn
) &&
9291 check_arg_pair_ok(fn
) &&
9292 check_btf_id_ok(fn
) ? 0 : -EINVAL
;
9295 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
9296 * are now invalid, so turn them into unknown SCALAR_VALUE.
9298 * This also applies to dynptr slices belonging to skb and xdp dynptrs,
9299 * since these slices point to packet data.
9301 static void clear_all_pkt_pointers(struct bpf_verifier_env
*env
)
9303 struct bpf_func_state
*state
;
9304 struct bpf_reg_state
*reg
;
9306 bpf_for_each_reg_in_vstate(env
->cur_state
, state
, reg
, ({
9307 if (reg_is_pkt_pointer_any(reg
) || reg_is_dynptr_slice_pkt(reg
))
9308 mark_reg_invalid(env
, reg
);
9314 BEYOND_PKT_END
= -2,
9317 static void mark_pkt_end(struct bpf_verifier_state
*vstate
, int regn
, bool range_open
)
9319 struct bpf_func_state
*state
= vstate
->frame
[vstate
->curframe
];
9320 struct bpf_reg_state
*reg
= &state
->regs
[regn
];
9322 if (reg
->type
!= PTR_TO_PACKET
)
9323 /* PTR_TO_PACKET_META is not supported yet */
9326 /* The 'reg' is pkt > pkt_end or pkt >= pkt_end.
9327 * How far beyond pkt_end it goes is unknown.
9328 * if (!range_open) it's the case of pkt >= pkt_end
9329 * if (range_open) it's the case of pkt > pkt_end
9330 * hence this pointer is at least 1 byte bigger than pkt_end
9333 reg
->range
= BEYOND_PKT_END
;
9335 reg
->range
= AT_PKT_END
;
9338 /* The pointer with the specified id has released its reference to kernel
9339 * resources. Identify all copies of the same pointer and clear the reference.
9341 static int release_reference(struct bpf_verifier_env
*env
,
9344 struct bpf_func_state
*state
;
9345 struct bpf_reg_state
*reg
;
9348 err
= release_reference_state(cur_func(env
), ref_obj_id
);
9352 bpf_for_each_reg_in_vstate(env
->cur_state
, state
, reg
, ({
9353 if (reg
->ref_obj_id
== ref_obj_id
)
9354 mark_reg_invalid(env
, reg
);
9360 static void invalidate_non_owning_refs(struct bpf_verifier_env
*env
)
9362 struct bpf_func_state
*unused
;
9363 struct bpf_reg_state
*reg
;
9365 bpf_for_each_reg_in_vstate(env
->cur_state
, unused
, reg
, ({
9366 if (type_is_non_owning_ref(reg
->type
))
9367 mark_reg_invalid(env
, reg
);
9371 static void clear_caller_saved_regs(struct bpf_verifier_env
*env
,
9372 struct bpf_reg_state
*regs
)
9376 /* after the call registers r0 - r5 were scratched */
9377 for (i
= 0; i
< CALLER_SAVED_REGS
; i
++) {
9378 mark_reg_not_init(env
, regs
, caller_saved
[i
]);
9379 __check_reg_arg(env
, regs
, caller_saved
[i
], DST_OP_NO_MARK
);
9383 typedef int (*set_callee_state_fn
)(struct bpf_verifier_env
*env
,
9384 struct bpf_func_state
*caller
,
9385 struct bpf_func_state
*callee
,
9388 static int set_callee_state(struct bpf_verifier_env
*env
,
9389 struct bpf_func_state
*caller
,
9390 struct bpf_func_state
*callee
, int insn_idx
);
9392 static int setup_func_entry(struct bpf_verifier_env
*env
, int subprog
, int callsite
,
9393 set_callee_state_fn set_callee_state_cb
,
9394 struct bpf_verifier_state
*state
)
9396 struct bpf_func_state
*caller
, *callee
;
9399 if (state
->curframe
+ 1 >= MAX_CALL_FRAMES
) {
9400 verbose(env
, "the call stack of %d frames is too deep\n",
9401 state
->curframe
+ 2);
9405 if (state
->frame
[state
->curframe
+ 1]) {
9406 verbose(env
, "verifier bug. Frame %d already allocated\n",
9407 state
->curframe
+ 1);
9411 caller
= state
->frame
[state
->curframe
];
9412 callee
= kzalloc(sizeof(*callee
), GFP_KERNEL
);
9415 state
->frame
[state
->curframe
+ 1] = callee
;
9417 /* callee cannot access r0, r6 - r9 for reading and has to write
9418 * into its own stack before reading from it.
9419 * callee can read/write into caller's stack
9421 init_func_state(env
, callee
,
9422 /* remember the callsite, it will be used by bpf_exit */
9424 state
->curframe
+ 1 /* frameno within this callchain */,
9425 subprog
/* subprog number within this prog */);
9426 /* Transfer references to the callee */
9427 err
= copy_reference_state(callee
, caller
);
9428 err
= err
?: set_callee_state_cb(env
, caller
, callee
, callsite
);
9432 /* only increment it after check_reg_arg() finished */
9438 free_func_state(callee
);
9439 state
->frame
[state
->curframe
+ 1] = NULL
;
9443 static int push_callback_call(struct bpf_verifier_env
*env
, struct bpf_insn
*insn
,
9444 int insn_idx
, int subprog
,
9445 set_callee_state_fn set_callee_state_cb
)
9447 struct bpf_verifier_state
*state
= env
->cur_state
, *callback_state
;
9448 struct bpf_func_state
*caller
, *callee
;
9451 caller
= state
->frame
[state
->curframe
];
9452 err
= btf_check_subprog_call(env
, subprog
, caller
->regs
);
9456 /* set_callee_state is used for direct subprog calls, but we are
9457 * interested in validating only BPF helpers that can call subprogs as
9460 env
->subprog_info
[subprog
].is_cb
= true;
9461 if (bpf_pseudo_kfunc_call(insn
) &&
9462 !is_sync_callback_calling_kfunc(insn
->imm
)) {
9463 verbose(env
, "verifier bug: kfunc %s#%d not marked as callback-calling\n",
9464 func_id_name(insn
->imm
), insn
->imm
);
9466 } else if (!bpf_pseudo_kfunc_call(insn
) &&
9467 !is_callback_calling_function(insn
->imm
)) { /* helper */
9468 verbose(env
, "verifier bug: helper %s#%d not marked as callback-calling\n",
9469 func_id_name(insn
->imm
), insn
->imm
);
9473 if (insn
->code
== (BPF_JMP
| BPF_CALL
) &&
9474 insn
->src_reg
== 0 &&
9475 insn
->imm
== BPF_FUNC_timer_set_callback
) {
9476 struct bpf_verifier_state
*async_cb
;
9478 /* there is no real recursion here. timer callbacks are async */
9479 env
->subprog_info
[subprog
].is_async_cb
= true;
9480 async_cb
= push_async_cb(env
, env
->subprog_info
[subprog
].start
,
9484 callee
= async_cb
->frame
[0];
9485 callee
->async_entry_cnt
= caller
->async_entry_cnt
+ 1;
9487 /* Convert bpf_timer_set_callback() args into timer callback args */
9488 err
= set_callee_state_cb(env
, caller
, callee
, insn_idx
);
9495 /* for callback functions enqueue entry to callback and
9496 * proceed with next instruction within current frame.
9498 callback_state
= push_stack(env
, env
->subprog_info
[subprog
].start
, insn_idx
, false);
9499 if (!callback_state
)
9502 err
= setup_func_entry(env
, subprog
, insn_idx
, set_callee_state_cb
,
9507 callback_state
->callback_unroll_depth
++;
9508 callback_state
->frame
[callback_state
->curframe
- 1]->callback_depth
++;
9509 caller
->callback_depth
= 0;
9513 static int check_func_call(struct bpf_verifier_env
*env
, struct bpf_insn
*insn
,
9516 struct bpf_verifier_state
*state
= env
->cur_state
;
9517 struct bpf_func_state
*caller
;
9518 int err
, subprog
, target_insn
;
9520 target_insn
= *insn_idx
+ insn
->imm
+ 1;
9521 subprog
= find_subprog(env
, target_insn
);
9523 verbose(env
, "verifier bug. No program starts at insn %d\n", target_insn
);
9527 caller
= state
->frame
[state
->curframe
];
9528 err
= btf_check_subprog_call(env
, subprog
, caller
->regs
);
9531 if (subprog_is_global(env
, subprog
)) {
9533 verbose(env
, "Caller passes invalid args into func#%d\n", subprog
);
9537 if (env
->log
.level
& BPF_LOG_LEVEL
)
9538 verbose(env
, "Func#%d is global and valid. Skipping.\n", subprog
);
9539 clear_caller_saved_regs(env
, caller
->regs
);
9541 /* All global functions return a 64-bit SCALAR_VALUE */
9542 mark_reg_unknown(env
, caller
->regs
, BPF_REG_0
);
9543 caller
->regs
[BPF_REG_0
].subreg_def
= DEF_NOT_SUBREG
;
9545 /* continue with next insn after call */
9549 /* for regular function entry setup new frame and continue
9552 err
= setup_func_entry(env
, subprog
, *insn_idx
, set_callee_state
, state
);
9556 clear_caller_saved_regs(env
, caller
->regs
);
9558 /* and go analyze first insn of the callee */
9559 *insn_idx
= env
->subprog_info
[subprog
].start
- 1;
9561 if (env
->log
.level
& BPF_LOG_LEVEL
) {
9562 verbose(env
, "caller:\n");
9563 print_verifier_state(env
, caller
, true);
9564 verbose(env
, "callee:\n");
9565 print_verifier_state(env
, state
->frame
[state
->curframe
], true);
9571 int map_set_for_each_callback_args(struct bpf_verifier_env
*env
,
9572 struct bpf_func_state
*caller
,
9573 struct bpf_func_state
*callee
)
9575 /* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
9576 * void *callback_ctx, u64 flags);
9577 * callback_fn(struct bpf_map *map, void *key, void *value,
9578 * void *callback_ctx);
9580 callee
->regs
[BPF_REG_1
] = caller
->regs
[BPF_REG_1
];
9582 callee
->regs
[BPF_REG_2
].type
= PTR_TO_MAP_KEY
;
9583 __mark_reg_known_zero(&callee
->regs
[BPF_REG_2
]);
9584 callee
->regs
[BPF_REG_2
].map_ptr
= caller
->regs
[BPF_REG_1
].map_ptr
;
9586 callee
->regs
[BPF_REG_3
].type
= PTR_TO_MAP_VALUE
;
9587 __mark_reg_known_zero(&callee
->regs
[BPF_REG_3
]);
9588 callee
->regs
[BPF_REG_3
].map_ptr
= caller
->regs
[BPF_REG_1
].map_ptr
;
9590 /* pointer to stack or null */
9591 callee
->regs
[BPF_REG_4
] = caller
->regs
[BPF_REG_3
];
9594 __mark_reg_not_init(env
, &callee
->regs
[BPF_REG_5
]);
9598 static int set_callee_state(struct bpf_verifier_env
*env
,
9599 struct bpf_func_state
*caller
,
9600 struct bpf_func_state
*callee
, int insn_idx
)
9604 /* copy r1 - r5 args that callee can access. The copy includes parent
9605 * pointers, which connects us up to the liveness chain
9607 for (i
= BPF_REG_1
; i
<= BPF_REG_5
; i
++)
9608 callee
->regs
[i
] = caller
->regs
[i
];
9612 static int set_map_elem_callback_state(struct bpf_verifier_env
*env
,
9613 struct bpf_func_state
*caller
,
9614 struct bpf_func_state
*callee
,
9617 struct bpf_insn_aux_data
*insn_aux
= &env
->insn_aux_data
[insn_idx
];
9618 struct bpf_map
*map
;
9621 if (bpf_map_ptr_poisoned(insn_aux
)) {
9622 verbose(env
, "tail_call abusing map_ptr\n");
9626 map
= BPF_MAP_PTR(insn_aux
->map_ptr_state
);
9627 if (!map
->ops
->map_set_for_each_callback_args
||
9628 !map
->ops
->map_for_each_callback
) {
9629 verbose(env
, "callback function not allowed for map\n");
9633 err
= map
->ops
->map_set_for_each_callback_args(env
, caller
, callee
);
9637 callee
->in_callback_fn
= true;
9638 callee
->callback_ret_range
= tnum_range(0, 1);
9642 static int set_loop_callback_state(struct bpf_verifier_env
*env
,
9643 struct bpf_func_state
*caller
,
9644 struct bpf_func_state
*callee
,
9647 /* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
9649 * callback_fn(u32 index, void *callback_ctx);
9651 callee
->regs
[BPF_REG_1
].type
= SCALAR_VALUE
;
9652 callee
->regs
[BPF_REG_2
] = caller
->regs
[BPF_REG_3
];
9655 __mark_reg_not_init(env
, &callee
->regs
[BPF_REG_3
]);
9656 __mark_reg_not_init(env
, &callee
->regs
[BPF_REG_4
]);
9657 __mark_reg_not_init(env
, &callee
->regs
[BPF_REG_5
]);
9659 callee
->in_callback_fn
= true;
9660 callee
->callback_ret_range
= tnum_range(0, 1);
9664 static int set_timer_callback_state(struct bpf_verifier_env
*env
,
9665 struct bpf_func_state
*caller
,
9666 struct bpf_func_state
*callee
,
9669 struct bpf_map
*map_ptr
= caller
->regs
[BPF_REG_1
].map_ptr
;
9671 /* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
9672 * callback_fn(struct bpf_map *map, void *key, void *value);
9674 callee
->regs
[BPF_REG_1
].type
= CONST_PTR_TO_MAP
;
9675 __mark_reg_known_zero(&callee
->regs
[BPF_REG_1
]);
9676 callee
->regs
[BPF_REG_1
].map_ptr
= map_ptr
;
9678 callee
->regs
[BPF_REG_2
].type
= PTR_TO_MAP_KEY
;
9679 __mark_reg_known_zero(&callee
->regs
[BPF_REG_2
]);
9680 callee
->regs
[BPF_REG_2
].map_ptr
= map_ptr
;
9682 callee
->regs
[BPF_REG_3
].type
= PTR_TO_MAP_VALUE
;
9683 __mark_reg_known_zero(&callee
->regs
[BPF_REG_3
]);
9684 callee
->regs
[BPF_REG_3
].map_ptr
= map_ptr
;
9687 __mark_reg_not_init(env
, &callee
->regs
[BPF_REG_4
]);
9688 __mark_reg_not_init(env
, &callee
->regs
[BPF_REG_5
]);
9689 callee
->in_async_callback_fn
= true;
9690 callee
->callback_ret_range
= tnum_range(0, 1);
9694 static int set_find_vma_callback_state(struct bpf_verifier_env
*env
,
9695 struct bpf_func_state
*caller
,
9696 struct bpf_func_state
*callee
,
9699 /* bpf_find_vma(struct task_struct *task, u64 addr,
9700 * void *callback_fn, void *callback_ctx, u64 flags)
9701 * (callback_fn)(struct task_struct *task,
9702 * struct vm_area_struct *vma, void *callback_ctx);
9704 callee
->regs
[BPF_REG_1
] = caller
->regs
[BPF_REG_1
];
9706 callee
->regs
[BPF_REG_2
].type
= PTR_TO_BTF_ID
;
9707 __mark_reg_known_zero(&callee
->regs
[BPF_REG_2
]);
9708 callee
->regs
[BPF_REG_2
].btf
= btf_vmlinux
;
9709 callee
->regs
[BPF_REG_2
].btf_id
= btf_tracing_ids
[BTF_TRACING_TYPE_VMA
],
9711 /* pointer to stack or null */
9712 callee
->regs
[BPF_REG_3
] = caller
->regs
[BPF_REG_4
];
9715 __mark_reg_not_init(env
, &callee
->regs
[BPF_REG_4
]);
9716 __mark_reg_not_init(env
, &callee
->regs
[BPF_REG_5
]);
9717 callee
->in_callback_fn
= true;
9718 callee
->callback_ret_range
= tnum_range(0, 1);
9722 static int set_user_ringbuf_callback_state(struct bpf_verifier_env
*env
,
9723 struct bpf_func_state
*caller
,
9724 struct bpf_func_state
*callee
,
9727 /* bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void
9728 * callback_ctx, u64 flags);
9729 * callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx);
9731 __mark_reg_not_init(env
, &callee
->regs
[BPF_REG_0
]);
9732 mark_dynptr_cb_reg(env
, &callee
->regs
[BPF_REG_1
], BPF_DYNPTR_TYPE_LOCAL
);
9733 callee
->regs
[BPF_REG_2
] = caller
->regs
[BPF_REG_3
];
9736 __mark_reg_not_init(env
, &callee
->regs
[BPF_REG_3
]);
9737 __mark_reg_not_init(env
, &callee
->regs
[BPF_REG_4
]);
9738 __mark_reg_not_init(env
, &callee
->regs
[BPF_REG_5
]);
9740 callee
->in_callback_fn
= true;
9741 callee
->callback_ret_range
= tnum_range(0, 1);
9745 static int set_rbtree_add_callback_state(struct bpf_verifier_env
*env
,
9746 struct bpf_func_state
*caller
,
9747 struct bpf_func_state
*callee
,
9750 /* void bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
9751 * bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b));
9753 * 'struct bpf_rb_node *node' arg to bpf_rbtree_add_impl is the same PTR_TO_BTF_ID w/ offset
9754 * that 'less' callback args will be receiving. However, 'node' arg was release_reference'd
9755 * by this point, so look at 'root'
9757 struct btf_field
*field
;
9759 field
= reg_find_field_offset(&caller
->regs
[BPF_REG_1
], caller
->regs
[BPF_REG_1
].off
,
9761 if (!field
|| !field
->graph_root
.value_btf_id
)
9764 mark_reg_graph_node(callee
->regs
, BPF_REG_1
, &field
->graph_root
);
9765 ref_set_non_owning(env
, &callee
->regs
[BPF_REG_1
]);
9766 mark_reg_graph_node(callee
->regs
, BPF_REG_2
, &field
->graph_root
);
9767 ref_set_non_owning(env
, &callee
->regs
[BPF_REG_2
]);
9769 __mark_reg_not_init(env
, &callee
->regs
[BPF_REG_3
]);
9770 __mark_reg_not_init(env
, &callee
->regs
[BPF_REG_4
]);
9771 __mark_reg_not_init(env
, &callee
->regs
[BPF_REG_5
]);
9772 callee
->in_callback_fn
= true;
9773 callee
->callback_ret_range
= tnum_range(0, 1);
9777 static bool is_rbtree_lock_required_kfunc(u32 btf_id
);
9779 /* Are we currently verifying the callback for a rbtree helper that must
9780 * be called with lock held? If so, no need to complain about unreleased
9783 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env
*env
)
9785 struct bpf_verifier_state
*state
= env
->cur_state
;
9786 struct bpf_insn
*insn
= env
->prog
->insnsi
;
9787 struct bpf_func_state
*callee
;
9790 if (!state
->curframe
)
9793 callee
= state
->frame
[state
->curframe
];
9795 if (!callee
->in_callback_fn
)
9798 kfunc_btf_id
= insn
[callee
->callsite
].imm
;
9799 return is_rbtree_lock_required_kfunc(kfunc_btf_id
);
9802 static int prepare_func_exit(struct bpf_verifier_env
*env
, int *insn_idx
)
9804 struct bpf_verifier_state
*state
= env
->cur_state
, *prev_st
;
9805 struct bpf_func_state
*caller
, *callee
;
9806 struct bpf_reg_state
*r0
;
9807 bool in_callback_fn
;
9810 callee
= state
->frame
[state
->curframe
];
9811 r0
= &callee
->regs
[BPF_REG_0
];
9812 if (r0
->type
== PTR_TO_STACK
) {
9813 /* technically it's ok to return caller's stack pointer
9814 * (or caller's caller's pointer) back to the caller,
9815 * since these pointers are valid. Only current stack
9816 * pointer will be invalid as soon as function exits,
9817 * but let's be conservative
9819 verbose(env
, "cannot return stack pointer to the caller\n");
9823 caller
= state
->frame
[state
->curframe
- 1];
9824 if (callee
->in_callback_fn
) {
9825 /* enforce R0 return value range [0, 1]. */
9826 struct tnum range
= callee
->callback_ret_range
;
9828 if (r0
->type
!= SCALAR_VALUE
) {
9829 verbose(env
, "R0 not a scalar value\n");
9832 if (!tnum_in(range
, r0
->var_off
)) {
9833 verbose_invalid_scalar(env
, r0
, &range
, "callback return", "R0");
9836 if (!calls_callback(env
, callee
->callsite
)) {
9837 verbose(env
, "BUG: in callback at %d, callsite %d !calls_callback\n",
9838 *insn_idx
, callee
->callsite
);
9842 /* return to the caller whatever r0 had in the callee */
9843 caller
->regs
[BPF_REG_0
] = *r0
;
9846 /* callback_fn frame should have released its own additions to parent's
9847 * reference state at this point, or check_reference_leak would
9848 * complain, hence it must be the same as the caller. There is no need
9851 if (!callee
->in_callback_fn
) {
9852 /* Transfer references to the caller */
9853 err
= copy_reference_state(caller
, callee
);
9858 /* for callbacks like bpf_loop or bpf_for_each_map_elem go back to callsite,
9859 * there function call logic would reschedule callback visit. If iteration
9860 * converges is_state_visited() would prune that visit eventually.
9862 in_callback_fn
= callee
->in_callback_fn
;
9864 *insn_idx
= callee
->callsite
;
9866 *insn_idx
= callee
->callsite
+ 1;
9868 if (env
->log
.level
& BPF_LOG_LEVEL
) {
9869 verbose(env
, "returning from callee:\n");
9870 print_verifier_state(env
, callee
, true);
9871 verbose(env
, "to caller at %d:\n", *insn_idx
);
9872 print_verifier_state(env
, caller
, true);
9874 /* clear everything in the callee. In case of exceptional exits using
9875 * bpf_throw, this will be done by copy_verifier_state for extra frames. */
9876 free_func_state(callee
);
9877 state
->frame
[state
->curframe
--] = NULL
;
9879 /* for callbacks widen imprecise scalars to make programs like below verify:
9881 * struct ctx { int i; }
9882 * void cb(int idx, struct ctx *ctx) { ctx->i++; ... }
9884 * struct ctx = { .i = 0; }
9885 * bpf_loop(100, cb, &ctx, 0);
9887 * This is similar to what is done in process_iter_next_call() for open
9890 prev_st
= in_callback_fn
? find_prev_entry(env
, state
, *insn_idx
) : NULL
;
9892 err
= widen_imprecise_scalars(env
, prev_st
, state
);
9899 static void do_refine_retval_range(struct bpf_reg_state
*regs
, int ret_type
,
9901 struct bpf_call_arg_meta
*meta
)
9903 struct bpf_reg_state
*ret_reg
= ®s
[BPF_REG_0
];
9905 if (ret_type
!= RET_INTEGER
)
9909 case BPF_FUNC_get_stack
:
9910 case BPF_FUNC_get_task_stack
:
9911 case BPF_FUNC_probe_read_str
:
9912 case BPF_FUNC_probe_read_kernel_str
:
9913 case BPF_FUNC_probe_read_user_str
:
9914 ret_reg
->smax_value
= meta
->msize_max_value
;
9915 ret_reg
->s32_max_value
= meta
->msize_max_value
;
9916 ret_reg
->smin_value
= -MAX_ERRNO
;
9917 ret_reg
->s32_min_value
= -MAX_ERRNO
;
9918 reg_bounds_sync(ret_reg
);
9920 case BPF_FUNC_get_smp_processor_id
:
9921 ret_reg
->umax_value
= nr_cpu_ids
- 1;
9922 ret_reg
->u32_max_value
= nr_cpu_ids
- 1;
9923 ret_reg
->smax_value
= nr_cpu_ids
- 1;
9924 ret_reg
->s32_max_value
= nr_cpu_ids
- 1;
9925 ret_reg
->umin_value
= 0;
9926 ret_reg
->u32_min_value
= 0;
9927 ret_reg
->smin_value
= 0;
9928 ret_reg
->s32_min_value
= 0;
9929 reg_bounds_sync(ret_reg
);
9935 record_func_map(struct bpf_verifier_env
*env
, struct bpf_call_arg_meta
*meta
,
9936 int func_id
, int insn_idx
)
9938 struct bpf_insn_aux_data
*aux
= &env
->insn_aux_data
[insn_idx
];
9939 struct bpf_map
*map
= meta
->map_ptr
;
9941 if (func_id
!= BPF_FUNC_tail_call
&&
9942 func_id
!= BPF_FUNC_map_lookup_elem
&&
9943 func_id
!= BPF_FUNC_map_update_elem
&&
9944 func_id
!= BPF_FUNC_map_delete_elem
&&
9945 func_id
!= BPF_FUNC_map_push_elem
&&
9946 func_id
!= BPF_FUNC_map_pop_elem
&&
9947 func_id
!= BPF_FUNC_map_peek_elem
&&
9948 func_id
!= BPF_FUNC_for_each_map_elem
&&
9949 func_id
!= BPF_FUNC_redirect_map
&&
9950 func_id
!= BPF_FUNC_map_lookup_percpu_elem
)
9954 verbose(env
, "kernel subsystem misconfigured verifier\n");
9958 /* In case of read-only, some additional restrictions
9959 * need to be applied in order to prevent altering the
9960 * state of the map from program side.
9962 if ((map
->map_flags
& BPF_F_RDONLY_PROG
) &&
9963 (func_id
== BPF_FUNC_map_delete_elem
||
9964 func_id
== BPF_FUNC_map_update_elem
||
9965 func_id
== BPF_FUNC_map_push_elem
||
9966 func_id
== BPF_FUNC_map_pop_elem
)) {
9967 verbose(env
, "write into map forbidden\n");
9971 if (!BPF_MAP_PTR(aux
->map_ptr_state
))
9972 bpf_map_ptr_store(aux
, meta
->map_ptr
,
9973 !meta
->map_ptr
->bypass_spec_v1
);
9974 else if (BPF_MAP_PTR(aux
->map_ptr_state
) != meta
->map_ptr
)
9975 bpf_map_ptr_store(aux
, BPF_MAP_PTR_POISON
,
9976 !meta
->map_ptr
->bypass_spec_v1
);
9981 record_func_key(struct bpf_verifier_env
*env
, struct bpf_call_arg_meta
*meta
,
9982 int func_id
, int insn_idx
)
9984 struct bpf_insn_aux_data
*aux
= &env
->insn_aux_data
[insn_idx
];
9985 struct bpf_reg_state
*regs
= cur_regs(env
), *reg
;
9986 struct bpf_map
*map
= meta
->map_ptr
;
9990 if (func_id
!= BPF_FUNC_tail_call
)
9992 if (!map
|| map
->map_type
!= BPF_MAP_TYPE_PROG_ARRAY
) {
9993 verbose(env
, "kernel subsystem misconfigured verifier\n");
9997 reg
= ®s
[BPF_REG_3
];
9998 val
= reg
->var_off
.value
;
9999 max
= map
->max_entries
;
10001 if (!(register_is_const(reg
) && val
< max
)) {
10002 bpf_map_key_store(aux
, BPF_MAP_KEY_POISON
);
10006 err
= mark_chain_precision(env
, BPF_REG_3
);
10009 if (bpf_map_key_unseen(aux
))
10010 bpf_map_key_store(aux
, val
);
10011 else if (!bpf_map_key_poisoned(aux
) &&
10012 bpf_map_key_immediate(aux
) != val
)
10013 bpf_map_key_store(aux
, BPF_MAP_KEY_POISON
);
10017 static int check_reference_leak(struct bpf_verifier_env
*env
, bool exception_exit
)
10019 struct bpf_func_state
*state
= cur_func(env
);
10020 bool refs_lingering
= false;
10023 if (!exception_exit
&& state
->frameno
&& !state
->in_callback_fn
)
10026 for (i
= 0; i
< state
->acquired_refs
; i
++) {
10027 if (!exception_exit
&& state
->in_callback_fn
&& state
->refs
[i
].callback_ref
!= state
->frameno
)
10029 verbose(env
, "Unreleased reference id=%d alloc_insn=%d\n",
10030 state
->refs
[i
].id
, state
->refs
[i
].insn_idx
);
10031 refs_lingering
= true;
10033 return refs_lingering
? -EINVAL
: 0;
10036 static int check_bpf_snprintf_call(struct bpf_verifier_env
*env
,
10037 struct bpf_reg_state
*regs
)
10039 struct bpf_reg_state
*fmt_reg
= ®s
[BPF_REG_3
];
10040 struct bpf_reg_state
*data_len_reg
= ®s
[BPF_REG_5
];
10041 struct bpf_map
*fmt_map
= fmt_reg
->map_ptr
;
10042 struct bpf_bprintf_data data
= {};
10043 int err
, fmt_map_off
, num_args
;
10047 /* data must be an array of u64 */
10048 if (data_len_reg
->var_off
.value
% 8)
10050 num_args
= data_len_reg
->var_off
.value
/ 8;
10052 /* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
10053 * and map_direct_value_addr is set.
10055 fmt_map_off
= fmt_reg
->off
+ fmt_reg
->var_off
.value
;
10056 err
= fmt_map
->ops
->map_direct_value_addr(fmt_map
, &fmt_addr
,
10059 verbose(env
, "verifier bug\n");
10062 fmt
= (char *)(long)fmt_addr
+ fmt_map_off
;
10064 /* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
10065 * can focus on validating the format specifiers.
10067 err
= bpf_bprintf_prepare(fmt
, UINT_MAX
, NULL
, num_args
, &data
);
10069 verbose(env
, "Invalid format string\n");
10074 static int check_get_func_ip(struct bpf_verifier_env
*env
)
10076 enum bpf_prog_type type
= resolve_prog_type(env
->prog
);
10077 int func_id
= BPF_FUNC_get_func_ip
;
10079 if (type
== BPF_PROG_TYPE_TRACING
) {
10080 if (!bpf_prog_has_trampoline(env
->prog
)) {
10081 verbose(env
, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n",
10082 func_id_name(func_id
), func_id
);
10086 } else if (type
== BPF_PROG_TYPE_KPROBE
) {
10090 verbose(env
, "func %s#%d not supported for program type %d\n",
10091 func_id_name(func_id
), func_id
, type
);
10095 static struct bpf_insn_aux_data
*cur_aux(struct bpf_verifier_env
*env
)
10097 return &env
->insn_aux_data
[env
->insn_idx
];
10100 static bool loop_flag_is_zero(struct bpf_verifier_env
*env
)
10102 struct bpf_reg_state
*regs
= cur_regs(env
);
10103 struct bpf_reg_state
*reg
= ®s
[BPF_REG_4
];
10104 bool reg_is_null
= register_is_null(reg
);
10107 mark_chain_precision(env
, BPF_REG_4
);
10109 return reg_is_null
;
10112 static void update_loop_inline_state(struct bpf_verifier_env
*env
, u32 subprogno
)
10114 struct bpf_loop_inline_state
*state
= &cur_aux(env
)->loop_inline_state
;
10116 if (!state
->initialized
) {
10117 state
->initialized
= 1;
10118 state
->fit_for_inline
= loop_flag_is_zero(env
);
10119 state
->callback_subprogno
= subprogno
;
10123 if (!state
->fit_for_inline
)
10126 state
->fit_for_inline
= (loop_flag_is_zero(env
) &&
10127 state
->callback_subprogno
== subprogno
);
10130 static int check_helper_call(struct bpf_verifier_env
*env
, struct bpf_insn
*insn
,
10133 enum bpf_prog_type prog_type
= resolve_prog_type(env
->prog
);
10134 bool returns_cpu_specific_alloc_ptr
= false;
10135 const struct bpf_func_proto
*fn
= NULL
;
10136 enum bpf_return_type ret_type
;
10137 enum bpf_type_flag ret_flag
;
10138 struct bpf_reg_state
*regs
;
10139 struct bpf_call_arg_meta meta
;
10140 int insn_idx
= *insn_idx_p
;
10142 int i
, err
, func_id
;
10144 /* find function prototype */
10145 func_id
= insn
->imm
;
10146 if (func_id
< 0 || func_id
>= __BPF_FUNC_MAX_ID
) {
10147 verbose(env
, "invalid func %s#%d\n", func_id_name(func_id
),
10152 if (env
->ops
->get_func_proto
)
10153 fn
= env
->ops
->get_func_proto(func_id
, env
->prog
);
10155 verbose(env
, "unknown func %s#%d\n", func_id_name(func_id
),
10160 /* eBPF programs must be GPL compatible to use GPL-ed functions */
10161 if (!env
->prog
->gpl_compatible
&& fn
->gpl_only
) {
10162 verbose(env
, "cannot call GPL-restricted function from non-GPL compatible program\n");
10166 if (fn
->allowed
&& !fn
->allowed(env
->prog
)) {
10167 verbose(env
, "helper call is not allowed in probe\n");
10171 if (!env
->prog
->aux
->sleepable
&& fn
->might_sleep
) {
10172 verbose(env
, "helper call might sleep in a non-sleepable prog\n");
10176 /* With LD_ABS/IND some JITs save/restore skb from r1. */
10177 changes_data
= bpf_helper_changes_pkt_data(fn
->func
);
10178 if (changes_data
&& fn
->arg1_type
!= ARG_PTR_TO_CTX
) {
10179 verbose(env
, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
10180 func_id_name(func_id
), func_id
);
10184 memset(&meta
, 0, sizeof(meta
));
10185 meta
.pkt_access
= fn
->pkt_access
;
10187 err
= check_func_proto(fn
, func_id
);
10189 verbose(env
, "kernel subsystem misconfigured func %s#%d\n",
10190 func_id_name(func_id
), func_id
);
10194 if (env
->cur_state
->active_rcu_lock
) {
10195 if (fn
->might_sleep
) {
10196 verbose(env
, "sleepable helper %s#%d in rcu_read_lock region\n",
10197 func_id_name(func_id
), func_id
);
10201 if (env
->prog
->aux
->sleepable
&& is_storage_get_function(func_id
))
10202 env
->insn_aux_data
[insn_idx
].storage_get_func_atomic
= true;
10205 meta
.func_id
= func_id
;
10207 for (i
= 0; i
< MAX_BPF_FUNC_REG_ARGS
; i
++) {
10208 err
= check_func_arg(env
, i
, &meta
, fn
, insn_idx
);
10213 err
= record_func_map(env
, &meta
, func_id
, insn_idx
);
10217 err
= record_func_key(env
, &meta
, func_id
, insn_idx
);
10221 /* Mark slots with STACK_MISC in case of raw mode, stack offset
10222 * is inferred from register state.
10224 for (i
= 0; i
< meta
.access_size
; i
++) {
10225 err
= check_mem_access(env
, insn_idx
, meta
.regno
, i
, BPF_B
,
10226 BPF_WRITE
, -1, false, false);
10231 regs
= cur_regs(env
);
10233 if (meta
.release_regno
) {
10235 /* This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot
10236 * be released by any dynptr helper. Hence, unmark_stack_slots_dynptr
10237 * is safe to do directly.
10239 if (arg_type_is_dynptr(fn
->arg_type
[meta
.release_regno
- BPF_REG_1
])) {
10240 if (regs
[meta
.release_regno
].type
== CONST_PTR_TO_DYNPTR
) {
10241 verbose(env
, "verifier internal error: CONST_PTR_TO_DYNPTR cannot be released\n");
10244 err
= unmark_stack_slots_dynptr(env
, ®s
[meta
.release_regno
]);
10245 } else if (func_id
== BPF_FUNC_kptr_xchg
&& meta
.ref_obj_id
) {
10246 u32 ref_obj_id
= meta
.ref_obj_id
;
10247 bool in_rcu
= in_rcu_cs(env
);
10248 struct bpf_func_state
*state
;
10249 struct bpf_reg_state
*reg
;
10251 err
= release_reference_state(cur_func(env
), ref_obj_id
);
10253 bpf_for_each_reg_in_vstate(env
->cur_state
, state
, reg
, ({
10254 if (reg
->ref_obj_id
== ref_obj_id
) {
10255 if (in_rcu
&& (reg
->type
& MEM_ALLOC
) && (reg
->type
& MEM_PERCPU
)) {
10256 reg
->ref_obj_id
= 0;
10257 reg
->type
&= ~MEM_ALLOC
;
10258 reg
->type
|= MEM_RCU
;
10260 mark_reg_invalid(env
, reg
);
10265 } else if (meta
.ref_obj_id
) {
10266 err
= release_reference(env
, meta
.ref_obj_id
);
10267 } else if (register_is_null(®s
[meta
.release_regno
])) {
10268 /* meta.ref_obj_id can only be 0 if register that is meant to be
10269 * released is NULL, which must be > R0.
10274 verbose(env
, "func %s#%d reference has not been acquired before\n",
10275 func_id_name(func_id
), func_id
);
10281 case BPF_FUNC_tail_call
:
10282 err
= check_reference_leak(env
, false);
10284 verbose(env
, "tail_call would lead to reference leak\n");
10288 case BPF_FUNC_get_local_storage
:
10289 /* check that flags argument in get_local_storage(map, flags) is 0,
10290 * this is required because get_local_storage() can't return an error.
10292 if (!register_is_null(®s
[BPF_REG_2
])) {
10293 verbose(env
, "get_local_storage() doesn't support non-zero flags\n");
10297 case BPF_FUNC_for_each_map_elem
:
10298 err
= push_callback_call(env
, insn
, insn_idx
, meta
.subprogno
,
10299 set_map_elem_callback_state
);
10301 case BPF_FUNC_timer_set_callback
:
10302 err
= push_callback_call(env
, insn
, insn_idx
, meta
.subprogno
,
10303 set_timer_callback_state
);
10305 case BPF_FUNC_find_vma
:
10306 err
= push_callback_call(env
, insn
, insn_idx
, meta
.subprogno
,
10307 set_find_vma_callback_state
);
10309 case BPF_FUNC_snprintf
:
10310 err
= check_bpf_snprintf_call(env
, regs
);
10312 case BPF_FUNC_loop
:
10313 update_loop_inline_state(env
, meta
.subprogno
);
10314 /* Verifier relies on R1 value to determine if bpf_loop() iteration
10315 * is finished, thus mark it precise.
10317 err
= mark_chain_precision(env
, BPF_REG_1
);
10320 if (cur_func(env
)->callback_depth
< regs
[BPF_REG_1
].umax_value
) {
10321 err
= push_callback_call(env
, insn
, insn_idx
, meta
.subprogno
,
10322 set_loop_callback_state
);
10324 cur_func(env
)->callback_depth
= 0;
10325 if (env
->log
.level
& BPF_LOG_LEVEL2
)
10326 verbose(env
, "frame%d bpf_loop iteration limit reached\n",
10327 env
->cur_state
->curframe
);
10330 case BPF_FUNC_dynptr_from_mem
:
10331 if (regs
[BPF_REG_1
].type
!= PTR_TO_MAP_VALUE
) {
10332 verbose(env
, "Unsupported reg type %s for bpf_dynptr_from_mem data\n",
10333 reg_type_str(env
, regs
[BPF_REG_1
].type
));
10337 case BPF_FUNC_set_retval
:
10338 if (prog_type
== BPF_PROG_TYPE_LSM
&&
10339 env
->prog
->expected_attach_type
== BPF_LSM_CGROUP
) {
10340 if (!env
->prog
->aux
->attach_func_proto
->type
) {
10341 /* Make sure programs that attach to void
10342 * hooks don't try to modify return value.
10344 verbose(env
, "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
10349 case BPF_FUNC_dynptr_data
:
10351 struct bpf_reg_state
*reg
;
10352 int id
, ref_obj_id
;
10354 reg
= get_dynptr_arg_reg(env
, fn
, regs
);
10359 if (meta
.dynptr_id
) {
10360 verbose(env
, "verifier internal error: meta.dynptr_id already set\n");
10363 if (meta
.ref_obj_id
) {
10364 verbose(env
, "verifier internal error: meta.ref_obj_id already set\n");
10368 id
= dynptr_id(env
, reg
);
10370 verbose(env
, "verifier internal error: failed to obtain dynptr id\n");
10374 ref_obj_id
= dynptr_ref_obj_id(env
, reg
);
10375 if (ref_obj_id
< 0) {
10376 verbose(env
, "verifier internal error: failed to obtain dynptr ref_obj_id\n");
10380 meta
.dynptr_id
= id
;
10381 meta
.ref_obj_id
= ref_obj_id
;
10385 case BPF_FUNC_dynptr_write
:
10387 enum bpf_dynptr_type dynptr_type
;
10388 struct bpf_reg_state
*reg
;
10390 reg
= get_dynptr_arg_reg(env
, fn
, regs
);
10394 dynptr_type
= dynptr_get_type(env
, reg
);
10395 if (dynptr_type
== BPF_DYNPTR_TYPE_INVALID
)
10398 if (dynptr_type
== BPF_DYNPTR_TYPE_SKB
)
10399 /* this will trigger clear_all_pkt_pointers(), which will
10400 * invalidate all dynptr slices associated with the skb
10402 changes_data
= true;
10406 case BPF_FUNC_per_cpu_ptr
:
10407 case BPF_FUNC_this_cpu_ptr
:
10409 struct bpf_reg_state
*reg
= ®s
[BPF_REG_1
];
10410 const struct btf_type
*type
;
10412 if (reg
->type
& MEM_RCU
) {
10413 type
= btf_type_by_id(reg
->btf
, reg
->btf_id
);
10414 if (!type
|| !btf_type_is_struct(type
)) {
10415 verbose(env
, "Helper has invalid btf/btf_id in R1\n");
10418 returns_cpu_specific_alloc_ptr
= true;
10419 env
->insn_aux_data
[insn_idx
].call_with_percpu_alloc_ptr
= true;
10423 case BPF_FUNC_user_ringbuf_drain
:
10424 err
= push_callback_call(env
, insn
, insn_idx
, meta
.subprogno
,
10425 set_user_ringbuf_callback_state
);
10432 /* reset caller saved regs */
10433 for (i
= 0; i
< CALLER_SAVED_REGS
; i
++) {
10434 mark_reg_not_init(env
, regs
, caller_saved
[i
]);
10435 check_reg_arg(env
, caller_saved
[i
], DST_OP_NO_MARK
);
10438 /* helper call returns 64-bit value. */
10439 regs
[BPF_REG_0
].subreg_def
= DEF_NOT_SUBREG
;
10441 /* update return register (already marked as written above) */
10442 ret_type
= fn
->ret_type
;
10443 ret_flag
= type_flag(ret_type
);
10445 switch (base_type(ret_type
)) {
10447 /* sets type to SCALAR_VALUE */
10448 mark_reg_unknown(env
, regs
, BPF_REG_0
);
10451 regs
[BPF_REG_0
].type
= NOT_INIT
;
10453 case RET_PTR_TO_MAP_VALUE
:
10454 /* There is no offset yet applied, variable or fixed */
10455 mark_reg_known_zero(env
, regs
, BPF_REG_0
);
10456 /* remember map_ptr, so that check_map_access()
10457 * can check 'value_size' boundary of memory access
10458 * to map element returned from bpf_map_lookup_elem()
10460 if (meta
.map_ptr
== NULL
) {
10462 "kernel subsystem misconfigured verifier\n");
10465 regs
[BPF_REG_0
].map_ptr
= meta
.map_ptr
;
10466 regs
[BPF_REG_0
].map_uid
= meta
.map_uid
;
10467 regs
[BPF_REG_0
].type
= PTR_TO_MAP_VALUE
| ret_flag
;
10468 if (!type_may_be_null(ret_type
) &&
10469 btf_record_has_field(meta
.map_ptr
->record
, BPF_SPIN_LOCK
)) {
10470 regs
[BPF_REG_0
].id
= ++env
->id_gen
;
10473 case RET_PTR_TO_SOCKET
:
10474 mark_reg_known_zero(env
, regs
, BPF_REG_0
);
10475 regs
[BPF_REG_0
].type
= PTR_TO_SOCKET
| ret_flag
;
10477 case RET_PTR_TO_SOCK_COMMON
:
10478 mark_reg_known_zero(env
, regs
, BPF_REG_0
);
10479 regs
[BPF_REG_0
].type
= PTR_TO_SOCK_COMMON
| ret_flag
;
10481 case RET_PTR_TO_TCP_SOCK
:
10482 mark_reg_known_zero(env
, regs
, BPF_REG_0
);
10483 regs
[BPF_REG_0
].type
= PTR_TO_TCP_SOCK
| ret_flag
;
10485 case RET_PTR_TO_MEM
:
10486 mark_reg_known_zero(env
, regs
, BPF_REG_0
);
10487 regs
[BPF_REG_0
].type
= PTR_TO_MEM
| ret_flag
;
10488 regs
[BPF_REG_0
].mem_size
= meta
.mem_size
;
10490 case RET_PTR_TO_MEM_OR_BTF_ID
:
10492 const struct btf_type
*t
;
10494 mark_reg_known_zero(env
, regs
, BPF_REG_0
);
10495 t
= btf_type_skip_modifiers(meta
.ret_btf
, meta
.ret_btf_id
, NULL
);
10496 if (!btf_type_is_struct(t
)) {
10498 const struct btf_type
*ret
;
10501 /* resolve the type size of ksym. */
10502 ret
= btf_resolve_size(meta
.ret_btf
, t
, &tsize
);
10504 tname
= btf_name_by_offset(meta
.ret_btf
, t
->name_off
);
10505 verbose(env
, "unable to resolve the size of type '%s': %ld\n",
10506 tname
, PTR_ERR(ret
));
10509 regs
[BPF_REG_0
].type
= PTR_TO_MEM
| ret_flag
;
10510 regs
[BPF_REG_0
].mem_size
= tsize
;
10512 if (returns_cpu_specific_alloc_ptr
) {
10513 regs
[BPF_REG_0
].type
= PTR_TO_BTF_ID
| MEM_ALLOC
| MEM_RCU
;
10515 /* MEM_RDONLY may be carried from ret_flag, but it
10516 * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
10517 * it will confuse the check of PTR_TO_BTF_ID in
10518 * check_mem_access().
10520 ret_flag
&= ~MEM_RDONLY
;
10521 regs
[BPF_REG_0
].type
= PTR_TO_BTF_ID
| ret_flag
;
10524 regs
[BPF_REG_0
].btf
= meta
.ret_btf
;
10525 regs
[BPF_REG_0
].btf_id
= meta
.ret_btf_id
;
10529 case RET_PTR_TO_BTF_ID
:
10531 struct btf
*ret_btf
;
10534 mark_reg_known_zero(env
, regs
, BPF_REG_0
);
10535 regs
[BPF_REG_0
].type
= PTR_TO_BTF_ID
| ret_flag
;
10536 if (func_id
== BPF_FUNC_kptr_xchg
) {
10537 ret_btf
= meta
.kptr_field
->kptr
.btf
;
10538 ret_btf_id
= meta
.kptr_field
->kptr
.btf_id
;
10539 if (!btf_is_kernel(ret_btf
)) {
10540 regs
[BPF_REG_0
].type
|= MEM_ALLOC
;
10541 if (meta
.kptr_field
->type
== BPF_KPTR_PERCPU
)
10542 regs
[BPF_REG_0
].type
|= MEM_PERCPU
;
10545 if (fn
->ret_btf_id
== BPF_PTR_POISON
) {
10546 verbose(env
, "verifier internal error:");
10547 verbose(env
, "func %s has non-overwritten BPF_PTR_POISON return type\n",
10548 func_id_name(func_id
));
10551 ret_btf
= btf_vmlinux
;
10552 ret_btf_id
= *fn
->ret_btf_id
;
10554 if (ret_btf_id
== 0) {
10555 verbose(env
, "invalid return type %u of func %s#%d\n",
10556 base_type(ret_type
), func_id_name(func_id
),
10560 regs
[BPF_REG_0
].btf
= ret_btf
;
10561 regs
[BPF_REG_0
].btf_id
= ret_btf_id
;
10565 verbose(env
, "unknown return type %u of func %s#%d\n",
10566 base_type(ret_type
), func_id_name(func_id
), func_id
);
10570 if (type_may_be_null(regs
[BPF_REG_0
].type
))
10571 regs
[BPF_REG_0
].id
= ++env
->id_gen
;
10573 if (helper_multiple_ref_obj_use(func_id
, meta
.map_ptr
)) {
10574 verbose(env
, "verifier internal error: func %s#%d sets ref_obj_id more than once\n",
10575 func_id_name(func_id
), func_id
);
10579 if (is_dynptr_ref_function(func_id
))
10580 regs
[BPF_REG_0
].dynptr_id
= meta
.dynptr_id
;
10582 if (is_ptr_cast_function(func_id
) || is_dynptr_ref_function(func_id
)) {
10583 /* For release_reference() */
10584 regs
[BPF_REG_0
].ref_obj_id
= meta
.ref_obj_id
;
10585 } else if (is_acquire_function(func_id
, meta
.map_ptr
)) {
10586 int id
= acquire_reference_state(env
, insn_idx
);
10590 /* For mark_ptr_or_null_reg() */
10591 regs
[BPF_REG_0
].id
= id
;
10592 /* For release_reference() */
10593 regs
[BPF_REG_0
].ref_obj_id
= id
;
10596 do_refine_retval_range(regs
, fn
->ret_type
, func_id
, &meta
);
10598 err
= check_map_func_compatibility(env
, meta
.map_ptr
, func_id
);
10602 if ((func_id
== BPF_FUNC_get_stack
||
10603 func_id
== BPF_FUNC_get_task_stack
) &&
10604 !env
->prog
->has_callchain_buf
) {
10605 const char *err_str
;
10607 #ifdef CONFIG_PERF_EVENTS
10608 err
= get_callchain_buffers(sysctl_perf_event_max_stack
);
10609 err_str
= "cannot get callchain buffer for func %s#%d\n";
10612 err_str
= "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
10615 verbose(env
, err_str
, func_id_name(func_id
), func_id
);
10619 env
->prog
->has_callchain_buf
= true;
10622 if (func_id
== BPF_FUNC_get_stackid
|| func_id
== BPF_FUNC_get_stack
)
10623 env
->prog
->call_get_stack
= true;
10625 if (func_id
== BPF_FUNC_get_func_ip
) {
10626 if (check_get_func_ip(env
))
10628 env
->prog
->call_get_func_ip
= true;
10632 clear_all_pkt_pointers(env
);
10636 /* mark_btf_func_reg_size() is used when the reg size is determined by
10637 * the BTF func_proto's return value size and argument.
10639 static void mark_btf_func_reg_size(struct bpf_verifier_env
*env
, u32 regno
,
10642 struct bpf_reg_state
*reg
= &cur_regs(env
)[regno
];
10644 if (regno
== BPF_REG_0
) {
10645 /* Function return value */
10646 reg
->live
|= REG_LIVE_WRITTEN
;
10647 reg
->subreg_def
= reg_size
== sizeof(u64
) ?
10648 DEF_NOT_SUBREG
: env
->insn_idx
+ 1;
10650 /* Function argument */
10651 if (reg_size
== sizeof(u64
)) {
10652 mark_insn_zext(env
, reg
);
10653 mark_reg_read(env
, reg
, reg
->parent
, REG_LIVE_READ64
);
10655 mark_reg_read(env
, reg
, reg
->parent
, REG_LIVE_READ32
);
10660 static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta
*meta
)
10662 return meta
->kfunc_flags
& KF_ACQUIRE
;
10665 static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta
*meta
)
10667 return meta
->kfunc_flags
& KF_RELEASE
;
10670 static bool is_kfunc_trusted_args(struct bpf_kfunc_call_arg_meta
*meta
)
10672 return (meta
->kfunc_flags
& KF_TRUSTED_ARGS
) || is_kfunc_release(meta
);
10675 static bool is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta
*meta
)
10677 return meta
->kfunc_flags
& KF_SLEEPABLE
;
10680 static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta
*meta
)
10682 return meta
->kfunc_flags
& KF_DESTRUCTIVE
;
10685 static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta
*meta
)
10687 return meta
->kfunc_flags
& KF_RCU
;
10690 static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta
*meta
)
10692 return meta
->kfunc_flags
& KF_RCU_PROTECTED
;
10695 static bool __kfunc_param_match_suffix(const struct btf
*btf
,
10696 const struct btf_param
*arg
,
10697 const char *suffix
)
10699 int suffix_len
= strlen(suffix
), len
;
10700 const char *param_name
;
10702 /* In the future, this can be ported to use BTF tagging */
10703 param_name
= btf_name_by_offset(btf
, arg
->name_off
);
10704 if (str_is_empty(param_name
))
10706 len
= strlen(param_name
);
10707 if (len
< suffix_len
)
10709 param_name
+= len
- suffix_len
;
10710 return !strncmp(param_name
, suffix
, suffix_len
);
10713 static bool is_kfunc_arg_mem_size(const struct btf
*btf
,
10714 const struct btf_param
*arg
,
10715 const struct bpf_reg_state
*reg
)
10717 const struct btf_type
*t
;
10719 t
= btf_type_skip_modifiers(btf
, arg
->type
, NULL
);
10720 if (!btf_type_is_scalar(t
) || reg
->type
!= SCALAR_VALUE
)
10723 return __kfunc_param_match_suffix(btf
, arg
, "__sz");
10726 static bool is_kfunc_arg_const_mem_size(const struct btf
*btf
,
10727 const struct btf_param
*arg
,
10728 const struct bpf_reg_state
*reg
)
10730 const struct btf_type
*t
;
10732 t
= btf_type_skip_modifiers(btf
, arg
->type
, NULL
);
10733 if (!btf_type_is_scalar(t
) || reg
->type
!= SCALAR_VALUE
)
10736 return __kfunc_param_match_suffix(btf
, arg
, "__szk");
10739 static bool is_kfunc_arg_optional(const struct btf
*btf
, const struct btf_param
*arg
)
10741 return __kfunc_param_match_suffix(btf
, arg
, "__opt");
10744 static bool is_kfunc_arg_constant(const struct btf
*btf
, const struct btf_param
*arg
)
10746 return __kfunc_param_match_suffix(btf
, arg
, "__k");
10749 static bool is_kfunc_arg_ignore(const struct btf
*btf
, const struct btf_param
*arg
)
10751 return __kfunc_param_match_suffix(btf
, arg
, "__ign");
10754 static bool is_kfunc_arg_alloc_obj(const struct btf
*btf
, const struct btf_param
*arg
)
10756 return __kfunc_param_match_suffix(btf
, arg
, "__alloc");
10759 static bool is_kfunc_arg_uninit(const struct btf
*btf
, const struct btf_param
*arg
)
10761 return __kfunc_param_match_suffix(btf
, arg
, "__uninit");
10764 static bool is_kfunc_arg_refcounted_kptr(const struct btf
*btf
, const struct btf_param
*arg
)
10766 return __kfunc_param_match_suffix(btf
, arg
, "__refcounted_kptr");
10769 static bool is_kfunc_arg_nullable(const struct btf
*btf
, const struct btf_param
*arg
)
10771 return __kfunc_param_match_suffix(btf
, arg
, "__nullable");
10774 static bool is_kfunc_arg_scalar_with_name(const struct btf
*btf
,
10775 const struct btf_param
*arg
,
10778 int len
, target_len
= strlen(name
);
10779 const char *param_name
;
10781 param_name
= btf_name_by_offset(btf
, arg
->name_off
);
10782 if (str_is_empty(param_name
))
10784 len
= strlen(param_name
);
10785 if (len
!= target_len
)
10787 if (strcmp(param_name
, name
))
10795 KF_ARG_LIST_HEAD_ID
,
10796 KF_ARG_LIST_NODE_ID
,
10801 BTF_ID_LIST(kf_arg_btf_ids
)
10802 BTF_ID(struct, bpf_dynptr_kern
)
10803 BTF_ID(struct, bpf_list_head
)
10804 BTF_ID(struct, bpf_list_node
)
10805 BTF_ID(struct, bpf_rb_root
)
10806 BTF_ID(struct, bpf_rb_node
)
10808 static bool __is_kfunc_ptr_arg_type(const struct btf
*btf
,
10809 const struct btf_param
*arg
, int type
)
10811 const struct btf_type
*t
;
10814 t
= btf_type_skip_modifiers(btf
, arg
->type
, NULL
);
10817 if (!btf_type_is_ptr(t
))
10819 t
= btf_type_skip_modifiers(btf
, t
->type
, &res_id
);
10822 return btf_types_are_same(btf
, res_id
, btf_vmlinux
, kf_arg_btf_ids
[type
]);
10825 static bool is_kfunc_arg_dynptr(const struct btf
*btf
, const struct btf_param
*arg
)
10827 return __is_kfunc_ptr_arg_type(btf
, arg
, KF_ARG_DYNPTR_ID
);
10830 static bool is_kfunc_arg_list_head(const struct btf
*btf
, const struct btf_param
*arg
)
10832 return __is_kfunc_ptr_arg_type(btf
, arg
, KF_ARG_LIST_HEAD_ID
);
10835 static bool is_kfunc_arg_list_node(const struct btf
*btf
, const struct btf_param
*arg
)
10837 return __is_kfunc_ptr_arg_type(btf
, arg
, KF_ARG_LIST_NODE_ID
);
10840 static bool is_kfunc_arg_rbtree_root(const struct btf
*btf
, const struct btf_param
*arg
)
10842 return __is_kfunc_ptr_arg_type(btf
, arg
, KF_ARG_RB_ROOT_ID
);
10845 static bool is_kfunc_arg_rbtree_node(const struct btf
*btf
, const struct btf_param
*arg
)
10847 return __is_kfunc_ptr_arg_type(btf
, arg
, KF_ARG_RB_NODE_ID
);
10850 static bool is_kfunc_arg_callback(struct bpf_verifier_env
*env
, const struct btf
*btf
,
10851 const struct btf_param
*arg
)
10853 const struct btf_type
*t
;
10855 t
= btf_type_resolve_func_ptr(btf
, arg
->type
, NULL
);
10862 /* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
10863 static bool __btf_type_is_scalar_struct(struct bpf_verifier_env
*env
,
10864 const struct btf
*btf
,
10865 const struct btf_type
*t
, int rec
)
10867 const struct btf_type
*member_type
;
10868 const struct btf_member
*member
;
10871 if (!btf_type_is_struct(t
))
10874 for_each_member(i
, t
, member
) {
10875 const struct btf_array
*array
;
10877 member_type
= btf_type_skip_modifiers(btf
, member
->type
, NULL
);
10878 if (btf_type_is_struct(member_type
)) {
10880 verbose(env
, "max struct nesting depth exceeded\n");
10883 if (!__btf_type_is_scalar_struct(env
, btf
, member_type
, rec
+ 1))
10887 if (btf_type_is_array(member_type
)) {
10888 array
= btf_array(member_type
);
10889 if (!array
->nelems
)
10891 member_type
= btf_type_skip_modifiers(btf
, array
->type
, NULL
);
10892 if (!btf_type_is_scalar(member_type
))
10896 if (!btf_type_is_scalar(member_type
))
10902 enum kfunc_ptr_arg_type
{
10904 KF_ARG_PTR_TO_ALLOC_BTF_ID
, /* Allocated object */
10905 KF_ARG_PTR_TO_REFCOUNTED_KPTR
, /* Refcounted local kptr */
10906 KF_ARG_PTR_TO_DYNPTR
,
10907 KF_ARG_PTR_TO_ITER
,
10908 KF_ARG_PTR_TO_LIST_HEAD
,
10909 KF_ARG_PTR_TO_LIST_NODE
,
10910 KF_ARG_PTR_TO_BTF_ID
, /* Also covers reg2btf_ids conversions */
10912 KF_ARG_PTR_TO_MEM_SIZE
, /* Size derived from next argument, skip it */
10913 KF_ARG_PTR_TO_CALLBACK
,
10914 KF_ARG_PTR_TO_RB_ROOT
,
10915 KF_ARG_PTR_TO_RB_NODE
,
10916 KF_ARG_PTR_TO_NULL
,
10919 enum special_kfunc_type
{
10920 KF_bpf_obj_new_impl
,
10921 KF_bpf_obj_drop_impl
,
10922 KF_bpf_refcount_acquire_impl
,
10923 KF_bpf_list_push_front_impl
,
10924 KF_bpf_list_push_back_impl
,
10925 KF_bpf_list_pop_front
,
10926 KF_bpf_list_pop_back
,
10927 KF_bpf_cast_to_kern_ctx
,
10928 KF_bpf_rdonly_cast
,
10929 KF_bpf_rcu_read_lock
,
10930 KF_bpf_rcu_read_unlock
,
10931 KF_bpf_rbtree_remove
,
10932 KF_bpf_rbtree_add_impl
,
10933 KF_bpf_rbtree_first
,
10934 KF_bpf_dynptr_from_skb
,
10935 KF_bpf_dynptr_from_xdp
,
10936 KF_bpf_dynptr_slice
,
10937 KF_bpf_dynptr_slice_rdwr
,
10938 KF_bpf_dynptr_clone
,
10939 KF_bpf_percpu_obj_new_impl
,
10940 KF_bpf_percpu_obj_drop_impl
,
10942 KF_bpf_iter_css_task_new
,
10945 BTF_SET_START(special_kfunc_set
)
10946 BTF_ID(func
, bpf_obj_new_impl
)
10947 BTF_ID(func
, bpf_obj_drop_impl
)
10948 BTF_ID(func
, bpf_refcount_acquire_impl
)
10949 BTF_ID(func
, bpf_list_push_front_impl
)
10950 BTF_ID(func
, bpf_list_push_back_impl
)
10951 BTF_ID(func
, bpf_list_pop_front
)
10952 BTF_ID(func
, bpf_list_pop_back
)
10953 BTF_ID(func
, bpf_cast_to_kern_ctx
)
10954 BTF_ID(func
, bpf_rdonly_cast
)
10955 BTF_ID(func
, bpf_rbtree_remove
)
10956 BTF_ID(func
, bpf_rbtree_add_impl
)
10957 BTF_ID(func
, bpf_rbtree_first
)
10958 BTF_ID(func
, bpf_dynptr_from_skb
)
10959 BTF_ID(func
, bpf_dynptr_from_xdp
)
10960 BTF_ID(func
, bpf_dynptr_slice
)
10961 BTF_ID(func
, bpf_dynptr_slice_rdwr
)
10962 BTF_ID(func
, bpf_dynptr_clone
)
10963 BTF_ID(func
, bpf_percpu_obj_new_impl
)
10964 BTF_ID(func
, bpf_percpu_obj_drop_impl
)
10965 BTF_ID(func
, bpf_throw
)
10966 #ifdef CONFIG_CGROUPS
10967 BTF_ID(func
, bpf_iter_css_task_new
)
10969 BTF_SET_END(special_kfunc_set
)
10971 BTF_ID_LIST(special_kfunc_list
)
10972 BTF_ID(func
, bpf_obj_new_impl
)
10973 BTF_ID(func
, bpf_obj_drop_impl
)
10974 BTF_ID(func
, bpf_refcount_acquire_impl
)
10975 BTF_ID(func
, bpf_list_push_front_impl
)
10976 BTF_ID(func
, bpf_list_push_back_impl
)
10977 BTF_ID(func
, bpf_list_pop_front
)
10978 BTF_ID(func
, bpf_list_pop_back
)
10979 BTF_ID(func
, bpf_cast_to_kern_ctx
)
10980 BTF_ID(func
, bpf_rdonly_cast
)
10981 BTF_ID(func
, bpf_rcu_read_lock
)
10982 BTF_ID(func
, bpf_rcu_read_unlock
)
10983 BTF_ID(func
, bpf_rbtree_remove
)
10984 BTF_ID(func
, bpf_rbtree_add_impl
)
10985 BTF_ID(func
, bpf_rbtree_first
)
10986 BTF_ID(func
, bpf_dynptr_from_skb
)
10987 BTF_ID(func
, bpf_dynptr_from_xdp
)
10988 BTF_ID(func
, bpf_dynptr_slice
)
10989 BTF_ID(func
, bpf_dynptr_slice_rdwr
)
10990 BTF_ID(func
, bpf_dynptr_clone
)
10991 BTF_ID(func
, bpf_percpu_obj_new_impl
)
10992 BTF_ID(func
, bpf_percpu_obj_drop_impl
)
10993 BTF_ID(func
, bpf_throw
)
10994 #ifdef CONFIG_CGROUPS
10995 BTF_ID(func
, bpf_iter_css_task_new
)
11000 static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta
*meta
)
11002 if (meta
->func_id
== special_kfunc_list
[KF_bpf_refcount_acquire_impl
] &&
11003 meta
->arg_owning_ref
) {
11007 return meta
->kfunc_flags
& KF_RET_NULL
;
11010 static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta
*meta
)
11012 return meta
->func_id
== special_kfunc_list
[KF_bpf_rcu_read_lock
];
11015 static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta
*meta
)
11017 return meta
->func_id
== special_kfunc_list
[KF_bpf_rcu_read_unlock
];
11020 static enum kfunc_ptr_arg_type
11021 get_kfunc_ptr_arg_type(struct bpf_verifier_env
*env
,
11022 struct bpf_kfunc_call_arg_meta
*meta
,
11023 const struct btf_type
*t
, const struct btf_type
*ref_t
,
11024 const char *ref_tname
, const struct btf_param
*args
,
11025 int argno
, int nargs
)
11027 u32 regno
= argno
+ 1;
11028 struct bpf_reg_state
*regs
= cur_regs(env
);
11029 struct bpf_reg_state
*reg
= ®s
[regno
];
11030 bool arg_mem_size
= false;
11032 if (meta
->func_id
== special_kfunc_list
[KF_bpf_cast_to_kern_ctx
])
11033 return KF_ARG_PTR_TO_CTX
;
11035 /* In this function, we verify the kfunc's BTF as per the argument type,
11036 * leaving the rest of the verification with respect to the register
11037 * type to our caller. When a set of conditions hold in the BTF type of
11038 * arguments, we resolve it to a known kfunc_ptr_arg_type.
11040 if (btf_get_prog_ctx_type(&env
->log
, meta
->btf
, t
, resolve_prog_type(env
->prog
), argno
))
11041 return KF_ARG_PTR_TO_CTX
;
11043 if (is_kfunc_arg_alloc_obj(meta
->btf
, &args
[argno
]))
11044 return KF_ARG_PTR_TO_ALLOC_BTF_ID
;
11046 if (is_kfunc_arg_refcounted_kptr(meta
->btf
, &args
[argno
]))
11047 return KF_ARG_PTR_TO_REFCOUNTED_KPTR
;
11049 if (is_kfunc_arg_dynptr(meta
->btf
, &args
[argno
]))
11050 return KF_ARG_PTR_TO_DYNPTR
;
11052 if (is_kfunc_arg_iter(meta
, argno
))
11053 return KF_ARG_PTR_TO_ITER
;
11055 if (is_kfunc_arg_list_head(meta
->btf
, &args
[argno
]))
11056 return KF_ARG_PTR_TO_LIST_HEAD
;
11058 if (is_kfunc_arg_list_node(meta
->btf
, &args
[argno
]))
11059 return KF_ARG_PTR_TO_LIST_NODE
;
11061 if (is_kfunc_arg_rbtree_root(meta
->btf
, &args
[argno
]))
11062 return KF_ARG_PTR_TO_RB_ROOT
;
11064 if (is_kfunc_arg_rbtree_node(meta
->btf
, &args
[argno
]))
11065 return KF_ARG_PTR_TO_RB_NODE
;
11067 if ((base_type(reg
->type
) == PTR_TO_BTF_ID
|| reg2btf_ids
[base_type(reg
->type
)])) {
11068 if (!btf_type_is_struct(ref_t
)) {
11069 verbose(env
, "kernel function %s args#%d pointer type %s %s is not supported\n",
11070 meta
->func_name
, argno
, btf_type_str(ref_t
), ref_tname
);
11073 return KF_ARG_PTR_TO_BTF_ID
;
11076 if (is_kfunc_arg_callback(env
, meta
->btf
, &args
[argno
]))
11077 return KF_ARG_PTR_TO_CALLBACK
;
11079 if (is_kfunc_arg_nullable(meta
->btf
, &args
[argno
]) && register_is_null(reg
))
11080 return KF_ARG_PTR_TO_NULL
;
11082 if (argno
+ 1 < nargs
&&
11083 (is_kfunc_arg_mem_size(meta
->btf
, &args
[argno
+ 1], ®s
[regno
+ 1]) ||
11084 is_kfunc_arg_const_mem_size(meta
->btf
, &args
[argno
+ 1], ®s
[regno
+ 1])))
11085 arg_mem_size
= true;
11087 /* This is the catch all argument type of register types supported by
11088 * check_helper_mem_access. However, we only allow when argument type is
11089 * pointer to scalar, or struct composed (recursively) of scalars. When
11090 * arg_mem_size is true, the pointer can be void *.
11092 if (!btf_type_is_scalar(ref_t
) && !__btf_type_is_scalar_struct(env
, meta
->btf
, ref_t
, 0) &&
11093 (arg_mem_size
? !btf_type_is_void(ref_t
) : 1)) {
11094 verbose(env
, "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n",
11095 argno
, btf_type_str(ref_t
), ref_tname
, arg_mem_size
? "void, " : "");
11098 return arg_mem_size
? KF_ARG_PTR_TO_MEM_SIZE
: KF_ARG_PTR_TO_MEM
;
11101 static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env
*env
,
11102 struct bpf_reg_state
*reg
,
11103 const struct btf_type
*ref_t
,
11104 const char *ref_tname
, u32 ref_id
,
11105 struct bpf_kfunc_call_arg_meta
*meta
,
11108 const struct btf_type
*reg_ref_t
;
11109 bool strict_type_match
= false;
11110 const struct btf
*reg_btf
;
11111 const char *reg_ref_tname
;
11114 if (base_type(reg
->type
) == PTR_TO_BTF_ID
) {
11115 reg_btf
= reg
->btf
;
11116 reg_ref_id
= reg
->btf_id
;
11118 reg_btf
= btf_vmlinux
;
11119 reg_ref_id
= *reg2btf_ids
[base_type(reg
->type
)];
11122 /* Enforce strict type matching for calls to kfuncs that are acquiring
11123 * or releasing a reference, or are no-cast aliases. We do _not_
11124 * enforce strict matching for plain KF_TRUSTED_ARGS kfuncs by default,
11125 * as we want to enable BPF programs to pass types that are bitwise
11126 * equivalent without forcing them to explicitly cast with something
11127 * like bpf_cast_to_kern_ctx().
11129 * For example, say we had a type like the following:
11131 * struct bpf_cpumask {
11132 * cpumask_t cpumask;
11133 * refcount_t usage;
11136 * Note that as specified in <linux/cpumask.h>, cpumask_t is typedef'ed
11137 * to a struct cpumask, so it would be safe to pass a struct
11138 * bpf_cpumask * to a kfunc expecting a struct cpumask *.
11140 * The philosophy here is similar to how we allow scalars of different
11141 * types to be passed to kfuncs as long as the size is the same. The
11142 * only difference here is that we're simply allowing
11143 * btf_struct_ids_match() to walk the struct at the 0th offset, and
11146 if (is_kfunc_acquire(meta
) ||
11147 (is_kfunc_release(meta
) && reg
->ref_obj_id
) ||
11148 btf_type_ids_nocast_alias(&env
->log
, reg_btf
, reg_ref_id
, meta
->btf
, ref_id
))
11149 strict_type_match
= true;
11151 WARN_ON_ONCE(is_kfunc_trusted_args(meta
) && reg
->off
);
11153 reg_ref_t
= btf_type_skip_modifiers(reg_btf
, reg_ref_id
, ®_ref_id
);
11154 reg_ref_tname
= btf_name_by_offset(reg_btf
, reg_ref_t
->name_off
);
11155 if (!btf_struct_ids_match(&env
->log
, reg_btf
, reg_ref_id
, reg
->off
, meta
->btf
, ref_id
, strict_type_match
)) {
11156 verbose(env
, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
11157 meta
->func_name
, argno
, btf_type_str(ref_t
), ref_tname
, argno
+ 1,
11158 btf_type_str(reg_ref_t
), reg_ref_tname
);
11164 static int ref_set_non_owning(struct bpf_verifier_env
*env
, struct bpf_reg_state
*reg
)
11166 struct bpf_verifier_state
*state
= env
->cur_state
;
11167 struct btf_record
*rec
= reg_btf_record(reg
);
11169 if (!state
->active_lock
.ptr
) {
11170 verbose(env
, "verifier internal error: ref_set_non_owning w/o active lock\n");
11174 if (type_flag(reg
->type
) & NON_OWN_REF
) {
11175 verbose(env
, "verifier internal error: NON_OWN_REF already set\n");
11179 reg
->type
|= NON_OWN_REF
;
11180 if (rec
->refcount_off
>= 0)
11181 reg
->type
|= MEM_RCU
;
11186 static int ref_convert_owning_non_owning(struct bpf_verifier_env
*env
, u32 ref_obj_id
)
11188 struct bpf_func_state
*state
, *unused
;
11189 struct bpf_reg_state
*reg
;
11192 state
= cur_func(env
);
11195 verbose(env
, "verifier internal error: ref_obj_id is zero for "
11196 "owning -> non-owning conversion\n");
11200 for (i
= 0; i
< state
->acquired_refs
; i
++) {
11201 if (state
->refs
[i
].id
!= ref_obj_id
)
11204 /* Clear ref_obj_id here so release_reference doesn't clobber
11207 bpf_for_each_reg_in_vstate(env
->cur_state
, unused
, reg
, ({
11208 if (reg
->ref_obj_id
== ref_obj_id
) {
11209 reg
->ref_obj_id
= 0;
11210 ref_set_non_owning(env
, reg
);
11216 verbose(env
, "verifier internal error: ref state missing for ref_obj_id\n");
11220 /* Implementation details:
11222 * Each register points to some region of memory, which we define as an
11223 * allocation. Each allocation may embed a bpf_spin_lock which protects any
11224 * special BPF objects (bpf_list_head, bpf_rb_root, etc.) part of the same
11225 * allocation. The lock and the data it protects are colocated in the same
11228 * Hence, everytime a register holds a pointer value pointing to such
11229 * allocation, the verifier preserves a unique reg->id for it.
11231 * The verifier remembers the lock 'ptr' and the lock 'id' whenever
11232 * bpf_spin_lock is called.
11234 * To enable this, lock state in the verifier captures two values:
11235 * active_lock.ptr = Register's type specific pointer
11236 * active_lock.id = A unique ID for each register pointer value
11238 * Currently, PTR_TO_MAP_VALUE and PTR_TO_BTF_ID | MEM_ALLOC are the two
11239 * supported register types.
11241 * The active_lock.ptr in case of map values is the reg->map_ptr, and in case of
11242 * allocated objects is the reg->btf pointer.
11244 * The active_lock.id is non-unique for maps supporting direct_value_addr, as we
11245 * can establish the provenance of the map value statically for each distinct
11246 * lookup into such maps. They always contain a single map value hence unique
11247 * IDs for each pseudo load pessimizes the algorithm and rejects valid programs.
11249 * So, in case of global variables, they use array maps with max_entries = 1,
11250 * hence their active_lock.ptr becomes map_ptr and id = 0 (since they all point
11251 * into the same map value as max_entries is 1, as described above).
11253 * In case of inner map lookups, the inner map pointer has same map_ptr as the
11254 * outer map pointer (in verifier context), but each lookup into an inner map
11255 * assigns a fresh reg->id to the lookup, so while lookups into distinct inner
11256 * maps from the same outer map share the same map_ptr as active_lock.ptr, they
11257 * will get different reg->id assigned to each lookup, hence different
11260 * In case of allocated objects, active_lock.ptr is the reg->btf, and the
11261 * reg->id is a unique ID preserved after the NULL pointer check on the pointer
11262 * returned from bpf_obj_new. Each allocation receives a new reg->id.
11264 static int check_reg_allocation_locked(struct bpf_verifier_env
*env
, struct bpf_reg_state
*reg
)
11269 switch ((int)reg
->type
) {
11270 case PTR_TO_MAP_VALUE
:
11271 ptr
= reg
->map_ptr
;
11273 case PTR_TO_BTF_ID
| MEM_ALLOC
:
11277 verbose(env
, "verifier internal error: unknown reg type for lock check\n");
11282 if (!env
->cur_state
->active_lock
.ptr
)
11284 if (env
->cur_state
->active_lock
.ptr
!= ptr
||
11285 env
->cur_state
->active_lock
.id
!= id
) {
11286 verbose(env
, "held lock and object are not in the same allocation\n");
11292 static bool is_bpf_list_api_kfunc(u32 btf_id
)
11294 return btf_id
== special_kfunc_list
[KF_bpf_list_push_front_impl
] ||
11295 btf_id
== special_kfunc_list
[KF_bpf_list_push_back_impl
] ||
11296 btf_id
== special_kfunc_list
[KF_bpf_list_pop_front
] ||
11297 btf_id
== special_kfunc_list
[KF_bpf_list_pop_back
];
11300 static bool is_bpf_rbtree_api_kfunc(u32 btf_id
)
11302 return btf_id
== special_kfunc_list
[KF_bpf_rbtree_add_impl
] ||
11303 btf_id
== special_kfunc_list
[KF_bpf_rbtree_remove
] ||
11304 btf_id
== special_kfunc_list
[KF_bpf_rbtree_first
];
11307 static bool is_bpf_graph_api_kfunc(u32 btf_id
)
11309 return is_bpf_list_api_kfunc(btf_id
) || is_bpf_rbtree_api_kfunc(btf_id
) ||
11310 btf_id
== special_kfunc_list
[KF_bpf_refcount_acquire_impl
];
11313 static bool is_sync_callback_calling_kfunc(u32 btf_id
)
11315 return btf_id
== special_kfunc_list
[KF_bpf_rbtree_add_impl
];
11318 static bool is_bpf_throw_kfunc(struct bpf_insn
*insn
)
11320 return bpf_pseudo_kfunc_call(insn
) && insn
->off
== 0 &&
11321 insn
->imm
== special_kfunc_list
[KF_bpf_throw
];
11324 static bool is_rbtree_lock_required_kfunc(u32 btf_id
)
11326 return is_bpf_rbtree_api_kfunc(btf_id
);
11329 static bool check_kfunc_is_graph_root_api(struct bpf_verifier_env
*env
,
11330 enum btf_field_type head_field_type
,
11335 switch (head_field_type
) {
11336 case BPF_LIST_HEAD
:
11337 ret
= is_bpf_list_api_kfunc(kfunc_btf_id
);
11340 ret
= is_bpf_rbtree_api_kfunc(kfunc_btf_id
);
11343 verbose(env
, "verifier internal error: unexpected graph root argument type %s\n",
11344 btf_field_type_name(head_field_type
));
11349 verbose(env
, "verifier internal error: %s head arg for unknown kfunc\n",
11350 btf_field_type_name(head_field_type
));
11354 static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env
*env
,
11355 enum btf_field_type node_field_type
,
11360 switch (node_field_type
) {
11361 case BPF_LIST_NODE
:
11362 ret
= (kfunc_btf_id
== special_kfunc_list
[KF_bpf_list_push_front_impl
] ||
11363 kfunc_btf_id
== special_kfunc_list
[KF_bpf_list_push_back_impl
]);
11366 ret
= (kfunc_btf_id
== special_kfunc_list
[KF_bpf_rbtree_remove
] ||
11367 kfunc_btf_id
== special_kfunc_list
[KF_bpf_rbtree_add_impl
]);
11370 verbose(env
, "verifier internal error: unexpected graph node argument type %s\n",
11371 btf_field_type_name(node_field_type
));
11376 verbose(env
, "verifier internal error: %s node arg for unknown kfunc\n",
11377 btf_field_type_name(node_field_type
));
11382 __process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env
*env
,
11383 struct bpf_reg_state
*reg
, u32 regno
,
11384 struct bpf_kfunc_call_arg_meta
*meta
,
11385 enum btf_field_type head_field_type
,
11386 struct btf_field
**head_field
)
11388 const char *head_type_name
;
11389 struct btf_field
*field
;
11390 struct btf_record
*rec
;
11393 if (meta
->btf
!= btf_vmlinux
) {
11394 verbose(env
, "verifier internal error: unexpected btf mismatch in kfunc call\n");
11398 if (!check_kfunc_is_graph_root_api(env
, head_field_type
, meta
->func_id
))
11401 head_type_name
= btf_field_type_name(head_field_type
);
11402 if (!tnum_is_const(reg
->var_off
)) {
11404 "R%d doesn't have constant offset. %s has to be at the constant offset\n",
11405 regno
, head_type_name
);
11409 rec
= reg_btf_record(reg
);
11410 head_off
= reg
->off
+ reg
->var_off
.value
;
11411 field
= btf_record_find(rec
, head_off
, head_field_type
);
11413 verbose(env
, "%s not found at offset=%u\n", head_type_name
, head_off
);
11417 /* All functions require bpf_list_head to be protected using a bpf_spin_lock */
11418 if (check_reg_allocation_locked(env
, reg
)) {
11419 verbose(env
, "bpf_spin_lock at off=%d must be held for %s\n",
11420 rec
->spin_lock_off
, head_type_name
);
11425 verbose(env
, "verifier internal error: repeating %s arg\n", head_type_name
);
11428 *head_field
= field
;
11432 static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env
*env
,
11433 struct bpf_reg_state
*reg
, u32 regno
,
11434 struct bpf_kfunc_call_arg_meta
*meta
)
11436 return __process_kf_arg_ptr_to_graph_root(env
, reg
, regno
, meta
, BPF_LIST_HEAD
,
11437 &meta
->arg_list_head
.field
);
11440 static int process_kf_arg_ptr_to_rbtree_root(struct bpf_verifier_env
*env
,
11441 struct bpf_reg_state
*reg
, u32 regno
,
11442 struct bpf_kfunc_call_arg_meta
*meta
)
11444 return __process_kf_arg_ptr_to_graph_root(env
, reg
, regno
, meta
, BPF_RB_ROOT
,
11445 &meta
->arg_rbtree_root
.field
);
11449 __process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env
*env
,
11450 struct bpf_reg_state
*reg
, u32 regno
,
11451 struct bpf_kfunc_call_arg_meta
*meta
,
11452 enum btf_field_type head_field_type
,
11453 enum btf_field_type node_field_type
,
11454 struct btf_field
**node_field
)
11456 const char *node_type_name
;
11457 const struct btf_type
*et
, *t
;
11458 struct btf_field
*field
;
11461 if (meta
->btf
!= btf_vmlinux
) {
11462 verbose(env
, "verifier internal error: unexpected btf mismatch in kfunc call\n");
11466 if (!check_kfunc_is_graph_node_api(env
, node_field_type
, meta
->func_id
))
11469 node_type_name
= btf_field_type_name(node_field_type
);
11470 if (!tnum_is_const(reg
->var_off
)) {
11472 "R%d doesn't have constant offset. %s has to be at the constant offset\n",
11473 regno
, node_type_name
);
11477 node_off
= reg
->off
+ reg
->var_off
.value
;
11478 field
= reg_find_field_offset(reg
, node_off
, node_field_type
);
11479 if (!field
|| field
->offset
!= node_off
) {
11480 verbose(env
, "%s not found at offset=%u\n", node_type_name
, node_off
);
11484 field
= *node_field
;
11486 et
= btf_type_by_id(field
->graph_root
.btf
, field
->graph_root
.value_btf_id
);
11487 t
= btf_type_by_id(reg
->btf
, reg
->btf_id
);
11488 if (!btf_struct_ids_match(&env
->log
, reg
->btf
, reg
->btf_id
, 0, field
->graph_root
.btf
,
11489 field
->graph_root
.value_btf_id
, true)) {
11490 verbose(env
, "operation on %s expects arg#1 %s at offset=%d "
11491 "in struct %s, but arg is at offset=%d in struct %s\n",
11492 btf_field_type_name(head_field_type
),
11493 btf_field_type_name(node_field_type
),
11494 field
->graph_root
.node_offset
,
11495 btf_name_by_offset(field
->graph_root
.btf
, et
->name_off
),
11496 node_off
, btf_name_by_offset(reg
->btf
, t
->name_off
));
11499 meta
->arg_btf
= reg
->btf
;
11500 meta
->arg_btf_id
= reg
->btf_id
;
11502 if (node_off
!= field
->graph_root
.node_offset
) {
11503 verbose(env
, "arg#1 offset=%d, but expected %s at offset=%d in struct %s\n",
11504 node_off
, btf_field_type_name(node_field_type
),
11505 field
->graph_root
.node_offset
,
11506 btf_name_by_offset(field
->graph_root
.btf
, et
->name_off
));
11513 static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env
*env
,
11514 struct bpf_reg_state
*reg
, u32 regno
,
11515 struct bpf_kfunc_call_arg_meta
*meta
)
11517 return __process_kf_arg_ptr_to_graph_node(env
, reg
, regno
, meta
,
11518 BPF_LIST_HEAD
, BPF_LIST_NODE
,
11519 &meta
->arg_list_head
.field
);
11522 static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env
*env
,
11523 struct bpf_reg_state
*reg
, u32 regno
,
11524 struct bpf_kfunc_call_arg_meta
*meta
)
11526 return __process_kf_arg_ptr_to_graph_node(env
, reg
, regno
, meta
,
11527 BPF_RB_ROOT
, BPF_RB_NODE
,
11528 &meta
->arg_rbtree_root
.field
);
11532 * css_task iter allowlist is needed to avoid dead locking on css_set_lock.
11533 * LSM hooks and iters (both sleepable and non-sleepable) are safe.
11534 * Any sleepable progs are also safe since bpf_check_attach_target() enforce
11535 * them can only be attached to some specific hook points.
11537 static bool check_css_task_iter_allowlist(struct bpf_verifier_env
*env
)
11539 enum bpf_prog_type prog_type
= resolve_prog_type(env
->prog
);
11541 switch (prog_type
) {
11542 case BPF_PROG_TYPE_LSM
:
11544 case BPF_PROG_TYPE_TRACING
:
11545 if (env
->prog
->expected_attach_type
== BPF_TRACE_ITER
)
11549 return env
->prog
->aux
->sleepable
;
11553 static int check_kfunc_args(struct bpf_verifier_env
*env
, struct bpf_kfunc_call_arg_meta
*meta
,
11556 const char *func_name
= meta
->func_name
, *ref_tname
;
11557 const struct btf
*btf
= meta
->btf
;
11558 const struct btf_param
*args
;
11559 struct btf_record
*rec
;
11563 args
= (const struct btf_param
*)(meta
->func_proto
+ 1);
11564 nargs
= btf_type_vlen(meta
->func_proto
);
11565 if (nargs
> MAX_BPF_FUNC_REG_ARGS
) {
11566 verbose(env
, "Function %s has %d > %d args\n", func_name
, nargs
,
11567 MAX_BPF_FUNC_REG_ARGS
);
11571 /* Check that BTF function arguments match actual types that the
11574 for (i
= 0; i
< nargs
; i
++) {
11575 struct bpf_reg_state
*regs
= cur_regs(env
), *reg
= ®s
[i
+ 1];
11576 const struct btf_type
*t
, *ref_t
, *resolve_ret
;
11577 enum bpf_arg_type arg_type
= ARG_DONTCARE
;
11578 u32 regno
= i
+ 1, ref_id
, type_size
;
11579 bool is_ret_buf_sz
= false;
11582 t
= btf_type_skip_modifiers(btf
, args
[i
].type
, NULL
);
11584 if (is_kfunc_arg_ignore(btf
, &args
[i
]))
11587 if (btf_type_is_scalar(t
)) {
11588 if (reg
->type
!= SCALAR_VALUE
) {
11589 verbose(env
, "R%d is not a scalar\n", regno
);
11593 if (is_kfunc_arg_constant(meta
->btf
, &args
[i
])) {
11594 if (meta
->arg_constant
.found
) {
11595 verbose(env
, "verifier internal error: only one constant argument permitted\n");
11598 if (!tnum_is_const(reg
->var_off
)) {
11599 verbose(env
, "R%d must be a known constant\n", regno
);
11602 ret
= mark_chain_precision(env
, regno
);
11605 meta
->arg_constant
.found
= true;
11606 meta
->arg_constant
.value
= reg
->var_off
.value
;
11607 } else if (is_kfunc_arg_scalar_with_name(btf
, &args
[i
], "rdonly_buf_size")) {
11608 meta
->r0_rdonly
= true;
11609 is_ret_buf_sz
= true;
11610 } else if (is_kfunc_arg_scalar_with_name(btf
, &args
[i
], "rdwr_buf_size")) {
11611 is_ret_buf_sz
= true;
11614 if (is_ret_buf_sz
) {
11615 if (meta
->r0_size
) {
11616 verbose(env
, "2 or more rdonly/rdwr_buf_size parameters for kfunc");
11620 if (!tnum_is_const(reg
->var_off
)) {
11621 verbose(env
, "R%d is not a const\n", regno
);
11625 meta
->r0_size
= reg
->var_off
.value
;
11626 ret
= mark_chain_precision(env
, regno
);
11633 if (!btf_type_is_ptr(t
)) {
11634 verbose(env
, "Unrecognized arg#%d type %s\n", i
, btf_type_str(t
));
11638 if ((is_kfunc_trusted_args(meta
) || is_kfunc_rcu(meta
)) &&
11639 (register_is_null(reg
) || type_may_be_null(reg
->type
)) &&
11640 !is_kfunc_arg_nullable(meta
->btf
, &args
[i
])) {
11641 verbose(env
, "Possibly NULL pointer passed to trusted arg%d\n", i
);
11645 if (reg
->ref_obj_id
) {
11646 if (is_kfunc_release(meta
) && meta
->ref_obj_id
) {
11647 verbose(env
, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
11648 regno
, reg
->ref_obj_id
,
11652 meta
->ref_obj_id
= reg
->ref_obj_id
;
11653 if (is_kfunc_release(meta
))
11654 meta
->release_regno
= regno
;
11657 ref_t
= btf_type_skip_modifiers(btf
, t
->type
, &ref_id
);
11658 ref_tname
= btf_name_by_offset(btf
, ref_t
->name_off
);
11660 kf_arg_type
= get_kfunc_ptr_arg_type(env
, meta
, t
, ref_t
, ref_tname
, args
, i
, nargs
);
11661 if (kf_arg_type
< 0)
11662 return kf_arg_type
;
11664 switch (kf_arg_type
) {
11665 case KF_ARG_PTR_TO_NULL
:
11667 case KF_ARG_PTR_TO_ALLOC_BTF_ID
:
11668 case KF_ARG_PTR_TO_BTF_ID
:
11669 if (!is_kfunc_trusted_args(meta
) && !is_kfunc_rcu(meta
))
11672 if (!is_trusted_reg(reg
)) {
11673 if (!is_kfunc_rcu(meta
)) {
11674 verbose(env
, "R%d must be referenced or trusted\n", regno
);
11677 if (!is_rcu_reg(reg
)) {
11678 verbose(env
, "R%d must be a rcu pointer\n", regno
);
11684 case KF_ARG_PTR_TO_CTX
:
11685 /* Trusted arguments have the same offset checks as release arguments */
11686 arg_type
|= OBJ_RELEASE
;
11688 case KF_ARG_PTR_TO_DYNPTR
:
11689 case KF_ARG_PTR_TO_ITER
:
11690 case KF_ARG_PTR_TO_LIST_HEAD
:
11691 case KF_ARG_PTR_TO_LIST_NODE
:
11692 case KF_ARG_PTR_TO_RB_ROOT
:
11693 case KF_ARG_PTR_TO_RB_NODE
:
11694 case KF_ARG_PTR_TO_MEM
:
11695 case KF_ARG_PTR_TO_MEM_SIZE
:
11696 case KF_ARG_PTR_TO_CALLBACK
:
11697 case KF_ARG_PTR_TO_REFCOUNTED_KPTR
:
11698 /* Trusted by default */
11705 if (is_kfunc_release(meta
) && reg
->ref_obj_id
)
11706 arg_type
|= OBJ_RELEASE
;
11707 ret
= check_func_arg_reg_off(env
, reg
, regno
, arg_type
);
11711 switch (kf_arg_type
) {
11712 case KF_ARG_PTR_TO_CTX
:
11713 if (reg
->type
!= PTR_TO_CTX
) {
11714 verbose(env
, "arg#%d expected pointer to ctx, but got %s\n", i
, btf_type_str(t
));
11718 if (meta
->func_id
== special_kfunc_list
[KF_bpf_cast_to_kern_ctx
]) {
11719 ret
= get_kern_ctx_btf_id(&env
->log
, resolve_prog_type(env
->prog
));
11722 meta
->ret_btf_id
= ret
;
11725 case KF_ARG_PTR_TO_ALLOC_BTF_ID
:
11726 if (reg
->type
== (PTR_TO_BTF_ID
| MEM_ALLOC
)) {
11727 if (meta
->func_id
!= special_kfunc_list
[KF_bpf_obj_drop_impl
]) {
11728 verbose(env
, "arg#%d expected for bpf_obj_drop_impl()\n", i
);
11731 } else if (reg
->type
== (PTR_TO_BTF_ID
| MEM_ALLOC
| MEM_PERCPU
)) {
11732 if (meta
->func_id
!= special_kfunc_list
[KF_bpf_percpu_obj_drop_impl
]) {
11733 verbose(env
, "arg#%d expected for bpf_percpu_obj_drop_impl()\n", i
);
11737 verbose(env
, "arg#%d expected pointer to allocated object\n", i
);
11740 if (!reg
->ref_obj_id
) {
11741 verbose(env
, "allocated object must be referenced\n");
11744 if (meta
->btf
== btf_vmlinux
) {
11745 meta
->arg_btf
= reg
->btf
;
11746 meta
->arg_btf_id
= reg
->btf_id
;
11749 case KF_ARG_PTR_TO_DYNPTR
:
11751 enum bpf_arg_type dynptr_arg_type
= ARG_PTR_TO_DYNPTR
;
11752 int clone_ref_obj_id
= 0;
11754 if (reg
->type
!= PTR_TO_STACK
&&
11755 reg
->type
!= CONST_PTR_TO_DYNPTR
) {
11756 verbose(env
, "arg#%d expected pointer to stack or dynptr_ptr\n", i
);
11760 if (reg
->type
== CONST_PTR_TO_DYNPTR
)
11761 dynptr_arg_type
|= MEM_RDONLY
;
11763 if (is_kfunc_arg_uninit(btf
, &args
[i
]))
11764 dynptr_arg_type
|= MEM_UNINIT
;
11766 if (meta
->func_id
== special_kfunc_list
[KF_bpf_dynptr_from_skb
]) {
11767 dynptr_arg_type
|= DYNPTR_TYPE_SKB
;
11768 } else if (meta
->func_id
== special_kfunc_list
[KF_bpf_dynptr_from_xdp
]) {
11769 dynptr_arg_type
|= DYNPTR_TYPE_XDP
;
11770 } else if (meta
->func_id
== special_kfunc_list
[KF_bpf_dynptr_clone
] &&
11771 (dynptr_arg_type
& MEM_UNINIT
)) {
11772 enum bpf_dynptr_type parent_type
= meta
->initialized_dynptr
.type
;
11774 if (parent_type
== BPF_DYNPTR_TYPE_INVALID
) {
11775 verbose(env
, "verifier internal error: no dynptr type for parent of clone\n");
11779 dynptr_arg_type
|= (unsigned int)get_dynptr_type_flag(parent_type
);
11780 clone_ref_obj_id
= meta
->initialized_dynptr
.ref_obj_id
;
11781 if (dynptr_type_refcounted(parent_type
) && !clone_ref_obj_id
) {
11782 verbose(env
, "verifier internal error: missing ref obj id for parent of clone\n");
11787 ret
= process_dynptr_func(env
, regno
, insn_idx
, dynptr_arg_type
, clone_ref_obj_id
);
11791 if (!(dynptr_arg_type
& MEM_UNINIT
)) {
11792 int id
= dynptr_id(env
, reg
);
11795 verbose(env
, "verifier internal error: failed to obtain dynptr id\n");
11798 meta
->initialized_dynptr
.id
= id
;
11799 meta
->initialized_dynptr
.type
= dynptr_get_type(env
, reg
);
11800 meta
->initialized_dynptr
.ref_obj_id
= dynptr_ref_obj_id(env
, reg
);
11805 case KF_ARG_PTR_TO_ITER
:
11806 if (meta
->func_id
== special_kfunc_list
[KF_bpf_iter_css_task_new
]) {
11807 if (!check_css_task_iter_allowlist(env
)) {
11808 verbose(env
, "css_task_iter is only allowed in bpf_lsm, bpf_iter and sleepable progs\n");
11812 ret
= process_iter_arg(env
, regno
, insn_idx
, meta
);
11816 case KF_ARG_PTR_TO_LIST_HEAD
:
11817 if (reg
->type
!= PTR_TO_MAP_VALUE
&&
11818 reg
->type
!= (PTR_TO_BTF_ID
| MEM_ALLOC
)) {
11819 verbose(env
, "arg#%d expected pointer to map value or allocated object\n", i
);
11822 if (reg
->type
== (PTR_TO_BTF_ID
| MEM_ALLOC
) && !reg
->ref_obj_id
) {
11823 verbose(env
, "allocated object must be referenced\n");
11826 ret
= process_kf_arg_ptr_to_list_head(env
, reg
, regno
, meta
);
11830 case KF_ARG_PTR_TO_RB_ROOT
:
11831 if (reg
->type
!= PTR_TO_MAP_VALUE
&&
11832 reg
->type
!= (PTR_TO_BTF_ID
| MEM_ALLOC
)) {
11833 verbose(env
, "arg#%d expected pointer to map value or allocated object\n", i
);
11836 if (reg
->type
== (PTR_TO_BTF_ID
| MEM_ALLOC
) && !reg
->ref_obj_id
) {
11837 verbose(env
, "allocated object must be referenced\n");
11840 ret
= process_kf_arg_ptr_to_rbtree_root(env
, reg
, regno
, meta
);
11844 case KF_ARG_PTR_TO_LIST_NODE
:
11845 if (reg
->type
!= (PTR_TO_BTF_ID
| MEM_ALLOC
)) {
11846 verbose(env
, "arg#%d expected pointer to allocated object\n", i
);
11849 if (!reg
->ref_obj_id
) {
11850 verbose(env
, "allocated object must be referenced\n");
11853 ret
= process_kf_arg_ptr_to_list_node(env
, reg
, regno
, meta
);
11857 case KF_ARG_PTR_TO_RB_NODE
:
11858 if (meta
->func_id
== special_kfunc_list
[KF_bpf_rbtree_remove
]) {
11859 if (!type_is_non_owning_ref(reg
->type
) || reg
->ref_obj_id
) {
11860 verbose(env
, "rbtree_remove node input must be non-owning ref\n");
11863 if (in_rbtree_lock_required_cb(env
)) {
11864 verbose(env
, "rbtree_remove not allowed in rbtree cb\n");
11868 if (reg
->type
!= (PTR_TO_BTF_ID
| MEM_ALLOC
)) {
11869 verbose(env
, "arg#%d expected pointer to allocated object\n", i
);
11872 if (!reg
->ref_obj_id
) {
11873 verbose(env
, "allocated object must be referenced\n");
11878 ret
= process_kf_arg_ptr_to_rbtree_node(env
, reg
, regno
, meta
);
11882 case KF_ARG_PTR_TO_BTF_ID
:
11883 /* Only base_type is checked, further checks are done here */
11884 if ((base_type(reg
->type
) != PTR_TO_BTF_ID
||
11885 (bpf_type_has_unsafe_modifiers(reg
->type
) && !is_rcu_reg(reg
))) &&
11886 !reg2btf_ids
[base_type(reg
->type
)]) {
11887 verbose(env
, "arg#%d is %s ", i
, reg_type_str(env
, reg
->type
));
11888 verbose(env
, "expected %s or socket\n",
11889 reg_type_str(env
, base_type(reg
->type
) |
11890 (type_flag(reg
->type
) & BPF_REG_TRUSTED_MODIFIERS
)));
11893 ret
= process_kf_arg_ptr_to_btf_id(env
, reg
, ref_t
, ref_tname
, ref_id
, meta
, i
);
11897 case KF_ARG_PTR_TO_MEM
:
11898 resolve_ret
= btf_resolve_size(btf
, ref_t
, &type_size
);
11899 if (IS_ERR(resolve_ret
)) {
11900 verbose(env
, "arg#%d reference type('%s %s') size cannot be determined: %ld\n",
11901 i
, btf_type_str(ref_t
), ref_tname
, PTR_ERR(resolve_ret
));
11904 ret
= check_mem_reg(env
, reg
, regno
, type_size
);
11908 case KF_ARG_PTR_TO_MEM_SIZE
:
11910 struct bpf_reg_state
*buff_reg
= ®s
[regno
];
11911 const struct btf_param
*buff_arg
= &args
[i
];
11912 struct bpf_reg_state
*size_reg
= ®s
[regno
+ 1];
11913 const struct btf_param
*size_arg
= &args
[i
+ 1];
11915 if (!register_is_null(buff_reg
) || !is_kfunc_arg_optional(meta
->btf
, buff_arg
)) {
11916 ret
= check_kfunc_mem_size_reg(env
, size_reg
, regno
+ 1);
11918 verbose(env
, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i
, i
+ 1);
11923 if (is_kfunc_arg_const_mem_size(meta
->btf
, size_arg
, size_reg
)) {
11924 if (meta
->arg_constant
.found
) {
11925 verbose(env
, "verifier internal error: only one constant argument permitted\n");
11928 if (!tnum_is_const(size_reg
->var_off
)) {
11929 verbose(env
, "R%d must be a known constant\n", regno
+ 1);
11932 meta
->arg_constant
.found
= true;
11933 meta
->arg_constant
.value
= size_reg
->var_off
.value
;
11936 /* Skip next '__sz' or '__szk' argument */
11940 case KF_ARG_PTR_TO_CALLBACK
:
11941 if (reg
->type
!= PTR_TO_FUNC
) {
11942 verbose(env
, "arg%d expected pointer to func\n", i
);
11945 meta
->subprogno
= reg
->subprogno
;
11947 case KF_ARG_PTR_TO_REFCOUNTED_KPTR
:
11948 if (!type_is_ptr_alloc_obj(reg
->type
)) {
11949 verbose(env
, "arg#%d is neither owning or non-owning ref\n", i
);
11952 if (!type_is_non_owning_ref(reg
->type
))
11953 meta
->arg_owning_ref
= true;
11955 rec
= reg_btf_record(reg
);
11957 verbose(env
, "verifier internal error: Couldn't find btf_record\n");
11961 if (rec
->refcount_off
< 0) {
11962 verbose(env
, "arg#%d doesn't point to a type with bpf_refcount field\n", i
);
11966 meta
->arg_btf
= reg
->btf
;
11967 meta
->arg_btf_id
= reg
->btf_id
;
11972 if (is_kfunc_release(meta
) && !meta
->release_regno
) {
11973 verbose(env
, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
11981 static int fetch_kfunc_meta(struct bpf_verifier_env
*env
,
11982 struct bpf_insn
*insn
,
11983 struct bpf_kfunc_call_arg_meta
*meta
,
11984 const char **kfunc_name
)
11986 const struct btf_type
*func
, *func_proto
;
11987 u32 func_id
, *kfunc_flags
;
11988 const char *func_name
;
11989 struct btf
*desc_btf
;
11992 *kfunc_name
= NULL
;
11997 desc_btf
= find_kfunc_desc_btf(env
, insn
->off
);
11998 if (IS_ERR(desc_btf
))
11999 return PTR_ERR(desc_btf
);
12001 func_id
= insn
->imm
;
12002 func
= btf_type_by_id(desc_btf
, func_id
);
12003 func_name
= btf_name_by_offset(desc_btf
, func
->name_off
);
12005 *kfunc_name
= func_name
;
12006 func_proto
= btf_type_by_id(desc_btf
, func
->type
);
12008 kfunc_flags
= btf_kfunc_id_set_contains(desc_btf
, func_id
, env
->prog
);
12009 if (!kfunc_flags
) {
12013 memset(meta
, 0, sizeof(*meta
));
12014 meta
->btf
= desc_btf
;
12015 meta
->func_id
= func_id
;
12016 meta
->kfunc_flags
= *kfunc_flags
;
12017 meta
->func_proto
= func_proto
;
12018 meta
->func_name
= func_name
;
12023 static int check_return_code(struct bpf_verifier_env
*env
, int regno
);
12025 static int check_kfunc_call(struct bpf_verifier_env
*env
, struct bpf_insn
*insn
,
12028 const struct btf_type
*t
, *ptr_type
;
12029 u32 i
, nargs
, ptr_type_id
, release_ref_obj_id
;
12030 struct bpf_reg_state
*regs
= cur_regs(env
);
12031 const char *func_name
, *ptr_type_name
;
12032 bool sleepable
, rcu_lock
, rcu_unlock
;
12033 struct bpf_kfunc_call_arg_meta meta
;
12034 struct bpf_insn_aux_data
*insn_aux
;
12035 int err
, insn_idx
= *insn_idx_p
;
12036 const struct btf_param
*args
;
12037 const struct btf_type
*ret_t
;
12038 struct btf
*desc_btf
;
12040 /* skip for now, but return error when we find this in fixup_kfunc_call */
12044 err
= fetch_kfunc_meta(env
, insn
, &meta
, &func_name
);
12045 if (err
== -EACCES
&& func_name
)
12046 verbose(env
, "calling kernel function %s is not allowed\n", func_name
);
12049 desc_btf
= meta
.btf
;
12050 insn_aux
= &env
->insn_aux_data
[insn_idx
];
12052 insn_aux
->is_iter_next
= is_iter_next_kfunc(&meta
);
12054 if (is_kfunc_destructive(&meta
) && !capable(CAP_SYS_BOOT
)) {
12055 verbose(env
, "destructive kfunc calls require CAP_SYS_BOOT capability\n");
12059 sleepable
= is_kfunc_sleepable(&meta
);
12060 if (sleepable
&& !env
->prog
->aux
->sleepable
) {
12061 verbose(env
, "program must be sleepable to call sleepable kfunc %s\n", func_name
);
12065 /* Check the arguments */
12066 err
= check_kfunc_args(env
, &meta
, insn_idx
);
12070 if (meta
.func_id
== special_kfunc_list
[KF_bpf_rbtree_add_impl
]) {
12071 err
= push_callback_call(env
, insn
, insn_idx
, meta
.subprogno
,
12072 set_rbtree_add_callback_state
);
12074 verbose(env
, "kfunc %s#%d failed callback verification\n",
12075 func_name
, meta
.func_id
);
12080 rcu_lock
= is_kfunc_bpf_rcu_read_lock(&meta
);
12081 rcu_unlock
= is_kfunc_bpf_rcu_read_unlock(&meta
);
12083 if (env
->cur_state
->active_rcu_lock
) {
12084 struct bpf_func_state
*state
;
12085 struct bpf_reg_state
*reg
;
12086 u32 clear_mask
= (1 << STACK_SPILL
) | (1 << STACK_ITER
);
12088 if (in_rbtree_lock_required_cb(env
) && (rcu_lock
|| rcu_unlock
)) {
12089 verbose(env
, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n");
12094 verbose(env
, "nested rcu read lock (kernel function %s)\n", func_name
);
12096 } else if (rcu_unlock
) {
12097 bpf_for_each_reg_in_vstate_mask(env
->cur_state
, state
, reg
, clear_mask
, ({
12098 if (reg
->type
& MEM_RCU
) {
12099 reg
->type
&= ~(MEM_RCU
| PTR_MAYBE_NULL
);
12100 reg
->type
|= PTR_UNTRUSTED
;
12103 env
->cur_state
->active_rcu_lock
= false;
12104 } else if (sleepable
) {
12105 verbose(env
, "kernel func %s is sleepable within rcu_read_lock region\n", func_name
);
12108 } else if (rcu_lock
) {
12109 env
->cur_state
->active_rcu_lock
= true;
12110 } else if (rcu_unlock
) {
12111 verbose(env
, "unmatched rcu read unlock (kernel function %s)\n", func_name
);
12115 /* In case of release function, we get register number of refcounted
12116 * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
12118 if (meta
.release_regno
) {
12119 err
= release_reference(env
, regs
[meta
.release_regno
].ref_obj_id
);
12121 verbose(env
, "kfunc %s#%d reference has not been acquired before\n",
12122 func_name
, meta
.func_id
);
12127 if (meta
.func_id
== special_kfunc_list
[KF_bpf_list_push_front_impl
] ||
12128 meta
.func_id
== special_kfunc_list
[KF_bpf_list_push_back_impl
] ||
12129 meta
.func_id
== special_kfunc_list
[KF_bpf_rbtree_add_impl
]) {
12130 release_ref_obj_id
= regs
[BPF_REG_2
].ref_obj_id
;
12131 insn_aux
->insert_off
= regs
[BPF_REG_2
].off
;
12132 insn_aux
->kptr_struct_meta
= btf_find_struct_meta(meta
.arg_btf
, meta
.arg_btf_id
);
12133 err
= ref_convert_owning_non_owning(env
, release_ref_obj_id
);
12135 verbose(env
, "kfunc %s#%d conversion of owning ref to non-owning failed\n",
12136 func_name
, meta
.func_id
);
12140 err
= release_reference(env
, release_ref_obj_id
);
12142 verbose(env
, "kfunc %s#%d reference has not been acquired before\n",
12143 func_name
, meta
.func_id
);
12148 if (meta
.func_id
== special_kfunc_list
[KF_bpf_throw
]) {
12149 if (!bpf_jit_supports_exceptions()) {
12150 verbose(env
, "JIT does not support calling kfunc %s#%d\n",
12151 func_name
, meta
.func_id
);
12154 env
->seen_exception
= true;
12156 /* In the case of the default callback, the cookie value passed
12157 * to bpf_throw becomes the return value of the program.
12159 if (!env
->exception_callback_subprog
) {
12160 err
= check_return_code(env
, BPF_REG_1
);
12166 for (i
= 0; i
< CALLER_SAVED_REGS
; i
++)
12167 mark_reg_not_init(env
, regs
, caller_saved
[i
]);
12169 /* Check return type */
12170 t
= btf_type_skip_modifiers(desc_btf
, meta
.func_proto
->type
, NULL
);
12172 if (is_kfunc_acquire(&meta
) && !btf_type_is_struct_ptr(meta
.btf
, t
)) {
12173 /* Only exception is bpf_obj_new_impl */
12174 if (meta
.btf
!= btf_vmlinux
||
12175 (meta
.func_id
!= special_kfunc_list
[KF_bpf_obj_new_impl
] &&
12176 meta
.func_id
!= special_kfunc_list
[KF_bpf_percpu_obj_new_impl
] &&
12177 meta
.func_id
!= special_kfunc_list
[KF_bpf_refcount_acquire_impl
])) {
12178 verbose(env
, "acquire kernel function does not return PTR_TO_BTF_ID\n");
12183 if (btf_type_is_scalar(t
)) {
12184 mark_reg_unknown(env
, regs
, BPF_REG_0
);
12185 mark_btf_func_reg_size(env
, BPF_REG_0
, t
->size
);
12186 } else if (btf_type_is_ptr(t
)) {
12187 ptr_type
= btf_type_skip_modifiers(desc_btf
, t
->type
, &ptr_type_id
);
12189 if (meta
.btf
== btf_vmlinux
&& btf_id_set_contains(&special_kfunc_set
, meta
.func_id
)) {
12190 if (meta
.func_id
== special_kfunc_list
[KF_bpf_obj_new_impl
] ||
12191 meta
.func_id
== special_kfunc_list
[KF_bpf_percpu_obj_new_impl
]) {
12192 struct btf_struct_meta
*struct_meta
;
12193 struct btf
*ret_btf
;
12196 if (meta
.func_id
== special_kfunc_list
[KF_bpf_obj_new_impl
] && !bpf_global_ma_set
)
12199 if (meta
.func_id
== special_kfunc_list
[KF_bpf_percpu_obj_new_impl
]) {
12200 if (!bpf_global_percpu_ma_set
) {
12201 mutex_lock(&bpf_percpu_ma_lock
);
12202 if (!bpf_global_percpu_ma_set
) {
12203 err
= bpf_mem_alloc_init(&bpf_global_percpu_ma
, 0, true);
12205 bpf_global_percpu_ma_set
= true;
12207 mutex_unlock(&bpf_percpu_ma_lock
);
12213 if (((u64
)(u32
)meta
.arg_constant
.value
) != meta
.arg_constant
.value
) {
12214 verbose(env
, "local type ID argument must be in range [0, U32_MAX]\n");
12218 ret_btf
= env
->prog
->aux
->btf
;
12219 ret_btf_id
= meta
.arg_constant
.value
;
12221 /* This may be NULL due to user not supplying a BTF */
12223 verbose(env
, "bpf_obj_new/bpf_percpu_obj_new requires prog BTF\n");
12227 ret_t
= btf_type_by_id(ret_btf
, ret_btf_id
);
12228 if (!ret_t
|| !__btf_type_is_struct(ret_t
)) {
12229 verbose(env
, "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct\n");
12233 struct_meta
= btf_find_struct_meta(ret_btf
, ret_btf_id
);
12234 if (meta
.func_id
== special_kfunc_list
[KF_bpf_percpu_obj_new_impl
]) {
12235 if (!__btf_type_is_scalar_struct(env
, ret_btf
, ret_t
, 0)) {
12236 verbose(env
, "bpf_percpu_obj_new type ID argument must be of a struct of scalars\n");
12241 verbose(env
, "bpf_percpu_obj_new type ID argument must not contain special fields\n");
12246 mark_reg_known_zero(env
, regs
, BPF_REG_0
);
12247 regs
[BPF_REG_0
].type
= PTR_TO_BTF_ID
| MEM_ALLOC
;
12248 regs
[BPF_REG_0
].btf
= ret_btf
;
12249 regs
[BPF_REG_0
].btf_id
= ret_btf_id
;
12250 if (meta
.func_id
== special_kfunc_list
[KF_bpf_percpu_obj_new_impl
])
12251 regs
[BPF_REG_0
].type
|= MEM_PERCPU
;
12253 insn_aux
->obj_new_size
= ret_t
->size
;
12254 insn_aux
->kptr_struct_meta
= struct_meta
;
12255 } else if (meta
.func_id
== special_kfunc_list
[KF_bpf_refcount_acquire_impl
]) {
12256 mark_reg_known_zero(env
, regs
, BPF_REG_0
);
12257 regs
[BPF_REG_0
].type
= PTR_TO_BTF_ID
| MEM_ALLOC
;
12258 regs
[BPF_REG_0
].btf
= meta
.arg_btf
;
12259 regs
[BPF_REG_0
].btf_id
= meta
.arg_btf_id
;
12261 insn_aux
->kptr_struct_meta
=
12262 btf_find_struct_meta(meta
.arg_btf
,
12264 } else if (meta
.func_id
== special_kfunc_list
[KF_bpf_list_pop_front
] ||
12265 meta
.func_id
== special_kfunc_list
[KF_bpf_list_pop_back
]) {
12266 struct btf_field
*field
= meta
.arg_list_head
.field
;
12268 mark_reg_graph_node(regs
, BPF_REG_0
, &field
->graph_root
);
12269 } else if (meta
.func_id
== special_kfunc_list
[KF_bpf_rbtree_remove
] ||
12270 meta
.func_id
== special_kfunc_list
[KF_bpf_rbtree_first
]) {
12271 struct btf_field
*field
= meta
.arg_rbtree_root
.field
;
12273 mark_reg_graph_node(regs
, BPF_REG_0
, &field
->graph_root
);
12274 } else if (meta
.func_id
== special_kfunc_list
[KF_bpf_cast_to_kern_ctx
]) {
12275 mark_reg_known_zero(env
, regs
, BPF_REG_0
);
12276 regs
[BPF_REG_0
].type
= PTR_TO_BTF_ID
| PTR_TRUSTED
;
12277 regs
[BPF_REG_0
].btf
= desc_btf
;
12278 regs
[BPF_REG_0
].btf_id
= meta
.ret_btf_id
;
12279 } else if (meta
.func_id
== special_kfunc_list
[KF_bpf_rdonly_cast
]) {
12280 ret_t
= btf_type_by_id(desc_btf
, meta
.arg_constant
.value
);
12281 if (!ret_t
|| !btf_type_is_struct(ret_t
)) {
12283 "kfunc bpf_rdonly_cast type ID argument must be of a struct\n");
12287 mark_reg_known_zero(env
, regs
, BPF_REG_0
);
12288 regs
[BPF_REG_0
].type
= PTR_TO_BTF_ID
| PTR_UNTRUSTED
;
12289 regs
[BPF_REG_0
].btf
= desc_btf
;
12290 regs
[BPF_REG_0
].btf_id
= meta
.arg_constant
.value
;
12291 } else if (meta
.func_id
== special_kfunc_list
[KF_bpf_dynptr_slice
] ||
12292 meta
.func_id
== special_kfunc_list
[KF_bpf_dynptr_slice_rdwr
]) {
12293 enum bpf_type_flag type_flag
= get_dynptr_type_flag(meta
.initialized_dynptr
.type
);
12295 mark_reg_known_zero(env
, regs
, BPF_REG_0
);
12297 if (!meta
.arg_constant
.found
) {
12298 verbose(env
, "verifier internal error: bpf_dynptr_slice(_rdwr) no constant size\n");
12302 regs
[BPF_REG_0
].mem_size
= meta
.arg_constant
.value
;
12304 /* PTR_MAYBE_NULL will be added when is_kfunc_ret_null is checked */
12305 regs
[BPF_REG_0
].type
= PTR_TO_MEM
| type_flag
;
12307 if (meta
.func_id
== special_kfunc_list
[KF_bpf_dynptr_slice
]) {
12308 regs
[BPF_REG_0
].type
|= MEM_RDONLY
;
12310 /* this will set env->seen_direct_write to true */
12311 if (!may_access_direct_pkt_data(env
, NULL
, BPF_WRITE
)) {
12312 verbose(env
, "the prog does not allow writes to packet data\n");
12317 if (!meta
.initialized_dynptr
.id
) {
12318 verbose(env
, "verifier internal error: no dynptr id\n");
12321 regs
[BPF_REG_0
].dynptr_id
= meta
.initialized_dynptr
.id
;
12323 /* we don't need to set BPF_REG_0's ref obj id
12324 * because packet slices are not refcounted (see
12325 * dynptr_type_refcounted)
12328 verbose(env
, "kernel function %s unhandled dynamic return type\n",
12332 } else if (!__btf_type_is_struct(ptr_type
)) {
12333 if (!meta
.r0_size
) {
12336 if (!IS_ERR(btf_resolve_size(desc_btf
, ptr_type
, &sz
))) {
12338 meta
.r0_rdonly
= true;
12341 if (!meta
.r0_size
) {
12342 ptr_type_name
= btf_name_by_offset(desc_btf
,
12343 ptr_type
->name_off
);
12345 "kernel function %s returns pointer type %s %s is not supported\n",
12347 btf_type_str(ptr_type
),
12352 mark_reg_known_zero(env
, regs
, BPF_REG_0
);
12353 regs
[BPF_REG_0
].type
= PTR_TO_MEM
;
12354 regs
[BPF_REG_0
].mem_size
= meta
.r0_size
;
12356 if (meta
.r0_rdonly
)
12357 regs
[BPF_REG_0
].type
|= MEM_RDONLY
;
12359 /* Ensures we don't access the memory after a release_reference() */
12360 if (meta
.ref_obj_id
)
12361 regs
[BPF_REG_0
].ref_obj_id
= meta
.ref_obj_id
;
12363 mark_reg_known_zero(env
, regs
, BPF_REG_0
);
12364 regs
[BPF_REG_0
].btf
= desc_btf
;
12365 regs
[BPF_REG_0
].type
= PTR_TO_BTF_ID
;
12366 regs
[BPF_REG_0
].btf_id
= ptr_type_id
;
12369 if (is_kfunc_ret_null(&meta
)) {
12370 regs
[BPF_REG_0
].type
|= PTR_MAYBE_NULL
;
12371 /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
12372 regs
[BPF_REG_0
].id
= ++env
->id_gen
;
12374 mark_btf_func_reg_size(env
, BPF_REG_0
, sizeof(void *));
12375 if (is_kfunc_acquire(&meta
)) {
12376 int id
= acquire_reference_state(env
, insn_idx
);
12380 if (is_kfunc_ret_null(&meta
))
12381 regs
[BPF_REG_0
].id
= id
;
12382 regs
[BPF_REG_0
].ref_obj_id
= id
;
12383 } else if (meta
.func_id
== special_kfunc_list
[KF_bpf_rbtree_first
]) {
12384 ref_set_non_owning(env
, ®s
[BPF_REG_0
]);
12387 if (reg_may_point_to_spin_lock(®s
[BPF_REG_0
]) && !regs
[BPF_REG_0
].id
)
12388 regs
[BPF_REG_0
].id
= ++env
->id_gen
;
12389 } else if (btf_type_is_void(t
)) {
12390 if (meta
.btf
== btf_vmlinux
&& btf_id_set_contains(&special_kfunc_set
, meta
.func_id
)) {
12391 if (meta
.func_id
== special_kfunc_list
[KF_bpf_obj_drop_impl
] ||
12392 meta
.func_id
== special_kfunc_list
[KF_bpf_percpu_obj_drop_impl
]) {
12393 insn_aux
->kptr_struct_meta
=
12394 btf_find_struct_meta(meta
.arg_btf
,
12400 nargs
= btf_type_vlen(meta
.func_proto
);
12401 args
= (const struct btf_param
*)(meta
.func_proto
+ 1);
12402 for (i
= 0; i
< nargs
; i
++) {
12405 t
= btf_type_skip_modifiers(desc_btf
, args
[i
].type
, NULL
);
12406 if (btf_type_is_ptr(t
))
12407 mark_btf_func_reg_size(env
, regno
, sizeof(void *));
12409 /* scalar. ensured by btf_check_kfunc_arg_match() */
12410 mark_btf_func_reg_size(env
, regno
, t
->size
);
12413 if (is_iter_next_kfunc(&meta
)) {
12414 err
= process_iter_next_call(env
, insn_idx
, &meta
);
12422 static bool signed_add_overflows(s64 a
, s64 b
)
12424 /* Do the add in u64, where overflow is well-defined */
12425 s64 res
= (s64
)((u64
)a
+ (u64
)b
);
12432 static bool signed_add32_overflows(s32 a
, s32 b
)
12434 /* Do the add in u32, where overflow is well-defined */
12435 s32 res
= (s32
)((u32
)a
+ (u32
)b
);
12442 static bool signed_sub_overflows(s64 a
, s64 b
)
12444 /* Do the sub in u64, where overflow is well-defined */
12445 s64 res
= (s64
)((u64
)a
- (u64
)b
);
12452 static bool signed_sub32_overflows(s32 a
, s32 b
)
12454 /* Do the sub in u32, where overflow is well-defined */
12455 s32 res
= (s32
)((u32
)a
- (u32
)b
);
12462 static bool check_reg_sane_offset(struct bpf_verifier_env
*env
,
12463 const struct bpf_reg_state
*reg
,
12464 enum bpf_reg_type type
)
12466 bool known
= tnum_is_const(reg
->var_off
);
12467 s64 val
= reg
->var_off
.value
;
12468 s64 smin
= reg
->smin_value
;
12470 if (known
&& (val
>= BPF_MAX_VAR_OFF
|| val
<= -BPF_MAX_VAR_OFF
)) {
12471 verbose(env
, "math between %s pointer and %lld is not allowed\n",
12472 reg_type_str(env
, type
), val
);
12476 if (reg
->off
>= BPF_MAX_VAR_OFF
|| reg
->off
<= -BPF_MAX_VAR_OFF
) {
12477 verbose(env
, "%s pointer offset %d is not allowed\n",
12478 reg_type_str(env
, type
), reg
->off
);
12482 if (smin
== S64_MIN
) {
12483 verbose(env
, "math between %s pointer and register with unbounded min value is not allowed\n",
12484 reg_type_str(env
, type
));
12488 if (smin
>= BPF_MAX_VAR_OFF
|| smin
<= -BPF_MAX_VAR_OFF
) {
12489 verbose(env
, "value %lld makes %s pointer be out of bounds\n",
12490 smin
, reg_type_str(env
, type
));
12498 REASON_BOUNDS
= -1,
12505 static int retrieve_ptr_limit(const struct bpf_reg_state
*ptr_reg
,
12506 u32
*alu_limit
, bool mask_to_left
)
12508 u32 max
= 0, ptr_limit
= 0;
12510 switch (ptr_reg
->type
) {
12512 /* Offset 0 is out-of-bounds, but acceptable start for the
12513 * left direction, see BPF_REG_FP. Also, unknown scalar
12514 * offset where we would need to deal with min/max bounds is
12515 * currently prohibited for unprivileged.
12517 max
= MAX_BPF_STACK
+ mask_to_left
;
12518 ptr_limit
= -(ptr_reg
->var_off
.value
+ ptr_reg
->off
);
12520 case PTR_TO_MAP_VALUE
:
12521 max
= ptr_reg
->map_ptr
->value_size
;
12522 ptr_limit
= (mask_to_left
?
12523 ptr_reg
->smin_value
:
12524 ptr_reg
->umax_value
) + ptr_reg
->off
;
12527 return REASON_TYPE
;
12530 if (ptr_limit
>= max
)
12531 return REASON_LIMIT
;
12532 *alu_limit
= ptr_limit
;
12536 static bool can_skip_alu_sanitation(const struct bpf_verifier_env
*env
,
12537 const struct bpf_insn
*insn
)
12539 return env
->bypass_spec_v1
|| BPF_SRC(insn
->code
) == BPF_K
;
12542 static int update_alu_sanitation_state(struct bpf_insn_aux_data
*aux
,
12543 u32 alu_state
, u32 alu_limit
)
12545 /* If we arrived here from different branches with different
12546 * state or limits to sanitize, then this won't work.
12548 if (aux
->alu_state
&&
12549 (aux
->alu_state
!= alu_state
||
12550 aux
->alu_limit
!= alu_limit
))
12551 return REASON_PATHS
;
12553 /* Corresponding fixup done in do_misc_fixups(). */
12554 aux
->alu_state
= alu_state
;
12555 aux
->alu_limit
= alu_limit
;
12559 static int sanitize_val_alu(struct bpf_verifier_env
*env
,
12560 struct bpf_insn
*insn
)
12562 struct bpf_insn_aux_data
*aux
= cur_aux(env
);
12564 if (can_skip_alu_sanitation(env
, insn
))
12567 return update_alu_sanitation_state(aux
, BPF_ALU_NON_POINTER
, 0);
12570 static bool sanitize_needed(u8 opcode
)
12572 return opcode
== BPF_ADD
|| opcode
== BPF_SUB
;
12575 struct bpf_sanitize_info
{
12576 struct bpf_insn_aux_data aux
;
12580 static struct bpf_verifier_state
*
12581 sanitize_speculative_path(struct bpf_verifier_env
*env
,
12582 const struct bpf_insn
*insn
,
12583 u32 next_idx
, u32 curr_idx
)
12585 struct bpf_verifier_state
*branch
;
12586 struct bpf_reg_state
*regs
;
12588 branch
= push_stack(env
, next_idx
, curr_idx
, true);
12589 if (branch
&& insn
) {
12590 regs
= branch
->frame
[branch
->curframe
]->regs
;
12591 if (BPF_SRC(insn
->code
) == BPF_K
) {
12592 mark_reg_unknown(env
, regs
, insn
->dst_reg
);
12593 } else if (BPF_SRC(insn
->code
) == BPF_X
) {
12594 mark_reg_unknown(env
, regs
, insn
->dst_reg
);
12595 mark_reg_unknown(env
, regs
, insn
->src_reg
);
12601 static int sanitize_ptr_alu(struct bpf_verifier_env
*env
,
12602 struct bpf_insn
*insn
,
12603 const struct bpf_reg_state
*ptr_reg
,
12604 const struct bpf_reg_state
*off_reg
,
12605 struct bpf_reg_state
*dst_reg
,
12606 struct bpf_sanitize_info
*info
,
12607 const bool commit_window
)
12609 struct bpf_insn_aux_data
*aux
= commit_window
? cur_aux(env
) : &info
->aux
;
12610 struct bpf_verifier_state
*vstate
= env
->cur_state
;
12611 bool off_is_imm
= tnum_is_const(off_reg
->var_off
);
12612 bool off_is_neg
= off_reg
->smin_value
< 0;
12613 bool ptr_is_dst_reg
= ptr_reg
== dst_reg
;
12614 u8 opcode
= BPF_OP(insn
->code
);
12615 u32 alu_state
, alu_limit
;
12616 struct bpf_reg_state tmp
;
12620 if (can_skip_alu_sanitation(env
, insn
))
12623 /* We already marked aux for masking from non-speculative
12624 * paths, thus we got here in the first place. We only care
12625 * to explore bad access from here.
12627 if (vstate
->speculative
)
12630 if (!commit_window
) {
12631 if (!tnum_is_const(off_reg
->var_off
) &&
12632 (off_reg
->smin_value
< 0) != (off_reg
->smax_value
< 0))
12633 return REASON_BOUNDS
;
12635 info
->mask_to_left
= (opcode
== BPF_ADD
&& off_is_neg
) ||
12636 (opcode
== BPF_SUB
&& !off_is_neg
);
12639 err
= retrieve_ptr_limit(ptr_reg
, &alu_limit
, info
->mask_to_left
);
12643 if (commit_window
) {
12644 /* In commit phase we narrow the masking window based on
12645 * the observed pointer move after the simulated operation.
12647 alu_state
= info
->aux
.alu_state
;
12648 alu_limit
= abs(info
->aux
.alu_limit
- alu_limit
);
12650 alu_state
= off_is_neg
? BPF_ALU_NEG_VALUE
: 0;
12651 alu_state
|= off_is_imm
? BPF_ALU_IMMEDIATE
: 0;
12652 alu_state
|= ptr_is_dst_reg
?
12653 BPF_ALU_SANITIZE_SRC
: BPF_ALU_SANITIZE_DST
;
12655 /* Limit pruning on unknown scalars to enable deep search for
12656 * potential masking differences from other program paths.
12659 env
->explore_alu_limits
= true;
12662 err
= update_alu_sanitation_state(aux
, alu_state
, alu_limit
);
12666 /* If we're in commit phase, we're done here given we already
12667 * pushed the truncated dst_reg into the speculative verification
12670 * Also, when register is a known constant, we rewrite register-based
12671 * operation to immediate-based, and thus do not need masking (and as
12672 * a consequence, do not need to simulate the zero-truncation either).
12674 if (commit_window
|| off_is_imm
)
12677 /* Simulate and find potential out-of-bounds access under
12678 * speculative execution from truncation as a result of
12679 * masking when off was not within expected range. If off
12680 * sits in dst, then we temporarily need to move ptr there
12681 * to simulate dst (== 0) +/-= ptr. Needed, for example,
12682 * for cases where we use K-based arithmetic in one direction
12683 * and truncated reg-based in the other in order to explore
12686 if (!ptr_is_dst_reg
) {
12688 copy_register_state(dst_reg
, ptr_reg
);
12690 ret
= sanitize_speculative_path(env
, NULL
, env
->insn_idx
+ 1,
12692 if (!ptr_is_dst_reg
&& ret
)
12694 return !ret
? REASON_STACK
: 0;
12697 static void sanitize_mark_insn_seen(struct bpf_verifier_env
*env
)
12699 struct bpf_verifier_state
*vstate
= env
->cur_state
;
12701 /* If we simulate paths under speculation, we don't update the
12702 * insn as 'seen' such that when we verify unreachable paths in
12703 * the non-speculative domain, sanitize_dead_code() can still
12704 * rewrite/sanitize them.
12706 if (!vstate
->speculative
)
12707 env
->insn_aux_data
[env
->insn_idx
].seen
= env
->pass_cnt
;
12710 static int sanitize_err(struct bpf_verifier_env
*env
,
12711 const struct bpf_insn
*insn
, int reason
,
12712 const struct bpf_reg_state
*off_reg
,
12713 const struct bpf_reg_state
*dst_reg
)
12715 static const char *err
= "pointer arithmetic with it prohibited for !root";
12716 const char *op
= BPF_OP(insn
->code
) == BPF_ADD
? "add" : "sub";
12717 u32 dst
= insn
->dst_reg
, src
= insn
->src_reg
;
12720 case REASON_BOUNDS
:
12721 verbose(env
, "R%d has unknown scalar with mixed signed bounds, %s\n",
12722 off_reg
== dst_reg
? dst
: src
, err
);
12725 verbose(env
, "R%d has pointer with unsupported alu operation, %s\n",
12726 off_reg
== dst_reg
? src
: dst
, err
);
12729 verbose(env
, "R%d tried to %s from different maps, paths or scalars, %s\n",
12733 verbose(env
, "R%d tried to %s beyond pointer bounds, %s\n",
12737 verbose(env
, "R%d could not be pushed for speculative verification, %s\n",
12741 verbose(env
, "verifier internal error: unknown reason (%d)\n",
12749 /* check that stack access falls within stack limits and that 'reg' doesn't
12750 * have a variable offset.
12752 * Variable offset is prohibited for unprivileged mode for simplicity since it
12753 * requires corresponding support in Spectre masking for stack ALU. See also
12754 * retrieve_ptr_limit().
12757 * 'off' includes 'reg->off'.
12759 static int check_stack_access_for_ptr_arithmetic(
12760 struct bpf_verifier_env
*env
,
12762 const struct bpf_reg_state
*reg
,
12765 if (!tnum_is_const(reg
->var_off
)) {
12768 tnum_strn(tn_buf
, sizeof(tn_buf
), reg
->var_off
);
12769 verbose(env
, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
12770 regno
, tn_buf
, off
);
12774 if (off
>= 0 || off
< -MAX_BPF_STACK
) {
12775 verbose(env
, "R%d stack pointer arithmetic goes out of range, "
12776 "prohibited for !root; off=%d\n", regno
, off
);
12783 static int sanitize_check_bounds(struct bpf_verifier_env
*env
,
12784 const struct bpf_insn
*insn
,
12785 const struct bpf_reg_state
*dst_reg
)
12787 u32 dst
= insn
->dst_reg
;
12789 /* For unprivileged we require that resulting offset must be in bounds
12790 * in order to be able to sanitize access later on.
12792 if (env
->bypass_spec_v1
)
12795 switch (dst_reg
->type
) {
12797 if (check_stack_access_for_ptr_arithmetic(env
, dst
, dst_reg
,
12798 dst_reg
->off
+ dst_reg
->var_off
.value
))
12801 case PTR_TO_MAP_VALUE
:
12802 if (check_map_access(env
, dst
, dst_reg
->off
, 1, false, ACCESS_HELPER
)) {
12803 verbose(env
, "R%d pointer arithmetic of map value goes out of range, "
12804 "prohibited for !root\n", dst
);
12815 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
12816 * Caller should also handle BPF_MOV case separately.
12817 * If we return -EACCES, caller may want to try again treating pointer as a
12818 * scalar. So we only emit a diagnostic if !env->allow_ptr_leaks.
12820 static int adjust_ptr_min_max_vals(struct bpf_verifier_env
*env
,
12821 struct bpf_insn
*insn
,
12822 const struct bpf_reg_state
*ptr_reg
,
12823 const struct bpf_reg_state
*off_reg
)
12825 struct bpf_verifier_state
*vstate
= env
->cur_state
;
12826 struct bpf_func_state
*state
= vstate
->frame
[vstate
->curframe
];
12827 struct bpf_reg_state
*regs
= state
->regs
, *dst_reg
;
12828 bool known
= tnum_is_const(off_reg
->var_off
);
12829 s64 smin_val
= off_reg
->smin_value
, smax_val
= off_reg
->smax_value
,
12830 smin_ptr
= ptr_reg
->smin_value
, smax_ptr
= ptr_reg
->smax_value
;
12831 u64 umin_val
= off_reg
->umin_value
, umax_val
= off_reg
->umax_value
,
12832 umin_ptr
= ptr_reg
->umin_value
, umax_ptr
= ptr_reg
->umax_value
;
12833 struct bpf_sanitize_info info
= {};
12834 u8 opcode
= BPF_OP(insn
->code
);
12835 u32 dst
= insn
->dst_reg
;
12838 dst_reg
= ®s
[dst
];
12840 if ((known
&& (smin_val
!= smax_val
|| umin_val
!= umax_val
)) ||
12841 smin_val
> smax_val
|| umin_val
> umax_val
) {
12842 /* Taint dst register if offset had invalid bounds derived from
12843 * e.g. dead branches.
12845 __mark_reg_unknown(env
, dst_reg
);
12849 if (BPF_CLASS(insn
->code
) != BPF_ALU64
) {
12850 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
12851 if (opcode
== BPF_SUB
&& env
->allow_ptr_leaks
) {
12852 __mark_reg_unknown(env
, dst_reg
);
12857 "R%d 32-bit pointer arithmetic prohibited\n",
12862 if (ptr_reg
->type
& PTR_MAYBE_NULL
) {
12863 verbose(env
, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
12864 dst
, reg_type_str(env
, ptr_reg
->type
));
12868 switch (base_type(ptr_reg
->type
)) {
12869 case CONST_PTR_TO_MAP
:
12870 /* smin_val represents the known value */
12871 if (known
&& smin_val
== 0 && opcode
== BPF_ADD
)
12874 case PTR_TO_PACKET_END
:
12875 case PTR_TO_SOCKET
:
12876 case PTR_TO_SOCK_COMMON
:
12877 case PTR_TO_TCP_SOCK
:
12878 case PTR_TO_XDP_SOCK
:
12879 verbose(env
, "R%d pointer arithmetic on %s prohibited\n",
12880 dst
, reg_type_str(env
, ptr_reg
->type
));
12886 /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
12887 * The id may be overwritten later if we create a new variable offset.
12889 dst_reg
->type
= ptr_reg
->type
;
12890 dst_reg
->id
= ptr_reg
->id
;
12892 if (!check_reg_sane_offset(env
, off_reg
, ptr_reg
->type
) ||
12893 !check_reg_sane_offset(env
, ptr_reg
, ptr_reg
->type
))
12896 /* pointer types do not carry 32-bit bounds at the moment. */
12897 __mark_reg32_unbounded(dst_reg
);
12899 if (sanitize_needed(opcode
)) {
12900 ret
= sanitize_ptr_alu(env
, insn
, ptr_reg
, off_reg
, dst_reg
,
12903 return sanitize_err(env
, insn
, ret
, off_reg
, dst_reg
);
12908 /* We can take a fixed offset as long as it doesn't overflow
12909 * the s32 'off' field
12911 if (known
&& (ptr_reg
->off
+ smin_val
==
12912 (s64
)(s32
)(ptr_reg
->off
+ smin_val
))) {
12913 /* pointer += K. Accumulate it into fixed offset */
12914 dst_reg
->smin_value
= smin_ptr
;
12915 dst_reg
->smax_value
= smax_ptr
;
12916 dst_reg
->umin_value
= umin_ptr
;
12917 dst_reg
->umax_value
= umax_ptr
;
12918 dst_reg
->var_off
= ptr_reg
->var_off
;
12919 dst_reg
->off
= ptr_reg
->off
+ smin_val
;
12920 dst_reg
->raw
= ptr_reg
->raw
;
12923 /* A new variable offset is created. Note that off_reg->off
12924 * == 0, since it's a scalar.
12925 * dst_reg gets the pointer type and since some positive
12926 * integer value was added to the pointer, give it a new 'id'
12927 * if it's a PTR_TO_PACKET.
12928 * this creates a new 'base' pointer, off_reg (variable) gets
12929 * added into the variable offset, and we copy the fixed offset
12932 if (signed_add_overflows(smin_ptr
, smin_val
) ||
12933 signed_add_overflows(smax_ptr
, smax_val
)) {
12934 dst_reg
->smin_value
= S64_MIN
;
12935 dst_reg
->smax_value
= S64_MAX
;
12937 dst_reg
->smin_value
= smin_ptr
+ smin_val
;
12938 dst_reg
->smax_value
= smax_ptr
+ smax_val
;
12940 if (umin_ptr
+ umin_val
< umin_ptr
||
12941 umax_ptr
+ umax_val
< umax_ptr
) {
12942 dst_reg
->umin_value
= 0;
12943 dst_reg
->umax_value
= U64_MAX
;
12945 dst_reg
->umin_value
= umin_ptr
+ umin_val
;
12946 dst_reg
->umax_value
= umax_ptr
+ umax_val
;
12948 dst_reg
->var_off
= tnum_add(ptr_reg
->var_off
, off_reg
->var_off
);
12949 dst_reg
->off
= ptr_reg
->off
;
12950 dst_reg
->raw
= ptr_reg
->raw
;
12951 if (reg_is_pkt_pointer(ptr_reg
)) {
12952 dst_reg
->id
= ++env
->id_gen
;
12953 /* something was added to pkt_ptr, set range to zero */
12954 memset(&dst_reg
->raw
, 0, sizeof(dst_reg
->raw
));
12958 if (dst_reg
== off_reg
) {
12959 /* scalar -= pointer. Creates an unknown scalar */
12960 verbose(env
, "R%d tried to subtract pointer from scalar\n",
12964 /* We don't allow subtraction from FP, because (according to
12965 * test_verifier.c test "invalid fp arithmetic", JITs might not
12966 * be able to deal with it.
12968 if (ptr_reg
->type
== PTR_TO_STACK
) {
12969 verbose(env
, "R%d subtraction from stack pointer prohibited\n",
12973 if (known
&& (ptr_reg
->off
- smin_val
==
12974 (s64
)(s32
)(ptr_reg
->off
- smin_val
))) {
12975 /* pointer -= K. Subtract it from fixed offset */
12976 dst_reg
->smin_value
= smin_ptr
;
12977 dst_reg
->smax_value
= smax_ptr
;
12978 dst_reg
->umin_value
= umin_ptr
;
12979 dst_reg
->umax_value
= umax_ptr
;
12980 dst_reg
->var_off
= ptr_reg
->var_off
;
12981 dst_reg
->id
= ptr_reg
->id
;
12982 dst_reg
->off
= ptr_reg
->off
- smin_val
;
12983 dst_reg
->raw
= ptr_reg
->raw
;
12986 /* A new variable offset is created. If the subtrahend is known
12987 * nonnegative, then any reg->range we had before is still good.
12989 if (signed_sub_overflows(smin_ptr
, smax_val
) ||
12990 signed_sub_overflows(smax_ptr
, smin_val
)) {
12991 /* Overflow possible, we know nothing */
12992 dst_reg
->smin_value
= S64_MIN
;
12993 dst_reg
->smax_value
= S64_MAX
;
12995 dst_reg
->smin_value
= smin_ptr
- smax_val
;
12996 dst_reg
->smax_value
= smax_ptr
- smin_val
;
12998 if (umin_ptr
< umax_val
) {
12999 /* Overflow possible, we know nothing */
13000 dst_reg
->umin_value
= 0;
13001 dst_reg
->umax_value
= U64_MAX
;
13003 /* Cannot overflow (as long as bounds are consistent) */
13004 dst_reg
->umin_value
= umin_ptr
- umax_val
;
13005 dst_reg
->umax_value
= umax_ptr
- umin_val
;
13007 dst_reg
->var_off
= tnum_sub(ptr_reg
->var_off
, off_reg
->var_off
);
13008 dst_reg
->off
= ptr_reg
->off
;
13009 dst_reg
->raw
= ptr_reg
->raw
;
13010 if (reg_is_pkt_pointer(ptr_reg
)) {
13011 dst_reg
->id
= ++env
->id_gen
;
13012 /* something was added to pkt_ptr, set range to zero */
13014 memset(&dst_reg
->raw
, 0, sizeof(dst_reg
->raw
));
13020 /* bitwise ops on pointers are troublesome, prohibit. */
13021 verbose(env
, "R%d bitwise operator %s on pointer prohibited\n",
13022 dst
, bpf_alu_string
[opcode
>> 4]);
13025 /* other operators (e.g. MUL,LSH) produce non-pointer results */
13026 verbose(env
, "R%d pointer arithmetic with %s operator prohibited\n",
13027 dst
, bpf_alu_string
[opcode
>> 4]);
13031 if (!check_reg_sane_offset(env
, dst_reg
, ptr_reg
->type
))
13033 reg_bounds_sync(dst_reg
);
13034 if (sanitize_check_bounds(env
, insn
, dst_reg
) < 0)
13036 if (sanitize_needed(opcode
)) {
13037 ret
= sanitize_ptr_alu(env
, insn
, dst_reg
, off_reg
, dst_reg
,
13040 return sanitize_err(env
, insn
, ret
, off_reg
, dst_reg
);
13046 static void scalar32_min_max_add(struct bpf_reg_state
*dst_reg
,
13047 struct bpf_reg_state
*src_reg
)
13049 s32 smin_val
= src_reg
->s32_min_value
;
13050 s32 smax_val
= src_reg
->s32_max_value
;
13051 u32 umin_val
= src_reg
->u32_min_value
;
13052 u32 umax_val
= src_reg
->u32_max_value
;
13054 if (signed_add32_overflows(dst_reg
->s32_min_value
, smin_val
) ||
13055 signed_add32_overflows(dst_reg
->s32_max_value
, smax_val
)) {
13056 dst_reg
->s32_min_value
= S32_MIN
;
13057 dst_reg
->s32_max_value
= S32_MAX
;
13059 dst_reg
->s32_min_value
+= smin_val
;
13060 dst_reg
->s32_max_value
+= smax_val
;
13062 if (dst_reg
->u32_min_value
+ umin_val
< umin_val
||
13063 dst_reg
->u32_max_value
+ umax_val
< umax_val
) {
13064 dst_reg
->u32_min_value
= 0;
13065 dst_reg
->u32_max_value
= U32_MAX
;
13067 dst_reg
->u32_min_value
+= umin_val
;
13068 dst_reg
->u32_max_value
+= umax_val
;
13072 static void scalar_min_max_add(struct bpf_reg_state
*dst_reg
,
13073 struct bpf_reg_state
*src_reg
)
13075 s64 smin_val
= src_reg
->smin_value
;
13076 s64 smax_val
= src_reg
->smax_value
;
13077 u64 umin_val
= src_reg
->umin_value
;
13078 u64 umax_val
= src_reg
->umax_value
;
13080 if (signed_add_overflows(dst_reg
->smin_value
, smin_val
) ||
13081 signed_add_overflows(dst_reg
->smax_value
, smax_val
)) {
13082 dst_reg
->smin_value
= S64_MIN
;
13083 dst_reg
->smax_value
= S64_MAX
;
13085 dst_reg
->smin_value
+= smin_val
;
13086 dst_reg
->smax_value
+= smax_val
;
13088 if (dst_reg
->umin_value
+ umin_val
< umin_val
||
13089 dst_reg
->umax_value
+ umax_val
< umax_val
) {
13090 dst_reg
->umin_value
= 0;
13091 dst_reg
->umax_value
= U64_MAX
;
13093 dst_reg
->umin_value
+= umin_val
;
13094 dst_reg
->umax_value
+= umax_val
;
13098 static void scalar32_min_max_sub(struct bpf_reg_state
*dst_reg
,
13099 struct bpf_reg_state
*src_reg
)
13101 s32 smin_val
= src_reg
->s32_min_value
;
13102 s32 smax_val
= src_reg
->s32_max_value
;
13103 u32 umin_val
= src_reg
->u32_min_value
;
13104 u32 umax_val
= src_reg
->u32_max_value
;
13106 if (signed_sub32_overflows(dst_reg
->s32_min_value
, smax_val
) ||
13107 signed_sub32_overflows(dst_reg
->s32_max_value
, smin_val
)) {
13108 /* Overflow possible, we know nothing */
13109 dst_reg
->s32_min_value
= S32_MIN
;
13110 dst_reg
->s32_max_value
= S32_MAX
;
13112 dst_reg
->s32_min_value
-= smax_val
;
13113 dst_reg
->s32_max_value
-= smin_val
;
13115 if (dst_reg
->u32_min_value
< umax_val
) {
13116 /* Overflow possible, we know nothing */
13117 dst_reg
->u32_min_value
= 0;
13118 dst_reg
->u32_max_value
= U32_MAX
;
13120 /* Cannot overflow (as long as bounds are consistent) */
13121 dst_reg
->u32_min_value
-= umax_val
;
13122 dst_reg
->u32_max_value
-= umin_val
;
13126 static void scalar_min_max_sub(struct bpf_reg_state
*dst_reg
,
13127 struct bpf_reg_state
*src_reg
)
13129 s64 smin_val
= src_reg
->smin_value
;
13130 s64 smax_val
= src_reg
->smax_value
;
13131 u64 umin_val
= src_reg
->umin_value
;
13132 u64 umax_val
= src_reg
->umax_value
;
13134 if (signed_sub_overflows(dst_reg
->smin_value
, smax_val
) ||
13135 signed_sub_overflows(dst_reg
->smax_value
, smin_val
)) {
13136 /* Overflow possible, we know nothing */
13137 dst_reg
->smin_value
= S64_MIN
;
13138 dst_reg
->smax_value
= S64_MAX
;
13140 dst_reg
->smin_value
-= smax_val
;
13141 dst_reg
->smax_value
-= smin_val
;
13143 if (dst_reg
->umin_value
< umax_val
) {
13144 /* Overflow possible, we know nothing */
13145 dst_reg
->umin_value
= 0;
13146 dst_reg
->umax_value
= U64_MAX
;
13148 /* Cannot overflow (as long as bounds are consistent) */
13149 dst_reg
->umin_value
-= umax_val
;
13150 dst_reg
->umax_value
-= umin_val
;
13154 static void scalar32_min_max_mul(struct bpf_reg_state
*dst_reg
,
13155 struct bpf_reg_state
*src_reg
)
13157 s32 smin_val
= src_reg
->s32_min_value
;
13158 u32 umin_val
= src_reg
->u32_min_value
;
13159 u32 umax_val
= src_reg
->u32_max_value
;
13161 if (smin_val
< 0 || dst_reg
->s32_min_value
< 0) {
13162 /* Ain't nobody got time to multiply that sign */
13163 __mark_reg32_unbounded(dst_reg
);
13166 /* Both values are positive, so we can work with unsigned and
13167 * copy the result to signed (unless it exceeds S32_MAX).
13169 if (umax_val
> U16_MAX
|| dst_reg
->u32_max_value
> U16_MAX
) {
13170 /* Potential overflow, we know nothing */
13171 __mark_reg32_unbounded(dst_reg
);
13174 dst_reg
->u32_min_value
*= umin_val
;
13175 dst_reg
->u32_max_value
*= umax_val
;
13176 if (dst_reg
->u32_max_value
> S32_MAX
) {
13177 /* Overflow possible, we know nothing */
13178 dst_reg
->s32_min_value
= S32_MIN
;
13179 dst_reg
->s32_max_value
= S32_MAX
;
13181 dst_reg
->s32_min_value
= dst_reg
->u32_min_value
;
13182 dst_reg
->s32_max_value
= dst_reg
->u32_max_value
;
13186 static void scalar_min_max_mul(struct bpf_reg_state
*dst_reg
,
13187 struct bpf_reg_state
*src_reg
)
13189 s64 smin_val
= src_reg
->smin_value
;
13190 u64 umin_val
= src_reg
->umin_value
;
13191 u64 umax_val
= src_reg
->umax_value
;
13193 if (smin_val
< 0 || dst_reg
->smin_value
< 0) {
13194 /* Ain't nobody got time to multiply that sign */
13195 __mark_reg64_unbounded(dst_reg
);
13198 /* Both values are positive, so we can work with unsigned and
13199 * copy the result to signed (unless it exceeds S64_MAX).
13201 if (umax_val
> U32_MAX
|| dst_reg
->umax_value
> U32_MAX
) {
13202 /* Potential overflow, we know nothing */
13203 __mark_reg64_unbounded(dst_reg
);
13206 dst_reg
->umin_value
*= umin_val
;
13207 dst_reg
->umax_value
*= umax_val
;
13208 if (dst_reg
->umax_value
> S64_MAX
) {
13209 /* Overflow possible, we know nothing */
13210 dst_reg
->smin_value
= S64_MIN
;
13211 dst_reg
->smax_value
= S64_MAX
;
13213 dst_reg
->smin_value
= dst_reg
->umin_value
;
13214 dst_reg
->smax_value
= dst_reg
->umax_value
;
13218 static void scalar32_min_max_and(struct bpf_reg_state
*dst_reg
,
13219 struct bpf_reg_state
*src_reg
)
13221 bool src_known
= tnum_subreg_is_const(src_reg
->var_off
);
13222 bool dst_known
= tnum_subreg_is_const(dst_reg
->var_off
);
13223 struct tnum var32_off
= tnum_subreg(dst_reg
->var_off
);
13224 s32 smin_val
= src_reg
->s32_min_value
;
13225 u32 umax_val
= src_reg
->u32_max_value
;
13227 if (src_known
&& dst_known
) {
13228 __mark_reg32_known(dst_reg
, var32_off
.value
);
13232 /* We get our minimum from the var_off, since that's inherently
13233 * bitwise. Our maximum is the minimum of the operands' maxima.
13235 dst_reg
->u32_min_value
= var32_off
.value
;
13236 dst_reg
->u32_max_value
= min(dst_reg
->u32_max_value
, umax_val
);
13237 if (dst_reg
->s32_min_value
< 0 || smin_val
< 0) {
13238 /* Lose signed bounds when ANDing negative numbers,
13239 * ain't nobody got time for that.
13241 dst_reg
->s32_min_value
= S32_MIN
;
13242 dst_reg
->s32_max_value
= S32_MAX
;
13244 /* ANDing two positives gives a positive, so safe to
13245 * cast result into s64.
13247 dst_reg
->s32_min_value
= dst_reg
->u32_min_value
;
13248 dst_reg
->s32_max_value
= dst_reg
->u32_max_value
;
13252 static void scalar_min_max_and(struct bpf_reg_state
*dst_reg
,
13253 struct bpf_reg_state
*src_reg
)
13255 bool src_known
= tnum_is_const(src_reg
->var_off
);
13256 bool dst_known
= tnum_is_const(dst_reg
->var_off
);
13257 s64 smin_val
= src_reg
->smin_value
;
13258 u64 umax_val
= src_reg
->umax_value
;
13260 if (src_known
&& dst_known
) {
13261 __mark_reg_known(dst_reg
, dst_reg
->var_off
.value
);
13265 /* We get our minimum from the var_off, since that's inherently
13266 * bitwise. Our maximum is the minimum of the operands' maxima.
13268 dst_reg
->umin_value
= dst_reg
->var_off
.value
;
13269 dst_reg
->umax_value
= min(dst_reg
->umax_value
, umax_val
);
13270 if (dst_reg
->smin_value
< 0 || smin_val
< 0) {
13271 /* Lose signed bounds when ANDing negative numbers,
13272 * ain't nobody got time for that.
13274 dst_reg
->smin_value
= S64_MIN
;
13275 dst_reg
->smax_value
= S64_MAX
;
13277 /* ANDing two positives gives a positive, so safe to
13278 * cast result into s64.
13280 dst_reg
->smin_value
= dst_reg
->umin_value
;
13281 dst_reg
->smax_value
= dst_reg
->umax_value
;
13283 /* We may learn something more from the var_off */
13284 __update_reg_bounds(dst_reg
);
13287 static void scalar32_min_max_or(struct bpf_reg_state
*dst_reg
,
13288 struct bpf_reg_state
*src_reg
)
13290 bool src_known
= tnum_subreg_is_const(src_reg
->var_off
);
13291 bool dst_known
= tnum_subreg_is_const(dst_reg
->var_off
);
13292 struct tnum var32_off
= tnum_subreg(dst_reg
->var_off
);
13293 s32 smin_val
= src_reg
->s32_min_value
;
13294 u32 umin_val
= src_reg
->u32_min_value
;
13296 if (src_known
&& dst_known
) {
13297 __mark_reg32_known(dst_reg
, var32_off
.value
);
13301 /* We get our maximum from the var_off, and our minimum is the
13302 * maximum of the operands' minima
13304 dst_reg
->u32_min_value
= max(dst_reg
->u32_min_value
, umin_val
);
13305 dst_reg
->u32_max_value
= var32_off
.value
| var32_off
.mask
;
13306 if (dst_reg
->s32_min_value
< 0 || smin_val
< 0) {
13307 /* Lose signed bounds when ORing negative numbers,
13308 * ain't nobody got time for that.
13310 dst_reg
->s32_min_value
= S32_MIN
;
13311 dst_reg
->s32_max_value
= S32_MAX
;
13313 /* ORing two positives gives a positive, so safe to
13314 * cast result into s64.
13316 dst_reg
->s32_min_value
= dst_reg
->u32_min_value
;
13317 dst_reg
->s32_max_value
= dst_reg
->u32_max_value
;
13321 static void scalar_min_max_or(struct bpf_reg_state
*dst_reg
,
13322 struct bpf_reg_state
*src_reg
)
13324 bool src_known
= tnum_is_const(src_reg
->var_off
);
13325 bool dst_known
= tnum_is_const(dst_reg
->var_off
);
13326 s64 smin_val
= src_reg
->smin_value
;
13327 u64 umin_val
= src_reg
->umin_value
;
13329 if (src_known
&& dst_known
) {
13330 __mark_reg_known(dst_reg
, dst_reg
->var_off
.value
);
13334 /* We get our maximum from the var_off, and our minimum is the
13335 * maximum of the operands' minima
13337 dst_reg
->umin_value
= max(dst_reg
->umin_value
, umin_val
);
13338 dst_reg
->umax_value
= dst_reg
->var_off
.value
| dst_reg
->var_off
.mask
;
13339 if (dst_reg
->smin_value
< 0 || smin_val
< 0) {
13340 /* Lose signed bounds when ORing negative numbers,
13341 * ain't nobody got time for that.
13343 dst_reg
->smin_value
= S64_MIN
;
13344 dst_reg
->smax_value
= S64_MAX
;
13346 /* ORing two positives gives a positive, so safe to
13347 * cast result into s64.
13349 dst_reg
->smin_value
= dst_reg
->umin_value
;
13350 dst_reg
->smax_value
= dst_reg
->umax_value
;
13352 /* We may learn something more from the var_off */
13353 __update_reg_bounds(dst_reg
);
13356 static void scalar32_min_max_xor(struct bpf_reg_state
*dst_reg
,
13357 struct bpf_reg_state
*src_reg
)
13359 bool src_known
= tnum_subreg_is_const(src_reg
->var_off
);
13360 bool dst_known
= tnum_subreg_is_const(dst_reg
->var_off
);
13361 struct tnum var32_off
= tnum_subreg(dst_reg
->var_off
);
13362 s32 smin_val
= src_reg
->s32_min_value
;
13364 if (src_known
&& dst_known
) {
13365 __mark_reg32_known(dst_reg
, var32_off
.value
);
13369 /* We get both minimum and maximum from the var32_off. */
13370 dst_reg
->u32_min_value
= var32_off
.value
;
13371 dst_reg
->u32_max_value
= var32_off
.value
| var32_off
.mask
;
13373 if (dst_reg
->s32_min_value
>= 0 && smin_val
>= 0) {
13374 /* XORing two positive sign numbers gives a positive,
13375 * so safe to cast u32 result into s32.
13377 dst_reg
->s32_min_value
= dst_reg
->u32_min_value
;
13378 dst_reg
->s32_max_value
= dst_reg
->u32_max_value
;
13380 dst_reg
->s32_min_value
= S32_MIN
;
13381 dst_reg
->s32_max_value
= S32_MAX
;
13385 static void scalar_min_max_xor(struct bpf_reg_state
*dst_reg
,
13386 struct bpf_reg_state
*src_reg
)
13388 bool src_known
= tnum_is_const(src_reg
->var_off
);
13389 bool dst_known
= tnum_is_const(dst_reg
->var_off
);
13390 s64 smin_val
= src_reg
->smin_value
;
13392 if (src_known
&& dst_known
) {
13393 /* dst_reg->var_off.value has been updated earlier */
13394 __mark_reg_known(dst_reg
, dst_reg
->var_off
.value
);
13398 /* We get both minimum and maximum from the var_off. */
13399 dst_reg
->umin_value
= dst_reg
->var_off
.value
;
13400 dst_reg
->umax_value
= dst_reg
->var_off
.value
| dst_reg
->var_off
.mask
;
13402 if (dst_reg
->smin_value
>= 0 && smin_val
>= 0) {
13403 /* XORing two positive sign numbers gives a positive,
13404 * so safe to cast u64 result into s64.
13406 dst_reg
->smin_value
= dst_reg
->umin_value
;
13407 dst_reg
->smax_value
= dst_reg
->umax_value
;
13409 dst_reg
->smin_value
= S64_MIN
;
13410 dst_reg
->smax_value
= S64_MAX
;
13413 __update_reg_bounds(dst_reg
);
13416 static void __scalar32_min_max_lsh(struct bpf_reg_state
*dst_reg
,
13417 u64 umin_val
, u64 umax_val
)
13419 /* We lose all sign bit information (except what we can pick
13422 dst_reg
->s32_min_value
= S32_MIN
;
13423 dst_reg
->s32_max_value
= S32_MAX
;
13424 /* If we might shift our top bit out, then we know nothing */
13425 if (umax_val
> 31 || dst_reg
->u32_max_value
> 1ULL << (31 - umax_val
)) {
13426 dst_reg
->u32_min_value
= 0;
13427 dst_reg
->u32_max_value
= U32_MAX
;
13429 dst_reg
->u32_min_value
<<= umin_val
;
13430 dst_reg
->u32_max_value
<<= umax_val
;
13434 static void scalar32_min_max_lsh(struct bpf_reg_state
*dst_reg
,
13435 struct bpf_reg_state
*src_reg
)
13437 u32 umax_val
= src_reg
->u32_max_value
;
13438 u32 umin_val
= src_reg
->u32_min_value
;
13439 /* u32 alu operation will zext upper bits */
13440 struct tnum subreg
= tnum_subreg(dst_reg
->var_off
);
13442 __scalar32_min_max_lsh(dst_reg
, umin_val
, umax_val
);
13443 dst_reg
->var_off
= tnum_subreg(tnum_lshift(subreg
, umin_val
));
13444 /* Not required but being careful mark reg64 bounds as unknown so
13445 * that we are forced to pick them up from tnum and zext later and
13446 * if some path skips this step we are still safe.
13448 __mark_reg64_unbounded(dst_reg
);
13449 __update_reg32_bounds(dst_reg
);
13452 static void __scalar64_min_max_lsh(struct bpf_reg_state
*dst_reg
,
13453 u64 umin_val
, u64 umax_val
)
13455 /* Special case <<32 because it is a common compiler pattern to sign
13456 * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
13457 * positive we know this shift will also be positive so we can track
13458 * bounds correctly. Otherwise we lose all sign bit information except
13459 * what we can pick up from var_off. Perhaps we can generalize this
13460 * later to shifts of any length.
13462 if (umin_val
== 32 && umax_val
== 32 && dst_reg
->s32_max_value
>= 0)
13463 dst_reg
->smax_value
= (s64
)dst_reg
->s32_max_value
<< 32;
13465 dst_reg
->smax_value
= S64_MAX
;
13467 if (umin_val
== 32 && umax_val
== 32 && dst_reg
->s32_min_value
>= 0)
13468 dst_reg
->smin_value
= (s64
)dst_reg
->s32_min_value
<< 32;
13470 dst_reg
->smin_value
= S64_MIN
;
13472 /* If we might shift our top bit out, then we know nothing */
13473 if (dst_reg
->umax_value
> 1ULL << (63 - umax_val
)) {
13474 dst_reg
->umin_value
= 0;
13475 dst_reg
->umax_value
= U64_MAX
;
13477 dst_reg
->umin_value
<<= umin_val
;
13478 dst_reg
->umax_value
<<= umax_val
;
13482 static void scalar_min_max_lsh(struct bpf_reg_state
*dst_reg
,
13483 struct bpf_reg_state
*src_reg
)
13485 u64 umax_val
= src_reg
->umax_value
;
13486 u64 umin_val
= src_reg
->umin_value
;
13488 /* scalar64 calc uses 32bit unshifted bounds so must be called first */
13489 __scalar64_min_max_lsh(dst_reg
, umin_val
, umax_val
);
13490 __scalar32_min_max_lsh(dst_reg
, umin_val
, umax_val
);
13492 dst_reg
->var_off
= tnum_lshift(dst_reg
->var_off
, umin_val
);
13493 /* We may learn something more from the var_off */
13494 __update_reg_bounds(dst_reg
);
13497 static void scalar32_min_max_rsh(struct bpf_reg_state
*dst_reg
,
13498 struct bpf_reg_state
*src_reg
)
13500 struct tnum subreg
= tnum_subreg(dst_reg
->var_off
);
13501 u32 umax_val
= src_reg
->u32_max_value
;
13502 u32 umin_val
= src_reg
->u32_min_value
;
13504 /* BPF_RSH is an unsigned shift. If the value in dst_reg might
13505 * be negative, then either:
13506 * 1) src_reg might be zero, so the sign bit of the result is
13507 * unknown, so we lose our signed bounds
13508 * 2) it's known negative, thus the unsigned bounds capture the
13510 * 3) the signed bounds cross zero, so they tell us nothing
13512 * If the value in dst_reg is known nonnegative, then again the
13513 * unsigned bounds capture the signed bounds.
13514 * Thus, in all cases it suffices to blow away our signed bounds
13515 * and rely on inferring new ones from the unsigned bounds and
13516 * var_off of the result.
13518 dst_reg
->s32_min_value
= S32_MIN
;
13519 dst_reg
->s32_max_value
= S32_MAX
;
13521 dst_reg
->var_off
= tnum_rshift(subreg
, umin_val
);
13522 dst_reg
->u32_min_value
>>= umax_val
;
13523 dst_reg
->u32_max_value
>>= umin_val
;
13525 __mark_reg64_unbounded(dst_reg
);
13526 __update_reg32_bounds(dst_reg
);
13529 static void scalar_min_max_rsh(struct bpf_reg_state
*dst_reg
,
13530 struct bpf_reg_state
*src_reg
)
13532 u64 umax_val
= src_reg
->umax_value
;
13533 u64 umin_val
= src_reg
->umin_value
;
13535 /* BPF_RSH is an unsigned shift. If the value in dst_reg might
13536 * be negative, then either:
13537 * 1) src_reg might be zero, so the sign bit of the result is
13538 * unknown, so we lose our signed bounds
13539 * 2) it's known negative, thus the unsigned bounds capture the
13541 * 3) the signed bounds cross zero, so they tell us nothing
13543 * If the value in dst_reg is known nonnegative, then again the
13544 * unsigned bounds capture the signed bounds.
13545 * Thus, in all cases it suffices to blow away our signed bounds
13546 * and rely on inferring new ones from the unsigned bounds and
13547 * var_off of the result.
13549 dst_reg
->smin_value
= S64_MIN
;
13550 dst_reg
->smax_value
= S64_MAX
;
13551 dst_reg
->var_off
= tnum_rshift(dst_reg
->var_off
, umin_val
);
13552 dst_reg
->umin_value
>>= umax_val
;
13553 dst_reg
->umax_value
>>= umin_val
;
13555 /* Its not easy to operate on alu32 bounds here because it depends
13556 * on bits being shifted in. Take easy way out and mark unbounded
13557 * so we can recalculate later from tnum.
13559 __mark_reg32_unbounded(dst_reg
);
13560 __update_reg_bounds(dst_reg
);
13563 static void scalar32_min_max_arsh(struct bpf_reg_state
*dst_reg
,
13564 struct bpf_reg_state
*src_reg
)
13566 u64 umin_val
= src_reg
->u32_min_value
;
13568 /* Upon reaching here, src_known is true and
13569 * umax_val is equal to umin_val.
13571 dst_reg
->s32_min_value
= (u32
)(((s32
)dst_reg
->s32_min_value
) >> umin_val
);
13572 dst_reg
->s32_max_value
= (u32
)(((s32
)dst_reg
->s32_max_value
) >> umin_val
);
13574 dst_reg
->var_off
= tnum_arshift(tnum_subreg(dst_reg
->var_off
), umin_val
, 32);
13576 /* blow away the dst_reg umin_value/umax_value and rely on
13577 * dst_reg var_off to refine the result.
13579 dst_reg
->u32_min_value
= 0;
13580 dst_reg
->u32_max_value
= U32_MAX
;
13582 __mark_reg64_unbounded(dst_reg
);
13583 __update_reg32_bounds(dst_reg
);
13586 static void scalar_min_max_arsh(struct bpf_reg_state
*dst_reg
,
13587 struct bpf_reg_state
*src_reg
)
13589 u64 umin_val
= src_reg
->umin_value
;
13591 /* Upon reaching here, src_known is true and umax_val is equal
13594 dst_reg
->smin_value
>>= umin_val
;
13595 dst_reg
->smax_value
>>= umin_val
;
13597 dst_reg
->var_off
= tnum_arshift(dst_reg
->var_off
, umin_val
, 64);
13599 /* blow away the dst_reg umin_value/umax_value and rely on
13600 * dst_reg var_off to refine the result.
13602 dst_reg
->umin_value
= 0;
13603 dst_reg
->umax_value
= U64_MAX
;
13605 /* Its not easy to operate on alu32 bounds here because it depends
13606 * on bits being shifted in from upper 32-bits. Take easy way out
13607 * and mark unbounded so we can recalculate later from tnum.
13609 __mark_reg32_unbounded(dst_reg
);
13610 __update_reg_bounds(dst_reg
);
13613 /* WARNING: This function does calculations on 64-bit values, but the actual
13614 * execution may occur on 32-bit values. Therefore, things like bitshifts
13615 * need extra checks in the 32-bit case.
13617 static int adjust_scalar_min_max_vals(struct bpf_verifier_env
*env
,
13618 struct bpf_insn
*insn
,
13619 struct bpf_reg_state
*dst_reg
,
13620 struct bpf_reg_state src_reg
)
13622 struct bpf_reg_state
*regs
= cur_regs(env
);
13623 u8 opcode
= BPF_OP(insn
->code
);
13625 s64 smin_val
, smax_val
;
13626 u64 umin_val
, umax_val
;
13627 s32 s32_min_val
, s32_max_val
;
13628 u32 u32_min_val
, u32_max_val
;
13629 u64 insn_bitness
= (BPF_CLASS(insn
->code
) == BPF_ALU64
) ? 64 : 32;
13630 bool alu32
= (BPF_CLASS(insn
->code
) != BPF_ALU64
);
13633 smin_val
= src_reg
.smin_value
;
13634 smax_val
= src_reg
.smax_value
;
13635 umin_val
= src_reg
.umin_value
;
13636 umax_val
= src_reg
.umax_value
;
13638 s32_min_val
= src_reg
.s32_min_value
;
13639 s32_max_val
= src_reg
.s32_max_value
;
13640 u32_min_val
= src_reg
.u32_min_value
;
13641 u32_max_val
= src_reg
.u32_max_value
;
13644 src_known
= tnum_subreg_is_const(src_reg
.var_off
);
13646 (s32_min_val
!= s32_max_val
|| u32_min_val
!= u32_max_val
)) ||
13647 s32_min_val
> s32_max_val
|| u32_min_val
> u32_max_val
) {
13648 /* Taint dst register if offset had invalid bounds
13649 * derived from e.g. dead branches.
13651 __mark_reg_unknown(env
, dst_reg
);
13655 src_known
= tnum_is_const(src_reg
.var_off
);
13657 (smin_val
!= smax_val
|| umin_val
!= umax_val
)) ||
13658 smin_val
> smax_val
|| umin_val
> umax_val
) {
13659 /* Taint dst register if offset had invalid bounds
13660 * derived from e.g. dead branches.
13662 __mark_reg_unknown(env
, dst_reg
);
13668 opcode
!= BPF_ADD
&& opcode
!= BPF_SUB
&& opcode
!= BPF_AND
) {
13669 __mark_reg_unknown(env
, dst_reg
);
13673 if (sanitize_needed(opcode
)) {
13674 ret
= sanitize_val_alu(env
, insn
);
13676 return sanitize_err(env
, insn
, ret
, NULL
, NULL
);
13679 /* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
13680 * There are two classes of instructions: The first class we track both
13681 * alu32 and alu64 sign/unsigned bounds independently this provides the
13682 * greatest amount of precision when alu operations are mixed with jmp32
13683 * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
13684 * and BPF_OR. This is possible because these ops have fairly easy to
13685 * understand and calculate behavior in both 32-bit and 64-bit alu ops.
13686 * See alu32 verifier tests for examples. The second class of
13687 * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
13688 * with regards to tracking sign/unsigned bounds because the bits may
13689 * cross subreg boundaries in the alu64 case. When this happens we mark
13690 * the reg unbounded in the subreg bound space and use the resulting
13691 * tnum to calculate an approximation of the sign/unsigned bounds.
13695 scalar32_min_max_add(dst_reg
, &src_reg
);
13696 scalar_min_max_add(dst_reg
, &src_reg
);
13697 dst_reg
->var_off
= tnum_add(dst_reg
->var_off
, src_reg
.var_off
);
13700 scalar32_min_max_sub(dst_reg
, &src_reg
);
13701 scalar_min_max_sub(dst_reg
, &src_reg
);
13702 dst_reg
->var_off
= tnum_sub(dst_reg
->var_off
, src_reg
.var_off
);
13705 dst_reg
->var_off
= tnum_mul(dst_reg
->var_off
, src_reg
.var_off
);
13706 scalar32_min_max_mul(dst_reg
, &src_reg
);
13707 scalar_min_max_mul(dst_reg
, &src_reg
);
13710 dst_reg
->var_off
= tnum_and(dst_reg
->var_off
, src_reg
.var_off
);
13711 scalar32_min_max_and(dst_reg
, &src_reg
);
13712 scalar_min_max_and(dst_reg
, &src_reg
);
13715 dst_reg
->var_off
= tnum_or(dst_reg
->var_off
, src_reg
.var_off
);
13716 scalar32_min_max_or(dst_reg
, &src_reg
);
13717 scalar_min_max_or(dst_reg
, &src_reg
);
13720 dst_reg
->var_off
= tnum_xor(dst_reg
->var_off
, src_reg
.var_off
);
13721 scalar32_min_max_xor(dst_reg
, &src_reg
);
13722 scalar_min_max_xor(dst_reg
, &src_reg
);
13725 if (umax_val
>= insn_bitness
) {
13726 /* Shifts greater than 31 or 63 are undefined.
13727 * This includes shifts by a negative number.
13729 mark_reg_unknown(env
, regs
, insn
->dst_reg
);
13733 scalar32_min_max_lsh(dst_reg
, &src_reg
);
13735 scalar_min_max_lsh(dst_reg
, &src_reg
);
13738 if (umax_val
>= insn_bitness
) {
13739 /* Shifts greater than 31 or 63 are undefined.
13740 * This includes shifts by a negative number.
13742 mark_reg_unknown(env
, regs
, insn
->dst_reg
);
13746 scalar32_min_max_rsh(dst_reg
, &src_reg
);
13748 scalar_min_max_rsh(dst_reg
, &src_reg
);
13751 if (umax_val
>= insn_bitness
) {
13752 /* Shifts greater than 31 or 63 are undefined.
13753 * This includes shifts by a negative number.
13755 mark_reg_unknown(env
, regs
, insn
->dst_reg
);
13759 scalar32_min_max_arsh(dst_reg
, &src_reg
);
13761 scalar_min_max_arsh(dst_reg
, &src_reg
);
13764 mark_reg_unknown(env
, regs
, insn
->dst_reg
);
13768 /* ALU32 ops are zero extended into 64bit register */
13770 zext_32_to_64(dst_reg
);
13771 reg_bounds_sync(dst_reg
);
13775 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
13778 static int adjust_reg_min_max_vals(struct bpf_verifier_env
*env
,
13779 struct bpf_insn
*insn
)
13781 struct bpf_verifier_state
*vstate
= env
->cur_state
;
13782 struct bpf_func_state
*state
= vstate
->frame
[vstate
->curframe
];
13783 struct bpf_reg_state
*regs
= state
->regs
, *dst_reg
, *src_reg
;
13784 struct bpf_reg_state
*ptr_reg
= NULL
, off_reg
= {0};
13785 u8 opcode
= BPF_OP(insn
->code
);
13788 dst_reg
= ®s
[insn
->dst_reg
];
13790 if (dst_reg
->type
!= SCALAR_VALUE
)
13793 /* Make sure ID is cleared otherwise dst_reg min/max could be
13794 * incorrectly propagated into other registers by find_equal_scalars()
13797 if (BPF_SRC(insn
->code
) == BPF_X
) {
13798 src_reg
= ®s
[insn
->src_reg
];
13799 if (src_reg
->type
!= SCALAR_VALUE
) {
13800 if (dst_reg
->type
!= SCALAR_VALUE
) {
13801 /* Combining two pointers by any ALU op yields
13802 * an arbitrary scalar. Disallow all math except
13803 * pointer subtraction
13805 if (opcode
== BPF_SUB
&& env
->allow_ptr_leaks
) {
13806 mark_reg_unknown(env
, regs
, insn
->dst_reg
);
13809 verbose(env
, "R%d pointer %s pointer prohibited\n",
13811 bpf_alu_string
[opcode
>> 4]);
13814 /* scalar += pointer
13815 * This is legal, but we have to reverse our
13816 * src/dest handling in computing the range
13818 err
= mark_chain_precision(env
, insn
->dst_reg
);
13821 return adjust_ptr_min_max_vals(env
, insn
,
13824 } else if (ptr_reg
) {
13825 /* pointer += scalar */
13826 err
= mark_chain_precision(env
, insn
->src_reg
);
13829 return adjust_ptr_min_max_vals(env
, insn
,
13831 } else if (dst_reg
->precise
) {
13832 /* if dst_reg is precise, src_reg should be precise as well */
13833 err
= mark_chain_precision(env
, insn
->src_reg
);
13838 /* Pretend the src is a reg with a known value, since we only
13839 * need to be able to read from this state.
13841 off_reg
.type
= SCALAR_VALUE
;
13842 __mark_reg_known(&off_reg
, insn
->imm
);
13843 src_reg
= &off_reg
;
13844 if (ptr_reg
) /* pointer += K */
13845 return adjust_ptr_min_max_vals(env
, insn
,
13849 /* Got here implies adding two SCALAR_VALUEs */
13850 if (WARN_ON_ONCE(ptr_reg
)) {
13851 print_verifier_state(env
, state
, true);
13852 verbose(env
, "verifier internal error: unexpected ptr_reg\n");
13855 if (WARN_ON(!src_reg
)) {
13856 print_verifier_state(env
, state
, true);
13857 verbose(env
, "verifier internal error: no src_reg\n");
13860 return adjust_scalar_min_max_vals(env
, insn
, dst_reg
, *src_reg
);
13863 /* check validity of 32-bit and 64-bit arithmetic operations */
13864 static int check_alu_op(struct bpf_verifier_env
*env
, struct bpf_insn
*insn
)
13866 struct bpf_reg_state
*regs
= cur_regs(env
);
13867 u8 opcode
= BPF_OP(insn
->code
);
13870 if (opcode
== BPF_END
|| opcode
== BPF_NEG
) {
13871 if (opcode
== BPF_NEG
) {
13872 if (BPF_SRC(insn
->code
) != BPF_K
||
13873 insn
->src_reg
!= BPF_REG_0
||
13874 insn
->off
!= 0 || insn
->imm
!= 0) {
13875 verbose(env
, "BPF_NEG uses reserved fields\n");
13879 if (insn
->src_reg
!= BPF_REG_0
|| insn
->off
!= 0 ||
13880 (insn
->imm
!= 16 && insn
->imm
!= 32 && insn
->imm
!= 64) ||
13881 (BPF_CLASS(insn
->code
) == BPF_ALU64
&&
13882 BPF_SRC(insn
->code
) != BPF_TO_LE
)) {
13883 verbose(env
, "BPF_END uses reserved fields\n");
13888 /* check src operand */
13889 err
= check_reg_arg(env
, insn
->dst_reg
, SRC_OP
);
13893 if (is_pointer_value(env
, insn
->dst_reg
)) {
13894 verbose(env
, "R%d pointer arithmetic prohibited\n",
13899 /* check dest operand */
13900 err
= check_reg_arg(env
, insn
->dst_reg
, DST_OP
);
13904 } else if (opcode
== BPF_MOV
) {
13906 if (BPF_SRC(insn
->code
) == BPF_X
) {
13907 if (insn
->imm
!= 0) {
13908 verbose(env
, "BPF_MOV uses reserved fields\n");
13912 if (BPF_CLASS(insn
->code
) == BPF_ALU
) {
13913 if (insn
->off
!= 0 && insn
->off
!= 8 && insn
->off
!= 16) {
13914 verbose(env
, "BPF_MOV uses reserved fields\n");
13918 if (insn
->off
!= 0 && insn
->off
!= 8 && insn
->off
!= 16 &&
13920 verbose(env
, "BPF_MOV uses reserved fields\n");
13925 /* check src operand */
13926 err
= check_reg_arg(env
, insn
->src_reg
, SRC_OP
);
13930 if (insn
->src_reg
!= BPF_REG_0
|| insn
->off
!= 0) {
13931 verbose(env
, "BPF_MOV uses reserved fields\n");
13936 /* check dest operand, mark as required later */
13937 err
= check_reg_arg(env
, insn
->dst_reg
, DST_OP_NO_MARK
);
13941 if (BPF_SRC(insn
->code
) == BPF_X
) {
13942 struct bpf_reg_state
*src_reg
= regs
+ insn
->src_reg
;
13943 struct bpf_reg_state
*dst_reg
= regs
+ insn
->dst_reg
;
13944 bool need_id
= src_reg
->type
== SCALAR_VALUE
&& !src_reg
->id
&&
13945 !tnum_is_const(src_reg
->var_off
);
13947 if (BPF_CLASS(insn
->code
) == BPF_ALU64
) {
13948 if (insn
->off
== 0) {
13950 * copy register state to dest reg
13953 /* Assign src and dst registers the same ID
13954 * that will be used by find_equal_scalars()
13955 * to propagate min/max range.
13957 src_reg
->id
= ++env
->id_gen
;
13958 copy_register_state(dst_reg
, src_reg
);
13959 dst_reg
->live
|= REG_LIVE_WRITTEN
;
13960 dst_reg
->subreg_def
= DEF_NOT_SUBREG
;
13962 /* case: R1 = (s8, s16 s32)R2 */
13963 if (is_pointer_value(env
, insn
->src_reg
)) {
13965 "R%d sign-extension part of pointer\n",
13968 } else if (src_reg
->type
== SCALAR_VALUE
) {
13971 no_sext
= src_reg
->umax_value
< (1ULL << (insn
->off
- 1));
13972 if (no_sext
&& need_id
)
13973 src_reg
->id
= ++env
->id_gen
;
13974 copy_register_state(dst_reg
, src_reg
);
13977 coerce_reg_to_size_sx(dst_reg
, insn
->off
>> 3);
13978 dst_reg
->live
|= REG_LIVE_WRITTEN
;
13979 dst_reg
->subreg_def
= DEF_NOT_SUBREG
;
13981 mark_reg_unknown(env
, regs
, insn
->dst_reg
);
13985 /* R1 = (u32) R2 */
13986 if (is_pointer_value(env
, insn
->src_reg
)) {
13988 "R%d partial copy of pointer\n",
13991 } else if (src_reg
->type
== SCALAR_VALUE
) {
13992 if (insn
->off
== 0) {
13993 bool is_src_reg_u32
= src_reg
->umax_value
<= U32_MAX
;
13995 if (is_src_reg_u32
&& need_id
)
13996 src_reg
->id
= ++env
->id_gen
;
13997 copy_register_state(dst_reg
, src_reg
);
13998 /* Make sure ID is cleared if src_reg is not in u32
13999 * range otherwise dst_reg min/max could be incorrectly
14000 * propagated into src_reg by find_equal_scalars()
14002 if (!is_src_reg_u32
)
14004 dst_reg
->live
|= REG_LIVE_WRITTEN
;
14005 dst_reg
->subreg_def
= env
->insn_idx
+ 1;
14007 /* case: W1 = (s8, s16)W2 */
14008 bool no_sext
= src_reg
->umax_value
< (1ULL << (insn
->off
- 1));
14010 if (no_sext
&& need_id
)
14011 src_reg
->id
= ++env
->id_gen
;
14012 copy_register_state(dst_reg
, src_reg
);
14015 dst_reg
->live
|= REG_LIVE_WRITTEN
;
14016 dst_reg
->subreg_def
= env
->insn_idx
+ 1;
14017 coerce_subreg_to_size_sx(dst_reg
, insn
->off
>> 3);
14020 mark_reg_unknown(env
, regs
,
14023 zext_32_to_64(dst_reg
);
14024 reg_bounds_sync(dst_reg
);
14028 * remember the value we stored into this reg
14030 /* clear any state __mark_reg_known doesn't set */
14031 mark_reg_unknown(env
, regs
, insn
->dst_reg
);
14032 regs
[insn
->dst_reg
].type
= SCALAR_VALUE
;
14033 if (BPF_CLASS(insn
->code
) == BPF_ALU64
) {
14034 __mark_reg_known(regs
+ insn
->dst_reg
,
14037 __mark_reg_known(regs
+ insn
->dst_reg
,
14042 } else if (opcode
> BPF_END
) {
14043 verbose(env
, "invalid BPF_ALU opcode %x\n", opcode
);
14046 } else { /* all other ALU ops: and, sub, xor, add, ... */
14048 if (BPF_SRC(insn
->code
) == BPF_X
) {
14049 if (insn
->imm
!= 0 || insn
->off
> 1 ||
14050 (insn
->off
== 1 && opcode
!= BPF_MOD
&& opcode
!= BPF_DIV
)) {
14051 verbose(env
, "BPF_ALU uses reserved fields\n");
14054 /* check src1 operand */
14055 err
= check_reg_arg(env
, insn
->src_reg
, SRC_OP
);
14059 if (insn
->src_reg
!= BPF_REG_0
|| insn
->off
> 1 ||
14060 (insn
->off
== 1 && opcode
!= BPF_MOD
&& opcode
!= BPF_DIV
)) {
14061 verbose(env
, "BPF_ALU uses reserved fields\n");
14066 /* check src2 operand */
14067 err
= check_reg_arg(env
, insn
->dst_reg
, SRC_OP
);
14071 if ((opcode
== BPF_MOD
|| opcode
== BPF_DIV
) &&
14072 BPF_SRC(insn
->code
) == BPF_K
&& insn
->imm
== 0) {
14073 verbose(env
, "div by zero\n");
14077 if ((opcode
== BPF_LSH
|| opcode
== BPF_RSH
||
14078 opcode
== BPF_ARSH
) && BPF_SRC(insn
->code
) == BPF_K
) {
14079 int size
= BPF_CLASS(insn
->code
) == BPF_ALU64
? 64 : 32;
14081 if (insn
->imm
< 0 || insn
->imm
>= size
) {
14082 verbose(env
, "invalid shift %d\n", insn
->imm
);
14087 /* check dest operand */
14088 err
= check_reg_arg(env
, insn
->dst_reg
, DST_OP_NO_MARK
);
14092 return adjust_reg_min_max_vals(env
, insn
);
14098 static void find_good_pkt_pointers(struct bpf_verifier_state
*vstate
,
14099 struct bpf_reg_state
*dst_reg
,
14100 enum bpf_reg_type type
,
14101 bool range_right_open
)
14103 struct bpf_func_state
*state
;
14104 struct bpf_reg_state
*reg
;
14107 if (dst_reg
->off
< 0 ||
14108 (dst_reg
->off
== 0 && range_right_open
))
14109 /* This doesn't give us any range */
14112 if (dst_reg
->umax_value
> MAX_PACKET_OFF
||
14113 dst_reg
->umax_value
+ dst_reg
->off
> MAX_PACKET_OFF
)
14114 /* Risk of overflow. For instance, ptr + (1<<63) may be less
14115 * than pkt_end, but that's because it's also less than pkt.
14119 new_range
= dst_reg
->off
;
14120 if (range_right_open
)
14123 /* Examples for register markings:
14125 * pkt_data in dst register:
14129 * if (r2 > pkt_end) goto <handle exception>
14134 * if (r2 < pkt_end) goto <access okay>
14135 * <handle exception>
14138 * r2 == dst_reg, pkt_end == src_reg
14139 * r2=pkt(id=n,off=8,r=0)
14140 * r3=pkt(id=n,off=0,r=0)
14142 * pkt_data in src register:
14146 * if (pkt_end >= r2) goto <access okay>
14147 * <handle exception>
14151 * if (pkt_end <= r2) goto <handle exception>
14155 * pkt_end == dst_reg, r2 == src_reg
14156 * r2=pkt(id=n,off=8,r=0)
14157 * r3=pkt(id=n,off=0,r=0)
14159 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
14160 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
14161 * and [r3, r3 + 8-1) respectively is safe to access depending on
14165 /* If our ids match, then we must have the same max_value. And we
14166 * don't care about the other reg's fixed offset, since if it's too big
14167 * the range won't allow anything.
14168 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
14170 bpf_for_each_reg_in_vstate(vstate
, state
, reg
, ({
14171 if (reg
->type
== type
&& reg
->id
== dst_reg
->id
)
14172 /* keep the maximum range already checked */
14173 reg
->range
= max(reg
->range
, new_range
);
14177 static int is_branch32_taken(struct bpf_reg_state
*reg
, u32 val
, u8 opcode
)
14179 struct tnum subreg
= tnum_subreg(reg
->var_off
);
14180 s32 sval
= (s32
)val
;
14184 if (tnum_is_const(subreg
))
14185 return !!tnum_equals_const(subreg
, val
);
14186 else if (val
< reg
->u32_min_value
|| val
> reg
->u32_max_value
)
14188 else if (sval
< reg
->s32_min_value
|| sval
> reg
->s32_max_value
)
14192 if (tnum_is_const(subreg
))
14193 return !tnum_equals_const(subreg
, val
);
14194 else if (val
< reg
->u32_min_value
|| val
> reg
->u32_max_value
)
14196 else if (sval
< reg
->s32_min_value
|| sval
> reg
->s32_max_value
)
14200 if ((~subreg
.mask
& subreg
.value
) & val
)
14202 if (!((subreg
.mask
| subreg
.value
) & val
))
14206 if (reg
->u32_min_value
> val
)
14208 else if (reg
->u32_max_value
<= val
)
14212 if (reg
->s32_min_value
> sval
)
14214 else if (reg
->s32_max_value
<= sval
)
14218 if (reg
->u32_max_value
< val
)
14220 else if (reg
->u32_min_value
>= val
)
14224 if (reg
->s32_max_value
< sval
)
14226 else if (reg
->s32_min_value
>= sval
)
14230 if (reg
->u32_min_value
>= val
)
14232 else if (reg
->u32_max_value
< val
)
14236 if (reg
->s32_min_value
>= sval
)
14238 else if (reg
->s32_max_value
< sval
)
14242 if (reg
->u32_max_value
<= val
)
14244 else if (reg
->u32_min_value
> val
)
14248 if (reg
->s32_max_value
<= sval
)
14250 else if (reg
->s32_min_value
> sval
)
14259 static int is_branch64_taken(struct bpf_reg_state
*reg
, u64 val
, u8 opcode
)
14261 s64 sval
= (s64
)val
;
14265 if (tnum_is_const(reg
->var_off
))
14266 return !!tnum_equals_const(reg
->var_off
, val
);
14267 else if (val
< reg
->umin_value
|| val
> reg
->umax_value
)
14269 else if (sval
< reg
->smin_value
|| sval
> reg
->smax_value
)
14273 if (tnum_is_const(reg
->var_off
))
14274 return !tnum_equals_const(reg
->var_off
, val
);
14275 else if (val
< reg
->umin_value
|| val
> reg
->umax_value
)
14277 else if (sval
< reg
->smin_value
|| sval
> reg
->smax_value
)
14281 if ((~reg
->var_off
.mask
& reg
->var_off
.value
) & val
)
14283 if (!((reg
->var_off
.mask
| reg
->var_off
.value
) & val
))
14287 if (reg
->umin_value
> val
)
14289 else if (reg
->umax_value
<= val
)
14293 if (reg
->smin_value
> sval
)
14295 else if (reg
->smax_value
<= sval
)
14299 if (reg
->umax_value
< val
)
14301 else if (reg
->umin_value
>= val
)
14305 if (reg
->smax_value
< sval
)
14307 else if (reg
->smin_value
>= sval
)
14311 if (reg
->umin_value
>= val
)
14313 else if (reg
->umax_value
< val
)
14317 if (reg
->smin_value
>= sval
)
14319 else if (reg
->smax_value
< sval
)
14323 if (reg
->umax_value
<= val
)
14325 else if (reg
->umin_value
> val
)
14329 if (reg
->smax_value
<= sval
)
14331 else if (reg
->smin_value
> sval
)
14339 /* compute branch direction of the expression "if (reg opcode val) goto target;"
14341 * 1 - branch will be taken and "goto target" will be executed
14342 * 0 - branch will not be taken and fall-through to next insn
14343 * -1 - unknown. Example: "if (reg < 5)" is unknown when register value
14346 static int is_branch_taken(struct bpf_reg_state
*reg
, u64 val
, u8 opcode
,
14349 if (__is_pointer_value(false, reg
)) {
14350 if (!reg_not_null(reg
))
14353 /* If pointer is valid tests against zero will fail so we can
14354 * use this to direct branch taken.
14370 return is_branch32_taken(reg
, val
, opcode
);
14371 return is_branch64_taken(reg
, val
, opcode
);
14374 static int flip_opcode(u32 opcode
)
14376 /* How can we transform "a <op> b" into "b <op> a"? */
14377 static const u8 opcode_flip
[16] = {
14378 /* these stay the same */
14379 [BPF_JEQ
>> 4] = BPF_JEQ
,
14380 [BPF_JNE
>> 4] = BPF_JNE
,
14381 [BPF_JSET
>> 4] = BPF_JSET
,
14382 /* these swap "lesser" and "greater" (L and G in the opcodes) */
14383 [BPF_JGE
>> 4] = BPF_JLE
,
14384 [BPF_JGT
>> 4] = BPF_JLT
,
14385 [BPF_JLE
>> 4] = BPF_JGE
,
14386 [BPF_JLT
>> 4] = BPF_JGT
,
14387 [BPF_JSGE
>> 4] = BPF_JSLE
,
14388 [BPF_JSGT
>> 4] = BPF_JSLT
,
14389 [BPF_JSLE
>> 4] = BPF_JSGE
,
14390 [BPF_JSLT
>> 4] = BPF_JSGT
14392 return opcode_flip
[opcode
>> 4];
14395 static int is_pkt_ptr_branch_taken(struct bpf_reg_state
*dst_reg
,
14396 struct bpf_reg_state
*src_reg
,
14399 struct bpf_reg_state
*pkt
;
14401 if (src_reg
->type
== PTR_TO_PACKET_END
) {
14403 } else if (dst_reg
->type
== PTR_TO_PACKET_END
) {
14405 opcode
= flip_opcode(opcode
);
14410 if (pkt
->range
>= 0)
14415 /* pkt <= pkt_end */
14418 /* pkt > pkt_end */
14419 if (pkt
->range
== BEYOND_PKT_END
)
14420 /* pkt has at last one extra byte beyond pkt_end */
14421 return opcode
== BPF_JGT
;
14424 /* pkt < pkt_end */
14427 /* pkt >= pkt_end */
14428 if (pkt
->range
== BEYOND_PKT_END
|| pkt
->range
== AT_PKT_END
)
14429 return opcode
== BPF_JGE
;
14435 /* Adjusts the register min/max values in the case that the dst_reg is the
14436 * variable register that we are working on, and src_reg is a constant or we're
14437 * simply doing a BPF_K check.
14438 * In JEQ/JNE cases we also adjust the var_off values.
14440 static void reg_set_min_max(struct bpf_reg_state
*true_reg
,
14441 struct bpf_reg_state
*false_reg
,
14442 u64 val
, u32 val32
,
14443 u8 opcode
, bool is_jmp32
)
14445 struct tnum false_32off
= tnum_subreg(false_reg
->var_off
);
14446 struct tnum false_64off
= false_reg
->var_off
;
14447 struct tnum true_32off
= tnum_subreg(true_reg
->var_off
);
14448 struct tnum true_64off
= true_reg
->var_off
;
14449 s64 sval
= (s64
)val
;
14450 s32 sval32
= (s32
)val32
;
14452 /* If the dst_reg is a pointer, we can't learn anything about its
14453 * variable offset from the compare (unless src_reg were a pointer into
14454 * the same object, but we don't bother with that.
14455 * Since false_reg and true_reg have the same type by construction, we
14456 * only need to check one of them for pointerness.
14458 if (__is_pointer_value(false, false_reg
))
14462 /* JEQ/JNE comparison doesn't change the register equivalence.
14465 * if (r1 == 42) goto label;
14467 * label: // here both r1 and r2 are known to be 42.
14469 * Hence when marking register as known preserve it's ID.
14473 __mark_reg32_known(true_reg
, val32
);
14474 true_32off
= tnum_subreg(true_reg
->var_off
);
14476 ___mark_reg_known(true_reg
, val
);
14477 true_64off
= true_reg
->var_off
;
14482 __mark_reg32_known(false_reg
, val32
);
14483 false_32off
= tnum_subreg(false_reg
->var_off
);
14485 ___mark_reg_known(false_reg
, val
);
14486 false_64off
= false_reg
->var_off
;
14491 false_32off
= tnum_and(false_32off
, tnum_const(~val32
));
14492 if (is_power_of_2(val32
))
14493 true_32off
= tnum_or(true_32off
,
14494 tnum_const(val32
));
14496 false_64off
= tnum_and(false_64off
, tnum_const(~val
));
14497 if (is_power_of_2(val
))
14498 true_64off
= tnum_or(true_64off
,
14506 u32 false_umax
= opcode
== BPF_JGT
? val32
: val32
- 1;
14507 u32 true_umin
= opcode
== BPF_JGT
? val32
+ 1 : val32
;
14509 false_reg
->u32_max_value
= min(false_reg
->u32_max_value
,
14511 true_reg
->u32_min_value
= max(true_reg
->u32_min_value
,
14514 u64 false_umax
= opcode
== BPF_JGT
? val
: val
- 1;
14515 u64 true_umin
= opcode
== BPF_JGT
? val
+ 1 : val
;
14517 false_reg
->umax_value
= min(false_reg
->umax_value
, false_umax
);
14518 true_reg
->umin_value
= max(true_reg
->umin_value
, true_umin
);
14526 s32 false_smax
= opcode
== BPF_JSGT
? sval32
: sval32
- 1;
14527 s32 true_smin
= opcode
== BPF_JSGT
? sval32
+ 1 : sval32
;
14529 false_reg
->s32_max_value
= min(false_reg
->s32_max_value
, false_smax
);
14530 true_reg
->s32_min_value
= max(true_reg
->s32_min_value
, true_smin
);
14532 s64 false_smax
= opcode
== BPF_JSGT
? sval
: sval
- 1;
14533 s64 true_smin
= opcode
== BPF_JSGT
? sval
+ 1 : sval
;
14535 false_reg
->smax_value
= min(false_reg
->smax_value
, false_smax
);
14536 true_reg
->smin_value
= max(true_reg
->smin_value
, true_smin
);
14544 u32 false_umin
= opcode
== BPF_JLT
? val32
: val32
+ 1;
14545 u32 true_umax
= opcode
== BPF_JLT
? val32
- 1 : val32
;
14547 false_reg
->u32_min_value
= max(false_reg
->u32_min_value
,
14549 true_reg
->u32_max_value
= min(true_reg
->u32_max_value
,
14552 u64 false_umin
= opcode
== BPF_JLT
? val
: val
+ 1;
14553 u64 true_umax
= opcode
== BPF_JLT
? val
- 1 : val
;
14555 false_reg
->umin_value
= max(false_reg
->umin_value
, false_umin
);
14556 true_reg
->umax_value
= min(true_reg
->umax_value
, true_umax
);
14564 s32 false_smin
= opcode
== BPF_JSLT
? sval32
: sval32
+ 1;
14565 s32 true_smax
= opcode
== BPF_JSLT
? sval32
- 1 : sval32
;
14567 false_reg
->s32_min_value
= max(false_reg
->s32_min_value
, false_smin
);
14568 true_reg
->s32_max_value
= min(true_reg
->s32_max_value
, true_smax
);
14570 s64 false_smin
= opcode
== BPF_JSLT
? sval
: sval
+ 1;
14571 s64 true_smax
= opcode
== BPF_JSLT
? sval
- 1 : sval
;
14573 false_reg
->smin_value
= max(false_reg
->smin_value
, false_smin
);
14574 true_reg
->smax_value
= min(true_reg
->smax_value
, true_smax
);
14583 false_reg
->var_off
= tnum_or(tnum_clear_subreg(false_64off
),
14584 tnum_subreg(false_32off
));
14585 true_reg
->var_off
= tnum_or(tnum_clear_subreg(true_64off
),
14586 tnum_subreg(true_32off
));
14587 __reg_combine_32_into_64(false_reg
);
14588 __reg_combine_32_into_64(true_reg
);
14590 false_reg
->var_off
= false_64off
;
14591 true_reg
->var_off
= true_64off
;
14592 __reg_combine_64_into_32(false_reg
);
14593 __reg_combine_64_into_32(true_reg
);
14597 /* Same as above, but for the case that dst_reg holds a constant and src_reg is
14598 * the variable reg.
14600 static void reg_set_min_max_inv(struct bpf_reg_state
*true_reg
,
14601 struct bpf_reg_state
*false_reg
,
14602 u64 val
, u32 val32
,
14603 u8 opcode
, bool is_jmp32
)
14605 opcode
= flip_opcode(opcode
);
14606 /* This uses zero as "not present in table"; luckily the zero opcode,
14607 * BPF_JA, can't get here.
14610 reg_set_min_max(true_reg
, false_reg
, val
, val32
, opcode
, is_jmp32
);
14613 /* Regs are known to be equal, so intersect their min/max/var_off */
14614 static void __reg_combine_min_max(struct bpf_reg_state
*src_reg
,
14615 struct bpf_reg_state
*dst_reg
)
14617 src_reg
->umin_value
= dst_reg
->umin_value
= max(src_reg
->umin_value
,
14618 dst_reg
->umin_value
);
14619 src_reg
->umax_value
= dst_reg
->umax_value
= min(src_reg
->umax_value
,
14620 dst_reg
->umax_value
);
14621 src_reg
->smin_value
= dst_reg
->smin_value
= max(src_reg
->smin_value
,
14622 dst_reg
->smin_value
);
14623 src_reg
->smax_value
= dst_reg
->smax_value
= min(src_reg
->smax_value
,
14624 dst_reg
->smax_value
);
14625 src_reg
->var_off
= dst_reg
->var_off
= tnum_intersect(src_reg
->var_off
,
14627 reg_bounds_sync(src_reg
);
14628 reg_bounds_sync(dst_reg
);
14631 static void reg_combine_min_max(struct bpf_reg_state
*true_src
,
14632 struct bpf_reg_state
*true_dst
,
14633 struct bpf_reg_state
*false_src
,
14634 struct bpf_reg_state
*false_dst
,
14639 __reg_combine_min_max(true_src
, true_dst
);
14642 __reg_combine_min_max(false_src
, false_dst
);
14647 static void mark_ptr_or_null_reg(struct bpf_func_state
*state
,
14648 struct bpf_reg_state
*reg
, u32 id
,
14651 if (type_may_be_null(reg
->type
) && reg
->id
== id
&&
14652 (is_rcu_reg(reg
) || !WARN_ON_ONCE(!reg
->id
))) {
14653 /* Old offset (both fixed and variable parts) should have been
14654 * known-zero, because we don't allow pointer arithmetic on
14655 * pointers that might be NULL. If we see this happening, don't
14656 * convert the register.
14658 * But in some cases, some helpers that return local kptrs
14659 * advance offset for the returned pointer. In those cases, it
14660 * is fine to expect to see reg->off.
14662 if (WARN_ON_ONCE(reg
->smin_value
|| reg
->smax_value
|| !tnum_equals_const(reg
->var_off
, 0)))
14664 if (!(type_is_ptr_alloc_obj(reg
->type
) || type_is_non_owning_ref(reg
->type
)) &&
14665 WARN_ON_ONCE(reg
->off
))
14669 reg
->type
= SCALAR_VALUE
;
14670 /* We don't need id and ref_obj_id from this point
14671 * onwards anymore, thus we should better reset it,
14672 * so that state pruning has chances to take effect.
14675 reg
->ref_obj_id
= 0;
14680 mark_ptr_not_null_reg(reg
);
14682 if (!reg_may_point_to_spin_lock(reg
)) {
14683 /* For not-NULL ptr, reg->ref_obj_id will be reset
14684 * in release_reference().
14686 * reg->id is still used by spin_lock ptr. Other
14687 * than spin_lock ptr type, reg->id can be reset.
14694 /* The logic is similar to find_good_pkt_pointers(), both could eventually
14695 * be folded together at some point.
14697 static void mark_ptr_or_null_regs(struct bpf_verifier_state
*vstate
, u32 regno
,
14700 struct bpf_func_state
*state
= vstate
->frame
[vstate
->curframe
];
14701 struct bpf_reg_state
*regs
= state
->regs
, *reg
;
14702 u32 ref_obj_id
= regs
[regno
].ref_obj_id
;
14703 u32 id
= regs
[regno
].id
;
14705 if (ref_obj_id
&& ref_obj_id
== id
&& is_null
)
14706 /* regs[regno] is in the " == NULL" branch.
14707 * No one could have freed the reference state before
14708 * doing the NULL check.
14710 WARN_ON_ONCE(release_reference_state(state
, id
));
14712 bpf_for_each_reg_in_vstate(vstate
, state
, reg
, ({
14713 mark_ptr_or_null_reg(state
, reg
, id
, is_null
);
14717 static bool try_match_pkt_pointers(const struct bpf_insn
*insn
,
14718 struct bpf_reg_state
*dst_reg
,
14719 struct bpf_reg_state
*src_reg
,
14720 struct bpf_verifier_state
*this_branch
,
14721 struct bpf_verifier_state
*other_branch
)
14723 if (BPF_SRC(insn
->code
) != BPF_X
)
14726 /* Pointers are always 64-bit. */
14727 if (BPF_CLASS(insn
->code
) == BPF_JMP32
)
14730 switch (BPF_OP(insn
->code
)) {
14732 if ((dst_reg
->type
== PTR_TO_PACKET
&&
14733 src_reg
->type
== PTR_TO_PACKET_END
) ||
14734 (dst_reg
->type
== PTR_TO_PACKET_META
&&
14735 reg_is_init_pkt_pointer(src_reg
, PTR_TO_PACKET
))) {
14736 /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
14737 find_good_pkt_pointers(this_branch
, dst_reg
,
14738 dst_reg
->type
, false);
14739 mark_pkt_end(other_branch
, insn
->dst_reg
, true);
14740 } else if ((dst_reg
->type
== PTR_TO_PACKET_END
&&
14741 src_reg
->type
== PTR_TO_PACKET
) ||
14742 (reg_is_init_pkt_pointer(dst_reg
, PTR_TO_PACKET
) &&
14743 src_reg
->type
== PTR_TO_PACKET_META
)) {
14744 /* pkt_end > pkt_data', pkt_data > pkt_meta' */
14745 find_good_pkt_pointers(other_branch
, src_reg
,
14746 src_reg
->type
, true);
14747 mark_pkt_end(this_branch
, insn
->src_reg
, false);
14753 if ((dst_reg
->type
== PTR_TO_PACKET
&&
14754 src_reg
->type
== PTR_TO_PACKET_END
) ||
14755 (dst_reg
->type
== PTR_TO_PACKET_META
&&
14756 reg_is_init_pkt_pointer(src_reg
, PTR_TO_PACKET
))) {
14757 /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
14758 find_good_pkt_pointers(other_branch
, dst_reg
,
14759 dst_reg
->type
, true);
14760 mark_pkt_end(this_branch
, insn
->dst_reg
, false);
14761 } else if ((dst_reg
->type
== PTR_TO_PACKET_END
&&
14762 src_reg
->type
== PTR_TO_PACKET
) ||
14763 (reg_is_init_pkt_pointer(dst_reg
, PTR_TO_PACKET
) &&
14764 src_reg
->type
== PTR_TO_PACKET_META
)) {
14765 /* pkt_end < pkt_data', pkt_data > pkt_meta' */
14766 find_good_pkt_pointers(this_branch
, src_reg
,
14767 src_reg
->type
, false);
14768 mark_pkt_end(other_branch
, insn
->src_reg
, true);
14774 if ((dst_reg
->type
== PTR_TO_PACKET
&&
14775 src_reg
->type
== PTR_TO_PACKET_END
) ||
14776 (dst_reg
->type
== PTR_TO_PACKET_META
&&
14777 reg_is_init_pkt_pointer(src_reg
, PTR_TO_PACKET
))) {
14778 /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
14779 find_good_pkt_pointers(this_branch
, dst_reg
,
14780 dst_reg
->type
, true);
14781 mark_pkt_end(other_branch
, insn
->dst_reg
, false);
14782 } else if ((dst_reg
->type
== PTR_TO_PACKET_END
&&
14783 src_reg
->type
== PTR_TO_PACKET
) ||
14784 (reg_is_init_pkt_pointer(dst_reg
, PTR_TO_PACKET
) &&
14785 src_reg
->type
== PTR_TO_PACKET_META
)) {
14786 /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
14787 find_good_pkt_pointers(other_branch
, src_reg
,
14788 src_reg
->type
, false);
14789 mark_pkt_end(this_branch
, insn
->src_reg
, true);
14795 if ((dst_reg
->type
== PTR_TO_PACKET
&&
14796 src_reg
->type
== PTR_TO_PACKET_END
) ||
14797 (dst_reg
->type
== PTR_TO_PACKET_META
&&
14798 reg_is_init_pkt_pointer(src_reg
, PTR_TO_PACKET
))) {
14799 /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
14800 find_good_pkt_pointers(other_branch
, dst_reg
,
14801 dst_reg
->type
, false);
14802 mark_pkt_end(this_branch
, insn
->dst_reg
, true);
14803 } else if ((dst_reg
->type
== PTR_TO_PACKET_END
&&
14804 src_reg
->type
== PTR_TO_PACKET
) ||
14805 (reg_is_init_pkt_pointer(dst_reg
, PTR_TO_PACKET
) &&
14806 src_reg
->type
== PTR_TO_PACKET_META
)) {
14807 /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
14808 find_good_pkt_pointers(this_branch
, src_reg
,
14809 src_reg
->type
, true);
14810 mark_pkt_end(other_branch
, insn
->src_reg
, false);
14822 static void find_equal_scalars(struct bpf_verifier_state
*vstate
,
14823 struct bpf_reg_state
*known_reg
)
14825 struct bpf_func_state
*state
;
14826 struct bpf_reg_state
*reg
;
14828 bpf_for_each_reg_in_vstate(vstate
, state
, reg
, ({
14829 if (reg
->type
== SCALAR_VALUE
&& reg
->id
== known_reg
->id
)
14830 copy_register_state(reg
, known_reg
);
14834 static int check_cond_jmp_op(struct bpf_verifier_env
*env
,
14835 struct bpf_insn
*insn
, int *insn_idx
)
14837 struct bpf_verifier_state
*this_branch
= env
->cur_state
;
14838 struct bpf_verifier_state
*other_branch
;
14839 struct bpf_reg_state
*regs
= this_branch
->frame
[this_branch
->curframe
]->regs
;
14840 struct bpf_reg_state
*dst_reg
, *other_branch_regs
, *src_reg
= NULL
;
14841 struct bpf_reg_state
*eq_branch_regs
;
14842 u8 opcode
= BPF_OP(insn
->code
);
14847 /* Only conditional jumps are expected to reach here. */
14848 if (opcode
== BPF_JA
|| opcode
> BPF_JSLE
) {
14849 verbose(env
, "invalid BPF_JMP/JMP32 opcode %x\n", opcode
);
14853 /* check src2 operand */
14854 err
= check_reg_arg(env
, insn
->dst_reg
, SRC_OP
);
14858 dst_reg
= ®s
[insn
->dst_reg
];
14859 if (BPF_SRC(insn
->code
) == BPF_X
) {
14860 if (insn
->imm
!= 0) {
14861 verbose(env
, "BPF_JMP/JMP32 uses reserved fields\n");
14865 /* check src1 operand */
14866 err
= check_reg_arg(env
, insn
->src_reg
, SRC_OP
);
14870 src_reg
= ®s
[insn
->src_reg
];
14871 if (!(reg_is_pkt_pointer_any(dst_reg
) && reg_is_pkt_pointer_any(src_reg
)) &&
14872 is_pointer_value(env
, insn
->src_reg
)) {
14873 verbose(env
, "R%d pointer comparison prohibited\n",
14878 if (insn
->src_reg
!= BPF_REG_0
) {
14879 verbose(env
, "BPF_JMP/JMP32 uses reserved fields\n");
14884 is_jmp32
= BPF_CLASS(insn
->code
) == BPF_JMP32
;
14886 if (BPF_SRC(insn
->code
) == BPF_K
) {
14887 pred
= is_branch_taken(dst_reg
, insn
->imm
, opcode
, is_jmp32
);
14888 } else if (src_reg
->type
== SCALAR_VALUE
&&
14889 is_jmp32
&& tnum_is_const(tnum_subreg(src_reg
->var_off
))) {
14890 pred
= is_branch_taken(dst_reg
,
14891 tnum_subreg(src_reg
->var_off
).value
,
14894 } else if (src_reg
->type
== SCALAR_VALUE
&&
14895 !is_jmp32
&& tnum_is_const(src_reg
->var_off
)) {
14896 pred
= is_branch_taken(dst_reg
,
14897 src_reg
->var_off
.value
,
14900 } else if (dst_reg
->type
== SCALAR_VALUE
&&
14901 is_jmp32
&& tnum_is_const(tnum_subreg(dst_reg
->var_off
))) {
14902 pred
= is_branch_taken(src_reg
,
14903 tnum_subreg(dst_reg
->var_off
).value
,
14904 flip_opcode(opcode
),
14906 } else if (dst_reg
->type
== SCALAR_VALUE
&&
14907 !is_jmp32
&& tnum_is_const(dst_reg
->var_off
)) {
14908 pred
= is_branch_taken(src_reg
,
14909 dst_reg
->var_off
.value
,
14910 flip_opcode(opcode
),
14912 } else if (reg_is_pkt_pointer_any(dst_reg
) &&
14913 reg_is_pkt_pointer_any(src_reg
) &&
14915 pred
= is_pkt_ptr_branch_taken(dst_reg
, src_reg
, opcode
);
14919 /* If we get here with a dst_reg pointer type it is because
14920 * above is_branch_taken() special cased the 0 comparison.
14922 if (!__is_pointer_value(false, dst_reg
))
14923 err
= mark_chain_precision(env
, insn
->dst_reg
);
14924 if (BPF_SRC(insn
->code
) == BPF_X
&& !err
&&
14925 !__is_pointer_value(false, src_reg
))
14926 err
= mark_chain_precision(env
, insn
->src_reg
);
14932 /* Only follow the goto, ignore fall-through. If needed, push
14933 * the fall-through branch for simulation under speculative
14936 if (!env
->bypass_spec_v1
&&
14937 !sanitize_speculative_path(env
, insn
, *insn_idx
+ 1,
14940 if (env
->log
.level
& BPF_LOG_LEVEL
)
14941 print_insn_state(env
, this_branch
->frame
[this_branch
->curframe
]);
14942 *insn_idx
+= insn
->off
;
14944 } else if (pred
== 0) {
14945 /* Only follow the fall-through branch, since that's where the
14946 * program will go. If needed, push the goto branch for
14947 * simulation under speculative execution.
14949 if (!env
->bypass_spec_v1
&&
14950 !sanitize_speculative_path(env
, insn
,
14951 *insn_idx
+ insn
->off
+ 1,
14954 if (env
->log
.level
& BPF_LOG_LEVEL
)
14955 print_insn_state(env
, this_branch
->frame
[this_branch
->curframe
]);
14959 other_branch
= push_stack(env
, *insn_idx
+ insn
->off
+ 1, *insn_idx
,
14963 other_branch_regs
= other_branch
->frame
[other_branch
->curframe
]->regs
;
14965 /* detect if we are comparing against a constant value so we can adjust
14966 * our min/max values for our dst register.
14967 * this is only legit if both are scalars (or pointers to the same
14968 * object, I suppose, see the PTR_MAYBE_NULL related if block below),
14969 * because otherwise the different base pointers mean the offsets aren't
14972 if (BPF_SRC(insn
->code
) == BPF_X
) {
14973 struct bpf_reg_state
*src_reg
= ®s
[insn
->src_reg
];
14975 if (dst_reg
->type
== SCALAR_VALUE
&&
14976 src_reg
->type
== SCALAR_VALUE
) {
14977 if (tnum_is_const(src_reg
->var_off
) ||
14979 tnum_is_const(tnum_subreg(src_reg
->var_off
))))
14980 reg_set_min_max(&other_branch_regs
[insn
->dst_reg
],
14982 src_reg
->var_off
.value
,
14983 tnum_subreg(src_reg
->var_off
).value
,
14985 else if (tnum_is_const(dst_reg
->var_off
) ||
14987 tnum_is_const(tnum_subreg(dst_reg
->var_off
))))
14988 reg_set_min_max_inv(&other_branch_regs
[insn
->src_reg
],
14990 dst_reg
->var_off
.value
,
14991 tnum_subreg(dst_reg
->var_off
).value
,
14993 else if (!is_jmp32
&&
14994 (opcode
== BPF_JEQ
|| opcode
== BPF_JNE
))
14995 /* Comparing for equality, we can combine knowledge */
14996 reg_combine_min_max(&other_branch_regs
[insn
->src_reg
],
14997 &other_branch_regs
[insn
->dst_reg
],
14998 src_reg
, dst_reg
, opcode
);
15000 !WARN_ON_ONCE(src_reg
->id
!= other_branch_regs
[insn
->src_reg
].id
)) {
15001 find_equal_scalars(this_branch
, src_reg
);
15002 find_equal_scalars(other_branch
, &other_branch_regs
[insn
->src_reg
]);
15006 } else if (dst_reg
->type
== SCALAR_VALUE
) {
15007 reg_set_min_max(&other_branch_regs
[insn
->dst_reg
],
15008 dst_reg
, insn
->imm
, (u32
)insn
->imm
,
15012 if (dst_reg
->type
== SCALAR_VALUE
&& dst_reg
->id
&&
15013 !WARN_ON_ONCE(dst_reg
->id
!= other_branch_regs
[insn
->dst_reg
].id
)) {
15014 find_equal_scalars(this_branch
, dst_reg
);
15015 find_equal_scalars(other_branch
, &other_branch_regs
[insn
->dst_reg
]);
15018 /* if one pointer register is compared to another pointer
15019 * register check if PTR_MAYBE_NULL could be lifted.
15020 * E.g. register A - maybe null
15021 * register B - not null
15022 * for JNE A, B, ... - A is not null in the false branch;
15023 * for JEQ A, B, ... - A is not null in the true branch.
15025 * Since PTR_TO_BTF_ID points to a kernel struct that does
15026 * not need to be null checked by the BPF program, i.e.,
15027 * could be null even without PTR_MAYBE_NULL marking, so
15028 * only propagate nullness when neither reg is that type.
15030 if (!is_jmp32
&& BPF_SRC(insn
->code
) == BPF_X
&&
15031 __is_pointer_value(false, src_reg
) && __is_pointer_value(false, dst_reg
) &&
15032 type_may_be_null(src_reg
->type
) != type_may_be_null(dst_reg
->type
) &&
15033 base_type(src_reg
->type
) != PTR_TO_BTF_ID
&&
15034 base_type(dst_reg
->type
) != PTR_TO_BTF_ID
) {
15035 eq_branch_regs
= NULL
;
15038 eq_branch_regs
= other_branch_regs
;
15041 eq_branch_regs
= regs
;
15047 if (eq_branch_regs
) {
15048 if (type_may_be_null(src_reg
->type
))
15049 mark_ptr_not_null_reg(&eq_branch_regs
[insn
->src_reg
]);
15051 mark_ptr_not_null_reg(&eq_branch_regs
[insn
->dst_reg
]);
15055 /* detect if R == 0 where R is returned from bpf_map_lookup_elem().
15056 * NOTE: these optimizations below are related with pointer comparison
15057 * which will never be JMP32.
15059 if (!is_jmp32
&& BPF_SRC(insn
->code
) == BPF_K
&&
15060 insn
->imm
== 0 && (opcode
== BPF_JEQ
|| opcode
== BPF_JNE
) &&
15061 type_may_be_null(dst_reg
->type
)) {
15062 /* Mark all identical registers in each branch as either
15063 * safe or unknown depending R == 0 or R != 0 conditional.
15065 mark_ptr_or_null_regs(this_branch
, insn
->dst_reg
,
15066 opcode
== BPF_JNE
);
15067 mark_ptr_or_null_regs(other_branch
, insn
->dst_reg
,
15068 opcode
== BPF_JEQ
);
15069 } else if (!try_match_pkt_pointers(insn
, dst_reg
, ®s
[insn
->src_reg
],
15070 this_branch
, other_branch
) &&
15071 is_pointer_value(env
, insn
->dst_reg
)) {
15072 verbose(env
, "R%d pointer comparison prohibited\n",
15076 if (env
->log
.level
& BPF_LOG_LEVEL
)
15077 print_insn_state(env
, this_branch
->frame
[this_branch
->curframe
]);
15081 /* verify BPF_LD_IMM64 instruction */
15082 static int check_ld_imm(struct bpf_verifier_env
*env
, struct bpf_insn
*insn
)
15084 struct bpf_insn_aux_data
*aux
= cur_aux(env
);
15085 struct bpf_reg_state
*regs
= cur_regs(env
);
15086 struct bpf_reg_state
*dst_reg
;
15087 struct bpf_map
*map
;
15090 if (BPF_SIZE(insn
->code
) != BPF_DW
) {
15091 verbose(env
, "invalid BPF_LD_IMM insn\n");
15094 if (insn
->off
!= 0) {
15095 verbose(env
, "BPF_LD_IMM64 uses reserved fields\n");
15099 err
= check_reg_arg(env
, insn
->dst_reg
, DST_OP
);
15103 dst_reg
= ®s
[insn
->dst_reg
];
15104 if (insn
->src_reg
== 0) {
15105 u64 imm
= ((u64
)(insn
+ 1)->imm
<< 32) | (u32
)insn
->imm
;
15107 dst_reg
->type
= SCALAR_VALUE
;
15108 __mark_reg_known(®s
[insn
->dst_reg
], imm
);
15112 /* All special src_reg cases are listed below. From this point onwards
15113 * we either succeed and assign a corresponding dst_reg->type after
15114 * zeroing the offset, or fail and reject the program.
15116 mark_reg_known_zero(env
, regs
, insn
->dst_reg
);
15118 if (insn
->src_reg
== BPF_PSEUDO_BTF_ID
) {
15119 dst_reg
->type
= aux
->btf_var
.reg_type
;
15120 switch (base_type(dst_reg
->type
)) {
15122 dst_reg
->mem_size
= aux
->btf_var
.mem_size
;
15124 case PTR_TO_BTF_ID
:
15125 dst_reg
->btf
= aux
->btf_var
.btf
;
15126 dst_reg
->btf_id
= aux
->btf_var
.btf_id
;
15129 verbose(env
, "bpf verifier is misconfigured\n");
15135 if (insn
->src_reg
== BPF_PSEUDO_FUNC
) {
15136 struct bpf_prog_aux
*aux
= env
->prog
->aux
;
15137 u32 subprogno
= find_subprog(env
,
15138 env
->insn_idx
+ insn
->imm
+ 1);
15140 if (!aux
->func_info
) {
15141 verbose(env
, "missing btf func_info\n");
15144 if (aux
->func_info_aux
[subprogno
].linkage
!= BTF_FUNC_STATIC
) {
15145 verbose(env
, "callback function not static\n");
15149 dst_reg
->type
= PTR_TO_FUNC
;
15150 dst_reg
->subprogno
= subprogno
;
15154 map
= env
->used_maps
[aux
->map_index
];
15155 dst_reg
->map_ptr
= map
;
15157 if (insn
->src_reg
== BPF_PSEUDO_MAP_VALUE
||
15158 insn
->src_reg
== BPF_PSEUDO_MAP_IDX_VALUE
) {
15159 dst_reg
->type
= PTR_TO_MAP_VALUE
;
15160 dst_reg
->off
= aux
->map_off
;
15161 WARN_ON_ONCE(map
->max_entries
!= 1);
15162 /* We want reg->id to be same (0) as map_value is not distinct */
15163 } else if (insn
->src_reg
== BPF_PSEUDO_MAP_FD
||
15164 insn
->src_reg
== BPF_PSEUDO_MAP_IDX
) {
15165 dst_reg
->type
= CONST_PTR_TO_MAP
;
15167 verbose(env
, "bpf verifier is misconfigured\n");
15174 static bool may_access_skb(enum bpf_prog_type type
)
15177 case BPF_PROG_TYPE_SOCKET_FILTER
:
15178 case BPF_PROG_TYPE_SCHED_CLS
:
15179 case BPF_PROG_TYPE_SCHED_ACT
:
15186 /* verify safety of LD_ABS|LD_IND instructions:
15187 * - they can only appear in the programs where ctx == skb
15188 * - since they are wrappers of function calls, they scratch R1-R5 registers,
15189 * preserve R6-R9, and store return value into R0
15192 * ctx == skb == R6 == CTX
15195 * SRC == any register
15196 * IMM == 32-bit immediate
15199 * R0 - 8/16/32-bit skb data converted to cpu endianness
15201 static int check_ld_abs(struct bpf_verifier_env
*env
, struct bpf_insn
*insn
)
15203 struct bpf_reg_state
*regs
= cur_regs(env
);
15204 static const int ctx_reg
= BPF_REG_6
;
15205 u8 mode
= BPF_MODE(insn
->code
);
15208 if (!may_access_skb(resolve_prog_type(env
->prog
))) {
15209 verbose(env
, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
15213 if (!env
->ops
->gen_ld_abs
) {
15214 verbose(env
, "bpf verifier is misconfigured\n");
15218 if (insn
->dst_reg
!= BPF_REG_0
|| insn
->off
!= 0 ||
15219 BPF_SIZE(insn
->code
) == BPF_DW
||
15220 (mode
== BPF_ABS
&& insn
->src_reg
!= BPF_REG_0
)) {
15221 verbose(env
, "BPF_LD_[ABS|IND] uses reserved fields\n");
15225 /* check whether implicit source operand (register R6) is readable */
15226 err
= check_reg_arg(env
, ctx_reg
, SRC_OP
);
15230 /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
15231 * gen_ld_abs() may terminate the program at runtime, leading to
15234 err
= check_reference_leak(env
, false);
15236 verbose(env
, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
15240 if (env
->cur_state
->active_lock
.ptr
) {
15241 verbose(env
, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
15245 if (env
->cur_state
->active_rcu_lock
) {
15246 verbose(env
, "BPF_LD_[ABS|IND] cannot be used inside bpf_rcu_read_lock-ed region\n");
15250 if (regs
[ctx_reg
].type
!= PTR_TO_CTX
) {
15252 "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
15256 if (mode
== BPF_IND
) {
15257 /* check explicit source operand */
15258 err
= check_reg_arg(env
, insn
->src_reg
, SRC_OP
);
15263 err
= check_ptr_off_reg(env
, ®s
[ctx_reg
], ctx_reg
);
15267 /* reset caller saved regs to unreadable */
15268 for (i
= 0; i
< CALLER_SAVED_REGS
; i
++) {
15269 mark_reg_not_init(env
, regs
, caller_saved
[i
]);
15270 check_reg_arg(env
, caller_saved
[i
], DST_OP_NO_MARK
);
15273 /* mark destination R0 register as readable, since it contains
15274 * the value fetched from the packet.
15275 * Already marked as written above.
15277 mark_reg_unknown(env
, regs
, BPF_REG_0
);
15278 /* ld_abs load up to 32-bit skb data. */
15279 regs
[BPF_REG_0
].subreg_def
= env
->insn_idx
+ 1;
15283 static int check_return_code(struct bpf_verifier_env
*env
, int regno
)
15285 struct tnum enforce_attach_type_range
= tnum_unknown
;
15286 const struct bpf_prog
*prog
= env
->prog
;
15287 struct bpf_reg_state
*reg
;
15288 struct tnum range
= tnum_range(0, 1), const_0
= tnum_const(0);
15289 enum bpf_prog_type prog_type
= resolve_prog_type(env
->prog
);
15291 struct bpf_func_state
*frame
= env
->cur_state
->frame
[0];
15292 const bool is_subprog
= frame
->subprogno
;
15294 /* LSM and struct_ops func-ptr's return type could be "void" */
15295 if (!is_subprog
|| frame
->in_exception_callback_fn
) {
15296 switch (prog_type
) {
15297 case BPF_PROG_TYPE_LSM
:
15298 if (prog
->expected_attach_type
== BPF_LSM_CGROUP
)
15299 /* See below, can be 0 or 0-1 depending on hook. */
15302 case BPF_PROG_TYPE_STRUCT_OPS
:
15303 if (!prog
->aux
->attach_func_proto
->type
)
15311 /* eBPF calling convention is such that R0 is used
15312 * to return the value from eBPF program.
15313 * Make sure that it's readable at this time
15314 * of bpf_exit, which means that program wrote
15315 * something into it earlier
15317 err
= check_reg_arg(env
, regno
, SRC_OP
);
15321 if (is_pointer_value(env
, regno
)) {
15322 verbose(env
, "R%d leaks addr as return value\n", regno
);
15326 reg
= cur_regs(env
) + regno
;
15328 if (frame
->in_async_callback_fn
) {
15329 /* enforce return zero from async callbacks like timer */
15330 if (reg
->type
!= SCALAR_VALUE
) {
15331 verbose(env
, "In async callback the register R%d is not a known value (%s)\n",
15332 regno
, reg_type_str(env
, reg
->type
));
15336 if (!tnum_in(const_0
, reg
->var_off
)) {
15337 verbose_invalid_scalar(env
, reg
, &const_0
, "async callback", "R0");
15343 if (is_subprog
&& !frame
->in_exception_callback_fn
) {
15344 if (reg
->type
!= SCALAR_VALUE
) {
15345 verbose(env
, "At subprogram exit the register R%d is not a scalar value (%s)\n",
15346 regno
, reg_type_str(env
, reg
->type
));
15352 switch (prog_type
) {
15353 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR
:
15354 if (env
->prog
->expected_attach_type
== BPF_CGROUP_UDP4_RECVMSG
||
15355 env
->prog
->expected_attach_type
== BPF_CGROUP_UDP6_RECVMSG
||
15356 env
->prog
->expected_attach_type
== BPF_CGROUP_UNIX_RECVMSG
||
15357 env
->prog
->expected_attach_type
== BPF_CGROUP_INET4_GETPEERNAME
||
15358 env
->prog
->expected_attach_type
== BPF_CGROUP_INET6_GETPEERNAME
||
15359 env
->prog
->expected_attach_type
== BPF_CGROUP_UNIX_GETPEERNAME
||
15360 env
->prog
->expected_attach_type
== BPF_CGROUP_INET4_GETSOCKNAME
||
15361 env
->prog
->expected_attach_type
== BPF_CGROUP_INET6_GETSOCKNAME
||
15362 env
->prog
->expected_attach_type
== BPF_CGROUP_UNIX_GETSOCKNAME
)
15363 range
= tnum_range(1, 1);
15364 if (env
->prog
->expected_attach_type
== BPF_CGROUP_INET4_BIND
||
15365 env
->prog
->expected_attach_type
== BPF_CGROUP_INET6_BIND
)
15366 range
= tnum_range(0, 3);
15368 case BPF_PROG_TYPE_CGROUP_SKB
:
15369 if (env
->prog
->expected_attach_type
== BPF_CGROUP_INET_EGRESS
) {
15370 range
= tnum_range(0, 3);
15371 enforce_attach_type_range
= tnum_range(2, 3);
15374 case BPF_PROG_TYPE_CGROUP_SOCK
:
15375 case BPF_PROG_TYPE_SOCK_OPS
:
15376 case BPF_PROG_TYPE_CGROUP_DEVICE
:
15377 case BPF_PROG_TYPE_CGROUP_SYSCTL
:
15378 case BPF_PROG_TYPE_CGROUP_SOCKOPT
:
15380 case BPF_PROG_TYPE_RAW_TRACEPOINT
:
15381 if (!env
->prog
->aux
->attach_btf_id
)
15383 range
= tnum_const(0);
15385 case BPF_PROG_TYPE_TRACING
:
15386 switch (env
->prog
->expected_attach_type
) {
15387 case BPF_TRACE_FENTRY
:
15388 case BPF_TRACE_FEXIT
:
15389 range
= tnum_const(0);
15391 case BPF_TRACE_RAW_TP
:
15392 case BPF_MODIFY_RETURN
:
15394 case BPF_TRACE_ITER
:
15400 case BPF_PROG_TYPE_SK_LOOKUP
:
15401 range
= tnum_range(SK_DROP
, SK_PASS
);
15404 case BPF_PROG_TYPE_LSM
:
15405 if (env
->prog
->expected_attach_type
!= BPF_LSM_CGROUP
) {
15406 /* Regular BPF_PROG_TYPE_LSM programs can return
15411 if (!env
->prog
->aux
->attach_func_proto
->type
) {
15412 /* Make sure programs that attach to void
15413 * hooks don't try to modify return value.
15415 range
= tnum_range(1, 1);
15419 case BPF_PROG_TYPE_NETFILTER
:
15420 range
= tnum_range(NF_DROP
, NF_ACCEPT
);
15422 case BPF_PROG_TYPE_EXT
:
15423 /* freplace program can return anything as its return value
15424 * depends on the to-be-replaced kernel func or bpf program.
15430 if (reg
->type
!= SCALAR_VALUE
) {
15431 verbose(env
, "At program exit the register R%d is not a known value (%s)\n",
15432 regno
, reg_type_str(env
, reg
->type
));
15436 if (!tnum_in(range
, reg
->var_off
)) {
15437 verbose_invalid_scalar(env
, reg
, &range
, "program exit", "R0");
15438 if (prog
->expected_attach_type
== BPF_LSM_CGROUP
&&
15439 prog_type
== BPF_PROG_TYPE_LSM
&&
15440 !prog
->aux
->attach_func_proto
->type
)
15441 verbose(env
, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
15445 if (!tnum_is_unknown(enforce_attach_type_range
) &&
15446 tnum_in(enforce_attach_type_range
, reg
->var_off
))
15447 env
->prog
->enforce_expected_attach_type
= 1;
15451 /* non-recursive DFS pseudo code
15452 * 1 procedure DFS-iterative(G,v):
15453 * 2 label v as discovered
15454 * 3 let S be a stack
15456 * 5 while S is not empty
15458 * 7 if t is what we're looking for:
15460 * 9 for all edges e in G.adjacentEdges(t) do
15461 * 10 if edge e is already labelled
15462 * 11 continue with the next edge
15463 * 12 w <- G.adjacentVertex(t,e)
15464 * 13 if vertex w is not discovered and not explored
15465 * 14 label e as tree-edge
15466 * 15 label w as discovered
15469 * 18 else if vertex w is discovered
15470 * 19 label e as back-edge
15472 * 21 // vertex w is explored
15473 * 22 label e as forward- or cross-edge
15474 * 23 label t as explored
15478 * 0x10 - discovered
15479 * 0x11 - discovered and fall-through edge labelled
15480 * 0x12 - discovered and fall-through and branch edges labelled
15491 static void mark_prune_point(struct bpf_verifier_env
*env
, int idx
)
15493 env
->insn_aux_data
[idx
].prune_point
= true;
15496 static bool is_prune_point(struct bpf_verifier_env
*env
, int insn_idx
)
15498 return env
->insn_aux_data
[insn_idx
].prune_point
;
15501 static void mark_force_checkpoint(struct bpf_verifier_env
*env
, int idx
)
15503 env
->insn_aux_data
[idx
].force_checkpoint
= true;
15506 static bool is_force_checkpoint(struct bpf_verifier_env
*env
, int insn_idx
)
15508 return env
->insn_aux_data
[insn_idx
].force_checkpoint
;
15511 static void mark_calls_callback(struct bpf_verifier_env
*env
, int idx
)
15513 env
->insn_aux_data
[idx
].calls_callback
= true;
15516 static bool calls_callback(struct bpf_verifier_env
*env
, int insn_idx
)
15518 return env
->insn_aux_data
[insn_idx
].calls_callback
;
15522 DONE_EXPLORING
= 0,
15523 KEEP_EXPLORING
= 1,
15526 /* t, w, e - match pseudo-code above:
15527 * t - index of current instruction
15528 * w - next instruction
15531 static int push_insn(int t
, int w
, int e
, struct bpf_verifier_env
*env
)
15533 int *insn_stack
= env
->cfg
.insn_stack
;
15534 int *insn_state
= env
->cfg
.insn_state
;
15536 if (e
== FALLTHROUGH
&& insn_state
[t
] >= (DISCOVERED
| FALLTHROUGH
))
15537 return DONE_EXPLORING
;
15539 if (e
== BRANCH
&& insn_state
[t
] >= (DISCOVERED
| BRANCH
))
15540 return DONE_EXPLORING
;
15542 if (w
< 0 || w
>= env
->prog
->len
) {
15543 verbose_linfo(env
, t
, "%d: ", t
);
15544 verbose(env
, "jump out of range from insn %d to %d\n", t
, w
);
15549 /* mark branch target for state pruning */
15550 mark_prune_point(env
, w
);
15551 mark_jmp_point(env
, w
);
15554 if (insn_state
[w
] == 0) {
15556 insn_state
[t
] = DISCOVERED
| e
;
15557 insn_state
[w
] = DISCOVERED
;
15558 if (env
->cfg
.cur_stack
>= env
->prog
->len
)
15560 insn_stack
[env
->cfg
.cur_stack
++] = w
;
15561 return KEEP_EXPLORING
;
15562 } else if ((insn_state
[w
] & 0xF0) == DISCOVERED
) {
15563 if (env
->bpf_capable
)
15564 return DONE_EXPLORING
;
15565 verbose_linfo(env
, t
, "%d: ", t
);
15566 verbose_linfo(env
, w
, "%d: ", w
);
15567 verbose(env
, "back-edge from insn %d to %d\n", t
, w
);
15569 } else if (insn_state
[w
] == EXPLORED
) {
15570 /* forward- or cross-edge */
15571 insn_state
[t
] = DISCOVERED
| e
;
15573 verbose(env
, "insn state internal bug\n");
15576 return DONE_EXPLORING
;
15579 static int visit_func_call_insn(int t
, struct bpf_insn
*insns
,
15580 struct bpf_verifier_env
*env
,
15585 insn_sz
= bpf_is_ldimm64(&insns
[t
]) ? 2 : 1;
15586 ret
= push_insn(t
, t
+ insn_sz
, FALLTHROUGH
, env
);
15590 mark_prune_point(env
, t
+ insn_sz
);
15591 /* when we exit from subprog, we need to record non-linear history */
15592 mark_jmp_point(env
, t
+ insn_sz
);
15594 if (visit_callee
) {
15595 mark_prune_point(env
, t
);
15596 ret
= push_insn(t
, t
+ insns
[t
].imm
+ 1, BRANCH
, env
);
15601 /* Visits the instruction at index t and returns one of the following:
15602 * < 0 - an error occurred
15603 * DONE_EXPLORING - the instruction was fully explored
15604 * KEEP_EXPLORING - there is still work to be done before it is fully explored
15606 static int visit_insn(int t
, struct bpf_verifier_env
*env
)
15608 struct bpf_insn
*insns
= env
->prog
->insnsi
, *insn
= &insns
[t
];
15609 int ret
, off
, insn_sz
;
15611 if (bpf_pseudo_func(insn
))
15612 return visit_func_call_insn(t
, insns
, env
, true);
15614 /* All non-branch instructions have a single fall-through edge. */
15615 if (BPF_CLASS(insn
->code
) != BPF_JMP
&&
15616 BPF_CLASS(insn
->code
) != BPF_JMP32
) {
15617 insn_sz
= bpf_is_ldimm64(insn
) ? 2 : 1;
15618 return push_insn(t
, t
+ insn_sz
, FALLTHROUGH
, env
);
15621 switch (BPF_OP(insn
->code
)) {
15623 return DONE_EXPLORING
;
15626 if (insn
->src_reg
== 0 && insn
->imm
== BPF_FUNC_timer_set_callback
)
15627 /* Mark this call insn as a prune point to trigger
15628 * is_state_visited() check before call itself is
15629 * processed by __check_func_call(). Otherwise new
15630 * async state will be pushed for further exploration.
15632 mark_prune_point(env
, t
);
15633 /* For functions that invoke callbacks it is not known how many times
15634 * callback would be called. Verifier models callback calling functions
15635 * by repeatedly visiting callback bodies and returning to origin call
15637 * In order to stop such iteration verifier needs to identify when a
15638 * state identical some state from a previous iteration is reached.
15639 * Check below forces creation of checkpoint before callback calling
15640 * instruction to allow search for such identical states.
15642 if (is_sync_callback_calling_insn(insn
)) {
15643 mark_calls_callback(env
, t
);
15644 mark_force_checkpoint(env
, t
);
15645 mark_prune_point(env
, t
);
15646 mark_jmp_point(env
, t
);
15648 if (insn
->src_reg
== BPF_PSEUDO_KFUNC_CALL
) {
15649 struct bpf_kfunc_call_arg_meta meta
;
15651 ret
= fetch_kfunc_meta(env
, insn
, &meta
, NULL
);
15652 if (ret
== 0 && is_iter_next_kfunc(&meta
)) {
15653 mark_prune_point(env
, t
);
15654 /* Checking and saving state checkpoints at iter_next() call
15655 * is crucial for fast convergence of open-coded iterator loop
15656 * logic, so we need to force it. If we don't do that,
15657 * is_state_visited() might skip saving a checkpoint, causing
15658 * unnecessarily long sequence of not checkpointed
15659 * instructions and jumps, leading to exhaustion of jump
15660 * history buffer, and potentially other undesired outcomes.
15661 * It is expected that with correct open-coded iterators
15662 * convergence will happen quickly, so we don't run a risk of
15663 * exhausting memory.
15665 mark_force_checkpoint(env
, t
);
15668 return visit_func_call_insn(t
, insns
, env
, insn
->src_reg
== BPF_PSEUDO_CALL
);
15671 if (BPF_SRC(insn
->code
) != BPF_K
)
15674 if (BPF_CLASS(insn
->code
) == BPF_JMP
)
15679 /* unconditional jump with single edge */
15680 ret
= push_insn(t
, t
+ off
+ 1, FALLTHROUGH
, env
);
15684 mark_prune_point(env
, t
+ off
+ 1);
15685 mark_jmp_point(env
, t
+ off
+ 1);
15690 /* conditional jump with two edges */
15691 mark_prune_point(env
, t
);
15693 ret
= push_insn(t
, t
+ 1, FALLTHROUGH
, env
);
15697 return push_insn(t
, t
+ insn
->off
+ 1, BRANCH
, env
);
15701 /* non-recursive depth-first-search to detect loops in BPF program
15702 * loop == back-edge in directed graph
15704 static int check_cfg(struct bpf_verifier_env
*env
)
15706 int insn_cnt
= env
->prog
->len
;
15707 int *insn_stack
, *insn_state
;
15708 int ex_insn_beg
, i
, ret
= 0;
15709 bool ex_done
= false;
15711 insn_state
= env
->cfg
.insn_state
= kvcalloc(insn_cnt
, sizeof(int), GFP_KERNEL
);
15715 insn_stack
= env
->cfg
.insn_stack
= kvcalloc(insn_cnt
, sizeof(int), GFP_KERNEL
);
15717 kvfree(insn_state
);
15721 insn_state
[0] = DISCOVERED
; /* mark 1st insn as discovered */
15722 insn_stack
[0] = 0; /* 0 is the first instruction */
15723 env
->cfg
.cur_stack
= 1;
15726 while (env
->cfg
.cur_stack
> 0) {
15727 int t
= insn_stack
[env
->cfg
.cur_stack
- 1];
15729 ret
= visit_insn(t
, env
);
15731 case DONE_EXPLORING
:
15732 insn_state
[t
] = EXPLORED
;
15733 env
->cfg
.cur_stack
--;
15735 case KEEP_EXPLORING
:
15739 verbose(env
, "visit_insn internal bug\n");
15746 if (env
->cfg
.cur_stack
< 0) {
15747 verbose(env
, "pop stack internal bug\n");
15752 if (env
->exception_callback_subprog
&& !ex_done
) {
15753 ex_insn_beg
= env
->subprog_info
[env
->exception_callback_subprog
].start
;
15755 insn_state
[ex_insn_beg
] = DISCOVERED
;
15756 insn_stack
[0] = ex_insn_beg
;
15757 env
->cfg
.cur_stack
= 1;
15762 for (i
= 0; i
< insn_cnt
; i
++) {
15763 struct bpf_insn
*insn
= &env
->prog
->insnsi
[i
];
15765 if (insn_state
[i
] != EXPLORED
) {
15766 verbose(env
, "unreachable insn %d\n", i
);
15770 if (bpf_is_ldimm64(insn
)) {
15771 if (insn_state
[i
+ 1] != 0) {
15772 verbose(env
, "jump into the middle of ldimm64 insn %d\n", i
);
15776 i
++; /* skip second half of ldimm64 */
15779 ret
= 0; /* cfg looks good */
15782 kvfree(insn_state
);
15783 kvfree(insn_stack
);
15784 env
->cfg
.insn_state
= env
->cfg
.insn_stack
= NULL
;
15788 static int check_abnormal_return(struct bpf_verifier_env
*env
)
15792 for (i
= 1; i
< env
->subprog_cnt
; i
++) {
15793 if (env
->subprog_info
[i
].has_ld_abs
) {
15794 verbose(env
, "LD_ABS is not allowed in subprogs without BTF\n");
15797 if (env
->subprog_info
[i
].has_tail_call
) {
15798 verbose(env
, "tail_call is not allowed in subprogs without BTF\n");
15805 /* The minimum supported BTF func info size */
15806 #define MIN_BPF_FUNCINFO_SIZE 8
15807 #define MAX_FUNCINFO_REC_SIZE 252
15809 static int check_btf_func_early(struct bpf_verifier_env
*env
,
15810 const union bpf_attr
*attr
,
15813 u32 krec_size
= sizeof(struct bpf_func_info
);
15814 const struct btf_type
*type
, *func_proto
;
15815 u32 i
, nfuncs
, urec_size
, min_size
;
15816 struct bpf_func_info
*krecord
;
15817 struct bpf_prog
*prog
;
15818 const struct btf
*btf
;
15819 u32 prev_offset
= 0;
15823 nfuncs
= attr
->func_info_cnt
;
15825 if (check_abnormal_return(env
))
15830 urec_size
= attr
->func_info_rec_size
;
15831 if (urec_size
< MIN_BPF_FUNCINFO_SIZE
||
15832 urec_size
> MAX_FUNCINFO_REC_SIZE
||
15833 urec_size
% sizeof(u32
)) {
15834 verbose(env
, "invalid func info rec size %u\n", urec_size
);
15839 btf
= prog
->aux
->btf
;
15841 urecord
= make_bpfptr(attr
->func_info
, uattr
.is_kernel
);
15842 min_size
= min_t(u32
, krec_size
, urec_size
);
15844 krecord
= kvcalloc(nfuncs
, krec_size
, GFP_KERNEL
| __GFP_NOWARN
);
15848 for (i
= 0; i
< nfuncs
; i
++) {
15849 ret
= bpf_check_uarg_tail_zero(urecord
, krec_size
, urec_size
);
15851 if (ret
== -E2BIG
) {
15852 verbose(env
, "nonzero tailing record in func info");
15853 /* set the size kernel expects so loader can zero
15854 * out the rest of the record.
15856 if (copy_to_bpfptr_offset(uattr
,
15857 offsetof(union bpf_attr
, func_info_rec_size
),
15858 &min_size
, sizeof(min_size
)))
15864 if (copy_from_bpfptr(&krecord
[i
], urecord
, min_size
)) {
15869 /* check insn_off */
15872 if (krecord
[i
].insn_off
) {
15874 "nonzero insn_off %u for the first func info record",
15875 krecord
[i
].insn_off
);
15878 } else if (krecord
[i
].insn_off
<= prev_offset
) {
15880 "same or smaller insn offset (%u) than previous func info record (%u)",
15881 krecord
[i
].insn_off
, prev_offset
);
15885 /* check type_id */
15886 type
= btf_type_by_id(btf
, krecord
[i
].type_id
);
15887 if (!type
|| !btf_type_is_func(type
)) {
15888 verbose(env
, "invalid type id %d in func info",
15889 krecord
[i
].type_id
);
15893 func_proto
= btf_type_by_id(btf
, type
->type
);
15894 if (unlikely(!func_proto
|| !btf_type_is_func_proto(func_proto
)))
15895 /* btf_func_check() already verified it during BTF load */
15898 prev_offset
= krecord
[i
].insn_off
;
15899 bpfptr_add(&urecord
, urec_size
);
15902 prog
->aux
->func_info
= krecord
;
15903 prog
->aux
->func_info_cnt
= nfuncs
;
15911 static int check_btf_func(struct bpf_verifier_env
*env
,
15912 const union bpf_attr
*attr
,
15915 const struct btf_type
*type
, *func_proto
, *ret_type
;
15916 u32 i
, nfuncs
, urec_size
;
15917 struct bpf_func_info
*krecord
;
15918 struct bpf_func_info_aux
*info_aux
= NULL
;
15919 struct bpf_prog
*prog
;
15920 const struct btf
*btf
;
15922 bool scalar_return
;
15925 nfuncs
= attr
->func_info_cnt
;
15927 if (check_abnormal_return(env
))
15931 if (nfuncs
!= env
->subprog_cnt
) {
15932 verbose(env
, "number of funcs in func_info doesn't match number of subprogs\n");
15936 urec_size
= attr
->func_info_rec_size
;
15939 btf
= prog
->aux
->btf
;
15941 urecord
= make_bpfptr(attr
->func_info
, uattr
.is_kernel
);
15943 krecord
= prog
->aux
->func_info
;
15944 info_aux
= kcalloc(nfuncs
, sizeof(*info_aux
), GFP_KERNEL
| __GFP_NOWARN
);
15948 for (i
= 0; i
< nfuncs
; i
++) {
15949 /* check insn_off */
15952 if (env
->subprog_info
[i
].start
!= krecord
[i
].insn_off
) {
15953 verbose(env
, "func_info BTF section doesn't match subprog layout in BPF program\n");
15957 /* Already checked type_id */
15958 type
= btf_type_by_id(btf
, krecord
[i
].type_id
);
15959 info_aux
[i
].linkage
= BTF_INFO_VLEN(type
->info
);
15960 /* Already checked func_proto */
15961 func_proto
= btf_type_by_id(btf
, type
->type
);
15963 ret_type
= btf_type_skip_modifiers(btf
, func_proto
->type
, NULL
);
15965 btf_type_is_small_int(ret_type
) || btf_is_any_enum(ret_type
);
15966 if (i
&& !scalar_return
&& env
->subprog_info
[i
].has_ld_abs
) {
15967 verbose(env
, "LD_ABS is only allowed in functions that return 'int'.\n");
15970 if (i
&& !scalar_return
&& env
->subprog_info
[i
].has_tail_call
) {
15971 verbose(env
, "tail_call is only allowed in functions that return 'int'.\n");
15975 bpfptr_add(&urecord
, urec_size
);
15978 prog
->aux
->func_info_aux
= info_aux
;
15986 static void adjust_btf_func(struct bpf_verifier_env
*env
)
15988 struct bpf_prog_aux
*aux
= env
->prog
->aux
;
15991 if (!aux
->func_info
)
15994 /* func_info is not available for hidden subprogs */
15995 for (i
= 0; i
< env
->subprog_cnt
- env
->hidden_subprog_cnt
; i
++)
15996 aux
->func_info
[i
].insn_off
= env
->subprog_info
[i
].start
;
15999 #define MIN_BPF_LINEINFO_SIZE offsetofend(struct bpf_line_info, line_col)
16000 #define MAX_LINEINFO_REC_SIZE MAX_FUNCINFO_REC_SIZE
16002 static int check_btf_line(struct bpf_verifier_env
*env
,
16003 const union bpf_attr
*attr
,
16006 u32 i
, s
, nr_linfo
, ncopy
, expected_size
, rec_size
, prev_offset
= 0;
16007 struct bpf_subprog_info
*sub
;
16008 struct bpf_line_info
*linfo
;
16009 struct bpf_prog
*prog
;
16010 const struct btf
*btf
;
16014 nr_linfo
= attr
->line_info_cnt
;
16017 if (nr_linfo
> INT_MAX
/ sizeof(struct bpf_line_info
))
16020 rec_size
= attr
->line_info_rec_size
;
16021 if (rec_size
< MIN_BPF_LINEINFO_SIZE
||
16022 rec_size
> MAX_LINEINFO_REC_SIZE
||
16023 rec_size
& (sizeof(u32
) - 1))
16026 /* Need to zero it in case the userspace may
16027 * pass in a smaller bpf_line_info object.
16029 linfo
= kvcalloc(nr_linfo
, sizeof(struct bpf_line_info
),
16030 GFP_KERNEL
| __GFP_NOWARN
);
16035 btf
= prog
->aux
->btf
;
16038 sub
= env
->subprog_info
;
16039 ulinfo
= make_bpfptr(attr
->line_info
, uattr
.is_kernel
);
16040 expected_size
= sizeof(struct bpf_line_info
);
16041 ncopy
= min_t(u32
, expected_size
, rec_size
);
16042 for (i
= 0; i
< nr_linfo
; i
++) {
16043 err
= bpf_check_uarg_tail_zero(ulinfo
, expected_size
, rec_size
);
16045 if (err
== -E2BIG
) {
16046 verbose(env
, "nonzero tailing record in line_info");
16047 if (copy_to_bpfptr_offset(uattr
,
16048 offsetof(union bpf_attr
, line_info_rec_size
),
16049 &expected_size
, sizeof(expected_size
)))
16055 if (copy_from_bpfptr(&linfo
[i
], ulinfo
, ncopy
)) {
16061 * Check insn_off to ensure
16062 * 1) strictly increasing AND
16063 * 2) bounded by prog->len
16065 * The linfo[0].insn_off == 0 check logically falls into
16066 * the later "missing bpf_line_info for func..." case
16067 * because the first linfo[0].insn_off must be the
16068 * first sub also and the first sub must have
16069 * subprog_info[0].start == 0.
16071 if ((i
&& linfo
[i
].insn_off
<= prev_offset
) ||
16072 linfo
[i
].insn_off
>= prog
->len
) {
16073 verbose(env
, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
16074 i
, linfo
[i
].insn_off
, prev_offset
,
16080 if (!prog
->insnsi
[linfo
[i
].insn_off
].code
) {
16082 "Invalid insn code at line_info[%u].insn_off\n",
16088 if (!btf_name_by_offset(btf
, linfo
[i
].line_off
) ||
16089 !btf_name_by_offset(btf
, linfo
[i
].file_name_off
)) {
16090 verbose(env
, "Invalid line_info[%u].line_off or .file_name_off\n", i
);
16095 if (s
!= env
->subprog_cnt
) {
16096 if (linfo
[i
].insn_off
== sub
[s
].start
) {
16097 sub
[s
].linfo_idx
= i
;
16099 } else if (sub
[s
].start
< linfo
[i
].insn_off
) {
16100 verbose(env
, "missing bpf_line_info for func#%u\n", s
);
16106 prev_offset
= linfo
[i
].insn_off
;
16107 bpfptr_add(&ulinfo
, rec_size
);
16110 if (s
!= env
->subprog_cnt
) {
16111 verbose(env
, "missing bpf_line_info for %u funcs starting from func#%u\n",
16112 env
->subprog_cnt
- s
, s
);
16117 prog
->aux
->linfo
= linfo
;
16118 prog
->aux
->nr_linfo
= nr_linfo
;
16127 #define MIN_CORE_RELO_SIZE sizeof(struct bpf_core_relo)
16128 #define MAX_CORE_RELO_SIZE MAX_FUNCINFO_REC_SIZE
16130 static int check_core_relo(struct bpf_verifier_env
*env
,
16131 const union bpf_attr
*attr
,
16134 u32 i
, nr_core_relo
, ncopy
, expected_size
, rec_size
;
16135 struct bpf_core_relo core_relo
= {};
16136 struct bpf_prog
*prog
= env
->prog
;
16137 const struct btf
*btf
= prog
->aux
->btf
;
16138 struct bpf_core_ctx ctx
= {
16142 bpfptr_t u_core_relo
;
16145 nr_core_relo
= attr
->core_relo_cnt
;
16148 if (nr_core_relo
> INT_MAX
/ sizeof(struct bpf_core_relo
))
16151 rec_size
= attr
->core_relo_rec_size
;
16152 if (rec_size
< MIN_CORE_RELO_SIZE
||
16153 rec_size
> MAX_CORE_RELO_SIZE
||
16154 rec_size
% sizeof(u32
))
16157 u_core_relo
= make_bpfptr(attr
->core_relos
, uattr
.is_kernel
);
16158 expected_size
= sizeof(struct bpf_core_relo
);
16159 ncopy
= min_t(u32
, expected_size
, rec_size
);
16161 /* Unlike func_info and line_info, copy and apply each CO-RE
16162 * relocation record one at a time.
16164 for (i
= 0; i
< nr_core_relo
; i
++) {
16165 /* future proofing when sizeof(bpf_core_relo) changes */
16166 err
= bpf_check_uarg_tail_zero(u_core_relo
, expected_size
, rec_size
);
16168 if (err
== -E2BIG
) {
16169 verbose(env
, "nonzero tailing record in core_relo");
16170 if (copy_to_bpfptr_offset(uattr
,
16171 offsetof(union bpf_attr
, core_relo_rec_size
),
16172 &expected_size
, sizeof(expected_size
)))
16178 if (copy_from_bpfptr(&core_relo
, u_core_relo
, ncopy
)) {
16183 if (core_relo
.insn_off
% 8 || core_relo
.insn_off
/ 8 >= prog
->len
) {
16184 verbose(env
, "Invalid core_relo[%u].insn_off:%u prog->len:%u\n",
16185 i
, core_relo
.insn_off
, prog
->len
);
16190 err
= bpf_core_apply(&ctx
, &core_relo
, i
,
16191 &prog
->insnsi
[core_relo
.insn_off
/ 8]);
16194 bpfptr_add(&u_core_relo
, rec_size
);
16199 static int check_btf_info_early(struct bpf_verifier_env
*env
,
16200 const union bpf_attr
*attr
,
16206 if (!attr
->func_info_cnt
&& !attr
->line_info_cnt
) {
16207 if (check_abnormal_return(env
))
16212 btf
= btf_get_by_fd(attr
->prog_btf_fd
);
16214 return PTR_ERR(btf
);
16215 if (btf_is_kernel(btf
)) {
16219 env
->prog
->aux
->btf
= btf
;
16221 err
= check_btf_func_early(env
, attr
, uattr
);
16227 static int check_btf_info(struct bpf_verifier_env
*env
,
16228 const union bpf_attr
*attr
,
16233 if (!attr
->func_info_cnt
&& !attr
->line_info_cnt
) {
16234 if (check_abnormal_return(env
))
16239 err
= check_btf_func(env
, attr
, uattr
);
16243 err
= check_btf_line(env
, attr
, uattr
);
16247 err
= check_core_relo(env
, attr
, uattr
);
16254 /* check %cur's range satisfies %old's */
16255 static bool range_within(struct bpf_reg_state
*old
,
16256 struct bpf_reg_state
*cur
)
16258 return old
->umin_value
<= cur
->umin_value
&&
16259 old
->umax_value
>= cur
->umax_value
&&
16260 old
->smin_value
<= cur
->smin_value
&&
16261 old
->smax_value
>= cur
->smax_value
&&
16262 old
->u32_min_value
<= cur
->u32_min_value
&&
16263 old
->u32_max_value
>= cur
->u32_max_value
&&
16264 old
->s32_min_value
<= cur
->s32_min_value
&&
16265 old
->s32_max_value
>= cur
->s32_max_value
;
16268 /* If in the old state two registers had the same id, then they need to have
16269 * the same id in the new state as well. But that id could be different from
16270 * the old state, so we need to track the mapping from old to new ids.
16271 * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
16272 * regs with old id 5 must also have new id 9 for the new state to be safe. But
16273 * regs with a different old id could still have new id 9, we don't care about
16275 * So we look through our idmap to see if this old id has been seen before. If
16276 * so, we require the new id to match; otherwise, we add the id pair to the map.
16278 static bool check_ids(u32 old_id
, u32 cur_id
, struct bpf_idmap
*idmap
)
16280 struct bpf_id_pair
*map
= idmap
->map
;
16283 /* either both IDs should be set or both should be zero */
16284 if (!!old_id
!= !!cur_id
)
16287 if (old_id
== 0) /* cur_id == 0 as well */
16290 for (i
= 0; i
< BPF_ID_MAP_SIZE
; i
++) {
16292 /* Reached an empty slot; haven't seen this id before */
16293 map
[i
].old
= old_id
;
16294 map
[i
].cur
= cur_id
;
16297 if (map
[i
].old
== old_id
)
16298 return map
[i
].cur
== cur_id
;
16299 if (map
[i
].cur
== cur_id
)
16302 /* We ran out of idmap slots, which should be impossible */
16307 /* Similar to check_ids(), but allocate a unique temporary ID
16308 * for 'old_id' or 'cur_id' of zero.
16309 * This makes pairs like '0 vs unique ID', 'unique ID vs 0' valid.
16311 static bool check_scalar_ids(u32 old_id
, u32 cur_id
, struct bpf_idmap
*idmap
)
16313 old_id
= old_id
? old_id
: ++idmap
->tmp_id_gen
;
16314 cur_id
= cur_id
? cur_id
: ++idmap
->tmp_id_gen
;
16316 return check_ids(old_id
, cur_id
, idmap
);
16319 static void clean_func_state(struct bpf_verifier_env
*env
,
16320 struct bpf_func_state
*st
)
16322 enum bpf_reg_liveness live
;
16325 for (i
= 0; i
< BPF_REG_FP
; i
++) {
16326 live
= st
->regs
[i
].live
;
16327 /* liveness must not touch this register anymore */
16328 st
->regs
[i
].live
|= REG_LIVE_DONE
;
16329 if (!(live
& REG_LIVE_READ
))
16330 /* since the register is unused, clear its state
16331 * to make further comparison simpler
16333 __mark_reg_not_init(env
, &st
->regs
[i
]);
16336 for (i
= 0; i
< st
->allocated_stack
/ BPF_REG_SIZE
; i
++) {
16337 live
= st
->stack
[i
].spilled_ptr
.live
;
16338 /* liveness must not touch this stack slot anymore */
16339 st
->stack
[i
].spilled_ptr
.live
|= REG_LIVE_DONE
;
16340 if (!(live
& REG_LIVE_READ
)) {
16341 __mark_reg_not_init(env
, &st
->stack
[i
].spilled_ptr
);
16342 for (j
= 0; j
< BPF_REG_SIZE
; j
++)
16343 st
->stack
[i
].slot_type
[j
] = STACK_INVALID
;
16348 static void clean_verifier_state(struct bpf_verifier_env
*env
,
16349 struct bpf_verifier_state
*st
)
16353 if (st
->frame
[0]->regs
[0].live
& REG_LIVE_DONE
)
16354 /* all regs in this state in all frames were already marked */
16357 for (i
= 0; i
<= st
->curframe
; i
++)
16358 clean_func_state(env
, st
->frame
[i
]);
16361 /* the parentage chains form a tree.
16362 * the verifier states are added to state lists at given insn and
16363 * pushed into state stack for future exploration.
16364 * when the verifier reaches bpf_exit insn some of the verifer states
16365 * stored in the state lists have their final liveness state already,
16366 * but a lot of states will get revised from liveness point of view when
16367 * the verifier explores other branches.
16370 * 2: if r1 == 100 goto pc+1
16373 * when the verifier reaches exit insn the register r0 in the state list of
16374 * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
16375 * of insn 2 and goes exploring further. At the insn 4 it will walk the
16376 * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
16378 * Since the verifier pushes the branch states as it sees them while exploring
16379 * the program the condition of walking the branch instruction for the second
16380 * time means that all states below this branch were already explored and
16381 * their final liveness marks are already propagated.
16382 * Hence when the verifier completes the search of state list in is_state_visited()
16383 * we can call this clean_live_states() function to mark all liveness states
16384 * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
16385 * will not be used.
16386 * This function also clears the registers and stack for states that !READ
16387 * to simplify state merging.
16389 * Important note here that walking the same branch instruction in the callee
16390 * doesn't meant that the states are DONE. The verifier has to compare
16393 static void clean_live_states(struct bpf_verifier_env
*env
, int insn
,
16394 struct bpf_verifier_state
*cur
)
16396 struct bpf_verifier_state_list
*sl
;
16398 sl
= *explored_state(env
, insn
);
16400 if (sl
->state
.branches
)
16402 if (sl
->state
.insn_idx
!= insn
||
16403 !same_callsites(&sl
->state
, cur
))
16405 clean_verifier_state(env
, &sl
->state
);
16411 static bool regs_exact(const struct bpf_reg_state
*rold
,
16412 const struct bpf_reg_state
*rcur
,
16413 struct bpf_idmap
*idmap
)
16415 return memcmp(rold
, rcur
, offsetof(struct bpf_reg_state
, id
)) == 0 &&
16416 check_ids(rold
->id
, rcur
->id
, idmap
) &&
16417 check_ids(rold
->ref_obj_id
, rcur
->ref_obj_id
, idmap
);
16420 /* Returns true if (rold safe implies rcur safe) */
16421 static bool regsafe(struct bpf_verifier_env
*env
, struct bpf_reg_state
*rold
,
16422 struct bpf_reg_state
*rcur
, struct bpf_idmap
*idmap
, bool exact
)
16425 return regs_exact(rold
, rcur
, idmap
);
16427 if (!(rold
->live
& REG_LIVE_READ
))
16428 /* explored state didn't use this */
16430 if (rold
->type
== NOT_INIT
)
16431 /* explored state can't have used this */
16433 if (rcur
->type
== NOT_INIT
)
16436 /* Enforce that register types have to match exactly, including their
16437 * modifiers (like PTR_MAYBE_NULL, MEM_RDONLY, etc), as a general
16440 * One can make a point that using a pointer register as unbounded
16441 * SCALAR would be technically acceptable, but this could lead to
16442 * pointer leaks because scalars are allowed to leak while pointers
16443 * are not. We could make this safe in special cases if root is
16444 * calling us, but it's probably not worth the hassle.
16446 * Also, register types that are *not* MAYBE_NULL could technically be
16447 * safe to use as their MAYBE_NULL variants (e.g., PTR_TO_MAP_VALUE
16448 * is safe to be used as PTR_TO_MAP_VALUE_OR_NULL, provided both point
16449 * to the same map).
16450 * However, if the old MAYBE_NULL register then got NULL checked,
16451 * doing so could have affected others with the same id, and we can't
16452 * check for that because we lost the id when we converted to
16453 * a non-MAYBE_NULL variant.
16454 * So, as a general rule we don't allow mixing MAYBE_NULL and
16455 * non-MAYBE_NULL registers as well.
16457 if (rold
->type
!= rcur
->type
)
16460 switch (base_type(rold
->type
)) {
16462 if (env
->explore_alu_limits
) {
16463 /* explore_alu_limits disables tnum_in() and range_within()
16464 * logic and requires everything to be strict
16466 return memcmp(rold
, rcur
, offsetof(struct bpf_reg_state
, id
)) == 0 &&
16467 check_scalar_ids(rold
->id
, rcur
->id
, idmap
);
16469 if (!rold
->precise
)
16471 /* Why check_ids() for scalar registers?
16473 * Consider the following BPF code:
16474 * 1: r6 = ... unbound scalar, ID=a ...
16475 * 2: r7 = ... unbound scalar, ID=b ...
16476 * 3: if (r6 > r7) goto +1
16478 * 5: if (r6 > X) goto ...
16479 * 6: ... memory operation using r7 ...
16481 * First verification path is [1-6]:
16482 * - at (4) same bpf_reg_state::id (b) would be assigned to r6 and r7;
16483 * - at (5) r6 would be marked <= X, find_equal_scalars() would also mark
16484 * r7 <= X, because r6 and r7 share same id.
16485 * Next verification path is [1-4, 6].
16487 * Instruction (6) would be reached in two states:
16488 * I. r6{.id=b}, r7{.id=b} via path 1-6;
16489 * II. r6{.id=a}, r7{.id=b} via path 1-4, 6.
16491 * Use check_ids() to distinguish these states.
16493 * Also verify that new value satisfies old value range knowledge.
16495 return range_within(rold
, rcur
) &&
16496 tnum_in(rold
->var_off
, rcur
->var_off
) &&
16497 check_scalar_ids(rold
->id
, rcur
->id
, idmap
);
16498 case PTR_TO_MAP_KEY
:
16499 case PTR_TO_MAP_VALUE
:
16502 case PTR_TO_TP_BUFFER
:
16503 /* If the new min/max/var_off satisfy the old ones and
16504 * everything else matches, we are OK.
16506 return memcmp(rold
, rcur
, offsetof(struct bpf_reg_state
, var_off
)) == 0 &&
16507 range_within(rold
, rcur
) &&
16508 tnum_in(rold
->var_off
, rcur
->var_off
) &&
16509 check_ids(rold
->id
, rcur
->id
, idmap
) &&
16510 check_ids(rold
->ref_obj_id
, rcur
->ref_obj_id
, idmap
);
16511 case PTR_TO_PACKET_META
:
16512 case PTR_TO_PACKET
:
16513 /* We must have at least as much range as the old ptr
16514 * did, so that any accesses which were safe before are
16515 * still safe. This is true even if old range < old off,
16516 * since someone could have accessed through (ptr - k), or
16517 * even done ptr -= k in a register, to get a safe access.
16519 if (rold
->range
> rcur
->range
)
16521 /* If the offsets don't match, we can't trust our alignment;
16522 * nor can we be sure that we won't fall out of range.
16524 if (rold
->off
!= rcur
->off
)
16526 /* id relations must be preserved */
16527 if (!check_ids(rold
->id
, rcur
->id
, idmap
))
16529 /* new val must satisfy old val knowledge */
16530 return range_within(rold
, rcur
) &&
16531 tnum_in(rold
->var_off
, rcur
->var_off
);
16533 /* two stack pointers are equal only if they're pointing to
16534 * the same stack frame, since fp-8 in foo != fp-8 in bar
16536 return regs_exact(rold
, rcur
, idmap
) && rold
->frameno
== rcur
->frameno
;
16538 return regs_exact(rold
, rcur
, idmap
);
16542 static bool stacksafe(struct bpf_verifier_env
*env
, struct bpf_func_state
*old
,
16543 struct bpf_func_state
*cur
, struct bpf_idmap
*idmap
, bool exact
)
16547 /* walk slots of the explored stack and ignore any additional
16548 * slots in the current stack, since explored(safe) state
16551 for (i
= 0; i
< old
->allocated_stack
; i
++) {
16552 struct bpf_reg_state
*old_reg
, *cur_reg
;
16554 spi
= i
/ BPF_REG_SIZE
;
16557 old
->stack
[spi
].slot_type
[i
% BPF_REG_SIZE
] !=
16558 cur
->stack
[spi
].slot_type
[i
% BPF_REG_SIZE
])
16561 if (!(old
->stack
[spi
].spilled_ptr
.live
& REG_LIVE_READ
) && !exact
) {
16562 i
+= BPF_REG_SIZE
- 1;
16563 /* explored state didn't use this */
16567 if (old
->stack
[spi
].slot_type
[i
% BPF_REG_SIZE
] == STACK_INVALID
)
16570 if (env
->allow_uninit_stack
&&
16571 old
->stack
[spi
].slot_type
[i
% BPF_REG_SIZE
] == STACK_MISC
)
16574 /* explored stack has more populated slots than current stack
16575 * and these slots were used
16577 if (i
>= cur
->allocated_stack
)
16580 /* if old state was safe with misc data in the stack
16581 * it will be safe with zero-initialized stack.
16582 * The opposite is not true
16584 if (old
->stack
[spi
].slot_type
[i
% BPF_REG_SIZE
] == STACK_MISC
&&
16585 cur
->stack
[spi
].slot_type
[i
% BPF_REG_SIZE
] == STACK_ZERO
)
16587 if (old
->stack
[spi
].slot_type
[i
% BPF_REG_SIZE
] !=
16588 cur
->stack
[spi
].slot_type
[i
% BPF_REG_SIZE
])
16589 /* Ex: old explored (safe) state has STACK_SPILL in
16590 * this stack slot, but current has STACK_MISC ->
16591 * this verifier states are not equivalent,
16592 * return false to continue verification of this path
16595 if (i
% BPF_REG_SIZE
!= BPF_REG_SIZE
- 1)
16597 /* Both old and cur are having same slot_type */
16598 switch (old
->stack
[spi
].slot_type
[BPF_REG_SIZE
- 1]) {
16600 /* when explored and current stack slot are both storing
16601 * spilled registers, check that stored pointers types
16602 * are the same as well.
16603 * Ex: explored safe path could have stored
16604 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
16605 * but current path has stored:
16606 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
16607 * such verifier states are not equivalent.
16608 * return false to continue verification of this path
16610 if (!regsafe(env
, &old
->stack
[spi
].spilled_ptr
,
16611 &cur
->stack
[spi
].spilled_ptr
, idmap
, exact
))
16615 old_reg
= &old
->stack
[spi
].spilled_ptr
;
16616 cur_reg
= &cur
->stack
[spi
].spilled_ptr
;
16617 if (old_reg
->dynptr
.type
!= cur_reg
->dynptr
.type
||
16618 old_reg
->dynptr
.first_slot
!= cur_reg
->dynptr
.first_slot
||
16619 !check_ids(old_reg
->ref_obj_id
, cur_reg
->ref_obj_id
, idmap
))
16623 old_reg
= &old
->stack
[spi
].spilled_ptr
;
16624 cur_reg
= &cur
->stack
[spi
].spilled_ptr
;
16625 /* iter.depth is not compared between states as it
16626 * doesn't matter for correctness and would otherwise
16627 * prevent convergence; we maintain it only to prevent
16628 * infinite loop check triggering, see
16629 * iter_active_depths_differ()
16631 if (old_reg
->iter
.btf
!= cur_reg
->iter
.btf
||
16632 old_reg
->iter
.btf_id
!= cur_reg
->iter
.btf_id
||
16633 old_reg
->iter
.state
!= cur_reg
->iter
.state
||
16634 /* ignore {old_reg,cur_reg}->iter.depth, see above */
16635 !check_ids(old_reg
->ref_obj_id
, cur_reg
->ref_obj_id
, idmap
))
16640 case STACK_INVALID
:
16642 /* Ensure that new unhandled slot types return false by default */
16650 static bool refsafe(struct bpf_func_state
*old
, struct bpf_func_state
*cur
,
16651 struct bpf_idmap
*idmap
)
16655 if (old
->acquired_refs
!= cur
->acquired_refs
)
16658 for (i
= 0; i
< old
->acquired_refs
; i
++) {
16659 if (!check_ids(old
->refs
[i
].id
, cur
->refs
[i
].id
, idmap
))
16666 /* compare two verifier states
16668 * all states stored in state_list are known to be valid, since
16669 * verifier reached 'bpf_exit' instruction through them
16671 * this function is called when verifier exploring different branches of
16672 * execution popped from the state stack. If it sees an old state that has
16673 * more strict register state and more strict stack state then this execution
16674 * branch doesn't need to be explored further, since verifier already
16675 * concluded that more strict state leads to valid finish.
16677 * Therefore two states are equivalent if register state is more conservative
16678 * and explored stack state is more conservative than the current one.
16681 * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
16682 * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
16684 * In other words if current stack state (one being explored) has more
16685 * valid slots than old one that already passed validation, it means
16686 * the verifier can stop exploring and conclude that current state is valid too
16688 * Similarly with registers. If explored state has register type as invalid
16689 * whereas register type in current state is meaningful, it means that
16690 * the current state will reach 'bpf_exit' instruction safely
16692 static bool func_states_equal(struct bpf_verifier_env
*env
, struct bpf_func_state
*old
,
16693 struct bpf_func_state
*cur
, bool exact
)
16697 for (i
= 0; i
< MAX_BPF_REG
; i
++)
16698 if (!regsafe(env
, &old
->regs
[i
], &cur
->regs
[i
],
16699 &env
->idmap_scratch
, exact
))
16702 if (!stacksafe(env
, old
, cur
, &env
->idmap_scratch
, exact
))
16705 if (!refsafe(old
, cur
, &env
->idmap_scratch
))
16711 static void reset_idmap_scratch(struct bpf_verifier_env
*env
)
16713 env
->idmap_scratch
.tmp_id_gen
= env
->id_gen
;
16714 memset(&env
->idmap_scratch
.map
, 0, sizeof(env
->idmap_scratch
.map
));
16717 static bool states_equal(struct bpf_verifier_env
*env
,
16718 struct bpf_verifier_state
*old
,
16719 struct bpf_verifier_state
*cur
,
16724 if (old
->curframe
!= cur
->curframe
)
16727 reset_idmap_scratch(env
);
16729 /* Verification state from speculative execution simulation
16730 * must never prune a non-speculative execution one.
16732 if (old
->speculative
&& !cur
->speculative
)
16735 if (old
->active_lock
.ptr
!= cur
->active_lock
.ptr
)
16738 /* Old and cur active_lock's have to be either both present
16741 if (!!old
->active_lock
.id
!= !!cur
->active_lock
.id
)
16744 if (old
->active_lock
.id
&&
16745 !check_ids(old
->active_lock
.id
, cur
->active_lock
.id
, &env
->idmap_scratch
))
16748 if (old
->active_rcu_lock
!= cur
->active_rcu_lock
)
16751 /* for states to be equal callsites have to be the same
16752 * and all frame states need to be equivalent
16754 for (i
= 0; i
<= old
->curframe
; i
++) {
16755 if (old
->frame
[i
]->callsite
!= cur
->frame
[i
]->callsite
)
16757 if (!func_states_equal(env
, old
->frame
[i
], cur
->frame
[i
], exact
))
16763 /* Return 0 if no propagation happened. Return negative error code if error
16764 * happened. Otherwise, return the propagated bit.
16766 static int propagate_liveness_reg(struct bpf_verifier_env
*env
,
16767 struct bpf_reg_state
*reg
,
16768 struct bpf_reg_state
*parent_reg
)
16770 u8 parent_flag
= parent_reg
->live
& REG_LIVE_READ
;
16771 u8 flag
= reg
->live
& REG_LIVE_READ
;
16774 /* When comes here, read flags of PARENT_REG or REG could be any of
16775 * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
16776 * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
16778 if (parent_flag
== REG_LIVE_READ64
||
16779 /* Or if there is no read flag from REG. */
16781 /* Or if the read flag from REG is the same as PARENT_REG. */
16782 parent_flag
== flag
)
16785 err
= mark_reg_read(env
, reg
, parent_reg
, flag
);
16792 /* A write screens off any subsequent reads; but write marks come from the
16793 * straight-line code between a state and its parent. When we arrive at an
16794 * equivalent state (jump target or such) we didn't arrive by the straight-line
16795 * code, so read marks in the state must propagate to the parent regardless
16796 * of the state's write marks. That's what 'parent == state->parent' comparison
16797 * in mark_reg_read() is for.
16799 static int propagate_liveness(struct bpf_verifier_env
*env
,
16800 const struct bpf_verifier_state
*vstate
,
16801 struct bpf_verifier_state
*vparent
)
16803 struct bpf_reg_state
*state_reg
, *parent_reg
;
16804 struct bpf_func_state
*state
, *parent
;
16805 int i
, frame
, err
= 0;
16807 if (vparent
->curframe
!= vstate
->curframe
) {
16808 WARN(1, "propagate_live: parent frame %d current frame %d\n",
16809 vparent
->curframe
, vstate
->curframe
);
16812 /* Propagate read liveness of registers... */
16813 BUILD_BUG_ON(BPF_REG_FP
+ 1 != MAX_BPF_REG
);
16814 for (frame
= 0; frame
<= vstate
->curframe
; frame
++) {
16815 parent
= vparent
->frame
[frame
];
16816 state
= vstate
->frame
[frame
];
16817 parent_reg
= parent
->regs
;
16818 state_reg
= state
->regs
;
16819 /* We don't need to worry about FP liveness, it's read-only */
16820 for (i
= frame
< vstate
->curframe
? BPF_REG_6
: 0; i
< BPF_REG_FP
; i
++) {
16821 err
= propagate_liveness_reg(env
, &state_reg
[i
],
16825 if (err
== REG_LIVE_READ64
)
16826 mark_insn_zext(env
, &parent_reg
[i
]);
16829 /* Propagate stack slots. */
16830 for (i
= 0; i
< state
->allocated_stack
/ BPF_REG_SIZE
&&
16831 i
< parent
->allocated_stack
/ BPF_REG_SIZE
; i
++) {
16832 parent_reg
= &parent
->stack
[i
].spilled_ptr
;
16833 state_reg
= &state
->stack
[i
].spilled_ptr
;
16834 err
= propagate_liveness_reg(env
, state_reg
,
16843 /* find precise scalars in the previous equivalent state and
16844 * propagate them into the current state
16846 static int propagate_precision(struct bpf_verifier_env
*env
,
16847 const struct bpf_verifier_state
*old
)
16849 struct bpf_reg_state
*state_reg
;
16850 struct bpf_func_state
*state
;
16851 int i
, err
= 0, fr
;
16854 for (fr
= old
->curframe
; fr
>= 0; fr
--) {
16855 state
= old
->frame
[fr
];
16856 state_reg
= state
->regs
;
16858 for (i
= 0; i
< BPF_REG_FP
; i
++, state_reg
++) {
16859 if (state_reg
->type
!= SCALAR_VALUE
||
16860 !state_reg
->precise
||
16861 !(state_reg
->live
& REG_LIVE_READ
))
16863 if (env
->log
.level
& BPF_LOG_LEVEL2
) {
16865 verbose(env
, "frame %d: propagating r%d", fr
, i
);
16867 verbose(env
, ",r%d", i
);
16869 bt_set_frame_reg(&env
->bt
, fr
, i
);
16873 for (i
= 0; i
< state
->allocated_stack
/ BPF_REG_SIZE
; i
++) {
16874 if (!is_spilled_reg(&state
->stack
[i
]))
16876 state_reg
= &state
->stack
[i
].spilled_ptr
;
16877 if (state_reg
->type
!= SCALAR_VALUE
||
16878 !state_reg
->precise
||
16879 !(state_reg
->live
& REG_LIVE_READ
))
16881 if (env
->log
.level
& BPF_LOG_LEVEL2
) {
16883 verbose(env
, "frame %d: propagating fp%d",
16884 fr
, (-i
- 1) * BPF_REG_SIZE
);
16886 verbose(env
, ",fp%d", (-i
- 1) * BPF_REG_SIZE
);
16888 bt_set_frame_slot(&env
->bt
, fr
, i
);
16892 verbose(env
, "\n");
16895 err
= mark_chain_precision_batch(env
);
16902 static bool states_maybe_looping(struct bpf_verifier_state
*old
,
16903 struct bpf_verifier_state
*cur
)
16905 struct bpf_func_state
*fold
, *fcur
;
16906 int i
, fr
= cur
->curframe
;
16908 if (old
->curframe
!= fr
)
16911 fold
= old
->frame
[fr
];
16912 fcur
= cur
->frame
[fr
];
16913 for (i
= 0; i
< MAX_BPF_REG
; i
++)
16914 if (memcmp(&fold
->regs
[i
], &fcur
->regs
[i
],
16915 offsetof(struct bpf_reg_state
, parent
)))
16920 static bool is_iter_next_insn(struct bpf_verifier_env
*env
, int insn_idx
)
16922 return env
->insn_aux_data
[insn_idx
].is_iter_next
;
16925 /* is_state_visited() handles iter_next() (see process_iter_next_call() for
16926 * terminology) calls specially: as opposed to bounded BPF loops, it *expects*
16927 * states to match, which otherwise would look like an infinite loop. So while
16928 * iter_next() calls are taken care of, we still need to be careful and
16929 * prevent erroneous and too eager declaration of "ininite loop", when
16930 * iterators are involved.
16932 * Here's a situation in pseudo-BPF assembly form:
16934 * 0: again: ; set up iter_next() call args
16935 * 1: r1 = &it ; <CHECKPOINT HERE>
16936 * 2: call bpf_iter_num_next ; this is iter_next() call
16937 * 3: if r0 == 0 goto done
16938 * 4: ... something useful here ...
16939 * 5: goto again ; another iteration
16942 * 8: call bpf_iter_num_destroy ; clean up iter state
16945 * This is a typical loop. Let's assume that we have a prune point at 1:,
16946 * before we get to `call bpf_iter_num_next` (e.g., because of that `goto
16947 * again`, assuming other heuristics don't get in a way).
16949 * When we first time come to 1:, let's say we have some state X. We proceed
16950 * to 2:, fork states, enqueue ACTIVE, validate NULL case successfully, exit.
16951 * Now we come back to validate that forked ACTIVE state. We proceed through
16952 * 3-5, come to goto, jump to 1:. Let's assume our state didn't change, so we
16953 * are converging. But the problem is that we don't know that yet, as this
16954 * convergence has to happen at iter_next() call site only. So if nothing is
16955 * done, at 1: verifier will use bounded loop logic and declare infinite
16956 * looping (and would be *technically* correct, if not for iterator's
16957 * "eventual sticky NULL" contract, see process_iter_next_call()). But we
16958 * don't want that. So what we do in process_iter_next_call() when we go on
16959 * another ACTIVE iteration, we bump slot->iter.depth, to mark that it's
16960 * a different iteration. So when we suspect an infinite loop, we additionally
16961 * check if any of the *ACTIVE* iterator states depths differ. If yes, we
16962 * pretend we are not looping and wait for next iter_next() call.
16964 * This only applies to ACTIVE state. In DRAINED state we don't expect to
16965 * loop, because that would actually mean infinite loop, as DRAINED state is
16966 * "sticky", and so we'll keep returning into the same instruction with the
16967 * same state (at least in one of possible code paths).
16969 * This approach allows to keep infinite loop heuristic even in the face of
16970 * active iterator. E.g., C snippet below is and will be detected as
16971 * inifintely looping:
16973 * struct bpf_iter_num it;
16976 * bpf_iter_num_new(&it, 0, 10);
16977 * while ((p = bpf_iter_num_next(&t))) {
16979 * while (x--) {} // <<-- infinite loop here
16983 static bool iter_active_depths_differ(struct bpf_verifier_state
*old
, struct bpf_verifier_state
*cur
)
16985 struct bpf_reg_state
*slot
, *cur_slot
;
16986 struct bpf_func_state
*state
;
16989 for (fr
= old
->curframe
; fr
>= 0; fr
--) {
16990 state
= old
->frame
[fr
];
16991 for (i
= 0; i
< state
->allocated_stack
/ BPF_REG_SIZE
; i
++) {
16992 if (state
->stack
[i
].slot_type
[0] != STACK_ITER
)
16995 slot
= &state
->stack
[i
].spilled_ptr
;
16996 if (slot
->iter
.state
!= BPF_ITER_STATE_ACTIVE
)
16999 cur_slot
= &cur
->frame
[fr
]->stack
[i
].spilled_ptr
;
17000 if (cur_slot
->iter
.depth
!= slot
->iter
.depth
)
17007 static int is_state_visited(struct bpf_verifier_env
*env
, int insn_idx
)
17009 struct bpf_verifier_state_list
*new_sl
;
17010 struct bpf_verifier_state_list
*sl
, **pprev
;
17011 struct bpf_verifier_state
*cur
= env
->cur_state
, *new, *loop_entry
;
17012 int i
, j
, n
, err
, states_cnt
= 0;
17013 bool force_new_state
= env
->test_state_freq
|| is_force_checkpoint(env
, insn_idx
);
17014 bool add_new_state
= force_new_state
;
17017 /* bpf progs typically have pruning point every 4 instructions
17018 * http://vger.kernel.org/bpfconf2019.html#session-1
17019 * Do not add new state for future pruning if the verifier hasn't seen
17020 * at least 2 jumps and at least 8 instructions.
17021 * This heuristics helps decrease 'total_states' and 'peak_states' metric.
17022 * In tests that amounts to up to 50% reduction into total verifier
17023 * memory consumption and 20% verifier time speedup.
17025 if (env
->jmps_processed
- env
->prev_jmps_processed
>= 2 &&
17026 env
->insn_processed
- env
->prev_insn_processed
>= 8)
17027 add_new_state
= true;
17029 pprev
= explored_state(env
, insn_idx
);
17032 clean_live_states(env
, insn_idx
, cur
);
17036 if (sl
->state
.insn_idx
!= insn_idx
)
17039 if (sl
->state
.branches
) {
17040 struct bpf_func_state
*frame
= sl
->state
.frame
[sl
->state
.curframe
];
17042 if (frame
->in_async_callback_fn
&&
17043 frame
->async_entry_cnt
!= cur
->frame
[cur
->curframe
]->async_entry_cnt
) {
17044 /* Different async_entry_cnt means that the verifier is
17045 * processing another entry into async callback.
17046 * Seeing the same state is not an indication of infinite
17047 * loop or infinite recursion.
17048 * But finding the same state doesn't mean that it's safe
17049 * to stop processing the current state. The previous state
17050 * hasn't yet reached bpf_exit, since state.branches > 0.
17051 * Checking in_async_callback_fn alone is not enough either.
17052 * Since the verifier still needs to catch infinite loops
17053 * inside async callbacks.
17055 goto skip_inf_loop_check
;
17057 /* BPF open-coded iterators loop detection is special.
17058 * states_maybe_looping() logic is too simplistic in detecting
17059 * states that *might* be equivalent, because it doesn't know
17060 * about ID remapping, so don't even perform it.
17061 * See process_iter_next_call() and iter_active_depths_differ()
17062 * for overview of the logic. When current and one of parent
17063 * states are detected as equivalent, it's a good thing: we prove
17064 * convergence and can stop simulating further iterations.
17065 * It's safe to assume that iterator loop will finish, taking into
17066 * account iter_next() contract of eventually returning
17067 * sticky NULL result.
17069 * Note, that states have to be compared exactly in this case because
17070 * read and precision marks might not be finalized inside the loop.
17071 * E.g. as in the program below:
17074 * 2. r6 = bpf_get_prandom_u32()
17075 * 3. while (bpf_iter_num_next(&fp[-8])) {
17076 * 4. if (r6 != 42) {
17078 * 6. r6 = bpf_get_prandom_u32()
17083 * 11. r8 = *(u64 *)(r0 + 0)
17084 * 12. r6 = bpf_get_prandom_u32()
17087 * Here verifier would first visit path 1-3, create a checkpoint at 3
17088 * with r7=-16, continue to 4-7,3. Existing checkpoint at 3 does
17089 * not have read or precision mark for r7 yet, thus inexact states
17090 * comparison would discard current state with r7=-32
17091 * => unsafe memory access at 11 would not be caught.
17093 if (is_iter_next_insn(env
, insn_idx
)) {
17094 if (states_equal(env
, &sl
->state
, cur
, true)) {
17095 struct bpf_func_state
*cur_frame
;
17096 struct bpf_reg_state
*iter_state
, *iter_reg
;
17099 cur_frame
= cur
->frame
[cur
->curframe
];
17100 /* btf_check_iter_kfuncs() enforces that
17101 * iter state pointer is always the first arg
17103 iter_reg
= &cur_frame
->regs
[BPF_REG_1
];
17104 /* current state is valid due to states_equal(),
17105 * so we can assume valid iter and reg state,
17106 * no need for extra (re-)validations
17108 spi
= __get_spi(iter_reg
->off
+ iter_reg
->var_off
.value
);
17109 iter_state
= &func(env
, iter_reg
)->stack
[spi
].spilled_ptr
;
17110 if (iter_state
->iter
.state
== BPF_ITER_STATE_ACTIVE
) {
17111 update_loop_entry(cur
, &sl
->state
);
17115 goto skip_inf_loop_check
;
17117 if (calls_callback(env
, insn_idx
)) {
17118 if (states_equal(env
, &sl
->state
, cur
, true))
17120 goto skip_inf_loop_check
;
17122 /* attempt to detect infinite loop to avoid unnecessary doomed work */
17123 if (states_maybe_looping(&sl
->state
, cur
) &&
17124 states_equal(env
, &sl
->state
, cur
, false) &&
17125 !iter_active_depths_differ(&sl
->state
, cur
) &&
17126 sl
->state
.callback_unroll_depth
== cur
->callback_unroll_depth
) {
17127 verbose_linfo(env
, insn_idx
, "; ");
17128 verbose(env
, "infinite loop detected at insn %d\n", insn_idx
);
17129 verbose(env
, "cur state:");
17130 print_verifier_state(env
, cur
->frame
[cur
->curframe
], true);
17131 verbose(env
, "old state:");
17132 print_verifier_state(env
, sl
->state
.frame
[cur
->curframe
], true);
17135 /* if the verifier is processing a loop, avoid adding new state
17136 * too often, since different loop iterations have distinct
17137 * states and may not help future pruning.
17138 * This threshold shouldn't be too low to make sure that
17139 * a loop with large bound will be rejected quickly.
17140 * The most abusive loop will be:
17142 * if r1 < 1000000 goto pc-2
17143 * 1M insn_procssed limit / 100 == 10k peak states.
17144 * This threshold shouldn't be too high either, since states
17145 * at the end of the loop are likely to be useful in pruning.
17147 skip_inf_loop_check
:
17148 if (!force_new_state
&&
17149 env
->jmps_processed
- env
->prev_jmps_processed
< 20 &&
17150 env
->insn_processed
- env
->prev_insn_processed
< 100)
17151 add_new_state
= false;
17154 /* If sl->state is a part of a loop and this loop's entry is a part of
17155 * current verification path then states have to be compared exactly.
17156 * 'force_exact' is needed to catch the following case:
17158 * initial Here state 'succ' was processed first,
17159 * | it was eventually tracked to produce a
17160 * V state identical to 'hdr'.
17161 * .---------> hdr All branches from 'succ' had been explored
17162 * | | and thus 'succ' has its .branches == 0.
17164 * | .------... Suppose states 'cur' and 'succ' correspond
17165 * | | | to the same instruction + callsites.
17166 * | V V In such case it is necessary to check
17167 * | ... ... if 'succ' and 'cur' are states_equal().
17168 * | | | If 'succ' and 'cur' are a part of the
17169 * | V V same loop exact flag has to be set.
17170 * | succ <- cur To check if that is the case, verify
17171 * | | if loop entry of 'succ' is in current
17177 * Additional details are in the comment before get_loop_entry().
17179 loop_entry
= get_loop_entry(&sl
->state
);
17180 force_exact
= loop_entry
&& loop_entry
->branches
> 0;
17181 if (states_equal(env
, &sl
->state
, cur
, force_exact
)) {
17183 update_loop_entry(cur
, loop_entry
);
17186 /* reached equivalent register/stack state,
17187 * prune the search.
17188 * Registers read by the continuation are read by us.
17189 * If we have any write marks in env->cur_state, they
17190 * will prevent corresponding reads in the continuation
17191 * from reaching our parent (an explored_state). Our
17192 * own state will get the read marks recorded, but
17193 * they'll be immediately forgotten as we're pruning
17194 * this state and will pop a new one.
17196 err
= propagate_liveness(env
, &sl
->state
, cur
);
17198 /* if previous state reached the exit with precision and
17199 * current state is equivalent to it (except precsion marks)
17200 * the precision needs to be propagated back in
17201 * the current state.
17203 err
= err
? : push_jmp_history(env
, cur
);
17204 err
= err
? : propagate_precision(env
, &sl
->state
);
17210 /* when new state is not going to be added do not increase miss count.
17211 * Otherwise several loop iterations will remove the state
17212 * recorded earlier. The goal of these heuristics is to have
17213 * states from some iterations of the loop (some in the beginning
17214 * and some at the end) to help pruning.
17218 /* heuristic to determine whether this state is beneficial
17219 * to keep checking from state equivalence point of view.
17220 * Higher numbers increase max_states_per_insn and verification time,
17221 * but do not meaningfully decrease insn_processed.
17222 * 'n' controls how many times state could miss before eviction.
17223 * Use bigger 'n' for checkpoints because evicting checkpoint states
17224 * too early would hinder iterator convergence.
17226 n
= is_force_checkpoint(env
, insn_idx
) && sl
->state
.branches
> 0 ? 64 : 3;
17227 if (sl
->miss_cnt
> sl
->hit_cnt
* n
+ n
) {
17228 /* the state is unlikely to be useful. Remove it to
17229 * speed up verification
17232 if (sl
->state
.frame
[0]->regs
[0].live
& REG_LIVE_DONE
&&
17233 !sl
->state
.used_as_loop_entry
) {
17234 u32 br
= sl
->state
.branches
;
17237 "BUG live_done but branches_to_explore %d\n",
17239 free_verifier_state(&sl
->state
, false);
17241 env
->peak_states
--;
17243 /* cannot free this state, since parentage chain may
17244 * walk it later. Add it for free_list instead to
17245 * be freed at the end of verification
17247 sl
->next
= env
->free_list
;
17248 env
->free_list
= sl
;
17258 if (env
->max_states_per_insn
< states_cnt
)
17259 env
->max_states_per_insn
= states_cnt
;
17261 if (!env
->bpf_capable
&& states_cnt
> BPF_COMPLEXITY_LIMIT_STATES
)
17264 if (!add_new_state
)
17267 /* There were no equivalent states, remember the current one.
17268 * Technically the current state is not proven to be safe yet,
17269 * but it will either reach outer most bpf_exit (which means it's safe)
17270 * or it will be rejected. When there are no loops the verifier won't be
17271 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
17272 * again on the way to bpf_exit.
17273 * When looping the sl->state.branches will be > 0 and this state
17274 * will not be considered for equivalence until branches == 0.
17276 new_sl
= kzalloc(sizeof(struct bpf_verifier_state_list
), GFP_KERNEL
);
17279 env
->total_states
++;
17280 env
->peak_states
++;
17281 env
->prev_jmps_processed
= env
->jmps_processed
;
17282 env
->prev_insn_processed
= env
->insn_processed
;
17284 /* forget precise markings we inherited, see __mark_chain_precision */
17285 if (env
->bpf_capable
)
17286 mark_all_scalars_imprecise(env
, cur
);
17288 /* add new state to the head of linked list */
17289 new = &new_sl
->state
;
17290 err
= copy_verifier_state(new, cur
);
17292 free_verifier_state(new, false);
17296 new->insn_idx
= insn_idx
;
17297 WARN_ONCE(new->branches
!= 1,
17298 "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches
, insn_idx
);
17301 cur
->first_insn_idx
= insn_idx
;
17302 cur
->dfs_depth
= new->dfs_depth
+ 1;
17303 clear_jmp_history(cur
);
17304 new_sl
->next
= *explored_state(env
, insn_idx
);
17305 *explored_state(env
, insn_idx
) = new_sl
;
17306 /* connect new state to parentage chain. Current frame needs all
17307 * registers connected. Only r6 - r9 of the callers are alive (pushed
17308 * to the stack implicitly by JITs) so in callers' frames connect just
17309 * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
17310 * the state of the call instruction (with WRITTEN set), and r0 comes
17311 * from callee with its full parentage chain, anyway.
17313 /* clear write marks in current state: the writes we did are not writes
17314 * our child did, so they don't screen off its reads from us.
17315 * (There are no read marks in current state, because reads always mark
17316 * their parent and current state never has children yet. Only
17317 * explored_states can get read marks.)
17319 for (j
= 0; j
<= cur
->curframe
; j
++) {
17320 for (i
= j
< cur
->curframe
? BPF_REG_6
: 0; i
< BPF_REG_FP
; i
++)
17321 cur
->frame
[j
]->regs
[i
].parent
= &new->frame
[j
]->regs
[i
];
17322 for (i
= 0; i
< BPF_REG_FP
; i
++)
17323 cur
->frame
[j
]->regs
[i
].live
= REG_LIVE_NONE
;
17326 /* all stack frames are accessible from callee, clear them all */
17327 for (j
= 0; j
<= cur
->curframe
; j
++) {
17328 struct bpf_func_state
*frame
= cur
->frame
[j
];
17329 struct bpf_func_state
*newframe
= new->frame
[j
];
17331 for (i
= 0; i
< frame
->allocated_stack
/ BPF_REG_SIZE
; i
++) {
17332 frame
->stack
[i
].spilled_ptr
.live
= REG_LIVE_NONE
;
17333 frame
->stack
[i
].spilled_ptr
.parent
=
17334 &newframe
->stack
[i
].spilled_ptr
;
17340 /* Return true if it's OK to have the same insn return a different type. */
17341 static bool reg_type_mismatch_ok(enum bpf_reg_type type
)
17343 switch (base_type(type
)) {
17345 case PTR_TO_SOCKET
:
17346 case PTR_TO_SOCK_COMMON
:
17347 case PTR_TO_TCP_SOCK
:
17348 case PTR_TO_XDP_SOCK
:
17349 case PTR_TO_BTF_ID
:
17356 /* If an instruction was previously used with particular pointer types, then we
17357 * need to be careful to avoid cases such as the below, where it may be ok
17358 * for one branch accessing the pointer, but not ok for the other branch:
17363 * R1 = some_other_valid_ptr;
17366 * R2 = *(u32 *)(R1 + 0);
17368 static bool reg_type_mismatch(enum bpf_reg_type src
, enum bpf_reg_type prev
)
17370 return src
!= prev
&& (!reg_type_mismatch_ok(src
) ||
17371 !reg_type_mismatch_ok(prev
));
17374 static int save_aux_ptr_type(struct bpf_verifier_env
*env
, enum bpf_reg_type type
,
17375 bool allow_trust_missmatch
)
17377 enum bpf_reg_type
*prev_type
= &env
->insn_aux_data
[env
->insn_idx
].ptr_type
;
17379 if (*prev_type
== NOT_INIT
) {
17380 /* Saw a valid insn
17381 * dst_reg = *(u32 *)(src_reg + off)
17382 * save type to validate intersecting paths
17385 } else if (reg_type_mismatch(type
, *prev_type
)) {
17386 /* Abuser program is trying to use the same insn
17387 * dst_reg = *(u32*) (src_reg + off)
17388 * with different pointer types:
17389 * src_reg == ctx in one branch and
17390 * src_reg == stack|map in some other branch.
17393 if (allow_trust_missmatch
&&
17394 base_type(type
) == PTR_TO_BTF_ID
&&
17395 base_type(*prev_type
) == PTR_TO_BTF_ID
) {
17397 * Have to support a use case when one path through
17398 * the program yields TRUSTED pointer while another
17399 * is UNTRUSTED. Fallback to UNTRUSTED to generate
17400 * BPF_PROBE_MEM/BPF_PROBE_MEMSX.
17402 *prev_type
= PTR_TO_BTF_ID
| PTR_UNTRUSTED
;
17404 verbose(env
, "same insn cannot be used with different pointers\n");
17412 static int do_check(struct bpf_verifier_env
*env
)
17414 bool pop_log
= !(env
->log
.level
& BPF_LOG_LEVEL2
);
17415 struct bpf_verifier_state
*state
= env
->cur_state
;
17416 struct bpf_insn
*insns
= env
->prog
->insnsi
;
17417 struct bpf_reg_state
*regs
;
17418 int insn_cnt
= env
->prog
->len
;
17419 bool do_print_state
= false;
17420 int prev_insn_idx
= -1;
17423 bool exception_exit
= false;
17424 struct bpf_insn
*insn
;
17428 env
->prev_insn_idx
= prev_insn_idx
;
17429 if (env
->insn_idx
>= insn_cnt
) {
17430 verbose(env
, "invalid insn idx %d insn_cnt %d\n",
17431 env
->insn_idx
, insn_cnt
);
17435 insn
= &insns
[env
->insn_idx
];
17436 class = BPF_CLASS(insn
->code
);
17438 if (++env
->insn_processed
> BPF_COMPLEXITY_LIMIT_INSNS
) {
17440 "BPF program is too large. Processed %d insn\n",
17441 env
->insn_processed
);
17445 state
->last_insn_idx
= env
->prev_insn_idx
;
17447 if (is_prune_point(env
, env
->insn_idx
)) {
17448 err
= is_state_visited(env
, env
->insn_idx
);
17452 /* found equivalent state, can prune the search */
17453 if (env
->log
.level
& BPF_LOG_LEVEL
) {
17454 if (do_print_state
)
17455 verbose(env
, "\nfrom %d to %d%s: safe\n",
17456 env
->prev_insn_idx
, env
->insn_idx
,
17457 env
->cur_state
->speculative
?
17458 " (speculative execution)" : "");
17460 verbose(env
, "%d: safe\n", env
->insn_idx
);
17462 goto process_bpf_exit
;
17466 if (is_jmp_point(env
, env
->insn_idx
)) {
17467 err
= push_jmp_history(env
, state
);
17472 if (signal_pending(current
))
17475 if (need_resched())
17478 if (env
->log
.level
& BPF_LOG_LEVEL2
&& do_print_state
) {
17479 verbose(env
, "\nfrom %d to %d%s:",
17480 env
->prev_insn_idx
, env
->insn_idx
,
17481 env
->cur_state
->speculative
?
17482 " (speculative execution)" : "");
17483 print_verifier_state(env
, state
->frame
[state
->curframe
], true);
17484 do_print_state
= false;
17487 if (env
->log
.level
& BPF_LOG_LEVEL
) {
17488 const struct bpf_insn_cbs cbs
= {
17489 .cb_call
= disasm_kfunc_name
,
17490 .cb_print
= verbose
,
17491 .private_data
= env
,
17494 if (verifier_state_scratched(env
))
17495 print_insn_state(env
, state
->frame
[state
->curframe
]);
17497 verbose_linfo(env
, env
->insn_idx
, "; ");
17498 env
->prev_log_pos
= env
->log
.end_pos
;
17499 verbose(env
, "%d: ", env
->insn_idx
);
17500 print_bpf_insn(&cbs
, insn
, env
->allow_ptr_leaks
);
17501 env
->prev_insn_print_pos
= env
->log
.end_pos
- env
->prev_log_pos
;
17502 env
->prev_log_pos
= env
->log
.end_pos
;
17505 if (bpf_prog_is_offloaded(env
->prog
->aux
)) {
17506 err
= bpf_prog_offload_verify_insn(env
, env
->insn_idx
,
17507 env
->prev_insn_idx
);
17512 regs
= cur_regs(env
);
17513 sanitize_mark_insn_seen(env
);
17514 prev_insn_idx
= env
->insn_idx
;
17516 if (class == BPF_ALU
|| class == BPF_ALU64
) {
17517 err
= check_alu_op(env
, insn
);
17521 } else if (class == BPF_LDX
) {
17522 enum bpf_reg_type src_reg_type
;
17524 /* check for reserved fields is already done */
17526 /* check src operand */
17527 err
= check_reg_arg(env
, insn
->src_reg
, SRC_OP
);
17531 err
= check_reg_arg(env
, insn
->dst_reg
, DST_OP_NO_MARK
);
17535 src_reg_type
= regs
[insn
->src_reg
].type
;
17537 /* check that memory (src_reg + off) is readable,
17538 * the state of dst_reg will be updated by this func
17540 err
= check_mem_access(env
, env
->insn_idx
, insn
->src_reg
,
17541 insn
->off
, BPF_SIZE(insn
->code
),
17542 BPF_READ
, insn
->dst_reg
, false,
17543 BPF_MODE(insn
->code
) == BPF_MEMSX
);
17547 err
= save_aux_ptr_type(env
, src_reg_type
, true);
17550 } else if (class == BPF_STX
) {
17551 enum bpf_reg_type dst_reg_type
;
17553 if (BPF_MODE(insn
->code
) == BPF_ATOMIC
) {
17554 err
= check_atomic(env
, env
->insn_idx
, insn
);
17561 if (BPF_MODE(insn
->code
) != BPF_MEM
|| insn
->imm
!= 0) {
17562 verbose(env
, "BPF_STX uses reserved fields\n");
17566 /* check src1 operand */
17567 err
= check_reg_arg(env
, insn
->src_reg
, SRC_OP
);
17570 /* check src2 operand */
17571 err
= check_reg_arg(env
, insn
->dst_reg
, SRC_OP
);
17575 dst_reg_type
= regs
[insn
->dst_reg
].type
;
17577 /* check that memory (dst_reg + off) is writeable */
17578 err
= check_mem_access(env
, env
->insn_idx
, insn
->dst_reg
,
17579 insn
->off
, BPF_SIZE(insn
->code
),
17580 BPF_WRITE
, insn
->src_reg
, false, false);
17584 err
= save_aux_ptr_type(env
, dst_reg_type
, false);
17587 } else if (class == BPF_ST
) {
17588 enum bpf_reg_type dst_reg_type
;
17590 if (BPF_MODE(insn
->code
) != BPF_MEM
||
17591 insn
->src_reg
!= BPF_REG_0
) {
17592 verbose(env
, "BPF_ST uses reserved fields\n");
17595 /* check src operand */
17596 err
= check_reg_arg(env
, insn
->dst_reg
, SRC_OP
);
17600 dst_reg_type
= regs
[insn
->dst_reg
].type
;
17602 /* check that memory (dst_reg + off) is writeable */
17603 err
= check_mem_access(env
, env
->insn_idx
, insn
->dst_reg
,
17604 insn
->off
, BPF_SIZE(insn
->code
),
17605 BPF_WRITE
, -1, false, false);
17609 err
= save_aux_ptr_type(env
, dst_reg_type
, false);
17612 } else if (class == BPF_JMP
|| class == BPF_JMP32
) {
17613 u8 opcode
= BPF_OP(insn
->code
);
17615 env
->jmps_processed
++;
17616 if (opcode
== BPF_CALL
) {
17617 if (BPF_SRC(insn
->code
) != BPF_K
||
17618 (insn
->src_reg
!= BPF_PSEUDO_KFUNC_CALL
17619 && insn
->off
!= 0) ||
17620 (insn
->src_reg
!= BPF_REG_0
&&
17621 insn
->src_reg
!= BPF_PSEUDO_CALL
&&
17622 insn
->src_reg
!= BPF_PSEUDO_KFUNC_CALL
) ||
17623 insn
->dst_reg
!= BPF_REG_0
||
17624 class == BPF_JMP32
) {
17625 verbose(env
, "BPF_CALL uses reserved fields\n");
17629 if (env
->cur_state
->active_lock
.ptr
) {
17630 if ((insn
->src_reg
== BPF_REG_0
&& insn
->imm
!= BPF_FUNC_spin_unlock
) ||
17631 (insn
->src_reg
== BPF_PSEUDO_CALL
) ||
17632 (insn
->src_reg
== BPF_PSEUDO_KFUNC_CALL
&&
17633 (insn
->off
!= 0 || !is_bpf_graph_api_kfunc(insn
->imm
)))) {
17634 verbose(env
, "function calls are not allowed while holding a lock\n");
17638 if (insn
->src_reg
== BPF_PSEUDO_CALL
) {
17639 err
= check_func_call(env
, insn
, &env
->insn_idx
);
17640 } else if (insn
->src_reg
== BPF_PSEUDO_KFUNC_CALL
) {
17641 err
= check_kfunc_call(env
, insn
, &env
->insn_idx
);
17642 if (!err
&& is_bpf_throw_kfunc(insn
)) {
17643 exception_exit
= true;
17644 goto process_bpf_exit_full
;
17647 err
= check_helper_call(env
, insn
, &env
->insn_idx
);
17652 mark_reg_scratched(env
, BPF_REG_0
);
17653 } else if (opcode
== BPF_JA
) {
17654 if (BPF_SRC(insn
->code
) != BPF_K
||
17655 insn
->src_reg
!= BPF_REG_0
||
17656 insn
->dst_reg
!= BPF_REG_0
||
17657 (class == BPF_JMP
&& insn
->imm
!= 0) ||
17658 (class == BPF_JMP32
&& insn
->off
!= 0)) {
17659 verbose(env
, "BPF_JA uses reserved fields\n");
17663 if (class == BPF_JMP
)
17664 env
->insn_idx
+= insn
->off
+ 1;
17666 env
->insn_idx
+= insn
->imm
+ 1;
17669 } else if (opcode
== BPF_EXIT
) {
17670 if (BPF_SRC(insn
->code
) != BPF_K
||
17672 insn
->src_reg
!= BPF_REG_0
||
17673 insn
->dst_reg
!= BPF_REG_0
||
17674 class == BPF_JMP32
) {
17675 verbose(env
, "BPF_EXIT uses reserved fields\n");
17678 process_bpf_exit_full
:
17679 if (env
->cur_state
->active_lock
.ptr
&&
17680 !in_rbtree_lock_required_cb(env
)) {
17681 verbose(env
, "bpf_spin_unlock is missing\n");
17685 if (env
->cur_state
->active_rcu_lock
&&
17686 !in_rbtree_lock_required_cb(env
)) {
17687 verbose(env
, "bpf_rcu_read_unlock is missing\n");
17691 /* We must do check_reference_leak here before
17692 * prepare_func_exit to handle the case when
17693 * state->curframe > 0, it may be a callback
17694 * function, for which reference_state must
17695 * match caller reference state when it exits.
17697 err
= check_reference_leak(env
, exception_exit
);
17701 /* The side effect of the prepare_func_exit
17702 * which is being skipped is that it frees
17703 * bpf_func_state. Typically, process_bpf_exit
17704 * will only be hit with outermost exit.
17705 * copy_verifier_state in pop_stack will handle
17706 * freeing of any extra bpf_func_state left over
17707 * from not processing all nested function
17708 * exits. We also skip return code checks as
17709 * they are not needed for exceptional exits.
17711 if (exception_exit
)
17712 goto process_bpf_exit
;
17714 if (state
->curframe
) {
17715 /* exit from nested function */
17716 err
= prepare_func_exit(env
, &env
->insn_idx
);
17719 do_print_state
= true;
17723 err
= check_return_code(env
, BPF_REG_0
);
17727 mark_verifier_state_scratched(env
);
17728 update_branch_counts(env
, env
->cur_state
);
17729 err
= pop_stack(env
, &prev_insn_idx
,
17730 &env
->insn_idx
, pop_log
);
17732 if (err
!= -ENOENT
)
17736 do_print_state
= true;
17740 err
= check_cond_jmp_op(env
, insn
, &env
->insn_idx
);
17744 } else if (class == BPF_LD
) {
17745 u8 mode
= BPF_MODE(insn
->code
);
17747 if (mode
== BPF_ABS
|| mode
== BPF_IND
) {
17748 err
= check_ld_abs(env
, insn
);
17752 } else if (mode
== BPF_IMM
) {
17753 err
= check_ld_imm(env
, insn
);
17758 sanitize_mark_insn_seen(env
);
17760 verbose(env
, "invalid BPF_LD mode\n");
17764 verbose(env
, "unknown insn class %d\n", class);
17774 static int find_btf_percpu_datasec(struct btf
*btf
)
17776 const struct btf_type
*t
;
17781 * Both vmlinux and module each have their own ".data..percpu"
17782 * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
17783 * types to look at only module's own BTF types.
17785 n
= btf_nr_types(btf
);
17786 if (btf_is_module(btf
))
17787 i
= btf_nr_types(btf_vmlinux
);
17791 for(; i
< n
; i
++) {
17792 t
= btf_type_by_id(btf
, i
);
17793 if (BTF_INFO_KIND(t
->info
) != BTF_KIND_DATASEC
)
17796 tname
= btf_name_by_offset(btf
, t
->name_off
);
17797 if (!strcmp(tname
, ".data..percpu"))
17804 /* replace pseudo btf_id with kernel symbol address */
17805 static int check_pseudo_btf_id(struct bpf_verifier_env
*env
,
17806 struct bpf_insn
*insn
,
17807 struct bpf_insn_aux_data
*aux
)
17809 const struct btf_var_secinfo
*vsi
;
17810 const struct btf_type
*datasec
;
17811 struct btf_mod_pair
*btf_mod
;
17812 const struct btf_type
*t
;
17813 const char *sym_name
;
17814 bool percpu
= false;
17815 u32 type
, id
= insn
->imm
;
17819 int i
, btf_fd
, err
;
17821 btf_fd
= insn
[1].imm
;
17823 btf
= btf_get_by_fd(btf_fd
);
17825 verbose(env
, "invalid module BTF object FD specified.\n");
17829 if (!btf_vmlinux
) {
17830 verbose(env
, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
17837 t
= btf_type_by_id(btf
, id
);
17839 verbose(env
, "ldimm64 insn specifies invalid btf_id %d.\n", id
);
17844 if (!btf_type_is_var(t
) && !btf_type_is_func(t
)) {
17845 verbose(env
, "pseudo btf_id %d in ldimm64 isn't KIND_VAR or KIND_FUNC\n", id
);
17850 sym_name
= btf_name_by_offset(btf
, t
->name_off
);
17851 addr
= kallsyms_lookup_name(sym_name
);
17853 verbose(env
, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
17858 insn
[0].imm
= (u32
)addr
;
17859 insn
[1].imm
= addr
>> 32;
17861 if (btf_type_is_func(t
)) {
17862 aux
->btf_var
.reg_type
= PTR_TO_MEM
| MEM_RDONLY
;
17863 aux
->btf_var
.mem_size
= 0;
17867 datasec_id
= find_btf_percpu_datasec(btf
);
17868 if (datasec_id
> 0) {
17869 datasec
= btf_type_by_id(btf
, datasec_id
);
17870 for_each_vsi(i
, datasec
, vsi
) {
17871 if (vsi
->type
== id
) {
17879 t
= btf_type_skip_modifiers(btf
, type
, NULL
);
17881 aux
->btf_var
.reg_type
= PTR_TO_BTF_ID
| MEM_PERCPU
;
17882 aux
->btf_var
.btf
= btf
;
17883 aux
->btf_var
.btf_id
= type
;
17884 } else if (!btf_type_is_struct(t
)) {
17885 const struct btf_type
*ret
;
17889 /* resolve the type size of ksym. */
17890 ret
= btf_resolve_size(btf
, t
, &tsize
);
17892 tname
= btf_name_by_offset(btf
, t
->name_off
);
17893 verbose(env
, "ldimm64 unable to resolve the size of type '%s': %ld\n",
17894 tname
, PTR_ERR(ret
));
17898 aux
->btf_var
.reg_type
= PTR_TO_MEM
| MEM_RDONLY
;
17899 aux
->btf_var
.mem_size
= tsize
;
17901 aux
->btf_var
.reg_type
= PTR_TO_BTF_ID
;
17902 aux
->btf_var
.btf
= btf
;
17903 aux
->btf_var
.btf_id
= type
;
17906 /* check whether we recorded this BTF (and maybe module) already */
17907 for (i
= 0; i
< env
->used_btf_cnt
; i
++) {
17908 if (env
->used_btfs
[i
].btf
== btf
) {
17914 if (env
->used_btf_cnt
>= MAX_USED_BTFS
) {
17919 btf_mod
= &env
->used_btfs
[env
->used_btf_cnt
];
17920 btf_mod
->btf
= btf
;
17921 btf_mod
->module
= NULL
;
17923 /* if we reference variables from kernel module, bump its refcount */
17924 if (btf_is_module(btf
)) {
17925 btf_mod
->module
= btf_try_get_module(btf
);
17926 if (!btf_mod
->module
) {
17932 env
->used_btf_cnt
++;
17940 static bool is_tracing_prog_type(enum bpf_prog_type type
)
17943 case BPF_PROG_TYPE_KPROBE
:
17944 case BPF_PROG_TYPE_TRACEPOINT
:
17945 case BPF_PROG_TYPE_PERF_EVENT
:
17946 case BPF_PROG_TYPE_RAW_TRACEPOINT
:
17947 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE
:
17954 static int check_map_prog_compatibility(struct bpf_verifier_env
*env
,
17955 struct bpf_map
*map
,
17956 struct bpf_prog
*prog
)
17959 enum bpf_prog_type prog_type
= resolve_prog_type(prog
);
17961 if (btf_record_has_field(map
->record
, BPF_LIST_HEAD
) ||
17962 btf_record_has_field(map
->record
, BPF_RB_ROOT
)) {
17963 if (is_tracing_prog_type(prog_type
)) {
17964 verbose(env
, "tracing progs cannot use bpf_{list_head,rb_root} yet\n");
17969 if (btf_record_has_field(map
->record
, BPF_SPIN_LOCK
)) {
17970 if (prog_type
== BPF_PROG_TYPE_SOCKET_FILTER
) {
17971 verbose(env
, "socket filter progs cannot use bpf_spin_lock yet\n");
17975 if (is_tracing_prog_type(prog_type
)) {
17976 verbose(env
, "tracing progs cannot use bpf_spin_lock yet\n");
17981 if (btf_record_has_field(map
->record
, BPF_TIMER
)) {
17982 if (is_tracing_prog_type(prog_type
)) {
17983 verbose(env
, "tracing progs cannot use bpf_timer yet\n");
17988 if ((bpf_prog_is_offloaded(prog
->aux
) || bpf_map_is_offloaded(map
)) &&
17989 !bpf_offload_prog_map_match(prog
, map
)) {
17990 verbose(env
, "offload device mismatch between prog and map\n");
17994 if (map
->map_type
== BPF_MAP_TYPE_STRUCT_OPS
) {
17995 verbose(env
, "bpf_struct_ops map cannot be used in prog\n");
17999 if (prog
->aux
->sleepable
)
18000 switch (map
->map_type
) {
18001 case BPF_MAP_TYPE_HASH
:
18002 case BPF_MAP_TYPE_LRU_HASH
:
18003 case BPF_MAP_TYPE_ARRAY
:
18004 case BPF_MAP_TYPE_PERCPU_HASH
:
18005 case BPF_MAP_TYPE_PERCPU_ARRAY
:
18006 case BPF_MAP_TYPE_LRU_PERCPU_HASH
:
18007 case BPF_MAP_TYPE_ARRAY_OF_MAPS
:
18008 case BPF_MAP_TYPE_HASH_OF_MAPS
:
18009 case BPF_MAP_TYPE_RINGBUF
:
18010 case BPF_MAP_TYPE_USER_RINGBUF
:
18011 case BPF_MAP_TYPE_INODE_STORAGE
:
18012 case BPF_MAP_TYPE_SK_STORAGE
:
18013 case BPF_MAP_TYPE_TASK_STORAGE
:
18014 case BPF_MAP_TYPE_CGRP_STORAGE
:
18018 "Sleepable programs can only use array, hash, ringbuf and local storage maps\n");
18025 static bool bpf_map_is_cgroup_storage(struct bpf_map
*map
)
18027 return (map
->map_type
== BPF_MAP_TYPE_CGROUP_STORAGE
||
18028 map
->map_type
== BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE
);
18031 /* find and rewrite pseudo imm in ld_imm64 instructions:
18033 * 1. if it accesses map FD, replace it with actual map pointer.
18034 * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
18036 * NOTE: btf_vmlinux is required for converting pseudo btf_id.
18038 static int resolve_pseudo_ldimm64(struct bpf_verifier_env
*env
)
18040 struct bpf_insn
*insn
= env
->prog
->insnsi
;
18041 int insn_cnt
= env
->prog
->len
;
18044 err
= bpf_prog_calc_tag(env
->prog
);
18048 for (i
= 0; i
< insn_cnt
; i
++, insn
++) {
18049 if (BPF_CLASS(insn
->code
) == BPF_LDX
&&
18050 ((BPF_MODE(insn
->code
) != BPF_MEM
&& BPF_MODE(insn
->code
) != BPF_MEMSX
) ||
18052 verbose(env
, "BPF_LDX uses reserved fields\n");
18056 if (insn
[0].code
== (BPF_LD
| BPF_IMM
| BPF_DW
)) {
18057 struct bpf_insn_aux_data
*aux
;
18058 struct bpf_map
*map
;
18063 if (i
== insn_cnt
- 1 || insn
[1].code
!= 0 ||
18064 insn
[1].dst_reg
!= 0 || insn
[1].src_reg
!= 0 ||
18065 insn
[1].off
!= 0) {
18066 verbose(env
, "invalid bpf_ld_imm64 insn\n");
18070 if (insn
[0].src_reg
== 0)
18071 /* valid generic load 64-bit imm */
18074 if (insn
[0].src_reg
== BPF_PSEUDO_BTF_ID
) {
18075 aux
= &env
->insn_aux_data
[i
];
18076 err
= check_pseudo_btf_id(env
, insn
, aux
);
18082 if (insn
[0].src_reg
== BPF_PSEUDO_FUNC
) {
18083 aux
= &env
->insn_aux_data
[i
];
18084 aux
->ptr_type
= PTR_TO_FUNC
;
18088 /* In final convert_pseudo_ld_imm64() step, this is
18089 * converted into regular 64-bit imm load insn.
18091 switch (insn
[0].src_reg
) {
18092 case BPF_PSEUDO_MAP_VALUE
:
18093 case BPF_PSEUDO_MAP_IDX_VALUE
:
18095 case BPF_PSEUDO_MAP_FD
:
18096 case BPF_PSEUDO_MAP_IDX
:
18097 if (insn
[1].imm
== 0)
18101 verbose(env
, "unrecognized bpf_ld_imm64 insn\n");
18105 switch (insn
[0].src_reg
) {
18106 case BPF_PSEUDO_MAP_IDX_VALUE
:
18107 case BPF_PSEUDO_MAP_IDX
:
18108 if (bpfptr_is_null(env
->fd_array
)) {
18109 verbose(env
, "fd_idx without fd_array is invalid\n");
18112 if (copy_from_bpfptr_offset(&fd
, env
->fd_array
,
18113 insn
[0].imm
* sizeof(fd
),
18123 map
= __bpf_map_get(f
);
18125 verbose(env
, "fd %d is not pointing to valid bpf_map\n",
18127 return PTR_ERR(map
);
18130 err
= check_map_prog_compatibility(env
, map
, env
->prog
);
18136 aux
= &env
->insn_aux_data
[i
];
18137 if (insn
[0].src_reg
== BPF_PSEUDO_MAP_FD
||
18138 insn
[0].src_reg
== BPF_PSEUDO_MAP_IDX
) {
18139 addr
= (unsigned long)map
;
18141 u32 off
= insn
[1].imm
;
18143 if (off
>= BPF_MAX_VAR_OFF
) {
18144 verbose(env
, "direct value offset of %u is not allowed\n", off
);
18149 if (!map
->ops
->map_direct_value_addr
) {
18150 verbose(env
, "no direct value access support for this map type\n");
18155 err
= map
->ops
->map_direct_value_addr(map
, &addr
, off
);
18157 verbose(env
, "invalid access to map value pointer, value_size=%u off=%u\n",
18158 map
->value_size
, off
);
18163 aux
->map_off
= off
;
18167 insn
[0].imm
= (u32
)addr
;
18168 insn
[1].imm
= addr
>> 32;
18170 /* check whether we recorded this map already */
18171 for (j
= 0; j
< env
->used_map_cnt
; j
++) {
18172 if (env
->used_maps
[j
] == map
) {
18173 aux
->map_index
= j
;
18179 if (env
->used_map_cnt
>= MAX_USED_MAPS
) {
18184 /* hold the map. If the program is rejected by verifier,
18185 * the map will be released by release_maps() or it
18186 * will be used by the valid program until it's unloaded
18187 * and all maps are released in free_used_maps()
18191 aux
->map_index
= env
->used_map_cnt
;
18192 env
->used_maps
[env
->used_map_cnt
++] = map
;
18194 if (bpf_map_is_cgroup_storage(map
) &&
18195 bpf_cgroup_storage_assign(env
->prog
->aux
, map
)) {
18196 verbose(env
, "only one cgroup storage of each type is allowed\n");
18208 /* Basic sanity check before we invest more work here. */
18209 if (!bpf_opcode_in_insntable(insn
->code
)) {
18210 verbose(env
, "unknown opcode %02x\n", insn
->code
);
18215 /* now all pseudo BPF_LD_IMM64 instructions load valid
18216 * 'struct bpf_map *' into a register instead of user map_fd.
18217 * These pointers will be used later by verifier to validate map access.
18222 /* drop refcnt of maps used by the rejected program */
18223 static void release_maps(struct bpf_verifier_env
*env
)
18225 __bpf_free_used_maps(env
->prog
->aux
, env
->used_maps
,
18226 env
->used_map_cnt
);
18229 /* drop refcnt of maps used by the rejected program */
18230 static void release_btfs(struct bpf_verifier_env
*env
)
18232 __bpf_free_used_btfs(env
->prog
->aux
, env
->used_btfs
,
18233 env
->used_btf_cnt
);
18236 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
18237 static void convert_pseudo_ld_imm64(struct bpf_verifier_env
*env
)
18239 struct bpf_insn
*insn
= env
->prog
->insnsi
;
18240 int insn_cnt
= env
->prog
->len
;
18243 for (i
= 0; i
< insn_cnt
; i
++, insn
++) {
18244 if (insn
->code
!= (BPF_LD
| BPF_IMM
| BPF_DW
))
18246 if (insn
->src_reg
== BPF_PSEUDO_FUNC
)
18252 /* single env->prog->insni[off] instruction was replaced with the range
18253 * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
18254 * [0, off) and [off, end) to new locations, so the patched range stays zero
18256 static void adjust_insn_aux_data(struct bpf_verifier_env
*env
,
18257 struct bpf_insn_aux_data
*new_data
,
18258 struct bpf_prog
*new_prog
, u32 off
, u32 cnt
)
18260 struct bpf_insn_aux_data
*old_data
= env
->insn_aux_data
;
18261 struct bpf_insn
*insn
= new_prog
->insnsi
;
18262 u32 old_seen
= old_data
[off
].seen
;
18266 /* aux info at OFF always needs adjustment, no matter fast path
18267 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
18268 * original insn at old prog.
18270 old_data
[off
].zext_dst
= insn_has_def32(env
, insn
+ off
+ cnt
- 1);
18274 prog_len
= new_prog
->len
;
18276 memcpy(new_data
, old_data
, sizeof(struct bpf_insn_aux_data
) * off
);
18277 memcpy(new_data
+ off
+ cnt
- 1, old_data
+ off
,
18278 sizeof(struct bpf_insn_aux_data
) * (prog_len
- off
- cnt
+ 1));
18279 for (i
= off
; i
< off
+ cnt
- 1; i
++) {
18280 /* Expand insni[off]'s seen count to the patched range. */
18281 new_data
[i
].seen
= old_seen
;
18282 new_data
[i
].zext_dst
= insn_has_def32(env
, insn
+ i
);
18284 env
->insn_aux_data
= new_data
;
18288 static void adjust_subprog_starts(struct bpf_verifier_env
*env
, u32 off
, u32 len
)
18294 /* NOTE: fake 'exit' subprog should be updated as well. */
18295 for (i
= 0; i
<= env
->subprog_cnt
; i
++) {
18296 if (env
->subprog_info
[i
].start
<= off
)
18298 env
->subprog_info
[i
].start
+= len
- 1;
18302 static void adjust_poke_descs(struct bpf_prog
*prog
, u32 off
, u32 len
)
18304 struct bpf_jit_poke_descriptor
*tab
= prog
->aux
->poke_tab
;
18305 int i
, sz
= prog
->aux
->size_poke_tab
;
18306 struct bpf_jit_poke_descriptor
*desc
;
18308 for (i
= 0; i
< sz
; i
++) {
18310 if (desc
->insn_idx
<= off
)
18312 desc
->insn_idx
+= len
- 1;
18316 static struct bpf_prog
*bpf_patch_insn_data(struct bpf_verifier_env
*env
, u32 off
,
18317 const struct bpf_insn
*patch
, u32 len
)
18319 struct bpf_prog
*new_prog
;
18320 struct bpf_insn_aux_data
*new_data
= NULL
;
18323 new_data
= vzalloc(array_size(env
->prog
->len
+ len
- 1,
18324 sizeof(struct bpf_insn_aux_data
)));
18329 new_prog
= bpf_patch_insn_single(env
->prog
, off
, patch
, len
);
18330 if (IS_ERR(new_prog
)) {
18331 if (PTR_ERR(new_prog
) == -ERANGE
)
18333 "insn %d cannot be patched due to 16-bit range\n",
18334 env
->insn_aux_data
[off
].orig_idx
);
18338 adjust_insn_aux_data(env
, new_data
, new_prog
, off
, len
);
18339 adjust_subprog_starts(env
, off
, len
);
18340 adjust_poke_descs(new_prog
, off
, len
);
18344 static int adjust_subprog_starts_after_remove(struct bpf_verifier_env
*env
,
18349 /* find first prog starting at or after off (first to remove) */
18350 for (i
= 0; i
< env
->subprog_cnt
; i
++)
18351 if (env
->subprog_info
[i
].start
>= off
)
18353 /* find first prog starting at or after off + cnt (first to stay) */
18354 for (j
= i
; j
< env
->subprog_cnt
; j
++)
18355 if (env
->subprog_info
[j
].start
>= off
+ cnt
)
18357 /* if j doesn't start exactly at off + cnt, we are just removing
18358 * the front of previous prog
18360 if (env
->subprog_info
[j
].start
!= off
+ cnt
)
18364 struct bpf_prog_aux
*aux
= env
->prog
->aux
;
18367 /* move fake 'exit' subprog as well */
18368 move
= env
->subprog_cnt
+ 1 - j
;
18370 memmove(env
->subprog_info
+ i
,
18371 env
->subprog_info
+ j
,
18372 sizeof(*env
->subprog_info
) * move
);
18373 env
->subprog_cnt
-= j
- i
;
18375 /* remove func_info */
18376 if (aux
->func_info
) {
18377 move
= aux
->func_info_cnt
- j
;
18379 memmove(aux
->func_info
+ i
,
18380 aux
->func_info
+ j
,
18381 sizeof(*aux
->func_info
) * move
);
18382 aux
->func_info_cnt
-= j
- i
;
18383 /* func_info->insn_off is set after all code rewrites,
18384 * in adjust_btf_func() - no need to adjust
18388 /* convert i from "first prog to remove" to "first to adjust" */
18389 if (env
->subprog_info
[i
].start
== off
)
18393 /* update fake 'exit' subprog as well */
18394 for (; i
<= env
->subprog_cnt
; i
++)
18395 env
->subprog_info
[i
].start
-= cnt
;
18400 static int bpf_adj_linfo_after_remove(struct bpf_verifier_env
*env
, u32 off
,
18403 struct bpf_prog
*prog
= env
->prog
;
18404 u32 i
, l_off
, l_cnt
, nr_linfo
;
18405 struct bpf_line_info
*linfo
;
18407 nr_linfo
= prog
->aux
->nr_linfo
;
18411 linfo
= prog
->aux
->linfo
;
18413 /* find first line info to remove, count lines to be removed */
18414 for (i
= 0; i
< nr_linfo
; i
++)
18415 if (linfo
[i
].insn_off
>= off
)
18420 for (; i
< nr_linfo
; i
++)
18421 if (linfo
[i
].insn_off
< off
+ cnt
)
18426 /* First live insn doesn't match first live linfo, it needs to "inherit"
18427 * last removed linfo. prog is already modified, so prog->len == off
18428 * means no live instructions after (tail of the program was removed).
18430 if (prog
->len
!= off
&& l_cnt
&&
18431 (i
== nr_linfo
|| linfo
[i
].insn_off
!= off
+ cnt
)) {
18433 linfo
[--i
].insn_off
= off
+ cnt
;
18436 /* remove the line info which refer to the removed instructions */
18438 memmove(linfo
+ l_off
, linfo
+ i
,
18439 sizeof(*linfo
) * (nr_linfo
- i
));
18441 prog
->aux
->nr_linfo
-= l_cnt
;
18442 nr_linfo
= prog
->aux
->nr_linfo
;
18445 /* pull all linfo[i].insn_off >= off + cnt in by cnt */
18446 for (i
= l_off
; i
< nr_linfo
; i
++)
18447 linfo
[i
].insn_off
-= cnt
;
18449 /* fix up all subprogs (incl. 'exit') which start >= off */
18450 for (i
= 0; i
<= env
->subprog_cnt
; i
++)
18451 if (env
->subprog_info
[i
].linfo_idx
> l_off
) {
18452 /* program may have started in the removed region but
18453 * may not be fully removed
18455 if (env
->subprog_info
[i
].linfo_idx
>= l_off
+ l_cnt
)
18456 env
->subprog_info
[i
].linfo_idx
-= l_cnt
;
18458 env
->subprog_info
[i
].linfo_idx
= l_off
;
18464 static int verifier_remove_insns(struct bpf_verifier_env
*env
, u32 off
, u32 cnt
)
18466 struct bpf_insn_aux_data
*aux_data
= env
->insn_aux_data
;
18467 unsigned int orig_prog_len
= env
->prog
->len
;
18470 if (bpf_prog_is_offloaded(env
->prog
->aux
))
18471 bpf_prog_offload_remove_insns(env
, off
, cnt
);
18473 err
= bpf_remove_insns(env
->prog
, off
, cnt
);
18477 err
= adjust_subprog_starts_after_remove(env
, off
, cnt
);
18481 err
= bpf_adj_linfo_after_remove(env
, off
, cnt
);
18485 memmove(aux_data
+ off
, aux_data
+ off
+ cnt
,
18486 sizeof(*aux_data
) * (orig_prog_len
- off
- cnt
));
18491 /* The verifier does more data flow analysis than llvm and will not
18492 * explore branches that are dead at run time. Malicious programs can
18493 * have dead code too. Therefore replace all dead at-run-time code
18496 * Just nops are not optimal, e.g. if they would sit at the end of the
18497 * program and through another bug we would manage to jump there, then
18498 * we'd execute beyond program memory otherwise. Returning exception
18499 * code also wouldn't work since we can have subprogs where the dead
18500 * code could be located.
18502 static void sanitize_dead_code(struct bpf_verifier_env
*env
)
18504 struct bpf_insn_aux_data
*aux_data
= env
->insn_aux_data
;
18505 struct bpf_insn trap
= BPF_JMP_IMM(BPF_JA
, 0, 0, -1);
18506 struct bpf_insn
*insn
= env
->prog
->insnsi
;
18507 const int insn_cnt
= env
->prog
->len
;
18510 for (i
= 0; i
< insn_cnt
; i
++) {
18511 if (aux_data
[i
].seen
)
18513 memcpy(insn
+ i
, &trap
, sizeof(trap
));
18514 aux_data
[i
].zext_dst
= false;
18518 static bool insn_is_cond_jump(u8 code
)
18523 if (BPF_CLASS(code
) == BPF_JMP32
)
18524 return op
!= BPF_JA
;
18526 if (BPF_CLASS(code
) != BPF_JMP
)
18529 return op
!= BPF_JA
&& op
!= BPF_EXIT
&& op
!= BPF_CALL
;
18532 static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env
*env
)
18534 struct bpf_insn_aux_data
*aux_data
= env
->insn_aux_data
;
18535 struct bpf_insn ja
= BPF_JMP_IMM(BPF_JA
, 0, 0, 0);
18536 struct bpf_insn
*insn
= env
->prog
->insnsi
;
18537 const int insn_cnt
= env
->prog
->len
;
18540 for (i
= 0; i
< insn_cnt
; i
++, insn
++) {
18541 if (!insn_is_cond_jump(insn
->code
))
18544 if (!aux_data
[i
+ 1].seen
)
18545 ja
.off
= insn
->off
;
18546 else if (!aux_data
[i
+ 1 + insn
->off
].seen
)
18551 if (bpf_prog_is_offloaded(env
->prog
->aux
))
18552 bpf_prog_offload_replace_insn(env
, i
, &ja
);
18554 memcpy(insn
, &ja
, sizeof(ja
));
18558 static int opt_remove_dead_code(struct bpf_verifier_env
*env
)
18560 struct bpf_insn_aux_data
*aux_data
= env
->insn_aux_data
;
18561 int insn_cnt
= env
->prog
->len
;
18564 for (i
= 0; i
< insn_cnt
; i
++) {
18568 while (i
+ j
< insn_cnt
&& !aux_data
[i
+ j
].seen
)
18573 err
= verifier_remove_insns(env
, i
, j
);
18576 insn_cnt
= env
->prog
->len
;
18582 static int opt_remove_nops(struct bpf_verifier_env
*env
)
18584 const struct bpf_insn ja
= BPF_JMP_IMM(BPF_JA
, 0, 0, 0);
18585 struct bpf_insn
*insn
= env
->prog
->insnsi
;
18586 int insn_cnt
= env
->prog
->len
;
18589 for (i
= 0; i
< insn_cnt
; i
++) {
18590 if (memcmp(&insn
[i
], &ja
, sizeof(ja
)))
18593 err
= verifier_remove_insns(env
, i
, 1);
18603 static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env
*env
,
18604 const union bpf_attr
*attr
)
18606 struct bpf_insn
*patch
, zext_patch
[2], rnd_hi32_patch
[4];
18607 struct bpf_insn_aux_data
*aux
= env
->insn_aux_data
;
18608 int i
, patch_len
, delta
= 0, len
= env
->prog
->len
;
18609 struct bpf_insn
*insns
= env
->prog
->insnsi
;
18610 struct bpf_prog
*new_prog
;
18613 rnd_hi32
= attr
->prog_flags
& BPF_F_TEST_RND_HI32
;
18614 zext_patch
[1] = BPF_ZEXT_REG(0);
18615 rnd_hi32_patch
[1] = BPF_ALU64_IMM(BPF_MOV
, BPF_REG_AX
, 0);
18616 rnd_hi32_patch
[2] = BPF_ALU64_IMM(BPF_LSH
, BPF_REG_AX
, 32);
18617 rnd_hi32_patch
[3] = BPF_ALU64_REG(BPF_OR
, 0, BPF_REG_AX
);
18618 for (i
= 0; i
< len
; i
++) {
18619 int adj_idx
= i
+ delta
;
18620 struct bpf_insn insn
;
18623 insn
= insns
[adj_idx
];
18624 load_reg
= insn_def_regno(&insn
);
18625 if (!aux
[adj_idx
].zext_dst
) {
18633 class = BPF_CLASS(code
);
18634 if (load_reg
== -1)
18637 /* NOTE: arg "reg" (the fourth one) is only used for
18638 * BPF_STX + SRC_OP, so it is safe to pass NULL
18641 if (is_reg64(env
, &insn
, load_reg
, NULL
, DST_OP
)) {
18642 if (class == BPF_LD
&&
18643 BPF_MODE(code
) == BPF_IMM
)
18648 /* ctx load could be transformed into wider load. */
18649 if (class == BPF_LDX
&&
18650 aux
[adj_idx
].ptr_type
== PTR_TO_CTX
)
18653 imm_rnd
= get_random_u32();
18654 rnd_hi32_patch
[0] = insn
;
18655 rnd_hi32_patch
[1].imm
= imm_rnd
;
18656 rnd_hi32_patch
[3].dst_reg
= load_reg
;
18657 patch
= rnd_hi32_patch
;
18659 goto apply_patch_buffer
;
18662 /* Add in an zero-extend instruction if a) the JIT has requested
18663 * it or b) it's a CMPXCHG.
18665 * The latter is because: BPF_CMPXCHG always loads a value into
18666 * R0, therefore always zero-extends. However some archs'
18667 * equivalent instruction only does this load when the
18668 * comparison is successful. This detail of CMPXCHG is
18669 * orthogonal to the general zero-extension behaviour of the
18670 * CPU, so it's treated independently of bpf_jit_needs_zext.
18672 if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn
))
18675 /* Zero-extension is done by the caller. */
18676 if (bpf_pseudo_kfunc_call(&insn
))
18679 if (WARN_ON(load_reg
== -1)) {
18680 verbose(env
, "verifier bug. zext_dst is set, but no reg is defined\n");
18684 zext_patch
[0] = insn
;
18685 zext_patch
[1].dst_reg
= load_reg
;
18686 zext_patch
[1].src_reg
= load_reg
;
18687 patch
= zext_patch
;
18689 apply_patch_buffer
:
18690 new_prog
= bpf_patch_insn_data(env
, adj_idx
, patch
, patch_len
);
18693 env
->prog
= new_prog
;
18694 insns
= new_prog
->insnsi
;
18695 aux
= env
->insn_aux_data
;
18696 delta
+= patch_len
- 1;
18702 /* convert load instructions that access fields of a context type into a
18703 * sequence of instructions that access fields of the underlying structure:
18704 * struct __sk_buff -> struct sk_buff
18705 * struct bpf_sock_ops -> struct sock
18707 static int convert_ctx_accesses(struct bpf_verifier_env
*env
)
18709 const struct bpf_verifier_ops
*ops
= env
->ops
;
18710 int i
, cnt
, size
, ctx_field_size
, delta
= 0;
18711 const int insn_cnt
= env
->prog
->len
;
18712 struct bpf_insn insn_buf
[16], *insn
;
18713 u32 target_size
, size_default
, off
;
18714 struct bpf_prog
*new_prog
;
18715 enum bpf_access_type type
;
18716 bool is_narrower_load
;
18718 if (ops
->gen_prologue
|| env
->seen_direct_write
) {
18719 if (!ops
->gen_prologue
) {
18720 verbose(env
, "bpf verifier is misconfigured\n");
18723 cnt
= ops
->gen_prologue(insn_buf
, env
->seen_direct_write
,
18725 if (cnt
>= ARRAY_SIZE(insn_buf
)) {
18726 verbose(env
, "bpf verifier is misconfigured\n");
18729 new_prog
= bpf_patch_insn_data(env
, 0, insn_buf
, cnt
);
18733 env
->prog
= new_prog
;
18738 if (bpf_prog_is_offloaded(env
->prog
->aux
))
18741 insn
= env
->prog
->insnsi
+ delta
;
18743 for (i
= 0; i
< insn_cnt
; i
++, insn
++) {
18744 bpf_convert_ctx_access_t convert_ctx_access
;
18747 if (insn
->code
== (BPF_LDX
| BPF_MEM
| BPF_B
) ||
18748 insn
->code
== (BPF_LDX
| BPF_MEM
| BPF_H
) ||
18749 insn
->code
== (BPF_LDX
| BPF_MEM
| BPF_W
) ||
18750 insn
->code
== (BPF_LDX
| BPF_MEM
| BPF_DW
) ||
18751 insn
->code
== (BPF_LDX
| BPF_MEMSX
| BPF_B
) ||
18752 insn
->code
== (BPF_LDX
| BPF_MEMSX
| BPF_H
) ||
18753 insn
->code
== (BPF_LDX
| BPF_MEMSX
| BPF_W
)) {
18755 } else if (insn
->code
== (BPF_STX
| BPF_MEM
| BPF_B
) ||
18756 insn
->code
== (BPF_STX
| BPF_MEM
| BPF_H
) ||
18757 insn
->code
== (BPF_STX
| BPF_MEM
| BPF_W
) ||
18758 insn
->code
== (BPF_STX
| BPF_MEM
| BPF_DW
) ||
18759 insn
->code
== (BPF_ST
| BPF_MEM
| BPF_B
) ||
18760 insn
->code
== (BPF_ST
| BPF_MEM
| BPF_H
) ||
18761 insn
->code
== (BPF_ST
| BPF_MEM
| BPF_W
) ||
18762 insn
->code
== (BPF_ST
| BPF_MEM
| BPF_DW
)) {
18768 if (type
== BPF_WRITE
&&
18769 env
->insn_aux_data
[i
+ delta
].sanitize_stack_spill
) {
18770 struct bpf_insn patch
[] = {
18775 cnt
= ARRAY_SIZE(patch
);
18776 new_prog
= bpf_patch_insn_data(env
, i
+ delta
, patch
, cnt
);
18781 env
->prog
= new_prog
;
18782 insn
= new_prog
->insnsi
+ i
+ delta
;
18786 switch ((int)env
->insn_aux_data
[i
+ delta
].ptr_type
) {
18788 if (!ops
->convert_ctx_access
)
18790 convert_ctx_access
= ops
->convert_ctx_access
;
18792 case PTR_TO_SOCKET
:
18793 case PTR_TO_SOCK_COMMON
:
18794 convert_ctx_access
= bpf_sock_convert_ctx_access
;
18796 case PTR_TO_TCP_SOCK
:
18797 convert_ctx_access
= bpf_tcp_sock_convert_ctx_access
;
18799 case PTR_TO_XDP_SOCK
:
18800 convert_ctx_access
= bpf_xdp_sock_convert_ctx_access
;
18802 case PTR_TO_BTF_ID
:
18803 case PTR_TO_BTF_ID
| PTR_UNTRUSTED
:
18804 /* PTR_TO_BTF_ID | MEM_ALLOC always has a valid lifetime, unlike
18805 * PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot
18806 * be said once it is marked PTR_UNTRUSTED, hence we must handle
18807 * any faults for loads into such types. BPF_WRITE is disallowed
18810 case PTR_TO_BTF_ID
| MEM_ALLOC
| PTR_UNTRUSTED
:
18811 if (type
== BPF_READ
) {
18812 if (BPF_MODE(insn
->code
) == BPF_MEM
)
18813 insn
->code
= BPF_LDX
| BPF_PROBE_MEM
|
18814 BPF_SIZE((insn
)->code
);
18816 insn
->code
= BPF_LDX
| BPF_PROBE_MEMSX
|
18817 BPF_SIZE((insn
)->code
);
18818 env
->prog
->aux
->num_exentries
++;
18825 ctx_field_size
= env
->insn_aux_data
[i
+ delta
].ctx_field_size
;
18826 size
= BPF_LDST_BYTES(insn
);
18827 mode
= BPF_MODE(insn
->code
);
18829 /* If the read access is a narrower load of the field,
18830 * convert to a 4/8-byte load, to minimum program type specific
18831 * convert_ctx_access changes. If conversion is successful,
18832 * we will apply proper mask to the result.
18834 is_narrower_load
= size
< ctx_field_size
;
18835 size_default
= bpf_ctx_off_adjust_machine(ctx_field_size
);
18837 if (is_narrower_load
) {
18840 if (type
== BPF_WRITE
) {
18841 verbose(env
, "bpf verifier narrow ctx access misconfigured\n");
18846 if (ctx_field_size
== 4)
18848 else if (ctx_field_size
== 8)
18849 size_code
= BPF_DW
;
18851 insn
->off
= off
& ~(size_default
- 1);
18852 insn
->code
= BPF_LDX
| BPF_MEM
| size_code
;
18856 cnt
= convert_ctx_access(type
, insn
, insn_buf
, env
->prog
,
18858 if (cnt
== 0 || cnt
>= ARRAY_SIZE(insn_buf
) ||
18859 (ctx_field_size
&& !target_size
)) {
18860 verbose(env
, "bpf verifier is misconfigured\n");
18864 if (is_narrower_load
&& size
< target_size
) {
18865 u8 shift
= bpf_ctx_narrow_access_offset(
18866 off
, size
, size_default
) * 8;
18867 if (shift
&& cnt
+ 1 >= ARRAY_SIZE(insn_buf
)) {
18868 verbose(env
, "bpf verifier narrow ctx load misconfigured\n");
18871 if (ctx_field_size
<= 4) {
18873 insn_buf
[cnt
++] = BPF_ALU32_IMM(BPF_RSH
,
18876 insn_buf
[cnt
++] = BPF_ALU32_IMM(BPF_AND
, insn
->dst_reg
,
18877 (1 << size
* 8) - 1);
18880 insn_buf
[cnt
++] = BPF_ALU64_IMM(BPF_RSH
,
18883 insn_buf
[cnt
++] = BPF_ALU32_IMM(BPF_AND
, insn
->dst_reg
,
18884 (1ULL << size
* 8) - 1);
18887 if (mode
== BPF_MEMSX
)
18888 insn_buf
[cnt
++] = BPF_RAW_INSN(BPF_ALU64
| BPF_MOV
| BPF_X
,
18889 insn
->dst_reg
, insn
->dst_reg
,
18892 new_prog
= bpf_patch_insn_data(env
, i
+ delta
, insn_buf
, cnt
);
18898 /* keep walking new program and skip insns we just inserted */
18899 env
->prog
= new_prog
;
18900 insn
= new_prog
->insnsi
+ i
+ delta
;
18906 static int jit_subprogs(struct bpf_verifier_env
*env
)
18908 struct bpf_prog
*prog
= env
->prog
, **func
, *tmp
;
18909 int i
, j
, subprog_start
, subprog_end
= 0, len
, subprog
;
18910 struct bpf_map
*map_ptr
;
18911 struct bpf_insn
*insn
;
18912 void *old_bpf_func
;
18913 int err
, num_exentries
;
18915 if (env
->subprog_cnt
<= 1)
18918 for (i
= 0, insn
= prog
->insnsi
; i
< prog
->len
; i
++, insn
++) {
18919 if (!bpf_pseudo_func(insn
) && !bpf_pseudo_call(insn
))
18922 /* Upon error here we cannot fall back to interpreter but
18923 * need a hard reject of the program. Thus -EFAULT is
18924 * propagated in any case.
18926 subprog
= find_subprog(env
, i
+ insn
->imm
+ 1);
18928 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
18929 i
+ insn
->imm
+ 1);
18932 /* temporarily remember subprog id inside insn instead of
18933 * aux_data, since next loop will split up all insns into funcs
18935 insn
->off
= subprog
;
18936 /* remember original imm in case JIT fails and fallback
18937 * to interpreter will be needed
18939 env
->insn_aux_data
[i
].call_imm
= insn
->imm
;
18940 /* point imm to __bpf_call_base+1 from JITs point of view */
18942 if (bpf_pseudo_func(insn
))
18943 /* jit (e.g. x86_64) may emit fewer instructions
18944 * if it learns a u32 imm is the same as a u64 imm.
18945 * Force a non zero here.
18950 err
= bpf_prog_alloc_jited_linfo(prog
);
18952 goto out_undo_insn
;
18955 func
= kcalloc(env
->subprog_cnt
, sizeof(prog
), GFP_KERNEL
);
18957 goto out_undo_insn
;
18959 for (i
= 0; i
< env
->subprog_cnt
; i
++) {
18960 subprog_start
= subprog_end
;
18961 subprog_end
= env
->subprog_info
[i
+ 1].start
;
18963 len
= subprog_end
- subprog_start
;
18964 /* bpf_prog_run() doesn't call subprogs directly,
18965 * hence main prog stats include the runtime of subprogs.
18966 * subprogs don't have IDs and not reachable via prog_get_next_id
18967 * func[i]->stats will never be accessed and stays NULL
18969 func
[i
] = bpf_prog_alloc_no_stats(bpf_prog_size(len
), GFP_USER
);
18972 memcpy(func
[i
]->insnsi
, &prog
->insnsi
[subprog_start
],
18973 len
* sizeof(struct bpf_insn
));
18974 func
[i
]->type
= prog
->type
;
18975 func
[i
]->len
= len
;
18976 if (bpf_prog_calc_tag(func
[i
]))
18978 func
[i
]->is_func
= 1;
18979 func
[i
]->aux
->func_idx
= i
;
18980 /* Below members will be freed only at prog->aux */
18981 func
[i
]->aux
->btf
= prog
->aux
->btf
;
18982 func
[i
]->aux
->func_info
= prog
->aux
->func_info
;
18983 func
[i
]->aux
->func_info_cnt
= prog
->aux
->func_info_cnt
;
18984 func
[i
]->aux
->poke_tab
= prog
->aux
->poke_tab
;
18985 func
[i
]->aux
->size_poke_tab
= prog
->aux
->size_poke_tab
;
18987 for (j
= 0; j
< prog
->aux
->size_poke_tab
; j
++) {
18988 struct bpf_jit_poke_descriptor
*poke
;
18990 poke
= &prog
->aux
->poke_tab
[j
];
18991 if (poke
->insn_idx
< subprog_end
&&
18992 poke
->insn_idx
>= subprog_start
)
18993 poke
->aux
= func
[i
]->aux
;
18996 func
[i
]->aux
->name
[0] = 'F';
18997 func
[i
]->aux
->stack_depth
= env
->subprog_info
[i
].stack_depth
;
18998 func
[i
]->jit_requested
= 1;
18999 func
[i
]->blinding_requested
= prog
->blinding_requested
;
19000 func
[i
]->aux
->kfunc_tab
= prog
->aux
->kfunc_tab
;
19001 func
[i
]->aux
->kfunc_btf_tab
= prog
->aux
->kfunc_btf_tab
;
19002 func
[i
]->aux
->linfo
= prog
->aux
->linfo
;
19003 func
[i
]->aux
->nr_linfo
= prog
->aux
->nr_linfo
;
19004 func
[i
]->aux
->jited_linfo
= prog
->aux
->jited_linfo
;
19005 func
[i
]->aux
->linfo_idx
= env
->subprog_info
[i
].linfo_idx
;
19007 insn
= func
[i
]->insnsi
;
19008 for (j
= 0; j
< func
[i
]->len
; j
++, insn
++) {
19009 if (BPF_CLASS(insn
->code
) == BPF_LDX
&&
19010 (BPF_MODE(insn
->code
) == BPF_PROBE_MEM
||
19011 BPF_MODE(insn
->code
) == BPF_PROBE_MEMSX
))
19014 func
[i
]->aux
->num_exentries
= num_exentries
;
19015 func
[i
]->aux
->tail_call_reachable
= env
->subprog_info
[i
].tail_call_reachable
;
19016 func
[i
]->aux
->exception_cb
= env
->subprog_info
[i
].is_exception_cb
;
19018 func
[i
]->aux
->exception_boundary
= env
->seen_exception
;
19019 func
[i
] = bpf_int_jit_compile(func
[i
]);
19020 if (!func
[i
]->jited
) {
19027 /* at this point all bpf functions were successfully JITed
19028 * now populate all bpf_calls with correct addresses and
19029 * run last pass of JIT
19031 for (i
= 0; i
< env
->subprog_cnt
; i
++) {
19032 insn
= func
[i
]->insnsi
;
19033 for (j
= 0; j
< func
[i
]->len
; j
++, insn
++) {
19034 if (bpf_pseudo_func(insn
)) {
19035 subprog
= insn
->off
;
19036 insn
[0].imm
= (u32
)(long)func
[subprog
]->bpf_func
;
19037 insn
[1].imm
= ((u64
)(long)func
[subprog
]->bpf_func
) >> 32;
19040 if (!bpf_pseudo_call(insn
))
19042 subprog
= insn
->off
;
19043 insn
->imm
= BPF_CALL_IMM(func
[subprog
]->bpf_func
);
19046 /* we use the aux data to keep a list of the start addresses
19047 * of the JITed images for each function in the program
19049 * for some architectures, such as powerpc64, the imm field
19050 * might not be large enough to hold the offset of the start
19051 * address of the callee's JITed image from __bpf_call_base
19053 * in such cases, we can lookup the start address of a callee
19054 * by using its subprog id, available from the off field of
19055 * the call instruction, as an index for this list
19057 func
[i
]->aux
->func
= func
;
19058 func
[i
]->aux
->func_cnt
= env
->subprog_cnt
- env
->hidden_subprog_cnt
;
19059 func
[i
]->aux
->real_func_cnt
= env
->subprog_cnt
;
19061 for (i
= 0; i
< env
->subprog_cnt
; i
++) {
19062 old_bpf_func
= func
[i
]->bpf_func
;
19063 tmp
= bpf_int_jit_compile(func
[i
]);
19064 if (tmp
!= func
[i
] || func
[i
]->bpf_func
!= old_bpf_func
) {
19065 verbose(env
, "JIT doesn't support bpf-to-bpf calls\n");
19072 /* finally lock prog and jit images for all functions and
19073 * populate kallsysm. Begin at the first subprogram, since
19074 * bpf_prog_load will add the kallsyms for the main program.
19076 for (i
= 1; i
< env
->subprog_cnt
; i
++) {
19077 bpf_prog_lock_ro(func
[i
]);
19078 bpf_prog_kallsyms_add(func
[i
]);
19081 /* Last step: make now unused interpreter insns from main
19082 * prog consistent for later dump requests, so they can
19083 * later look the same as if they were interpreted only.
19085 for (i
= 0, insn
= prog
->insnsi
; i
< prog
->len
; i
++, insn
++) {
19086 if (bpf_pseudo_func(insn
)) {
19087 insn
[0].imm
= env
->insn_aux_data
[i
].call_imm
;
19088 insn
[1].imm
= insn
->off
;
19092 if (!bpf_pseudo_call(insn
))
19094 insn
->off
= env
->insn_aux_data
[i
].call_imm
;
19095 subprog
= find_subprog(env
, i
+ insn
->off
+ 1);
19096 insn
->imm
= subprog
;
19100 prog
->bpf_func
= func
[0]->bpf_func
;
19101 prog
->jited_len
= func
[0]->jited_len
;
19102 prog
->aux
->extable
= func
[0]->aux
->extable
;
19103 prog
->aux
->num_exentries
= func
[0]->aux
->num_exentries
;
19104 prog
->aux
->func
= func
;
19105 prog
->aux
->func_cnt
= env
->subprog_cnt
- env
->hidden_subprog_cnt
;
19106 prog
->aux
->real_func_cnt
= env
->subprog_cnt
;
19107 prog
->aux
->bpf_exception_cb
= (void *)func
[env
->exception_callback_subprog
]->bpf_func
;
19108 prog
->aux
->exception_boundary
= func
[0]->aux
->exception_boundary
;
19109 bpf_prog_jit_attempt_done(prog
);
19112 /* We failed JIT'ing, so at this point we need to unregister poke
19113 * descriptors from subprogs, so that kernel is not attempting to
19114 * patch it anymore as we're freeing the subprog JIT memory.
19116 for (i
= 0; i
< prog
->aux
->size_poke_tab
; i
++) {
19117 map_ptr
= prog
->aux
->poke_tab
[i
].tail_call
.map
;
19118 map_ptr
->ops
->map_poke_untrack(map_ptr
, prog
->aux
);
19120 /* At this point we're guaranteed that poke descriptors are not
19121 * live anymore. We can just unlink its descriptor table as it's
19122 * released with the main prog.
19124 for (i
= 0; i
< env
->subprog_cnt
; i
++) {
19127 func
[i
]->aux
->poke_tab
= NULL
;
19128 bpf_jit_free(func
[i
]);
19132 /* cleanup main prog to be interpreted */
19133 prog
->jit_requested
= 0;
19134 prog
->blinding_requested
= 0;
19135 for (i
= 0, insn
= prog
->insnsi
; i
< prog
->len
; i
++, insn
++) {
19136 if (!bpf_pseudo_call(insn
))
19139 insn
->imm
= env
->insn_aux_data
[i
].call_imm
;
19141 bpf_prog_jit_attempt_done(prog
);
19145 static int fixup_call_args(struct bpf_verifier_env
*env
)
19147 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
19148 struct bpf_prog
*prog
= env
->prog
;
19149 struct bpf_insn
*insn
= prog
->insnsi
;
19150 bool has_kfunc_call
= bpf_prog_has_kfunc_call(prog
);
19155 if (env
->prog
->jit_requested
&&
19156 !bpf_prog_is_offloaded(env
->prog
->aux
)) {
19157 err
= jit_subprogs(env
);
19160 if (err
== -EFAULT
)
19163 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
19164 if (has_kfunc_call
) {
19165 verbose(env
, "calling kernel functions are not allowed in non-JITed programs\n");
19168 if (env
->subprog_cnt
> 1 && env
->prog
->aux
->tail_call_reachable
) {
19169 /* When JIT fails the progs with bpf2bpf calls and tail_calls
19170 * have to be rejected, since interpreter doesn't support them yet.
19172 verbose(env
, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
19175 for (i
= 0; i
< prog
->len
; i
++, insn
++) {
19176 if (bpf_pseudo_func(insn
)) {
19177 /* When JIT fails the progs with callback calls
19178 * have to be rejected, since interpreter doesn't support them yet.
19180 verbose(env
, "callbacks are not allowed in non-JITed programs\n");
19184 if (!bpf_pseudo_call(insn
))
19186 depth
= get_callee_stack_depth(env
, insn
, i
);
19189 bpf_patch_call_args(insn
, depth
);
19196 /* replace a generic kfunc with a specialized version if necessary */
19197 static void specialize_kfunc(struct bpf_verifier_env
*env
,
19198 u32 func_id
, u16 offset
, unsigned long *addr
)
19200 struct bpf_prog
*prog
= env
->prog
;
19201 bool seen_direct_write
;
19205 if (bpf_dev_bound_kfunc_id(func_id
)) {
19206 xdp_kfunc
= bpf_dev_bound_resolve_kfunc(prog
, func_id
);
19208 *addr
= (unsigned long)xdp_kfunc
;
19211 /* fallback to default kfunc when not supported by netdev */
19217 if (func_id
== special_kfunc_list
[KF_bpf_dynptr_from_skb
]) {
19218 seen_direct_write
= env
->seen_direct_write
;
19219 is_rdonly
= !may_access_direct_pkt_data(env
, NULL
, BPF_WRITE
);
19222 *addr
= (unsigned long)bpf_dynptr_from_skb_rdonly
;
19224 /* restore env->seen_direct_write to its original value, since
19225 * may_access_direct_pkt_data mutates it
19227 env
->seen_direct_write
= seen_direct_write
;
19231 static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data
*insn_aux
,
19232 u16 struct_meta_reg
,
19233 u16 node_offset_reg
,
19234 struct bpf_insn
*insn
,
19235 struct bpf_insn
*insn_buf
,
19238 struct btf_struct_meta
*kptr_struct_meta
= insn_aux
->kptr_struct_meta
;
19239 struct bpf_insn addr
[2] = { BPF_LD_IMM64(struct_meta_reg
, (long)kptr_struct_meta
) };
19241 insn_buf
[0] = addr
[0];
19242 insn_buf
[1] = addr
[1];
19243 insn_buf
[2] = BPF_MOV64_IMM(node_offset_reg
, insn_aux
->insert_off
);
19244 insn_buf
[3] = *insn
;
19248 static int fixup_kfunc_call(struct bpf_verifier_env
*env
, struct bpf_insn
*insn
,
19249 struct bpf_insn
*insn_buf
, int insn_idx
, int *cnt
)
19251 const struct bpf_kfunc_desc
*desc
;
19254 verbose(env
, "invalid kernel function call not eliminated in verifier pass\n");
19260 /* insn->imm has the btf func_id. Replace it with an offset relative to
19261 * __bpf_call_base, unless the JIT needs to call functions that are
19262 * further than 32 bits away (bpf_jit_supports_far_kfunc_call()).
19264 desc
= find_kfunc_desc(env
->prog
, insn
->imm
, insn
->off
);
19266 verbose(env
, "verifier internal error: kernel function descriptor not found for func_id %u\n",
19271 if (!bpf_jit_supports_far_kfunc_call())
19272 insn
->imm
= BPF_CALL_IMM(desc
->addr
);
19275 if (desc
->func_id
== special_kfunc_list
[KF_bpf_obj_new_impl
] ||
19276 desc
->func_id
== special_kfunc_list
[KF_bpf_percpu_obj_new_impl
]) {
19277 struct btf_struct_meta
*kptr_struct_meta
= env
->insn_aux_data
[insn_idx
].kptr_struct_meta
;
19278 struct bpf_insn addr
[2] = { BPF_LD_IMM64(BPF_REG_2
, (long)kptr_struct_meta
) };
19279 u64 obj_new_size
= env
->insn_aux_data
[insn_idx
].obj_new_size
;
19281 if (desc
->func_id
== special_kfunc_list
[KF_bpf_percpu_obj_new_impl
] && kptr_struct_meta
) {
19282 verbose(env
, "verifier internal error: NULL kptr_struct_meta expected at insn_idx %d\n",
19287 insn_buf
[0] = BPF_MOV64_IMM(BPF_REG_1
, obj_new_size
);
19288 insn_buf
[1] = addr
[0];
19289 insn_buf
[2] = addr
[1];
19290 insn_buf
[3] = *insn
;
19292 } else if (desc
->func_id
== special_kfunc_list
[KF_bpf_obj_drop_impl
] ||
19293 desc
->func_id
== special_kfunc_list
[KF_bpf_percpu_obj_drop_impl
] ||
19294 desc
->func_id
== special_kfunc_list
[KF_bpf_refcount_acquire_impl
]) {
19295 struct btf_struct_meta
*kptr_struct_meta
= env
->insn_aux_data
[insn_idx
].kptr_struct_meta
;
19296 struct bpf_insn addr
[2] = { BPF_LD_IMM64(BPF_REG_2
, (long)kptr_struct_meta
) };
19298 if (desc
->func_id
== special_kfunc_list
[KF_bpf_percpu_obj_drop_impl
] && kptr_struct_meta
) {
19299 verbose(env
, "verifier internal error: NULL kptr_struct_meta expected at insn_idx %d\n",
19304 if (desc
->func_id
== special_kfunc_list
[KF_bpf_refcount_acquire_impl
] &&
19305 !kptr_struct_meta
) {
19306 verbose(env
, "verifier internal error: kptr_struct_meta expected at insn_idx %d\n",
19311 insn_buf
[0] = addr
[0];
19312 insn_buf
[1] = addr
[1];
19313 insn_buf
[2] = *insn
;
19315 } else if (desc
->func_id
== special_kfunc_list
[KF_bpf_list_push_back_impl
] ||
19316 desc
->func_id
== special_kfunc_list
[KF_bpf_list_push_front_impl
] ||
19317 desc
->func_id
== special_kfunc_list
[KF_bpf_rbtree_add_impl
]) {
19318 struct btf_struct_meta
*kptr_struct_meta
= env
->insn_aux_data
[insn_idx
].kptr_struct_meta
;
19319 int struct_meta_reg
= BPF_REG_3
;
19320 int node_offset_reg
= BPF_REG_4
;
19322 /* rbtree_add has extra 'less' arg, so args-to-fixup are in diff regs */
19323 if (desc
->func_id
== special_kfunc_list
[KF_bpf_rbtree_add_impl
]) {
19324 struct_meta_reg
= BPF_REG_4
;
19325 node_offset_reg
= BPF_REG_5
;
19328 if (!kptr_struct_meta
) {
19329 verbose(env
, "verifier internal error: kptr_struct_meta expected at insn_idx %d\n",
19334 __fixup_collection_insert_kfunc(&env
->insn_aux_data
[insn_idx
], struct_meta_reg
,
19335 node_offset_reg
, insn
, insn_buf
, cnt
);
19336 } else if (desc
->func_id
== special_kfunc_list
[KF_bpf_cast_to_kern_ctx
] ||
19337 desc
->func_id
== special_kfunc_list
[KF_bpf_rdonly_cast
]) {
19338 insn_buf
[0] = BPF_MOV64_REG(BPF_REG_0
, BPF_REG_1
);
19344 /* The function requires that first instruction in 'patch' is insnsi[prog->len - 1] */
19345 static int add_hidden_subprog(struct bpf_verifier_env
*env
, struct bpf_insn
*patch
, int len
)
19347 struct bpf_subprog_info
*info
= env
->subprog_info
;
19348 int cnt
= env
->subprog_cnt
;
19349 struct bpf_prog
*prog
;
19351 /* We only reserve one slot for hidden subprogs in subprog_info. */
19352 if (env
->hidden_subprog_cnt
) {
19353 verbose(env
, "verifier internal error: only one hidden subprog supported\n");
19356 /* We're not patching any existing instruction, just appending the new
19357 * ones for the hidden subprog. Hence all of the adjustment operations
19358 * in bpf_patch_insn_data are no-ops.
19360 prog
= bpf_patch_insn_data(env
, env
->prog
->len
- 1, patch
, len
);
19364 info
[cnt
+ 1].start
= info
[cnt
].start
;
19365 info
[cnt
].start
= prog
->len
- len
+ 1;
19366 env
->subprog_cnt
++;
19367 env
->hidden_subprog_cnt
++;
19371 /* Do various post-verification rewrites in a single program pass.
19372 * These rewrites simplify JIT and interpreter implementations.
19374 static int do_misc_fixups(struct bpf_verifier_env
*env
)
19376 struct bpf_prog
*prog
= env
->prog
;
19377 enum bpf_attach_type eatype
= prog
->expected_attach_type
;
19378 enum bpf_prog_type prog_type
= resolve_prog_type(prog
);
19379 struct bpf_insn
*insn
= prog
->insnsi
;
19380 const struct bpf_func_proto
*fn
;
19381 const int insn_cnt
= prog
->len
;
19382 const struct bpf_map_ops
*ops
;
19383 struct bpf_insn_aux_data
*aux
;
19384 struct bpf_insn insn_buf
[16];
19385 struct bpf_prog
*new_prog
;
19386 struct bpf_map
*map_ptr
;
19387 int i
, ret
, cnt
, delta
= 0;
19389 if (env
->seen_exception
&& !env
->exception_callback_subprog
) {
19390 struct bpf_insn patch
[] = {
19391 env
->prog
->insnsi
[insn_cnt
- 1],
19392 BPF_MOV64_REG(BPF_REG_0
, BPF_REG_1
),
19396 ret
= add_hidden_subprog(env
, patch
, ARRAY_SIZE(patch
));
19400 insn
= prog
->insnsi
;
19402 env
->exception_callback_subprog
= env
->subprog_cnt
- 1;
19403 /* Don't update insn_cnt, as add_hidden_subprog always appends insns */
19404 env
->subprog_info
[env
->exception_callback_subprog
].is_cb
= true;
19405 env
->subprog_info
[env
->exception_callback_subprog
].is_async_cb
= true;
19406 env
->subprog_info
[env
->exception_callback_subprog
].is_exception_cb
= true;
19409 for (i
= 0; i
< insn_cnt
; i
++, insn
++) {
19410 /* Make divide-by-zero exceptions impossible. */
19411 if (insn
->code
== (BPF_ALU64
| BPF_MOD
| BPF_X
) ||
19412 insn
->code
== (BPF_ALU64
| BPF_DIV
| BPF_X
) ||
19413 insn
->code
== (BPF_ALU
| BPF_MOD
| BPF_X
) ||
19414 insn
->code
== (BPF_ALU
| BPF_DIV
| BPF_X
)) {
19415 bool is64
= BPF_CLASS(insn
->code
) == BPF_ALU64
;
19416 bool isdiv
= BPF_OP(insn
->code
) == BPF_DIV
;
19417 struct bpf_insn
*patchlet
;
19418 struct bpf_insn chk_and_div
[] = {
19419 /* [R,W]x div 0 -> 0 */
19420 BPF_RAW_INSN((is64
? BPF_JMP
: BPF_JMP32
) |
19421 BPF_JNE
| BPF_K
, insn
->src_reg
,
19423 BPF_ALU32_REG(BPF_XOR
, insn
->dst_reg
, insn
->dst_reg
),
19424 BPF_JMP_IMM(BPF_JA
, 0, 0, 1),
19427 struct bpf_insn chk_and_mod
[] = {
19428 /* [R,W]x mod 0 -> [R,W]x */
19429 BPF_RAW_INSN((is64
? BPF_JMP
: BPF_JMP32
) |
19430 BPF_JEQ
| BPF_K
, insn
->src_reg
,
19431 0, 1 + (is64
? 0 : 1), 0),
19433 BPF_JMP_IMM(BPF_JA
, 0, 0, 1),
19434 BPF_MOV32_REG(insn
->dst_reg
, insn
->dst_reg
),
19437 patchlet
= isdiv
? chk_and_div
: chk_and_mod
;
19438 cnt
= isdiv
? ARRAY_SIZE(chk_and_div
) :
19439 ARRAY_SIZE(chk_and_mod
) - (is64
? 2 : 0);
19441 new_prog
= bpf_patch_insn_data(env
, i
+ delta
, patchlet
, cnt
);
19446 env
->prog
= prog
= new_prog
;
19447 insn
= new_prog
->insnsi
+ i
+ delta
;
19451 /* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
19452 if (BPF_CLASS(insn
->code
) == BPF_LD
&&
19453 (BPF_MODE(insn
->code
) == BPF_ABS
||
19454 BPF_MODE(insn
->code
) == BPF_IND
)) {
19455 cnt
= env
->ops
->gen_ld_abs(insn
, insn_buf
);
19456 if (cnt
== 0 || cnt
>= ARRAY_SIZE(insn_buf
)) {
19457 verbose(env
, "bpf verifier is misconfigured\n");
19461 new_prog
= bpf_patch_insn_data(env
, i
+ delta
, insn_buf
, cnt
);
19466 env
->prog
= prog
= new_prog
;
19467 insn
= new_prog
->insnsi
+ i
+ delta
;
19471 /* Rewrite pointer arithmetic to mitigate speculation attacks. */
19472 if (insn
->code
== (BPF_ALU64
| BPF_ADD
| BPF_X
) ||
19473 insn
->code
== (BPF_ALU64
| BPF_SUB
| BPF_X
)) {
19474 const u8 code_add
= BPF_ALU64
| BPF_ADD
| BPF_X
;
19475 const u8 code_sub
= BPF_ALU64
| BPF_SUB
| BPF_X
;
19476 struct bpf_insn
*patch
= &insn_buf
[0];
19477 bool issrc
, isneg
, isimm
;
19480 aux
= &env
->insn_aux_data
[i
+ delta
];
19481 if (!aux
->alu_state
||
19482 aux
->alu_state
== BPF_ALU_NON_POINTER
)
19485 isneg
= aux
->alu_state
& BPF_ALU_NEG_VALUE
;
19486 issrc
= (aux
->alu_state
& BPF_ALU_SANITIZE
) ==
19487 BPF_ALU_SANITIZE_SRC
;
19488 isimm
= aux
->alu_state
& BPF_ALU_IMMEDIATE
;
19490 off_reg
= issrc
? insn
->src_reg
: insn
->dst_reg
;
19492 *patch
++ = BPF_MOV32_IMM(BPF_REG_AX
, aux
->alu_limit
);
19495 *patch
++ = BPF_ALU64_IMM(BPF_MUL
, off_reg
, -1);
19496 *patch
++ = BPF_MOV32_IMM(BPF_REG_AX
, aux
->alu_limit
);
19497 *patch
++ = BPF_ALU64_REG(BPF_SUB
, BPF_REG_AX
, off_reg
);
19498 *patch
++ = BPF_ALU64_REG(BPF_OR
, BPF_REG_AX
, off_reg
);
19499 *patch
++ = BPF_ALU64_IMM(BPF_NEG
, BPF_REG_AX
, 0);
19500 *patch
++ = BPF_ALU64_IMM(BPF_ARSH
, BPF_REG_AX
, 63);
19501 *patch
++ = BPF_ALU64_REG(BPF_AND
, BPF_REG_AX
, off_reg
);
19504 *patch
++ = BPF_MOV64_REG(insn
->dst_reg
, insn
->src_reg
);
19505 insn
->src_reg
= BPF_REG_AX
;
19507 insn
->code
= insn
->code
== code_add
?
19508 code_sub
: code_add
;
19510 if (issrc
&& isneg
&& !isimm
)
19511 *patch
++ = BPF_ALU64_IMM(BPF_MUL
, off_reg
, -1);
19512 cnt
= patch
- insn_buf
;
19514 new_prog
= bpf_patch_insn_data(env
, i
+ delta
, insn_buf
, cnt
);
19519 env
->prog
= prog
= new_prog
;
19520 insn
= new_prog
->insnsi
+ i
+ delta
;
19524 if (insn
->code
!= (BPF_JMP
| BPF_CALL
))
19526 if (insn
->src_reg
== BPF_PSEUDO_CALL
)
19528 if (insn
->src_reg
== BPF_PSEUDO_KFUNC_CALL
) {
19529 ret
= fixup_kfunc_call(env
, insn
, insn_buf
, i
+ delta
, &cnt
);
19535 new_prog
= bpf_patch_insn_data(env
, i
+ delta
, insn_buf
, cnt
);
19540 env
->prog
= prog
= new_prog
;
19541 insn
= new_prog
->insnsi
+ i
+ delta
;
19545 if (insn
->imm
== BPF_FUNC_get_route_realm
)
19546 prog
->dst_needed
= 1;
19547 if (insn
->imm
== BPF_FUNC_get_prandom_u32
)
19548 bpf_user_rnd_init_once();
19549 if (insn
->imm
== BPF_FUNC_override_return
)
19550 prog
->kprobe_override
= 1;
19551 if (insn
->imm
== BPF_FUNC_tail_call
) {
19552 /* If we tail call into other programs, we
19553 * cannot make any assumptions since they can
19554 * be replaced dynamically during runtime in
19555 * the program array.
19557 prog
->cb_access
= 1;
19558 if (!allow_tail_call_in_subprogs(env
))
19559 prog
->aux
->stack_depth
= MAX_BPF_STACK
;
19560 prog
->aux
->max_pkt_offset
= MAX_PACKET_OFF
;
19562 /* mark bpf_tail_call as different opcode to avoid
19563 * conditional branch in the interpreter for every normal
19564 * call and to prevent accidental JITing by JIT compiler
19565 * that doesn't support bpf_tail_call yet
19568 insn
->code
= BPF_JMP
| BPF_TAIL_CALL
;
19570 aux
= &env
->insn_aux_data
[i
+ delta
];
19571 if (env
->bpf_capable
&& !prog
->blinding_requested
&&
19572 prog
->jit_requested
&&
19573 !bpf_map_key_poisoned(aux
) &&
19574 !bpf_map_ptr_poisoned(aux
) &&
19575 !bpf_map_ptr_unpriv(aux
)) {
19576 struct bpf_jit_poke_descriptor desc
= {
19577 .reason
= BPF_POKE_REASON_TAIL_CALL
,
19578 .tail_call
.map
= BPF_MAP_PTR(aux
->map_ptr_state
),
19579 .tail_call
.key
= bpf_map_key_immediate(aux
),
19580 .insn_idx
= i
+ delta
,
19583 ret
= bpf_jit_add_poke_descriptor(prog
, &desc
);
19585 verbose(env
, "adding tail call poke descriptor failed\n");
19589 insn
->imm
= ret
+ 1;
19593 if (!bpf_map_ptr_unpriv(aux
))
19596 /* instead of changing every JIT dealing with tail_call
19597 * emit two extra insns:
19598 * if (index >= max_entries) goto out;
19599 * index &= array->index_mask;
19600 * to avoid out-of-bounds cpu speculation
19602 if (bpf_map_ptr_poisoned(aux
)) {
19603 verbose(env
, "tail_call abusing map_ptr\n");
19607 map_ptr
= BPF_MAP_PTR(aux
->map_ptr_state
);
19608 insn_buf
[0] = BPF_JMP_IMM(BPF_JGE
, BPF_REG_3
,
19609 map_ptr
->max_entries
, 2);
19610 insn_buf
[1] = BPF_ALU32_IMM(BPF_AND
, BPF_REG_3
,
19611 container_of(map_ptr
,
19614 insn_buf
[2] = *insn
;
19616 new_prog
= bpf_patch_insn_data(env
, i
+ delta
, insn_buf
, cnt
);
19621 env
->prog
= prog
= new_prog
;
19622 insn
= new_prog
->insnsi
+ i
+ delta
;
19626 if (insn
->imm
== BPF_FUNC_timer_set_callback
) {
19627 /* The verifier will process callback_fn as many times as necessary
19628 * with different maps and the register states prepared by
19629 * set_timer_callback_state will be accurate.
19631 * The following use case is valid:
19632 * map1 is shared by prog1, prog2, prog3.
19633 * prog1 calls bpf_timer_init for some map1 elements
19634 * prog2 calls bpf_timer_set_callback for some map1 elements.
19635 * Those that were not bpf_timer_init-ed will return -EINVAL.
19636 * prog3 calls bpf_timer_start for some map1 elements.
19637 * Those that were not both bpf_timer_init-ed and
19638 * bpf_timer_set_callback-ed will return -EINVAL.
19640 struct bpf_insn ld_addrs
[2] = {
19641 BPF_LD_IMM64(BPF_REG_3
, (long)prog
->aux
),
19644 insn_buf
[0] = ld_addrs
[0];
19645 insn_buf
[1] = ld_addrs
[1];
19646 insn_buf
[2] = *insn
;
19649 new_prog
= bpf_patch_insn_data(env
, i
+ delta
, insn_buf
, cnt
);
19654 env
->prog
= prog
= new_prog
;
19655 insn
= new_prog
->insnsi
+ i
+ delta
;
19656 goto patch_call_imm
;
19659 if (is_storage_get_function(insn
->imm
)) {
19660 if (!env
->prog
->aux
->sleepable
||
19661 env
->insn_aux_data
[i
+ delta
].storage_get_func_atomic
)
19662 insn_buf
[0] = BPF_MOV64_IMM(BPF_REG_5
, (__force __s32
)GFP_ATOMIC
);
19664 insn_buf
[0] = BPF_MOV64_IMM(BPF_REG_5
, (__force __s32
)GFP_KERNEL
);
19665 insn_buf
[1] = *insn
;
19668 new_prog
= bpf_patch_insn_data(env
, i
+ delta
, insn_buf
, cnt
);
19673 env
->prog
= prog
= new_prog
;
19674 insn
= new_prog
->insnsi
+ i
+ delta
;
19675 goto patch_call_imm
;
19678 /* bpf_per_cpu_ptr() and bpf_this_cpu_ptr() */
19679 if (env
->insn_aux_data
[i
+ delta
].call_with_percpu_alloc_ptr
) {
19680 /* patch with 'r1 = *(u64 *)(r1 + 0)' since for percpu data,
19681 * bpf_mem_alloc() returns a ptr to the percpu data ptr.
19683 insn_buf
[0] = BPF_LDX_MEM(BPF_DW
, BPF_REG_1
, BPF_REG_1
, 0);
19684 insn_buf
[1] = *insn
;
19687 new_prog
= bpf_patch_insn_data(env
, i
+ delta
, insn_buf
, cnt
);
19692 env
->prog
= prog
= new_prog
;
19693 insn
= new_prog
->insnsi
+ i
+ delta
;
19694 goto patch_call_imm
;
19697 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
19698 * and other inlining handlers are currently limited to 64 bit
19701 if (prog
->jit_requested
&& BITS_PER_LONG
== 64 &&
19702 (insn
->imm
== BPF_FUNC_map_lookup_elem
||
19703 insn
->imm
== BPF_FUNC_map_update_elem
||
19704 insn
->imm
== BPF_FUNC_map_delete_elem
||
19705 insn
->imm
== BPF_FUNC_map_push_elem
||
19706 insn
->imm
== BPF_FUNC_map_pop_elem
||
19707 insn
->imm
== BPF_FUNC_map_peek_elem
||
19708 insn
->imm
== BPF_FUNC_redirect_map
||
19709 insn
->imm
== BPF_FUNC_for_each_map_elem
||
19710 insn
->imm
== BPF_FUNC_map_lookup_percpu_elem
)) {
19711 aux
= &env
->insn_aux_data
[i
+ delta
];
19712 if (bpf_map_ptr_poisoned(aux
))
19713 goto patch_call_imm
;
19715 map_ptr
= BPF_MAP_PTR(aux
->map_ptr_state
);
19716 ops
= map_ptr
->ops
;
19717 if (insn
->imm
== BPF_FUNC_map_lookup_elem
&&
19718 ops
->map_gen_lookup
) {
19719 cnt
= ops
->map_gen_lookup(map_ptr
, insn_buf
);
19720 if (cnt
== -EOPNOTSUPP
)
19721 goto patch_map_ops_generic
;
19722 if (cnt
<= 0 || cnt
>= ARRAY_SIZE(insn_buf
)) {
19723 verbose(env
, "bpf verifier is misconfigured\n");
19727 new_prog
= bpf_patch_insn_data(env
, i
+ delta
,
19733 env
->prog
= prog
= new_prog
;
19734 insn
= new_prog
->insnsi
+ i
+ delta
;
19738 BUILD_BUG_ON(!__same_type(ops
->map_lookup_elem
,
19739 (void *(*)(struct bpf_map
*map
, void *key
))NULL
));
19740 BUILD_BUG_ON(!__same_type(ops
->map_delete_elem
,
19741 (long (*)(struct bpf_map
*map
, void *key
))NULL
));
19742 BUILD_BUG_ON(!__same_type(ops
->map_update_elem
,
19743 (long (*)(struct bpf_map
*map
, void *key
, void *value
,
19745 BUILD_BUG_ON(!__same_type(ops
->map_push_elem
,
19746 (long (*)(struct bpf_map
*map
, void *value
,
19748 BUILD_BUG_ON(!__same_type(ops
->map_pop_elem
,
19749 (long (*)(struct bpf_map
*map
, void *value
))NULL
));
19750 BUILD_BUG_ON(!__same_type(ops
->map_peek_elem
,
19751 (long (*)(struct bpf_map
*map
, void *value
))NULL
));
19752 BUILD_BUG_ON(!__same_type(ops
->map_redirect
,
19753 (long (*)(struct bpf_map
*map
, u64 index
, u64 flags
))NULL
));
19754 BUILD_BUG_ON(!__same_type(ops
->map_for_each_callback
,
19755 (long (*)(struct bpf_map
*map
,
19756 bpf_callback_t callback_fn
,
19757 void *callback_ctx
,
19759 BUILD_BUG_ON(!__same_type(ops
->map_lookup_percpu_elem
,
19760 (void *(*)(struct bpf_map
*map
, void *key
, u32 cpu
))NULL
));
19762 patch_map_ops_generic
:
19763 switch (insn
->imm
) {
19764 case BPF_FUNC_map_lookup_elem
:
19765 insn
->imm
= BPF_CALL_IMM(ops
->map_lookup_elem
);
19767 case BPF_FUNC_map_update_elem
:
19768 insn
->imm
= BPF_CALL_IMM(ops
->map_update_elem
);
19770 case BPF_FUNC_map_delete_elem
:
19771 insn
->imm
= BPF_CALL_IMM(ops
->map_delete_elem
);
19773 case BPF_FUNC_map_push_elem
:
19774 insn
->imm
= BPF_CALL_IMM(ops
->map_push_elem
);
19776 case BPF_FUNC_map_pop_elem
:
19777 insn
->imm
= BPF_CALL_IMM(ops
->map_pop_elem
);
19779 case BPF_FUNC_map_peek_elem
:
19780 insn
->imm
= BPF_CALL_IMM(ops
->map_peek_elem
);
19782 case BPF_FUNC_redirect_map
:
19783 insn
->imm
= BPF_CALL_IMM(ops
->map_redirect
);
19785 case BPF_FUNC_for_each_map_elem
:
19786 insn
->imm
= BPF_CALL_IMM(ops
->map_for_each_callback
);
19788 case BPF_FUNC_map_lookup_percpu_elem
:
19789 insn
->imm
= BPF_CALL_IMM(ops
->map_lookup_percpu_elem
);
19793 goto patch_call_imm
;
19796 /* Implement bpf_jiffies64 inline. */
19797 if (prog
->jit_requested
&& BITS_PER_LONG
== 64 &&
19798 insn
->imm
== BPF_FUNC_jiffies64
) {
19799 struct bpf_insn ld_jiffies_addr
[2] = {
19800 BPF_LD_IMM64(BPF_REG_0
,
19801 (unsigned long)&jiffies
),
19804 insn_buf
[0] = ld_jiffies_addr
[0];
19805 insn_buf
[1] = ld_jiffies_addr
[1];
19806 insn_buf
[2] = BPF_LDX_MEM(BPF_DW
, BPF_REG_0
,
19810 new_prog
= bpf_patch_insn_data(env
, i
+ delta
, insn_buf
,
19816 env
->prog
= prog
= new_prog
;
19817 insn
= new_prog
->insnsi
+ i
+ delta
;
19821 /* Implement bpf_get_func_arg inline. */
19822 if (prog_type
== BPF_PROG_TYPE_TRACING
&&
19823 insn
->imm
== BPF_FUNC_get_func_arg
) {
19824 /* Load nr_args from ctx - 8 */
19825 insn_buf
[0] = BPF_LDX_MEM(BPF_DW
, BPF_REG_0
, BPF_REG_1
, -8);
19826 insn_buf
[1] = BPF_JMP32_REG(BPF_JGE
, BPF_REG_2
, BPF_REG_0
, 6);
19827 insn_buf
[2] = BPF_ALU64_IMM(BPF_LSH
, BPF_REG_2
, 3);
19828 insn_buf
[3] = BPF_ALU64_REG(BPF_ADD
, BPF_REG_2
, BPF_REG_1
);
19829 insn_buf
[4] = BPF_LDX_MEM(BPF_DW
, BPF_REG_0
, BPF_REG_2
, 0);
19830 insn_buf
[5] = BPF_STX_MEM(BPF_DW
, BPF_REG_3
, BPF_REG_0
, 0);
19831 insn_buf
[6] = BPF_MOV64_IMM(BPF_REG_0
, 0);
19832 insn_buf
[7] = BPF_JMP_A(1);
19833 insn_buf
[8] = BPF_MOV64_IMM(BPF_REG_0
, -EINVAL
);
19836 new_prog
= bpf_patch_insn_data(env
, i
+ delta
, insn_buf
, cnt
);
19841 env
->prog
= prog
= new_prog
;
19842 insn
= new_prog
->insnsi
+ i
+ delta
;
19846 /* Implement bpf_get_func_ret inline. */
19847 if (prog_type
== BPF_PROG_TYPE_TRACING
&&
19848 insn
->imm
== BPF_FUNC_get_func_ret
) {
19849 if (eatype
== BPF_TRACE_FEXIT
||
19850 eatype
== BPF_MODIFY_RETURN
) {
19851 /* Load nr_args from ctx - 8 */
19852 insn_buf
[0] = BPF_LDX_MEM(BPF_DW
, BPF_REG_0
, BPF_REG_1
, -8);
19853 insn_buf
[1] = BPF_ALU64_IMM(BPF_LSH
, BPF_REG_0
, 3);
19854 insn_buf
[2] = BPF_ALU64_REG(BPF_ADD
, BPF_REG_0
, BPF_REG_1
);
19855 insn_buf
[3] = BPF_LDX_MEM(BPF_DW
, BPF_REG_3
, BPF_REG_0
, 0);
19856 insn_buf
[4] = BPF_STX_MEM(BPF_DW
, BPF_REG_2
, BPF_REG_3
, 0);
19857 insn_buf
[5] = BPF_MOV64_IMM(BPF_REG_0
, 0);
19860 insn_buf
[0] = BPF_MOV64_IMM(BPF_REG_0
, -EOPNOTSUPP
);
19864 new_prog
= bpf_patch_insn_data(env
, i
+ delta
, insn_buf
, cnt
);
19869 env
->prog
= prog
= new_prog
;
19870 insn
= new_prog
->insnsi
+ i
+ delta
;
19874 /* Implement get_func_arg_cnt inline. */
19875 if (prog_type
== BPF_PROG_TYPE_TRACING
&&
19876 insn
->imm
== BPF_FUNC_get_func_arg_cnt
) {
19877 /* Load nr_args from ctx - 8 */
19878 insn_buf
[0] = BPF_LDX_MEM(BPF_DW
, BPF_REG_0
, BPF_REG_1
, -8);
19880 new_prog
= bpf_patch_insn_data(env
, i
+ delta
, insn_buf
, 1);
19884 env
->prog
= prog
= new_prog
;
19885 insn
= new_prog
->insnsi
+ i
+ delta
;
19889 /* Implement bpf_get_func_ip inline. */
19890 if (prog_type
== BPF_PROG_TYPE_TRACING
&&
19891 insn
->imm
== BPF_FUNC_get_func_ip
) {
19892 /* Load IP address from ctx - 16 */
19893 insn_buf
[0] = BPF_LDX_MEM(BPF_DW
, BPF_REG_0
, BPF_REG_1
, -16);
19895 new_prog
= bpf_patch_insn_data(env
, i
+ delta
, insn_buf
, 1);
19899 env
->prog
= prog
= new_prog
;
19900 insn
= new_prog
->insnsi
+ i
+ delta
;
19905 fn
= env
->ops
->get_func_proto(insn
->imm
, env
->prog
);
19906 /* all functions that have prototype and verifier allowed
19907 * programs to call them, must be real in-kernel functions
19911 "kernel subsystem misconfigured func %s#%d\n",
19912 func_id_name(insn
->imm
), insn
->imm
);
19915 insn
->imm
= fn
->func
- __bpf_call_base
;
19918 /* Since poke tab is now finalized, publish aux to tracker. */
19919 for (i
= 0; i
< prog
->aux
->size_poke_tab
; i
++) {
19920 map_ptr
= prog
->aux
->poke_tab
[i
].tail_call
.map
;
19921 if (!map_ptr
->ops
->map_poke_track
||
19922 !map_ptr
->ops
->map_poke_untrack
||
19923 !map_ptr
->ops
->map_poke_run
) {
19924 verbose(env
, "bpf verifier is misconfigured\n");
19928 ret
= map_ptr
->ops
->map_poke_track(map_ptr
, prog
->aux
);
19930 verbose(env
, "tracking tail call prog failed\n");
19935 sort_kfunc_descs_by_imm_off(env
->prog
);
19940 static struct bpf_prog
*inline_bpf_loop(struct bpf_verifier_env
*env
,
19943 u32 callback_subprogno
,
19946 s32 r6_offset
= stack_base
+ 0 * BPF_REG_SIZE
;
19947 s32 r7_offset
= stack_base
+ 1 * BPF_REG_SIZE
;
19948 s32 r8_offset
= stack_base
+ 2 * BPF_REG_SIZE
;
19949 int reg_loop_max
= BPF_REG_6
;
19950 int reg_loop_cnt
= BPF_REG_7
;
19951 int reg_loop_ctx
= BPF_REG_8
;
19953 struct bpf_prog
*new_prog
;
19954 u32 callback_start
;
19955 u32 call_insn_offset
;
19956 s32 callback_offset
;
19958 /* This represents an inlined version of bpf_iter.c:bpf_loop,
19959 * be careful to modify this code in sync.
19961 struct bpf_insn insn_buf
[] = {
19962 /* Return error and jump to the end of the patch if
19963 * expected number of iterations is too big.
19965 BPF_JMP_IMM(BPF_JLE
, BPF_REG_1
, BPF_MAX_LOOPS
, 2),
19966 BPF_MOV32_IMM(BPF_REG_0
, -E2BIG
),
19967 BPF_JMP_IMM(BPF_JA
, 0, 0, 16),
19968 /* spill R6, R7, R8 to use these as loop vars */
19969 BPF_STX_MEM(BPF_DW
, BPF_REG_10
, BPF_REG_6
, r6_offset
),
19970 BPF_STX_MEM(BPF_DW
, BPF_REG_10
, BPF_REG_7
, r7_offset
),
19971 BPF_STX_MEM(BPF_DW
, BPF_REG_10
, BPF_REG_8
, r8_offset
),
19972 /* initialize loop vars */
19973 BPF_MOV64_REG(reg_loop_max
, BPF_REG_1
),
19974 BPF_MOV32_IMM(reg_loop_cnt
, 0),
19975 BPF_MOV64_REG(reg_loop_ctx
, BPF_REG_3
),
19977 * if reg_loop_cnt >= reg_loop_max skip the loop body
19979 BPF_JMP_REG(BPF_JGE
, reg_loop_cnt
, reg_loop_max
, 5),
19981 * correct callback offset would be set after patching
19983 BPF_MOV64_REG(BPF_REG_1
, reg_loop_cnt
),
19984 BPF_MOV64_REG(BPF_REG_2
, reg_loop_ctx
),
19986 /* increment loop counter */
19987 BPF_ALU64_IMM(BPF_ADD
, reg_loop_cnt
, 1),
19988 /* jump to loop header if callback returned 0 */
19989 BPF_JMP_IMM(BPF_JEQ
, BPF_REG_0
, 0, -6),
19990 /* return value of bpf_loop,
19991 * set R0 to the number of iterations
19993 BPF_MOV64_REG(BPF_REG_0
, reg_loop_cnt
),
19994 /* restore original values of R6, R7, R8 */
19995 BPF_LDX_MEM(BPF_DW
, BPF_REG_6
, BPF_REG_10
, r6_offset
),
19996 BPF_LDX_MEM(BPF_DW
, BPF_REG_7
, BPF_REG_10
, r7_offset
),
19997 BPF_LDX_MEM(BPF_DW
, BPF_REG_8
, BPF_REG_10
, r8_offset
),
20000 *cnt
= ARRAY_SIZE(insn_buf
);
20001 new_prog
= bpf_patch_insn_data(env
, position
, insn_buf
, *cnt
);
20005 /* callback start is known only after patching */
20006 callback_start
= env
->subprog_info
[callback_subprogno
].start
;
20007 /* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */
20008 call_insn_offset
= position
+ 12;
20009 callback_offset
= callback_start
- call_insn_offset
- 1;
20010 new_prog
->insnsi
[call_insn_offset
].imm
= callback_offset
;
20015 static bool is_bpf_loop_call(struct bpf_insn
*insn
)
20017 return insn
->code
== (BPF_JMP
| BPF_CALL
) &&
20018 insn
->src_reg
== 0 &&
20019 insn
->imm
== BPF_FUNC_loop
;
20022 /* For all sub-programs in the program (including main) check
20023 * insn_aux_data to see if there are bpf_loop calls that require
20024 * inlining. If such calls are found the calls are replaced with a
20025 * sequence of instructions produced by `inline_bpf_loop` function and
20026 * subprog stack_depth is increased by the size of 3 registers.
20027 * This stack space is used to spill values of the R6, R7, R8. These
20028 * registers are used to store the loop bound, counter and context
20031 static int optimize_bpf_loop(struct bpf_verifier_env
*env
)
20033 struct bpf_subprog_info
*subprogs
= env
->subprog_info
;
20034 int i
, cur_subprog
= 0, cnt
, delta
= 0;
20035 struct bpf_insn
*insn
= env
->prog
->insnsi
;
20036 int insn_cnt
= env
->prog
->len
;
20037 u16 stack_depth
= subprogs
[cur_subprog
].stack_depth
;
20038 u16 stack_depth_roundup
= round_up(stack_depth
, 8) - stack_depth
;
20039 u16 stack_depth_extra
= 0;
20041 for (i
= 0; i
< insn_cnt
; i
++, insn
++) {
20042 struct bpf_loop_inline_state
*inline_state
=
20043 &env
->insn_aux_data
[i
+ delta
].loop_inline_state
;
20045 if (is_bpf_loop_call(insn
) && inline_state
->fit_for_inline
) {
20046 struct bpf_prog
*new_prog
;
20048 stack_depth_extra
= BPF_REG_SIZE
* 3 + stack_depth_roundup
;
20049 new_prog
= inline_bpf_loop(env
,
20051 -(stack_depth
+ stack_depth_extra
),
20052 inline_state
->callback_subprogno
,
20058 env
->prog
= new_prog
;
20059 insn
= new_prog
->insnsi
+ i
+ delta
;
20062 if (subprogs
[cur_subprog
+ 1].start
== i
+ delta
+ 1) {
20063 subprogs
[cur_subprog
].stack_depth
+= stack_depth_extra
;
20065 stack_depth
= subprogs
[cur_subprog
].stack_depth
;
20066 stack_depth_roundup
= round_up(stack_depth
, 8) - stack_depth
;
20067 stack_depth_extra
= 0;
20071 env
->prog
->aux
->stack_depth
= env
->subprog_info
[0].stack_depth
;
20076 static void free_states(struct bpf_verifier_env
*env
)
20078 struct bpf_verifier_state_list
*sl
, *sln
;
20081 sl
= env
->free_list
;
20084 free_verifier_state(&sl
->state
, false);
20088 env
->free_list
= NULL
;
20090 if (!env
->explored_states
)
20093 for (i
= 0; i
< state_htab_size(env
); i
++) {
20094 sl
= env
->explored_states
[i
];
20098 free_verifier_state(&sl
->state
, false);
20102 env
->explored_states
[i
] = NULL
;
20106 static int do_check_common(struct bpf_verifier_env
*env
, int subprog
, bool is_ex_cb
)
20108 bool pop_log
= !(env
->log
.level
& BPF_LOG_LEVEL2
);
20109 struct bpf_verifier_state
*state
;
20110 struct bpf_reg_state
*regs
;
20113 env
->prev_linfo
= NULL
;
20116 state
= kzalloc(sizeof(struct bpf_verifier_state
), GFP_KERNEL
);
20119 state
->curframe
= 0;
20120 state
->speculative
= false;
20121 state
->branches
= 1;
20122 state
->frame
[0] = kzalloc(sizeof(struct bpf_func_state
), GFP_KERNEL
);
20123 if (!state
->frame
[0]) {
20127 env
->cur_state
= state
;
20128 init_func_state(env
, state
->frame
[0],
20129 BPF_MAIN_FUNC
/* callsite */,
20132 state
->first_insn_idx
= env
->subprog_info
[subprog
].start
;
20133 state
->last_insn_idx
= -1;
20135 regs
= state
->frame
[state
->curframe
]->regs
;
20136 if (subprog
|| env
->prog
->type
== BPF_PROG_TYPE_EXT
) {
20137 ret
= btf_prepare_func_args(env
, subprog
, regs
, is_ex_cb
);
20140 for (i
= BPF_REG_1
; i
<= BPF_REG_5
; i
++) {
20141 if (regs
[i
].type
== PTR_TO_CTX
)
20142 mark_reg_known_zero(env
, regs
, i
);
20143 else if (regs
[i
].type
== SCALAR_VALUE
)
20144 mark_reg_unknown(env
, regs
, i
);
20145 else if (base_type(regs
[i
].type
) == PTR_TO_MEM
) {
20146 const u32 mem_size
= regs
[i
].mem_size
;
20148 mark_reg_known_zero(env
, regs
, i
);
20149 regs
[i
].mem_size
= mem_size
;
20150 regs
[i
].id
= ++env
->id_gen
;
20154 state
->frame
[0]->in_exception_callback_fn
= true;
20155 env
->subprog_info
[subprog
].is_cb
= true;
20156 env
->subprog_info
[subprog
].is_async_cb
= true;
20157 env
->subprog_info
[subprog
].is_exception_cb
= true;
20160 /* 1st arg to a function */
20161 regs
[BPF_REG_1
].type
= PTR_TO_CTX
;
20162 mark_reg_known_zero(env
, regs
, BPF_REG_1
);
20163 ret
= btf_check_subprog_arg_match(env
, subprog
, regs
);
20164 if (ret
== -EFAULT
)
20165 /* unlikely verifier bug. abort.
20166 * ret == 0 and ret < 0 are sadly acceptable for
20167 * main() function due to backward compatibility.
20168 * Like socket filter program may be written as:
20169 * int bpf_prog(struct pt_regs *ctx)
20170 * and never dereference that ctx in the program.
20171 * 'struct pt_regs' is a type mismatch for socket
20172 * filter that should be using 'struct __sk_buff'.
20177 ret
= do_check(env
);
20179 /* check for NULL is necessary, since cur_state can be freed inside
20180 * do_check() under memory pressure.
20182 if (env
->cur_state
) {
20183 free_verifier_state(env
->cur_state
, true);
20184 env
->cur_state
= NULL
;
20186 while (!pop_stack(env
, NULL
, NULL
, false));
20187 if (!ret
&& pop_log
)
20188 bpf_vlog_reset(&env
->log
, 0);
20193 /* Verify all global functions in a BPF program one by one based on their BTF.
20194 * All global functions must pass verification. Otherwise the whole program is rejected.
20205 * foo() will be verified first for R1=any_scalar_value. During verification it
20206 * will be assumed that bar() already verified successfully and call to bar()
20207 * from foo() will be checked for type match only. Later bar() will be verified
20208 * independently to check that it's safe for R1=any_scalar_value.
20210 static int do_check_subprogs(struct bpf_verifier_env
*env
)
20212 struct bpf_prog_aux
*aux
= env
->prog
->aux
;
20215 if (!aux
->func_info
)
20218 for (i
= 1; i
< env
->subprog_cnt
; i
++) {
20219 if (aux
->func_info_aux
[i
].linkage
!= BTF_FUNC_GLOBAL
)
20221 env
->insn_idx
= env
->subprog_info
[i
].start
;
20222 WARN_ON_ONCE(env
->insn_idx
== 0);
20223 ret
= do_check_common(env
, i
, env
->exception_callback_subprog
== i
);
20226 } else if (env
->log
.level
& BPF_LOG_LEVEL
) {
20228 "Func#%d is safe for any args that match its prototype\n",
20235 static int do_check_main(struct bpf_verifier_env
*env
)
20240 ret
= do_check_common(env
, 0, false);
20242 env
->prog
->aux
->stack_depth
= env
->subprog_info
[0].stack_depth
;
20247 static void print_verification_stats(struct bpf_verifier_env
*env
)
20251 if (env
->log
.level
& BPF_LOG_STATS
) {
20252 verbose(env
, "verification time %lld usec\n",
20253 div_u64(env
->verification_time
, 1000));
20254 verbose(env
, "stack depth ");
20255 for (i
= 0; i
< env
->subprog_cnt
; i
++) {
20256 u32 depth
= env
->subprog_info
[i
].stack_depth
;
20258 verbose(env
, "%d", depth
);
20259 if (i
+ 1 < env
->subprog_cnt
)
20262 verbose(env
, "\n");
20264 verbose(env
, "processed %d insns (limit %d) max_states_per_insn %d "
20265 "total_states %d peak_states %d mark_read %d\n",
20266 env
->insn_processed
, BPF_COMPLEXITY_LIMIT_INSNS
,
20267 env
->max_states_per_insn
, env
->total_states
,
20268 env
->peak_states
, env
->longest_mark_read_walk
);
20271 static int check_struct_ops_btf_id(struct bpf_verifier_env
*env
)
20273 const struct btf_type
*t
, *func_proto
;
20274 const struct bpf_struct_ops
*st_ops
;
20275 const struct btf_member
*member
;
20276 struct bpf_prog
*prog
= env
->prog
;
20277 u32 btf_id
, member_idx
;
20280 if (!prog
->gpl_compatible
) {
20281 verbose(env
, "struct ops programs must have a GPL compatible license\n");
20285 btf_id
= prog
->aux
->attach_btf_id
;
20286 st_ops
= bpf_struct_ops_find(btf_id
);
20288 verbose(env
, "attach_btf_id %u is not a supported struct\n",
20294 member_idx
= prog
->expected_attach_type
;
20295 if (member_idx
>= btf_type_vlen(t
)) {
20296 verbose(env
, "attach to invalid member idx %u of struct %s\n",
20297 member_idx
, st_ops
->name
);
20301 member
= &btf_type_member(t
)[member_idx
];
20302 mname
= btf_name_by_offset(btf_vmlinux
, member
->name_off
);
20303 func_proto
= btf_type_resolve_func_ptr(btf_vmlinux
, member
->type
,
20306 verbose(env
, "attach to invalid member %s(@idx %u) of struct %s\n",
20307 mname
, member_idx
, st_ops
->name
);
20311 if (st_ops
->check_member
) {
20312 int err
= st_ops
->check_member(t
, member
, prog
);
20315 verbose(env
, "attach to unsupported member %s of struct %s\n",
20316 mname
, st_ops
->name
);
20321 prog
->aux
->attach_func_proto
= func_proto
;
20322 prog
->aux
->attach_func_name
= mname
;
20323 env
->ops
= st_ops
->verifier_ops
;
20327 #define SECURITY_PREFIX "security_"
20329 static int check_attach_modify_return(unsigned long addr
, const char *func_name
)
20331 if (within_error_injection_list(addr
) ||
20332 !strncmp(SECURITY_PREFIX
, func_name
, sizeof(SECURITY_PREFIX
) - 1))
20338 /* list of non-sleepable functions that are otherwise on
20339 * ALLOW_ERROR_INJECTION list
20341 BTF_SET_START(btf_non_sleepable_error_inject
)
20342 /* Three functions below can be called from sleepable and non-sleepable context.
20343 * Assume non-sleepable from bpf safety point of view.
20345 BTF_ID(func
, __filemap_add_folio
)
20346 BTF_ID(func
, should_fail_alloc_page
)
20347 BTF_ID(func
, should_failslab
)
20348 BTF_SET_END(btf_non_sleepable_error_inject
)
20350 static int check_non_sleepable_error_inject(u32 btf_id
)
20352 return btf_id_set_contains(&btf_non_sleepable_error_inject
, btf_id
);
20355 int bpf_check_attach_target(struct bpf_verifier_log
*log
,
20356 const struct bpf_prog
*prog
,
20357 const struct bpf_prog
*tgt_prog
,
20359 struct bpf_attach_target_info
*tgt_info
)
20361 bool prog_extension
= prog
->type
== BPF_PROG_TYPE_EXT
;
20362 const char prefix
[] = "btf_trace_";
20363 int ret
= 0, subprog
= -1, i
;
20364 const struct btf_type
*t
;
20365 bool conservative
= true;
20369 struct module
*mod
= NULL
;
20372 bpf_log(log
, "Tracing programs must provide btf_id\n");
20375 btf
= tgt_prog
? tgt_prog
->aux
->btf
: prog
->aux
->attach_btf
;
20378 "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
20381 t
= btf_type_by_id(btf
, btf_id
);
20383 bpf_log(log
, "attach_btf_id %u is invalid\n", btf_id
);
20386 tname
= btf_name_by_offset(btf
, t
->name_off
);
20388 bpf_log(log
, "attach_btf_id %u doesn't have a name\n", btf_id
);
20392 struct bpf_prog_aux
*aux
= tgt_prog
->aux
;
20394 if (bpf_prog_is_dev_bound(prog
->aux
) &&
20395 !bpf_prog_dev_bound_match(prog
, tgt_prog
)) {
20396 bpf_log(log
, "Target program bound device mismatch");
20400 for (i
= 0; i
< aux
->func_info_cnt
; i
++)
20401 if (aux
->func_info
[i
].type_id
== btf_id
) {
20405 if (subprog
== -1) {
20406 bpf_log(log
, "Subprog %s doesn't exist\n", tname
);
20409 if (aux
->func
&& aux
->func
[subprog
]->aux
->exception_cb
) {
20411 "%s programs cannot attach to exception callback\n",
20412 prog_extension
? "Extension" : "FENTRY/FEXIT");
20415 conservative
= aux
->func_info_aux
[subprog
].unreliable
;
20416 if (prog_extension
) {
20417 if (conservative
) {
20419 "Cannot replace static functions\n");
20422 if (!prog
->jit_requested
) {
20424 "Extension programs should be JITed\n");
20428 if (!tgt_prog
->jited
) {
20429 bpf_log(log
, "Can attach to only JITed progs\n");
20432 if (tgt_prog
->type
== prog
->type
) {
20433 /* Cannot fentry/fexit another fentry/fexit program.
20434 * Cannot attach program extension to another extension.
20435 * It's ok to attach fentry/fexit to extension program.
20437 bpf_log(log
, "Cannot recursively attach\n");
20440 if (tgt_prog
->type
== BPF_PROG_TYPE_TRACING
&&
20442 (tgt_prog
->expected_attach_type
== BPF_TRACE_FENTRY
||
20443 tgt_prog
->expected_attach_type
== BPF_TRACE_FEXIT
)) {
20444 /* Program extensions can extend all program types
20445 * except fentry/fexit. The reason is the following.
20446 * The fentry/fexit programs are used for performance
20447 * analysis, stats and can be attached to any program
20448 * type except themselves. When extension program is
20449 * replacing XDP function it is necessary to allow
20450 * performance analysis of all functions. Both original
20451 * XDP program and its program extension. Hence
20452 * attaching fentry/fexit to BPF_PROG_TYPE_EXT is
20453 * allowed. If extending of fentry/fexit was allowed it
20454 * would be possible to create long call chain
20455 * fentry->extension->fentry->extension beyond
20456 * reasonable stack size. Hence extending fentry is not
20459 bpf_log(log
, "Cannot extend fentry/fexit\n");
20463 if (prog_extension
) {
20464 bpf_log(log
, "Cannot replace kernel functions\n");
20469 switch (prog
->expected_attach_type
) {
20470 case BPF_TRACE_RAW_TP
:
20473 "Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
20476 if (!btf_type_is_typedef(t
)) {
20477 bpf_log(log
, "attach_btf_id %u is not a typedef\n",
20481 if (strncmp(prefix
, tname
, sizeof(prefix
) - 1)) {
20482 bpf_log(log
, "attach_btf_id %u points to wrong type name %s\n",
20486 tname
+= sizeof(prefix
) - 1;
20487 t
= btf_type_by_id(btf
, t
->type
);
20488 if (!btf_type_is_ptr(t
))
20489 /* should never happen in valid vmlinux build */
20491 t
= btf_type_by_id(btf
, t
->type
);
20492 if (!btf_type_is_func_proto(t
))
20493 /* should never happen in valid vmlinux build */
20497 case BPF_TRACE_ITER
:
20498 if (!btf_type_is_func(t
)) {
20499 bpf_log(log
, "attach_btf_id %u is not a function\n",
20503 t
= btf_type_by_id(btf
, t
->type
);
20504 if (!btf_type_is_func_proto(t
))
20506 ret
= btf_distill_func_proto(log
, btf
, t
, tname
, &tgt_info
->fmodel
);
20511 if (!prog_extension
)
20514 case BPF_MODIFY_RETURN
:
20516 case BPF_LSM_CGROUP
:
20517 case BPF_TRACE_FENTRY
:
20518 case BPF_TRACE_FEXIT
:
20519 if (!btf_type_is_func(t
)) {
20520 bpf_log(log
, "attach_btf_id %u is not a function\n",
20524 if (prog_extension
&&
20525 btf_check_type_match(log
, prog
, btf
, t
))
20527 t
= btf_type_by_id(btf
, t
->type
);
20528 if (!btf_type_is_func_proto(t
))
20531 if ((prog
->aux
->saved_dst_prog_type
|| prog
->aux
->saved_dst_attach_type
) &&
20532 (!tgt_prog
|| prog
->aux
->saved_dst_prog_type
!= tgt_prog
->type
||
20533 prog
->aux
->saved_dst_attach_type
!= tgt_prog
->expected_attach_type
))
20536 if (tgt_prog
&& conservative
)
20539 ret
= btf_distill_func_proto(log
, btf
, t
, tname
, &tgt_info
->fmodel
);
20545 addr
= (long) tgt_prog
->bpf_func
;
20547 addr
= (long) tgt_prog
->aux
->func
[subprog
]->bpf_func
;
20549 if (btf_is_module(btf
)) {
20550 mod
= btf_try_get_module(btf
);
20552 addr
= find_kallsyms_symbol_value(mod
, tname
);
20556 addr
= kallsyms_lookup_name(tname
);
20561 "The address of function %s cannot be found\n",
20567 if (prog
->aux
->sleepable
) {
20569 switch (prog
->type
) {
20570 case BPF_PROG_TYPE_TRACING
:
20572 /* fentry/fexit/fmod_ret progs can be sleepable if they are
20573 * attached to ALLOW_ERROR_INJECTION and are not in denylist.
20575 if (!check_non_sleepable_error_inject(btf_id
) &&
20576 within_error_injection_list(addr
))
20578 /* fentry/fexit/fmod_ret progs can also be sleepable if they are
20579 * in the fmodret id set with the KF_SLEEPABLE flag.
20582 u32
*flags
= btf_kfunc_is_modify_return(btf
, btf_id
,
20585 if (flags
&& (*flags
& KF_SLEEPABLE
))
20589 case BPF_PROG_TYPE_LSM
:
20590 /* LSM progs check that they are attached to bpf_lsm_*() funcs.
20591 * Only some of them are sleepable.
20593 if (bpf_lsm_is_sleepable_hook(btf_id
))
20601 bpf_log(log
, "%s is not sleepable\n", tname
);
20604 } else if (prog
->expected_attach_type
== BPF_MODIFY_RETURN
) {
20607 bpf_log(log
, "can't modify return codes of BPF programs\n");
20611 if (btf_kfunc_is_modify_return(btf
, btf_id
, prog
) ||
20612 !check_attach_modify_return(addr
, tname
))
20616 bpf_log(log
, "%s() is not modifiable\n", tname
);
20623 tgt_info
->tgt_addr
= addr
;
20624 tgt_info
->tgt_name
= tname
;
20625 tgt_info
->tgt_type
= t
;
20626 tgt_info
->tgt_mod
= mod
;
20630 BTF_SET_START(btf_id_deny
)
20633 BTF_ID(func
, migrate_disable
)
20634 BTF_ID(func
, migrate_enable
)
20636 #if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
20637 BTF_ID(func
, rcu_read_unlock_strict
)
20639 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_TRACE_PREEMPT_TOGGLE)
20640 BTF_ID(func
, preempt_count_add
)
20641 BTF_ID(func
, preempt_count_sub
)
20643 #ifdef CONFIG_PREEMPT_RCU
20644 BTF_ID(func
, __rcu_read_lock
)
20645 BTF_ID(func
, __rcu_read_unlock
)
20647 BTF_SET_END(btf_id_deny
)
20649 static bool can_be_sleepable(struct bpf_prog
*prog
)
20651 if (prog
->type
== BPF_PROG_TYPE_TRACING
) {
20652 switch (prog
->expected_attach_type
) {
20653 case BPF_TRACE_FENTRY
:
20654 case BPF_TRACE_FEXIT
:
20655 case BPF_MODIFY_RETURN
:
20656 case BPF_TRACE_ITER
:
20662 return prog
->type
== BPF_PROG_TYPE_LSM
||
20663 prog
->type
== BPF_PROG_TYPE_KPROBE
/* only for uprobes */ ||
20664 prog
->type
== BPF_PROG_TYPE_STRUCT_OPS
;
20667 static int check_attach_btf_id(struct bpf_verifier_env
*env
)
20669 struct bpf_prog
*prog
= env
->prog
;
20670 struct bpf_prog
*tgt_prog
= prog
->aux
->dst_prog
;
20671 struct bpf_attach_target_info tgt_info
= {};
20672 u32 btf_id
= prog
->aux
->attach_btf_id
;
20673 struct bpf_trampoline
*tr
;
20677 if (prog
->type
== BPF_PROG_TYPE_SYSCALL
) {
20678 if (prog
->aux
->sleepable
)
20679 /* attach_btf_id checked to be zero already */
20681 verbose(env
, "Syscall programs can only be sleepable\n");
20685 if (prog
->aux
->sleepable
&& !can_be_sleepable(prog
)) {
20686 verbose(env
, "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable\n");
20690 if (prog
->type
== BPF_PROG_TYPE_STRUCT_OPS
)
20691 return check_struct_ops_btf_id(env
);
20693 if (prog
->type
!= BPF_PROG_TYPE_TRACING
&&
20694 prog
->type
!= BPF_PROG_TYPE_LSM
&&
20695 prog
->type
!= BPF_PROG_TYPE_EXT
)
20698 ret
= bpf_check_attach_target(&env
->log
, prog
, tgt_prog
, btf_id
, &tgt_info
);
20702 if (tgt_prog
&& prog
->type
== BPF_PROG_TYPE_EXT
) {
20703 /* to make freplace equivalent to their targets, they need to
20704 * inherit env->ops and expected_attach_type for the rest of the
20707 env
->ops
= bpf_verifier_ops
[tgt_prog
->type
];
20708 prog
->expected_attach_type
= tgt_prog
->expected_attach_type
;
20711 /* store info about the attachment target that will be used later */
20712 prog
->aux
->attach_func_proto
= tgt_info
.tgt_type
;
20713 prog
->aux
->attach_func_name
= tgt_info
.tgt_name
;
20714 prog
->aux
->mod
= tgt_info
.tgt_mod
;
20717 prog
->aux
->saved_dst_prog_type
= tgt_prog
->type
;
20718 prog
->aux
->saved_dst_attach_type
= tgt_prog
->expected_attach_type
;
20721 if (prog
->expected_attach_type
== BPF_TRACE_RAW_TP
) {
20722 prog
->aux
->attach_btf_trace
= true;
20724 } else if (prog
->expected_attach_type
== BPF_TRACE_ITER
) {
20725 if (!bpf_iter_prog_supported(prog
))
20730 if (prog
->type
== BPF_PROG_TYPE_LSM
) {
20731 ret
= bpf_lsm_verify_prog(&env
->log
, prog
);
20734 } else if (prog
->type
== BPF_PROG_TYPE_TRACING
&&
20735 btf_id_set_contains(&btf_id_deny
, btf_id
)) {
20739 key
= bpf_trampoline_compute_key(tgt_prog
, prog
->aux
->attach_btf
, btf_id
);
20740 tr
= bpf_trampoline_get(key
, &tgt_info
);
20744 if (tgt_prog
&& tgt_prog
->aux
->tail_call_reachable
)
20745 tr
->flags
= BPF_TRAMP_F_TAIL_CALL_CTX
;
20747 prog
->aux
->dst_trampoline
= tr
;
20751 struct btf
*bpf_get_btf_vmlinux(void)
20753 if (!btf_vmlinux
&& IS_ENABLED(CONFIG_DEBUG_INFO_BTF
)) {
20754 mutex_lock(&bpf_verifier_lock
);
20756 btf_vmlinux
= btf_parse_vmlinux();
20757 mutex_unlock(&bpf_verifier_lock
);
20759 return btf_vmlinux
;
20762 int bpf_check(struct bpf_prog
**prog
, union bpf_attr
*attr
, bpfptr_t uattr
, __u32 uattr_size
)
20764 u64 start_time
= ktime_get_ns();
20765 struct bpf_verifier_env
*env
;
20766 int i
, len
, ret
= -EINVAL
, err
;
20770 /* no program is valid */
20771 if (ARRAY_SIZE(bpf_verifier_ops
) == 0)
20774 /* 'struct bpf_verifier_env' can be global, but since it's not small,
20775 * allocate/free it every time bpf_check() is called
20777 env
= kzalloc(sizeof(struct bpf_verifier_env
), GFP_KERNEL
);
20783 len
= (*prog
)->len
;
20784 env
->insn_aux_data
=
20785 vzalloc(array_size(sizeof(struct bpf_insn_aux_data
), len
));
20787 if (!env
->insn_aux_data
)
20789 for (i
= 0; i
< len
; i
++)
20790 env
->insn_aux_data
[i
].orig_idx
= i
;
20792 env
->ops
= bpf_verifier_ops
[env
->prog
->type
];
20793 env
->fd_array
= make_bpfptr(attr
->fd_array
, uattr
.is_kernel
);
20794 is_priv
= bpf_capable();
20796 bpf_get_btf_vmlinux();
20798 /* grab the mutex to protect few globals used by verifier */
20800 mutex_lock(&bpf_verifier_lock
);
20802 /* user could have requested verbose verifier output
20803 * and supplied buffer to store the verification trace
20805 ret
= bpf_vlog_init(&env
->log
, attr
->log_level
,
20806 (char __user
*) (unsigned long) attr
->log_buf
,
20811 mark_verifier_state_clean(env
);
20813 if (IS_ERR(btf_vmlinux
)) {
20814 /* Either gcc or pahole or kernel are broken. */
20815 verbose(env
, "in-kernel BTF is malformed\n");
20816 ret
= PTR_ERR(btf_vmlinux
);
20817 goto skip_full_check
;
20820 env
->strict_alignment
= !!(attr
->prog_flags
& BPF_F_STRICT_ALIGNMENT
);
20821 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
))
20822 env
->strict_alignment
= true;
20823 if (attr
->prog_flags
& BPF_F_ANY_ALIGNMENT
)
20824 env
->strict_alignment
= false;
20826 env
->allow_ptr_leaks
= bpf_allow_ptr_leaks();
20827 env
->allow_uninit_stack
= bpf_allow_uninit_stack();
20828 env
->bypass_spec_v1
= bpf_bypass_spec_v1();
20829 env
->bypass_spec_v4
= bpf_bypass_spec_v4();
20830 env
->bpf_capable
= bpf_capable();
20833 env
->test_state_freq
= attr
->prog_flags
& BPF_F_TEST_STATE_FREQ
;
20835 env
->explored_states
= kvcalloc(state_htab_size(env
),
20836 sizeof(struct bpf_verifier_state_list
*),
20839 if (!env
->explored_states
)
20840 goto skip_full_check
;
20842 ret
= check_btf_info_early(env
, attr
, uattr
);
20844 goto skip_full_check
;
20846 ret
= add_subprog_and_kfunc(env
);
20848 goto skip_full_check
;
20850 ret
= check_subprogs(env
);
20852 goto skip_full_check
;
20854 ret
= check_btf_info(env
, attr
, uattr
);
20856 goto skip_full_check
;
20858 ret
= check_attach_btf_id(env
);
20860 goto skip_full_check
;
20862 ret
= resolve_pseudo_ldimm64(env
);
20864 goto skip_full_check
;
20866 if (bpf_prog_is_offloaded(env
->prog
->aux
)) {
20867 ret
= bpf_prog_offload_verifier_prep(env
->prog
);
20869 goto skip_full_check
;
20872 ret
= check_cfg(env
);
20874 goto skip_full_check
;
20876 ret
= do_check_subprogs(env
);
20877 ret
= ret
?: do_check_main(env
);
20879 if (ret
== 0 && bpf_prog_is_offloaded(env
->prog
->aux
))
20880 ret
= bpf_prog_offload_finalize(env
);
20883 kvfree(env
->explored_states
);
20886 ret
= check_max_stack_depth(env
);
20888 /* instruction rewrites happen after this point */
20890 ret
= optimize_bpf_loop(env
);
20894 opt_hard_wire_dead_code_branches(env
);
20896 ret
= opt_remove_dead_code(env
);
20898 ret
= opt_remove_nops(env
);
20901 sanitize_dead_code(env
);
20905 /* program is valid, convert *(u32*)(ctx + off) accesses */
20906 ret
= convert_ctx_accesses(env
);
20909 ret
= do_misc_fixups(env
);
20911 /* do 32-bit optimization after insn patching has done so those patched
20912 * insns could be handled correctly.
20914 if (ret
== 0 && !bpf_prog_is_offloaded(env
->prog
->aux
)) {
20915 ret
= opt_subreg_zext_lo32_rnd_hi32(env
, attr
);
20916 env
->prog
->aux
->verifier_zext
= bpf_jit_needs_zext() ? !ret
20921 ret
= fixup_call_args(env
);
20923 env
->verification_time
= ktime_get_ns() - start_time
;
20924 print_verification_stats(env
);
20925 env
->prog
->aux
->verified_insns
= env
->insn_processed
;
20927 /* preserve original error even if log finalization is successful */
20928 err
= bpf_vlog_finalize(&env
->log
, &log_true_size
);
20932 if (uattr_size
>= offsetofend(union bpf_attr
, log_true_size
) &&
20933 copy_to_bpfptr_offset(uattr
, offsetof(union bpf_attr
, log_true_size
),
20934 &log_true_size
, sizeof(log_true_size
))) {
20936 goto err_release_maps
;
20940 goto err_release_maps
;
20942 if (env
->used_map_cnt
) {
20943 /* if program passed verifier, update used_maps in bpf_prog_info */
20944 env
->prog
->aux
->used_maps
= kmalloc_array(env
->used_map_cnt
,
20945 sizeof(env
->used_maps
[0]),
20948 if (!env
->prog
->aux
->used_maps
) {
20950 goto err_release_maps
;
20953 memcpy(env
->prog
->aux
->used_maps
, env
->used_maps
,
20954 sizeof(env
->used_maps
[0]) * env
->used_map_cnt
);
20955 env
->prog
->aux
->used_map_cnt
= env
->used_map_cnt
;
20957 if (env
->used_btf_cnt
) {
20958 /* if program passed verifier, update used_btfs in bpf_prog_aux */
20959 env
->prog
->aux
->used_btfs
= kmalloc_array(env
->used_btf_cnt
,
20960 sizeof(env
->used_btfs
[0]),
20962 if (!env
->prog
->aux
->used_btfs
) {
20964 goto err_release_maps
;
20967 memcpy(env
->prog
->aux
->used_btfs
, env
->used_btfs
,
20968 sizeof(env
->used_btfs
[0]) * env
->used_btf_cnt
);
20969 env
->prog
->aux
->used_btf_cnt
= env
->used_btf_cnt
;
20971 if (env
->used_map_cnt
|| env
->used_btf_cnt
) {
20972 /* program is valid. Convert pseudo bpf_ld_imm64 into generic
20973 * bpf_ld_imm64 instructions
20975 convert_pseudo_ld_imm64(env
);
20978 adjust_btf_func(env
);
20981 if (!env
->prog
->aux
->used_maps
)
20982 /* if we didn't copy map pointers into bpf_prog_info, release
20983 * them now. Otherwise free_used_maps() will release them.
20986 if (!env
->prog
->aux
->used_btfs
)
20989 /* extension progs temporarily inherit the attach_type of their targets
20990 for verification purposes, so set it back to zero before returning
20992 if (env
->prog
->type
== BPF_PROG_TYPE_EXT
)
20993 env
->prog
->expected_attach_type
= 0;
20998 mutex_unlock(&bpf_verifier_lock
);
20999 vfree(env
->insn_aux_data
);