From e940228ab2f54fcbf38ed364312277ac21ca5fbf Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 21 Dec 2023 11:03:29 +0100 Subject: [PATCH] 6.1-stable patches added patches: bpf-fix-prog_array_map_poke_run-map-poke-update.patch --- ...g_array_map_poke_run-map-poke-update.patch | 227 ++++++++++++++++++ queue-6.1/series | 1 + 2 files changed, 228 insertions(+) create mode 100644 queue-6.1/bpf-fix-prog_array_map_poke_run-map-poke-update.patch diff --git a/queue-6.1/bpf-fix-prog_array_map_poke_run-map-poke-update.patch b/queue-6.1/bpf-fix-prog_array_map_poke_run-map-poke-update.patch new file mode 100644 index 00000000000..bff6ff8b857 --- /dev/null +++ b/queue-6.1/bpf-fix-prog_array_map_poke_run-map-poke-update.patch @@ -0,0 +1,227 @@ +From 4b7de801606e504e69689df71475d27e35336fb3 Mon Sep 17 00:00:00 2001 +From: Jiri Olsa +Date: Wed, 6 Dec 2023 09:30:40 +0100 +Subject: bpf: Fix prog_array_map_poke_run map poke update + +From: Jiri Olsa + +commit 4b7de801606e504e69689df71475d27e35336fb3 upstream. + +Lee pointed out issue found by syscaller [0] hitting BUG in prog array +map poke update in prog_array_map_poke_run function due to error value +returned from bpf_arch_text_poke function. + +There's race window where bpf_arch_text_poke can fail due to missing +bpf program kallsym symbols, which is accounted for with check for +-EINVAL in that BUG_ON call. + +The problem is that in such case we won't update the tail call jump +and cause imbalance for the next tail call update check which will +fail with -EBUSY in bpf_arch_text_poke. + +I'm hitting following race during the program load: + + CPU 0 CPU 1 + + bpf_prog_load + bpf_check + do_misc_fixups + prog_array_map_poke_track + + map_update_elem + bpf_fd_array_map_update_elem + prog_array_map_poke_run + + bpf_arch_text_poke returns -EINVAL + + bpf_prog_kallsyms_add + +After bpf_arch_text_poke (CPU 1) fails to update the tail call jump, the next +poke update fails on expected jump instruction check in bpf_arch_text_poke +with -EBUSY and triggers the BUG_ON in prog_array_map_poke_run. + +Similar race exists on the program unload. + +Fixing this by moving the update to bpf_arch_poke_desc_update function which +makes sure we call __bpf_arch_text_poke that skips the bpf address check. + +Each architecture has slightly different approach wrt looking up bpf address +in bpf_arch_text_poke, so instead of splitting the function or adding new +'checkip' argument in previous version, it seems best to move the whole +map_poke_run update as arch specific code. + + [0] https://syzkaller.appspot.com/bug?extid=97a4fe20470e9bc30810 + +Fixes: ebf7d1f508a7 ("bpf, x64: rework pro/epilogue and tailcall handling in JIT") +Reported-by: syzbot+97a4fe20470e9bc30810@syzkaller.appspotmail.com +Signed-off-by: Jiri Olsa +Signed-off-by: Daniel Borkmann +Acked-by: Yonghong Song +Cc: Lee Jones +Cc: Maciej Fijalkowski +Link: https://lore.kernel.org/bpf/20231206083041.1306660-2-jolsa@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/net/bpf_jit_comp.c | 46 ++++++++++++++++++++++++++++++++++ + include/linux/bpf.h | 3 ++ + kernel/bpf/arraymap.c | 58 +++++++------------------------------------- + 3 files changed, 59 insertions(+), 48 deletions(-) + +--- a/arch/x86/net/bpf_jit_comp.c ++++ b/arch/x86/net/bpf_jit_comp.c +@@ -2553,3 +2553,49 @@ void bpf_jit_free(struct bpf_prog *prog) + + bpf_prog_unlock_free(prog); + } ++ ++void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, ++ struct bpf_prog *new, struct bpf_prog *old) ++{ ++ u8 *old_addr, *new_addr, *old_bypass_addr; ++ int ret; ++ ++ old_bypass_addr = old ? NULL : poke->bypass_addr; ++ old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL; ++ new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL; ++ ++ /* ++ * On program loading or teardown, the program's kallsym entry ++ * might not be in place, so we use __bpf_arch_text_poke to skip ++ * the kallsyms check. ++ */ ++ if (new) { ++ ret = __bpf_arch_text_poke(poke->tailcall_target, ++ BPF_MOD_JUMP, ++ old_addr, new_addr); ++ BUG_ON(ret < 0); ++ if (!old) { ++ ret = __bpf_arch_text_poke(poke->tailcall_bypass, ++ BPF_MOD_JUMP, ++ poke->bypass_addr, ++ NULL); ++ BUG_ON(ret < 0); ++ } ++ } else { ++ ret = __bpf_arch_text_poke(poke->tailcall_bypass, ++ BPF_MOD_JUMP, ++ old_bypass_addr, ++ poke->bypass_addr); ++ BUG_ON(ret < 0); ++ /* let other CPUs finish the execution of program ++ * so that it will not possible to expose them ++ * to invalid nop, stack unwind, nop state ++ */ ++ if (!ret) ++ synchronize_rcu(); ++ ret = __bpf_arch_text_poke(poke->tailcall_target, ++ BPF_MOD_JUMP, ++ old_addr, NULL); ++ BUG_ON(ret < 0); ++ } ++} +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -2681,6 +2681,9 @@ enum bpf_text_poke_type { + int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, + void *addr1, void *addr2); + ++void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, ++ struct bpf_prog *new, struct bpf_prog *old); ++ + void *bpf_arch_text_copy(void *dst, void *src, size_t len); + int bpf_arch_text_invalidate(void *dst, size_t len); + +--- a/kernel/bpf/arraymap.c ++++ b/kernel/bpf/arraymap.c +@@ -997,11 +997,16 @@ static void prog_array_map_poke_untrack( + mutex_unlock(&aux->poke_mutex); + } + ++void __weak bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, ++ struct bpf_prog *new, struct bpf_prog *old) ++{ ++ WARN_ON_ONCE(1); ++} ++ + static void prog_array_map_poke_run(struct bpf_map *map, u32 key, + struct bpf_prog *old, + struct bpf_prog *new) + { +- u8 *old_addr, *new_addr, *old_bypass_addr; + struct prog_poke_elem *elem; + struct bpf_array_aux *aux; + +@@ -1010,7 +1015,7 @@ static void prog_array_map_poke_run(stru + + list_for_each_entry(elem, &aux->poke_progs, list) { + struct bpf_jit_poke_descriptor *poke; +- int i, ret; ++ int i; + + for (i = 0; i < elem->aux->size_poke_tab; i++) { + poke = &elem->aux->poke_tab[i]; +@@ -1029,21 +1034,10 @@ static void prog_array_map_poke_run(stru + * activated, so tail call updates can arrive from here + * while JIT is still finishing its final fixup for + * non-activated poke entries. +- * 3) On program teardown, the program's kallsym entry gets +- * removed out of RCU callback, but we can only untrack +- * from sleepable context, therefore bpf_arch_text_poke() +- * might not see that this is in BPF text section and +- * bails out with -EINVAL. As these are unreachable since +- * RCU grace period already passed, we simply skip them. +- * 4) Also programs reaching refcount of zero while patching ++ * 3) Also programs reaching refcount of zero while patching + * is in progress is okay since we're protected under + * poke_mutex and untrack the programs before the JIT +- * buffer is freed. When we're still in the middle of +- * patching and suddenly kallsyms entry of the program +- * gets evicted, we just skip the rest which is fine due +- * to point 3). +- * 5) Any other error happening below from bpf_arch_text_poke() +- * is a unexpected bug. ++ * buffer is freed. + */ + if (!READ_ONCE(poke->tailcall_target_stable)) + continue; +@@ -1053,39 +1047,7 @@ static void prog_array_map_poke_run(stru + poke->tail_call.key != key) + continue; + +- old_bypass_addr = old ? NULL : poke->bypass_addr; +- old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL; +- new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL; +- +- if (new) { +- ret = bpf_arch_text_poke(poke->tailcall_target, +- BPF_MOD_JUMP, +- old_addr, new_addr); +- BUG_ON(ret < 0 && ret != -EINVAL); +- if (!old) { +- ret = bpf_arch_text_poke(poke->tailcall_bypass, +- BPF_MOD_JUMP, +- poke->bypass_addr, +- NULL); +- BUG_ON(ret < 0 && ret != -EINVAL); +- } +- } else { +- ret = bpf_arch_text_poke(poke->tailcall_bypass, +- BPF_MOD_JUMP, +- old_bypass_addr, +- poke->bypass_addr); +- BUG_ON(ret < 0 && ret != -EINVAL); +- /* let other CPUs finish the execution of program +- * so that it will not possible to expose them +- * to invalid nop, stack unwind, nop state +- */ +- if (!ret) +- synchronize_rcu(); +- ret = bpf_arch_text_poke(poke->tailcall_target, +- BPF_MOD_JUMP, +- old_addr, NULL); +- BUG_ON(ret < 0 && ret != -EINVAL); +- } ++ bpf_arch_poke_desc_update(poke, new, old); + } + } + } diff --git a/queue-6.1/series b/queue-6.1/series index 6556ee9129b..cd5d2a1814f 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -1 +1,2 @@ kasan-disable-kasan_non_canonical_hook-for-hw-tags.patch +bpf-fix-prog_array_map_poke_run-map-poke-update.patch -- 2.47.3