From: Greg Kroah-Hartman Date: Fri, 27 Sep 2024 11:47:45 +0000 (+0200) Subject: 5.10-stable patches X-Git-Tag: v6.1.112~17 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=6db2d850a1e424b1241409b219b2d86db5b3397a;p=thirdparty%2Fkernel%2Fstable-queue.git 5.10-stable patches added patches: ftrace-fix-possible-use-after-free-issue-in-ftrace_location.patch gpiolib-cdev-ignore-reconfiguration-without-direction.patch inet-inet_defrag-prevent-sk-release-while-still-in-use.patch x86-ibt-ftrace-search-for-__fentry__-location.patch --- diff --git a/queue-5.10/ftrace-fix-possible-use-after-free-issue-in-ftrace_location.patch b/queue-5.10/ftrace-fix-possible-use-after-free-issue-in-ftrace_location.patch new file mode 100644 index 00000000000..a9a3dcb98a1 --- /dev/null +++ b/queue-5.10/ftrace-fix-possible-use-after-free-issue-in-ftrace_location.patch @@ -0,0 +1,172 @@ +From e60b613df8b6253def41215402f72986fee3fc8d Mon Sep 17 00:00:00 2001 +From: Zheng Yejian +Date: Fri, 10 May 2024 03:28:59 +0800 +Subject: ftrace: Fix possible use-after-free issue in ftrace_location() + +From: Zheng Yejian + +commit e60b613df8b6253def41215402f72986fee3fc8d upstream. + +KASAN reports a bug: + + BUG: KASAN: use-after-free in ftrace_location+0x90/0x120 + Read of size 8 at addr ffff888141d40010 by task insmod/424 + CPU: 8 PID: 424 Comm: insmod Tainted: G W 6.9.0-rc2+ + [...] + Call Trace: + + dump_stack_lvl+0x68/0xa0 + print_report+0xcf/0x610 + kasan_report+0xb5/0xe0 + ftrace_location+0x90/0x120 + register_kprobe+0x14b/0xa40 + kprobe_init+0x2d/0xff0 [kprobe_example] + do_one_initcall+0x8f/0x2d0 + do_init_module+0x13a/0x3c0 + load_module+0x3082/0x33d0 + init_module_from_file+0xd2/0x130 + __x64_sys_finit_module+0x306/0x440 + do_syscall_64+0x68/0x140 + entry_SYSCALL_64_after_hwframe+0x71/0x79 + +The root cause is that, in lookup_rec(), ftrace record of some address +is being searched in ftrace pages of some module, but those ftrace pages +at the same time is being freed in ftrace_release_mod() as the +corresponding module is being deleted: + + CPU1 | CPU2 + register_kprobes() { | delete_module() { + check_kprobe_address_safe() { | + arch_check_ftrace_location() { | + ftrace_location() { | + lookup_rec() // USE! | ftrace_release_mod() // Free! + +To fix this issue: + 1. Hold rcu lock as accessing ftrace pages in ftrace_location_range(); + 2. Use ftrace_location_range() instead of lookup_rec() in + ftrace_location(); + 3. Call synchronize_rcu() before freeing any ftrace pages both in + ftrace_process_locs()/ftrace_release_mod()/ftrace_free_mem(). + +Link: https://lore.kernel.org/linux-trace-kernel/20240509192859.1273558-1-zhengyejian1@huawei.com + +Cc: stable@vger.kernel.org +Cc: +Cc: +Cc: +Fixes: ae6aa16fdc16 ("kprobes: introduce ftrace based optimization") +Suggested-by: Steven Rostedt +Signed-off-by: Zheng Yejian +Signed-off-by: Steven Rostedt (Google) +[Shivani: Modified to apply on v5.10.y] +Signed-off-by: Shivani Agarwal +Signed-off-by: Greg Kroah-Hartman +--- + kernel/trace/ftrace.c | 39 +++++++++++++++++++++++---------------- + 1 file changed, 23 insertions(+), 16 deletions(-) + +--- a/kernel/trace/ftrace.c ++++ b/kernel/trace/ftrace.c +@@ -1566,12 +1566,15 @@ static struct dyn_ftrace *lookup_rec(uns + unsigned long ftrace_location_range(unsigned long start, unsigned long end) + { + struct dyn_ftrace *rec; ++ unsigned long ip = 0; + ++ rcu_read_lock(); + rec = lookup_rec(start, end); + if (rec) +- return rec->ip; ++ ip = rec->ip; ++ rcu_read_unlock(); + +- return 0; ++ return ip; + } + + /** +@@ -1584,25 +1587,22 @@ unsigned long ftrace_location_range(unsi + */ + unsigned long ftrace_location(unsigned long ip) + { +- struct dyn_ftrace *rec; ++ unsigned long loc; + unsigned long offset; + unsigned long size; + +- rec = lookup_rec(ip, ip); +- if (!rec) { ++ loc = ftrace_location_range(ip, ip); ++ if (!loc) { + if (!kallsyms_lookup_size_offset(ip, &size, &offset)) + goto out; + + /* map sym+0 to __fentry__ */ + if (!offset) +- rec = lookup_rec(ip, ip + size - 1); ++ loc = ftrace_location_range(ip, ip + size - 1); + } + +- if (rec) +- return rec->ip; +- + out: +- return 0; ++ return loc; + } + + /** +@@ -6331,6 +6331,8 @@ static int ftrace_process_locs(struct mo + /* We should have used all pages unless we skipped some */ + if (pg_unuse) { + WARN_ON(!skipped); ++ /* Need to synchronize with ftrace_location_range() */ ++ synchronize_rcu(); + ftrace_free_pages(pg_unuse); + } + return ret; +@@ -6513,6 +6515,9 @@ void ftrace_release_mod(struct module *m + out_unlock: + mutex_unlock(&ftrace_lock); + ++ /* Need to synchronize with ftrace_location_range() */ ++ if (tmp_page) ++ synchronize_rcu(); + for (pg = tmp_page; pg; pg = tmp_page) { + + /* Needs to be called outside of ftrace_lock */ +@@ -6835,6 +6840,7 @@ void ftrace_free_mem(struct module *mod, + unsigned long start = (unsigned long)(start_ptr); + unsigned long end = (unsigned long)(end_ptr); + struct ftrace_page **last_pg = &ftrace_pages_start; ++ struct ftrace_page *tmp_page = NULL; + struct ftrace_page *pg; + struct dyn_ftrace *rec; + struct dyn_ftrace key; +@@ -6878,12 +6884,8 @@ void ftrace_free_mem(struct module *mod, + ftrace_update_tot_cnt--; + if (!pg->index) { + *last_pg = pg->next; +- if (pg->records) { +- free_pages((unsigned long)pg->records, pg->order); +- ftrace_number_of_pages -= 1 << pg->order; +- } +- ftrace_number_of_groups--; +- kfree(pg); ++ pg->next = tmp_page; ++ tmp_page = pg; + pg = container_of(last_pg, struct ftrace_page, next); + if (!(*last_pg)) + ftrace_pages = pg; +@@ -6900,6 +6902,11 @@ void ftrace_free_mem(struct module *mod, + clear_func_from_hashes(func); + kfree(func); + } ++ /* Need to synchronize with ftrace_location_range() */ ++ if (tmp_page) { ++ synchronize_rcu(); ++ ftrace_free_pages(tmp_page); ++ } + } + + void __init ftrace_free_init_mem(void) diff --git a/queue-5.10/gpiolib-cdev-ignore-reconfiguration-without-direction.patch b/queue-5.10/gpiolib-cdev-ignore-reconfiguration-without-direction.patch new file mode 100644 index 00000000000..44880b4ce57 --- /dev/null +++ b/queue-5.10/gpiolib-cdev-ignore-reconfiguration-without-direction.patch @@ -0,0 +1,72 @@ +From b440396387418fe2feaacd41ca16080e7a8bc9ad Mon Sep 17 00:00:00 2001 +From: Kent Gibson +Date: Wed, 26 Jun 2024 13:29:23 +0800 +Subject: gpiolib: cdev: Ignore reconfiguration without direction + +From: Kent Gibson + +commit b440396387418fe2feaacd41ca16080e7a8bc9ad upstream. + +linereq_set_config() behaves badly when direction is not set. +The configuration validation is borrowed from linereq_create(), where, +to verify the intent of the user, the direction must be set to in order to +effect a change to the electrical configuration of a line. But, when +applied to reconfiguration, that validation does not allow for the unset +direction case, making it possible to clear flags set previously without +specifying the line direction. + +Adding to the inconsistency, those changes are not immediately applied by +linereq_set_config(), but will take effect when the line value is next get +or set. + +For example, by requesting a configuration with no flags set, an output +line with GPIO_V2_LINE_FLAG_ACTIVE_LOW and GPIO_V2_LINE_FLAG_OPEN_DRAIN +set could have those flags cleared, inverting the sense of the line and +changing the line drive to push-pull on the next line value set. + +Skip the reconfiguration of lines for which the direction is not set, and +only reconfigure the lines for which direction is set. + +Fixes: a54756cb24ea ("gpiolib: cdev: support GPIO_V2_LINE_SET_CONFIG_IOCTL") +Signed-off-by: Kent Gibson +Link: https://lore.kernel.org/r/20240626052925.174272-3-warthog618@gmail.com +Signed-off-by: Bartosz Golaszewski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpio/gpiolib-cdev.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +--- a/drivers/gpio/gpiolib-cdev.c ++++ b/drivers/gpio/gpiolib-cdev.c +@@ -1110,15 +1110,18 @@ static long linereq_set_config_unlocked( + for (i = 0; i < lr->num_lines; i++) { + desc = lr->lines[i].desc; + flags = gpio_v2_line_config_flags(lc, i); ++ /* ++ * Lines not explicitly reconfigured as input or output ++ * are left unchanged. ++ */ ++ if (!(flags & GPIO_V2_LINE_DIRECTION_FLAGS)) ++ continue; ++ + polarity_change = + (!!test_bit(FLAG_ACTIVE_LOW, &desc->flags) != + ((flags & GPIO_V2_LINE_FLAG_ACTIVE_LOW) != 0)); + + gpio_v2_line_config_flags_to_desc_flags(flags, &desc->flags); +- /* +- * Lines have to be requested explicitly for input +- * or output, else the line will be treated "as is". +- */ + if (flags & GPIO_V2_LINE_FLAG_OUTPUT) { + int val = gpio_v2_line_config_output_value(lc, i); + +@@ -1126,7 +1129,7 @@ static long linereq_set_config_unlocked( + ret = gpiod_direction_output(desc, val); + if (ret) + return ret; +- } else if (flags & GPIO_V2_LINE_FLAG_INPUT) { ++ } else { + ret = gpiod_direction_input(desc); + if (ret) + return ret; diff --git a/queue-5.10/inet-inet_defrag-prevent-sk-release-while-still-in-use.patch b/queue-5.10/inet-inet_defrag-prevent-sk-release-while-still-in-use.patch new file mode 100644 index 00000000000..54fb8a7dd3f --- /dev/null +++ b/queue-5.10/inet-inet_defrag-prevent-sk-release-while-still-in-use.patch @@ -0,0 +1,313 @@ +From 18685451fc4e546fc0e718580d32df3c0e5c8272 Mon Sep 17 00:00:00 2001 +From: Florian Westphal +Date: Tue, 26 Mar 2024 11:18:41 +0100 +Subject: inet: inet_defrag: prevent sk release while still in use + +From: Florian Westphal + +commit 18685451fc4e546fc0e718580d32df3c0e5c8272 upstream. + +ip_local_out() and other functions can pass skb->sk as function argument. + +If the skb is a fragment and reassembly happens before such function call +returns, the sk must not be released. + +This affects skb fragments reassembled via netfilter or similar +modules, e.g. openvswitch or ct_act.c, when run as part of tx pipeline. + +Eric Dumazet made an initial analysis of this bug. Quoting Eric: + Calling ip_defrag() in output path is also implying skb_orphan(), + which is buggy because output path relies on sk not disappearing. + + A relevant old patch about the issue was : + 8282f27449bf ("inet: frag: Always orphan skbs inside ip_defrag()") + + [..] + + net/ipv4/ip_output.c depends on skb->sk being set, and probably to an + inet socket, not an arbitrary one. + + If we orphan the packet in ipvlan, then downstream things like FQ + packet scheduler will not work properly. + + We need to change ip_defrag() to only use skb_orphan() when really + needed, ie whenever frag_list is going to be used. + +Eric suggested to stash sk in fragment queue and made an initial patch. +However there is a problem with this: + +If skb is refragmented again right after, ip_do_fragment() will copy +head->sk to the new fragments, and sets up destructor to sock_wfree. +IOW, we have no choice but to fix up sk_wmem accouting to reflect the +fully reassembled skb, else wmem will underflow. + +This change moves the orphan down into the core, to last possible moment. +As ip_defrag_offset is aliased with sk_buff->sk member, we must move the +offset into the FRAG_CB, else skb->sk gets clobbered. + +This allows to delay the orphaning long enough to learn if the skb has +to be queued or if the skb is completing the reasm queue. + +In the former case, things work as before, skb is orphaned. This is +safe because skb gets queued/stolen and won't continue past reasm engine. + +In the latter case, we will steal the skb->sk reference, reattach it to +the head skb, and fix up wmem accouting when inet_frag inflates truesize. + +Fixes: 7026b1ddb6b8 ("netfilter: Pass socket pointer down through okfn().") +Diagnosed-by: Eric Dumazet +Reported-by: xingwei lee +Reported-by: yue sun +Reported-by: syzbot+e5167d7144a62715044c@syzkaller.appspotmail.com +Signed-off-by: Florian Westphal +Reviewed-by: Eric Dumazet +Link: https://lore.kernel.org/r/20240326101845.30836-1-fw@strlen.de +Signed-off-by: Paolo Abeni +Signed-off-by: Saeed Mirzamohammadi +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/skbuff.h | 5 -- + net/core/sock_destructor.h | 12 +++++ + net/ipv4/inet_fragment.c | 70 ++++++++++++++++++++++++++------ + net/ipv4/ip_fragment.c | 2 + net/ipv6/netfilter/nf_conntrack_reasm.c | 2 + 5 files changed, 72 insertions(+), 19 deletions(-) + create mode 100644 net/core/sock_destructor.h + +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -733,10 +733,7 @@ struct sk_buff { + struct list_head list; + }; + +- union { +- struct sock *sk; +- int ip_defrag_offset; +- }; ++ struct sock *sk; + + union { + ktime_t tstamp; +--- /dev/null ++++ b/net/core/sock_destructor.h +@@ -0,0 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++#ifndef _NET_CORE_SOCK_DESTRUCTOR_H ++#define _NET_CORE_SOCK_DESTRUCTOR_H ++#include ++ ++static inline bool is_skb_wmem(const struct sk_buff *skb) ++{ ++ return skb->destructor == sock_wfree || ++ skb->destructor == __sock_wfree || ++ (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree); ++} ++#endif +--- a/net/ipv4/inet_fragment.c ++++ b/net/ipv4/inet_fragment.c +@@ -24,6 +24,8 @@ + #include + #include + ++#include "../core/sock_destructor.h" ++ + /* Use skb->cb to track consecutive/adjacent fragments coming at + * the end of the queue. Nodes in the rb-tree queue will + * contain "runs" of one or more adjacent fragments. +@@ -39,6 +41,7 @@ struct ipfrag_skb_cb { + }; + struct sk_buff *next_frag; + int frag_run_len; ++ int ip_defrag_offset; + }; + + #define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) +@@ -359,12 +362,12 @@ int inet_frag_queue_insert(struct inet_f + */ + if (!last) + fragrun_create(q, skb); /* First fragment. */ +- else if (last->ip_defrag_offset + last->len < end) { ++ else if (FRAG_CB(last)->ip_defrag_offset + last->len < end) { + /* This is the common case: skb goes to the end. */ + /* Detect and discard overlaps. */ +- if (offset < last->ip_defrag_offset + last->len) ++ if (offset < FRAG_CB(last)->ip_defrag_offset + last->len) + return IPFRAG_OVERLAP; +- if (offset == last->ip_defrag_offset + last->len) ++ if (offset == FRAG_CB(last)->ip_defrag_offset + last->len) + fragrun_append_to_last(q, skb); + else + fragrun_create(q, skb); +@@ -381,13 +384,13 @@ int inet_frag_queue_insert(struct inet_f + + parent = *rbn; + curr = rb_to_skb(parent); +- curr_run_end = curr->ip_defrag_offset + ++ curr_run_end = FRAG_CB(curr)->ip_defrag_offset + + FRAG_CB(curr)->frag_run_len; +- if (end <= curr->ip_defrag_offset) ++ if (end <= FRAG_CB(curr)->ip_defrag_offset) + rbn = &parent->rb_left; + else if (offset >= curr_run_end) + rbn = &parent->rb_right; +- else if (offset >= curr->ip_defrag_offset && ++ else if (offset >= FRAG_CB(curr)->ip_defrag_offset && + end <= curr_run_end) + return IPFRAG_DUP; + else +@@ -401,7 +404,7 @@ int inet_frag_queue_insert(struct inet_f + rb_insert_color(&skb->rbnode, &q->rb_fragments); + } + +- skb->ip_defrag_offset = offset; ++ FRAG_CB(skb)->ip_defrag_offset = offset; + + return IPFRAG_OK; + } +@@ -411,13 +414,28 @@ void *inet_frag_reasm_prepare(struct ine + struct sk_buff *parent) + { + struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments); +- struct sk_buff **nextp; ++ void (*destructor)(struct sk_buff *); ++ unsigned int orig_truesize = 0; ++ struct sk_buff **nextp = NULL; ++ struct sock *sk = skb->sk; + int delta; + ++ if (sk && is_skb_wmem(skb)) { ++ /* TX: skb->sk might have been passed as argument to ++ * dst->output and must remain valid until tx completes. ++ * ++ * Move sk to reassembled skb and fix up wmem accounting. ++ */ ++ orig_truesize = skb->truesize; ++ destructor = skb->destructor; ++ } ++ + if (head != skb) { + fp = skb_clone(skb, GFP_ATOMIC); +- if (!fp) +- return NULL; ++ if (!fp) { ++ head = skb; ++ goto out_restore_sk; ++ } + FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag; + if (RB_EMPTY_NODE(&skb->rbnode)) + FRAG_CB(parent)->next_frag = fp; +@@ -426,6 +444,12 @@ void *inet_frag_reasm_prepare(struct ine + &q->rb_fragments); + if (q->fragments_tail == skb) + q->fragments_tail = fp; ++ ++ if (orig_truesize) { ++ /* prevent skb_morph from releasing sk */ ++ skb->sk = NULL; ++ skb->destructor = NULL; ++ } + skb_morph(skb, head); + FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag; + rb_replace_node(&head->rbnode, &skb->rbnode, +@@ -433,13 +457,13 @@ void *inet_frag_reasm_prepare(struct ine + consume_skb(head); + head = skb; + } +- WARN_ON(head->ip_defrag_offset != 0); ++ WARN_ON(FRAG_CB(head)->ip_defrag_offset != 0); + + delta = -head->truesize; + + /* Head of list must not be cloned. */ + if (skb_unclone(head, GFP_ATOMIC)) +- return NULL; ++ goto out_restore_sk; + + delta += head->truesize; + if (delta) +@@ -455,7 +479,7 @@ void *inet_frag_reasm_prepare(struct ine + + clone = alloc_skb(0, GFP_ATOMIC); + if (!clone) +- return NULL; ++ goto out_restore_sk; + skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; + skb_frag_list_init(head); + for (i = 0; i < skb_shinfo(head)->nr_frags; i++) +@@ -472,6 +496,21 @@ void *inet_frag_reasm_prepare(struct ine + nextp = &skb_shinfo(head)->frag_list; + } + ++out_restore_sk: ++ if (orig_truesize) { ++ int ts_delta = head->truesize - orig_truesize; ++ ++ /* if this reassembled skb is fragmented later, ++ * fraglist skbs will get skb->sk assigned from head->sk, ++ * and each frag skb will be released via sock_wfree. ++ * ++ * Update sk_wmem_alloc. ++ */ ++ head->sk = sk; ++ head->destructor = destructor; ++ refcount_add(ts_delta, &sk->sk_wmem_alloc); ++ } ++ + return nextp; + } + EXPORT_SYMBOL(inet_frag_reasm_prepare); +@@ -479,6 +518,8 @@ EXPORT_SYMBOL(inet_frag_reasm_prepare); + void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, + void *reasm_data, bool try_coalesce) + { ++ struct sock *sk = is_skb_wmem(head) ? head->sk : NULL; ++ const unsigned int head_truesize = head->truesize; + struct sk_buff **nextp = (struct sk_buff **)reasm_data; + struct rb_node *rbn; + struct sk_buff *fp; +@@ -541,6 +582,9 @@ void inet_frag_reasm_finish(struct inet_ + skb_mark_not_on_list(head); + head->prev = NULL; + head->tstamp = q->stamp; ++ ++ if (sk) ++ refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc); + } + EXPORT_SYMBOL(inet_frag_reasm_finish); + +--- a/net/ipv4/ip_fragment.c ++++ b/net/ipv4/ip_fragment.c +@@ -377,6 +377,7 @@ static int ip_frag_queue(struct ipq *qp, + } + + skb_dst_drop(skb); ++ skb_orphan(skb); + return -EINPROGRESS; + + insert_error: +@@ -479,7 +480,6 @@ int ip_defrag(struct net *net, struct sk + struct ipq *qp; + + __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS); +- skb_orphan(skb); + + /* Lookup (or create) queue header */ + qp = ip_find(net, ip_hdr(skb), user, vif); +--- a/net/ipv6/netfilter/nf_conntrack_reasm.c ++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c +@@ -296,6 +296,7 @@ static int nf_ct_frag6_queue(struct frag + } + + skb_dst_drop(skb); ++ skb_orphan(skb); + return -EINPROGRESS; + + insert_error: +@@ -471,7 +472,6 @@ int nf_ct_frag6_gather(struct net *net, + hdr = ipv6_hdr(skb); + fhdr = (struct frag_hdr *)skb_transport_header(skb); + +- skb_orphan(skb); + fq = fq_find(net, fhdr->identification, user, hdr, + skb->dev ? skb->dev->ifindex : 0); + if (fq == NULL) { diff --git a/queue-5.10/series b/queue-5.10/series index b88075c6406..c7f3ff8f752 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -48,3 +48,7 @@ gpio-prevent-potential-speculation-leaks-in-gpio_device_get_desc.patch mptcp-export-lookup_anno_list_by_saddr.patch mptcp-validate-id-when-stopping-the-add_addr-retransmit-timer.patch mptcp-pm-fix-uaf-in-__timer_delete_sync.patch +inet-inet_defrag-prevent-sk-release-while-still-in-use.patch +x86-ibt-ftrace-search-for-__fentry__-location.patch +ftrace-fix-possible-use-after-free-issue-in-ftrace_location.patch +gpiolib-cdev-ignore-reconfiguration-without-direction.patch diff --git a/queue-5.10/x86-ibt-ftrace-search-for-__fentry__-location.patch b/queue-5.10/x86-ibt-ftrace-search-for-__fentry__-location.patch new file mode 100644 index 00000000000..c7d77fb2177 --- /dev/null +++ b/queue-5.10/x86-ibt-ftrace-search-for-__fentry__-location.patch @@ -0,0 +1,209 @@ +From aebfd12521d9c7d0b502cf6d06314cfbcdccfe3b Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Tue, 8 Mar 2022 16:30:29 +0100 +Subject: x86/ibt,ftrace: Search for __fentry__ location + +From: Peter Zijlstra + +commit aebfd12521d9c7d0b502cf6d06314cfbcdccfe3b upstream. + +Currently a lot of ftrace code assumes __fentry__ is at sym+0. However +with Intel IBT enabled the first instruction of a function will most +likely be ENDBR. + +Change ftrace_location() to not only return the __fentry__ location +when called for the __fentry__ location, but also when called for the +sym+0 location. + +Then audit/update all callsites of this function to consistently use +these new semantics. + +Suggested-by: Steven Rostedt +Signed-off-by: Peter Zijlstra (Intel) +Acked-by: Masami Hiramatsu +Acked-by: Josh Poimboeuf +Link: https://lore.kernel.org/r/20220308154318.227581603@infradead.org +Stable-dep-of: e60b613df8b6 ("ftrace: Fix possible use-after-free issue in ftrace_location()") +[Shivani: Modified to apply on v5.10.y] +Signed-off-by: Shivani Agarwal +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/kprobes/core.c | 11 +-------- + kernel/bpf/trampoline.c | 20 +++-------------- + kernel/kprobes.c | 8 +----- + kernel/trace/ftrace.c | 48 ++++++++++++++++++++++++++++++++++------- + 4 files changed, 48 insertions(+), 39 deletions(-) + +--- a/arch/x86/kernel/kprobes/core.c ++++ b/arch/x86/kernel/kprobes/core.c +@@ -194,17 +194,10 @@ static unsigned long + __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) + { + struct kprobe *kp; +- unsigned long faddr; ++ bool faddr; + + kp = get_kprobe((void *)addr); +- faddr = ftrace_location(addr); +- /* +- * Addresses inside the ftrace location are refused by +- * arch_check_ftrace_location(). Something went terribly wrong +- * if such an address is checked here. +- */ +- if (WARN_ON(faddr && faddr != addr)) +- return 0UL; ++ faddr = ftrace_location(addr) == addr; + /* + * Use the current code if it is not modified by Kprobe + * and it cannot be modified by ftrace. +--- a/kernel/bpf/trampoline.c ++++ b/kernel/bpf/trampoline.c +@@ -87,18 +87,6 @@ out: + return tr; + } + +-static int is_ftrace_location(void *ip) +-{ +- long addr; +- +- addr = ftrace_location((long)ip); +- if (!addr) +- return 0; +- if (WARN_ON_ONCE(addr != (long)ip)) +- return -EFAULT; +- return 1; +-} +- + static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr) + { + void *ip = tr->func.addr; +@@ -127,12 +115,12 @@ static int modify_fentry(struct bpf_tram + static int register_fentry(struct bpf_trampoline *tr, void *new_addr) + { + void *ip = tr->func.addr; ++ unsigned long faddr; + int ret; + +- ret = is_ftrace_location(ip); +- if (ret < 0) +- return ret; +- tr->func.ftrace_managed = ret; ++ faddr = ftrace_location((unsigned long)ip); ++ if (faddr) ++ tr->func.ftrace_managed = true; + + if (tr->func.ftrace_managed) + ret = register_ftrace_direct((long)ip, (long)new_addr); +--- a/kernel/kprobes.c ++++ b/kernel/kprobes.c +@@ -1609,14 +1609,10 @@ static inline int check_kprobe_rereg(str + + int __weak arch_check_ftrace_location(struct kprobe *p) + { +- unsigned long ftrace_addr; ++ unsigned long addr = (unsigned long)p->addr; + +- ftrace_addr = ftrace_location((unsigned long)p->addr); +- if (ftrace_addr) { ++ if (ftrace_location(addr) == addr) { + #ifdef CONFIG_KPROBES_ON_FTRACE +- /* Given address is not on the instruction boundary */ +- if ((unsigned long)p->addr != ftrace_addr) +- return -EILSEQ; + p->flags |= KPROBE_FLAG_FTRACE; + #else /* !CONFIG_KPROBES_ON_FTRACE */ + return -EINVAL; +--- a/kernel/trace/ftrace.c ++++ b/kernel/trace/ftrace.c +@@ -1575,17 +1575,34 @@ unsigned long ftrace_location_range(unsi + } + + /** +- * ftrace_location - return true if the ip giving is a traced location ++ * ftrace_location - return the ftrace location + * @ip: the instruction pointer to check + * +- * Returns rec->ip if @ip given is a pointer to a ftrace location. +- * That is, the instruction that is either a NOP or call to +- * the function tracer. It checks the ftrace internal tables to +- * determine if the address belongs or not. ++ * If @ip matches the ftrace location, return @ip. ++ * If @ip matches sym+0, return sym's ftrace location. ++ * Otherwise, return 0. + */ + unsigned long ftrace_location(unsigned long ip) + { +- return ftrace_location_range(ip, ip); ++ struct dyn_ftrace *rec; ++ unsigned long offset; ++ unsigned long size; ++ ++ rec = lookup_rec(ip, ip); ++ if (!rec) { ++ if (!kallsyms_lookup_size_offset(ip, &size, &offset)) ++ goto out; ++ ++ /* map sym+0 to __fentry__ */ ++ if (!offset) ++ rec = lookup_rec(ip, ip + size - 1); ++ } ++ ++ if (rec) ++ return rec->ip; ++ ++out: ++ return 0; + } + + /** +@@ -4948,7 +4965,8 @@ ftrace_match_addr(struct ftrace_hash *ha + { + struct ftrace_func_entry *entry; + +- if (!ftrace_location(ip)) ++ ip = ftrace_location(ip); ++ if (!ip) + return -EINVAL; + + if (remove) { +@@ -5096,11 +5114,16 @@ int register_ftrace_direct(unsigned long + struct ftrace_func_entry *entry; + struct ftrace_hash *free_hash = NULL; + struct dyn_ftrace *rec; +- int ret = -EBUSY; ++ int ret = -ENODEV; + + mutex_lock(&direct_mutex); + ++ ip = ftrace_location(ip); ++ if (!ip) ++ goto out_unlock; ++ + /* See if there's a direct function at @ip already */ ++ ret = -EBUSY; + if (ftrace_find_rec_direct(ip)) + goto out_unlock; + +@@ -5229,6 +5252,10 @@ int unregister_ftrace_direct(unsigned lo + + mutex_lock(&direct_mutex); + ++ ip = ftrace_location(ip); ++ if (!ip) ++ goto out_unlock; ++ + entry = find_direct_entry(&ip, NULL); + if (!entry) + goto out_unlock; +@@ -5360,6 +5387,11 @@ int modify_ftrace_direct(unsigned long i + mutex_lock(&direct_mutex); + + mutex_lock(&ftrace_lock); ++ ++ ip = ftrace_location(ip); ++ if (!ip) ++ goto out_unlock; ++ + entry = find_direct_entry(&ip, &rec); + if (!entry) + goto out_unlock;