From 1a4725078c91c8c53b133d884e228ff42973e559 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 27 Sep 2024 13:47:38 +0200 Subject: [PATCH] 5.4-stable patches added patches: bpf-fix-devmap_hash-overflow-check-on-32-bit-arches.patch gpio-prevent-potential-speculation-leaks-in-gpio_device_get_desc.patch inet-inet_defrag-prevent-sk-release-while-still-in-use.patch --- ...hash-overflow-check-on-32-bit-arches.patch | 59 ++++ ...lation-leaks-in-gpio_device_get_desc.patch | 50 +++ ...revent-sk-release-while-still-in-use.patch | 313 ++++++++++++++++++ ...k-wiphy-mutex-is-held-for-wdev-mutex.patch | 71 ---- queue-5.4/series | 4 +- 5 files changed, 425 insertions(+), 72 deletions(-) create mode 100644 queue-5.4/bpf-fix-devmap_hash-overflow-check-on-32-bit-arches.patch create mode 100644 queue-5.4/gpio-prevent-potential-speculation-leaks-in-gpio_device_get_desc.patch create mode 100644 queue-5.4/inet-inet_defrag-prevent-sk-release-while-still-in-use.patch delete mode 100644 queue-5.4/revert-wifi-cfg80211-check-wiphy-mutex-is-held-for-wdev-mutex.patch diff --git a/queue-5.4/bpf-fix-devmap_hash-overflow-check-on-32-bit-arches.patch b/queue-5.4/bpf-fix-devmap_hash-overflow-check-on-32-bit-arches.patch new file mode 100644 index 00000000000..5c979ae3435 --- /dev/null +++ b/queue-5.4/bpf-fix-devmap_hash-overflow-check-on-32-bit-arches.patch @@ -0,0 +1,59 @@ +From 281d464a34f540de166cee74b723e97ac2515ec3 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= +Date: Thu, 7 Mar 2024 13:03:35 +0100 +Subject: bpf: Fix DEVMAP_HASH overflow check on 32-bit arches +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Toke Høiland-Jørgensen + +commit 281d464a34f540de166cee74b723e97ac2515ec3 upstream. + +The devmap code allocates a number hash buckets equal to the next power +of two of the max_entries value provided when creating the map. When +rounding up to the next power of two, the 32-bit variable storing the +number of buckets can overflow, and the code checks for overflow by +checking if the truncated 32-bit value is equal to 0. However, on 32-bit +arches the rounding up itself can overflow mid-way through, because it +ends up doing a left-shift of 32 bits on an unsigned long value. If the +size of an unsigned long is four bytes, this is undefined behaviour, so +there is no guarantee that we'll end up with a nice and tidy 0-value at +the end. + +Syzbot managed to turn this into a crash on arm32 by creating a +DEVMAP_HASH with max_entries > 0x80000000 and then trying to update it. +Fix this by moving the overflow check to before the rounding up +operation. + +Fixes: 6f9d451ab1a3 ("xdp: Add devmap_hash map type for looking up devices by hashed index") +Link: https://lore.kernel.org/r/000000000000ed666a0611af6818@google.com +Reported-and-tested-by: syzbot+8cd36f6b65f3cafd400a@syzkaller.appspotmail.com +Signed-off-by: Toke Høiland-Jørgensen +Message-ID: <20240307120340.99577-2-toke@redhat.com> +Signed-off-by: Alexei Starovoitov +Signed-off-by: Pu Lehui +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/devmap.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/kernel/bpf/devmap.c ++++ b/kernel/bpf/devmap.c +@@ -130,10 +130,13 @@ static int dev_map_init_map(struct bpf_d + cost = (u64) sizeof(struct list_head) * num_possible_cpus(); + + if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { +- dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries); +- +- if (!dtab->n_buckets) /* Overflow check */ ++ /* hash table size must be power of 2; roundup_pow_of_two() can ++ * overflow into UB on 32-bit arches, so check that first ++ */ ++ if (dtab->map.max_entries > 1UL << 31) + return -EINVAL; ++ ++ dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries); + cost += (u64) sizeof(struct hlist_head) * dtab->n_buckets; + } else { + cost += (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *); diff --git a/queue-5.4/gpio-prevent-potential-speculation-leaks-in-gpio_device_get_desc.patch b/queue-5.4/gpio-prevent-potential-speculation-leaks-in-gpio_device_get_desc.patch new file mode 100644 index 00000000000..0888531b853 --- /dev/null +++ b/queue-5.4/gpio-prevent-potential-speculation-leaks-in-gpio_device_get_desc.patch @@ -0,0 +1,50 @@ +From d795848ecce24a75dfd46481aee066ae6fe39775 Mon Sep 17 00:00:00 2001 +From: Hagar Hemdan +Date: Thu, 23 May 2024 08:53:32 +0000 +Subject: gpio: prevent potential speculation leaks in gpio_device_get_desc() + +From: Hagar Hemdan + +commit d795848ecce24a75dfd46481aee066ae6fe39775 upstream. + +Userspace may trigger a speculative read of an address outside the gpio +descriptor array. +Users can do that by calling gpio_ioctl() with an offset out of range. +Offset is copied from user and then used as an array index to get +the gpio descriptor without sanitization in gpio_device_get_desc(). + +This change ensures that the offset is sanitized by using +array_index_nospec() to mitigate any possibility of speculative +information leaks. + +This bug was discovered and resolved using Coverity Static Analysis +Security Testing (SAST) by Synopsys, Inc. + +Signed-off-by: Hagar Hemdan +Link: https://lore.kernel.org/r/20240523085332.1801-1-hagarhem@amazon.com +Signed-off-by: Bartosz Golaszewski +Signed-off-by: Hugo SIMELIERE +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpio/gpiolib.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/gpio/gpiolib.c ++++ b/drivers/gpio/gpiolib.c +@@ -4,6 +4,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -147,7 +148,7 @@ struct gpio_desc *gpiochip_get_desc(stru + if (hwnum >= gdev->ngpio) + return ERR_PTR(-EINVAL); + +- return &gdev->descs[hwnum]; ++ return &gdev->descs[array_index_nospec(hwnum, gdev->ngpio)]; + } + + /** diff --git a/queue-5.4/inet-inet_defrag-prevent-sk-release-while-still-in-use.patch b/queue-5.4/inet-inet_defrag-prevent-sk-release-while-still-in-use.patch new file mode 100644 index 00000000000..3f0ab277928 --- /dev/null +++ b/queue-5.4/inet-inet_defrag-prevent-sk-release-while-still-in-use.patch @@ -0,0 +1,313 @@ +From 18685451fc4e546fc0e718580d32df3c0e5c8272 Mon Sep 17 00:00:00 2001 +From: Florian Westphal +Date: Tue, 26 Mar 2024 11:18:41 +0100 +Subject: inet: inet_defrag: prevent sk release while still in use + +From: Florian Westphal + +commit 18685451fc4e546fc0e718580d32df3c0e5c8272 upstream. + +ip_local_out() and other functions can pass skb->sk as function argument. + +If the skb is a fragment and reassembly happens before such function call +returns, the sk must not be released. + +This affects skb fragments reassembled via netfilter or similar +modules, e.g. openvswitch or ct_act.c, when run as part of tx pipeline. + +Eric Dumazet made an initial analysis of this bug. Quoting Eric: + Calling ip_defrag() in output path is also implying skb_orphan(), + which is buggy because output path relies on sk not disappearing. + + A relevant old patch about the issue was : + 8282f27449bf ("inet: frag: Always orphan skbs inside ip_defrag()") + + [..] + + net/ipv4/ip_output.c depends on skb->sk being set, and probably to an + inet socket, not an arbitrary one. + + If we orphan the packet in ipvlan, then downstream things like FQ + packet scheduler will not work properly. + + We need to change ip_defrag() to only use skb_orphan() when really + needed, ie whenever frag_list is going to be used. + +Eric suggested to stash sk in fragment queue and made an initial patch. +However there is a problem with this: + +If skb is refragmented again right after, ip_do_fragment() will copy +head->sk to the new fragments, and sets up destructor to sock_wfree. +IOW, we have no choice but to fix up sk_wmem accouting to reflect the +fully reassembled skb, else wmem will underflow. + +This change moves the orphan down into the core, to last possible moment. +As ip_defrag_offset is aliased with sk_buff->sk member, we must move the +offset into the FRAG_CB, else skb->sk gets clobbered. + +This allows to delay the orphaning long enough to learn if the skb has +to be queued or if the skb is completing the reasm queue. + +In the former case, things work as before, skb is orphaned. This is +safe because skb gets queued/stolen and won't continue past reasm engine. + +In the latter case, we will steal the skb->sk reference, reattach it to +the head skb, and fix up wmem accouting when inet_frag inflates truesize. + +Fixes: 7026b1ddb6b8 ("netfilter: Pass socket pointer down through okfn().") +Diagnosed-by: Eric Dumazet +Reported-by: xingwei lee +Reported-by: yue sun +Reported-by: syzbot+e5167d7144a62715044c@syzkaller.appspotmail.com +Signed-off-by: Florian Westphal +Reviewed-by: Eric Dumazet +Link: https://lore.kernel.org/r/20240326101845.30836-1-fw@strlen.de +Signed-off-by: Paolo Abeni +Signed-off-by: Saeed Mirzamohammadi +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/skbuff.h | 5 -- + net/core/sock_destructor.h | 12 +++++ + net/ipv4/inet_fragment.c | 70 ++++++++++++++++++++++++++------ + net/ipv4/ip_fragment.c | 2 + net/ipv6/netfilter/nf_conntrack_reasm.c | 2 + 5 files changed, 72 insertions(+), 19 deletions(-) + create mode 100644 net/core/sock_destructor.h + +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -704,10 +704,7 @@ struct sk_buff { + struct list_head list; + }; + +- union { +- struct sock *sk; +- int ip_defrag_offset; +- }; ++ struct sock *sk; + + union { + ktime_t tstamp; +--- /dev/null ++++ b/net/core/sock_destructor.h +@@ -0,0 +1,12 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++#ifndef _NET_CORE_SOCK_DESTRUCTOR_H ++#define _NET_CORE_SOCK_DESTRUCTOR_H ++#include ++ ++static inline bool is_skb_wmem(const struct sk_buff *skb) ++{ ++ return skb->destructor == sock_wfree || ++ skb->destructor == __sock_wfree || ++ (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree); ++} ++#endif +--- a/net/ipv4/inet_fragment.c ++++ b/net/ipv4/inet_fragment.c +@@ -24,6 +24,8 @@ + #include + #include + ++#include "../core/sock_destructor.h" ++ + /* Use skb->cb to track consecutive/adjacent fragments coming at + * the end of the queue. Nodes in the rb-tree queue will + * contain "runs" of one or more adjacent fragments. +@@ -39,6 +41,7 @@ struct ipfrag_skb_cb { + }; + struct sk_buff *next_frag; + int frag_run_len; ++ int ip_defrag_offset; + }; + + #define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) +@@ -359,12 +362,12 @@ int inet_frag_queue_insert(struct inet_f + */ + if (!last) + fragrun_create(q, skb); /* First fragment. */ +- else if (last->ip_defrag_offset + last->len < end) { ++ else if (FRAG_CB(last)->ip_defrag_offset + last->len < end) { + /* This is the common case: skb goes to the end. */ + /* Detect and discard overlaps. */ +- if (offset < last->ip_defrag_offset + last->len) ++ if (offset < FRAG_CB(last)->ip_defrag_offset + last->len) + return IPFRAG_OVERLAP; +- if (offset == last->ip_defrag_offset + last->len) ++ if (offset == FRAG_CB(last)->ip_defrag_offset + last->len) + fragrun_append_to_last(q, skb); + else + fragrun_create(q, skb); +@@ -381,13 +384,13 @@ int inet_frag_queue_insert(struct inet_f + + parent = *rbn; + curr = rb_to_skb(parent); +- curr_run_end = curr->ip_defrag_offset + ++ curr_run_end = FRAG_CB(curr)->ip_defrag_offset + + FRAG_CB(curr)->frag_run_len; +- if (end <= curr->ip_defrag_offset) ++ if (end <= FRAG_CB(curr)->ip_defrag_offset) + rbn = &parent->rb_left; + else if (offset >= curr_run_end) + rbn = &parent->rb_right; +- else if (offset >= curr->ip_defrag_offset && ++ else if (offset >= FRAG_CB(curr)->ip_defrag_offset && + end <= curr_run_end) + return IPFRAG_DUP; + else +@@ -401,7 +404,7 @@ int inet_frag_queue_insert(struct inet_f + rb_insert_color(&skb->rbnode, &q->rb_fragments); + } + +- skb->ip_defrag_offset = offset; ++ FRAG_CB(skb)->ip_defrag_offset = offset; + + return IPFRAG_OK; + } +@@ -411,13 +414,28 @@ void *inet_frag_reasm_prepare(struct ine + struct sk_buff *parent) + { + struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments); +- struct sk_buff **nextp; ++ void (*destructor)(struct sk_buff *); ++ unsigned int orig_truesize = 0; ++ struct sk_buff **nextp = NULL; ++ struct sock *sk = skb->sk; + int delta; + ++ if (sk && is_skb_wmem(skb)) { ++ /* TX: skb->sk might have been passed as argument to ++ * dst->output and must remain valid until tx completes. ++ * ++ * Move sk to reassembled skb and fix up wmem accounting. ++ */ ++ orig_truesize = skb->truesize; ++ destructor = skb->destructor; ++ } ++ + if (head != skb) { + fp = skb_clone(skb, GFP_ATOMIC); +- if (!fp) +- return NULL; ++ if (!fp) { ++ head = skb; ++ goto out_restore_sk; ++ } + FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag; + if (RB_EMPTY_NODE(&skb->rbnode)) + FRAG_CB(parent)->next_frag = fp; +@@ -426,6 +444,12 @@ void *inet_frag_reasm_prepare(struct ine + &q->rb_fragments); + if (q->fragments_tail == skb) + q->fragments_tail = fp; ++ ++ if (orig_truesize) { ++ /* prevent skb_morph from releasing sk */ ++ skb->sk = NULL; ++ skb->destructor = NULL; ++ } + skb_morph(skb, head); + FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag; + rb_replace_node(&head->rbnode, &skb->rbnode, +@@ -433,13 +457,13 @@ void *inet_frag_reasm_prepare(struct ine + consume_skb(head); + head = skb; + } +- WARN_ON(head->ip_defrag_offset != 0); ++ WARN_ON(FRAG_CB(head)->ip_defrag_offset != 0); + + delta = -head->truesize; + + /* Head of list must not be cloned. */ + if (skb_unclone(head, GFP_ATOMIC)) +- return NULL; ++ goto out_restore_sk; + + delta += head->truesize; + if (delta) +@@ -455,7 +479,7 @@ void *inet_frag_reasm_prepare(struct ine + + clone = alloc_skb(0, GFP_ATOMIC); + if (!clone) +- return NULL; ++ goto out_restore_sk; + skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; + skb_frag_list_init(head); + for (i = 0; i < skb_shinfo(head)->nr_frags; i++) +@@ -472,6 +496,21 @@ void *inet_frag_reasm_prepare(struct ine + nextp = &skb_shinfo(head)->frag_list; + } + ++out_restore_sk: ++ if (orig_truesize) { ++ int ts_delta = head->truesize - orig_truesize; ++ ++ /* if this reassembled skb is fragmented later, ++ * fraglist skbs will get skb->sk assigned from head->sk, ++ * and each frag skb will be released via sock_wfree. ++ * ++ * Update sk_wmem_alloc. ++ */ ++ head->sk = sk; ++ head->destructor = destructor; ++ refcount_add(ts_delta, &sk->sk_wmem_alloc); ++ } ++ + return nextp; + } + EXPORT_SYMBOL(inet_frag_reasm_prepare); +@@ -479,6 +518,8 @@ EXPORT_SYMBOL(inet_frag_reasm_prepare); + void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, + void *reasm_data, bool try_coalesce) + { ++ struct sock *sk = is_skb_wmem(head) ? head->sk : NULL; ++ const unsigned int head_truesize = head->truesize; + struct sk_buff **nextp = (struct sk_buff **)reasm_data; + struct rb_node *rbn; + struct sk_buff *fp; +@@ -541,6 +582,9 @@ void inet_frag_reasm_finish(struct inet_ + skb_mark_not_on_list(head); + head->prev = NULL; + head->tstamp = q->stamp; ++ ++ if (sk) ++ refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc); + } + EXPORT_SYMBOL(inet_frag_reasm_finish); + +--- a/net/ipv4/ip_fragment.c ++++ b/net/ipv4/ip_fragment.c +@@ -377,6 +377,7 @@ static int ip_frag_queue(struct ipq *qp, + } + + skb_dst_drop(skb); ++ skb_orphan(skb); + return -EINPROGRESS; + + insert_error: +@@ -479,7 +480,6 @@ int ip_defrag(struct net *net, struct sk + struct ipq *qp; + + __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS); +- skb_orphan(skb); + + /* Lookup (or create) queue header */ + qp = ip_find(net, ip_hdr(skb), user, vif); +--- a/net/ipv6/netfilter/nf_conntrack_reasm.c ++++ b/net/ipv6/netfilter/nf_conntrack_reasm.c +@@ -296,6 +296,7 @@ static int nf_ct_frag6_queue(struct frag + } + + skb_dst_drop(skb); ++ skb_orphan(skb); + return -EINPROGRESS; + + insert_error: +@@ -461,7 +462,6 @@ int nf_ct_frag6_gather(struct net *net, + hdr = ipv6_hdr(skb); + fhdr = (struct frag_hdr *)skb_transport_header(skb); + +- skb_orphan(skb); + fq = fq_find(net, fhdr->identification, user, hdr, + skb->dev ? skb->dev->ifindex : 0); + if (fq == NULL) { diff --git a/queue-5.4/revert-wifi-cfg80211-check-wiphy-mutex-is-held-for-wdev-mutex.patch b/queue-5.4/revert-wifi-cfg80211-check-wiphy-mutex-is-held-for-wdev-mutex.patch deleted file mode 100644 index dbbf5d28f98..00000000000 --- a/queue-5.4/revert-wifi-cfg80211-check-wiphy-mutex-is-held-for-wdev-mutex.patch +++ /dev/null @@ -1,71 +0,0 @@ -From pkshih@realtek.com Fri Sep 27 09:45:55 2024 -From: Ping-Ke Shih -Date: Thu, 26 Sep 2024 08:30:17 +0800 -Subject: [PATCH stable 6.6] Revert "wifi: cfg80211: check wiphy mutex is held for wdev mutex" -To: -Cc: , -Message-ID: <20240926003017.5427-1-pkshih@realtek.com> - -From: Ping-Ke Shih - -This reverts commit 268f84a827534c4e4c2540a4e29daa73359fc0a5. - -The reverted commit is based on implementation of wiphy locking that isn't -planned to redo on a stable kernel, so revert it to avoid warning: - - WARNING: CPU: 0 PID: 9 at net/wireless/core.h:231 disconnect_work+0xb8/0x144 [cfg80211] - CPU: 0 PID: 9 Comm: kworker/0:1 Not tainted 6.6.51-00141-ga1649b6f8ed6 #7 - Hardware name: Freescale i.MX6 SoloX (Device Tree) - Workqueue: events disconnect_work [cfg80211] - unwind_backtrace from show_stack+0x10/0x14 - show_stack from dump_stack_lvl+0x58/0x70 - dump_stack_lvl from __warn+0x70/0x1c0 - __warn from warn_slowpath_fmt+0x16c/0x294 - warn_slowpath_fmt from disconnect_work+0xb8/0x144 [cfg80211] - disconnect_work [cfg80211] from process_one_work+0x204/0x620 - process_one_work from worker_thread+0x1b0/0x474 - worker_thread from kthread+0x10c/0x12c - kthread from ret_from_fork+0x14/0x24 - -Reported-by: petter@technux.se -Closes: https://lore.kernel.org/linux-wireless/9e98937d781c990615ef27ee0c858ff9@technux.se/T/#t -Cc: Johannes Berg -Signed-off-by: Ping-Ke Shih -Signed-off-by: Greg Kroah-Hartman ---- - net/wireless/core.h | 8 +------- - 1 file changed, 1 insertion(+), 7 deletions(-) - -diff --git a/net/wireless/core.h b/net/wireless/core.h -index c955be6c6daa..f0a3a2317638 100644 ---- a/net/wireless/core.h -+++ b/net/wireless/core.h -@@ -228,7 +228,6 @@ void cfg80211_register_wdev(struct cfg80211_registered_device *rdev, - static inline void wdev_lock(struct wireless_dev *wdev) - __acquires(wdev) - { -- lockdep_assert_held(&wdev->wiphy->mtx); - mutex_lock(&wdev->mtx); - __acquire(wdev->mtx); - } -@@ -236,16 +235,11 @@ static inline void wdev_lock(struct wireless_dev *wdev) - static inline void wdev_unlock(struct wireless_dev *wdev) - __releases(wdev) - { -- lockdep_assert_held(&wdev->wiphy->mtx); - __release(wdev->mtx); - mutex_unlock(&wdev->mtx); - } - --static inline void ASSERT_WDEV_LOCK(struct wireless_dev *wdev) --{ -- lockdep_assert_held(&wdev->wiphy->mtx); -- lockdep_assert_held(&wdev->mtx); --} -+#define ASSERT_WDEV_LOCK(wdev) lockdep_assert_held(&(wdev)->mtx) - - static inline bool cfg80211_has_monitors_only(struct cfg80211_registered_device *rdev) - { --- -2.25.1 - diff --git a/queue-5.4/series b/queue-5.4/series index 18102ba3cbe..47cf4cb794a 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -24,4 +24,6 @@ spi-bcm63xx-enable-module-autoloading.patch x86-hyperv-set-x86_feature_tsc_known_freq-when-hyper.patch ocfs2-add-bounds-checking-to-ocfs2_xattr_find_entry.patch ocfs2-strict-bound-check-before-memcmp-in-ocfs2_xatt.patch -revert-wifi-cfg80211-check-wiphy-mutex-is-held-for-wdev-mutex.patch +gpio-prevent-potential-speculation-leaks-in-gpio_device_get_desc.patch +inet-inet_defrag-prevent-sk-release-while-still-in-use.patch +bpf-fix-devmap_hash-overflow-check-on-32-bit-arches.patch -- 2.47.3