From: Sasha Levin Date: Sat, 5 Aug 2023 20:50:04 +0000 (-0400) Subject: Fixes for 5.4 X-Git-Tag: v4.14.321~50 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=52f2a6f91a1a31c12fa7761274369141749bdcd1;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.4 Signed-off-by: Sasha Levin --- diff --git a/queue-5.4/bpf-sockmap-remove-preempt_disable-in-sock_map_sk_ac.patch b/queue-5.4/bpf-sockmap-remove-preempt_disable-in-sock_map_sk_ac.patch new file mode 100644 index 00000000000..445536fe050 --- /dev/null +++ b/queue-5.4/bpf-sockmap-remove-preempt_disable-in-sock_map_sk_ac.patch @@ -0,0 +1,59 @@ +From a645d88fa9de1bfe63729527e48559436ee01df2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 28 Jul 2023 08:44:11 +0200 +Subject: bpf: sockmap: Remove preempt_disable in sock_map_sk_acquire + +From: Tomas Glozar + +[ Upstream commit 13d2618b48f15966d1adfe1ff6a1985f5eef40ba ] + +Disabling preemption in sock_map_sk_acquire conflicts with GFP_ATOMIC +allocation later in sk_psock_init_link on PREEMPT_RT kernels, since +GFP_ATOMIC might sleep on RT (see bpf: Make BPF and PREEMPT_RT co-exist +patchset notes for details). + +This causes calling bpf_map_update_elem on BPF_MAP_TYPE_SOCKMAP maps to +BUG (sleeping function called from invalid context) on RT kernels. + +preempt_disable was introduced together with lock_sk and rcu_read_lock +in commit 99ba2b5aba24e ("bpf: sockhash, disallow bpf_tcp_close and update +in parallel"), probably to match disabled migration of BPF programs, and +is no longer necessary. + +Remove preempt_disable to fix BUG in sock_map_update_common on RT. + +Signed-off-by: Tomas Glozar +Reviewed-by: Jakub Sitnicki +Link: https://lore.kernel.org/all/20200224140131.461979697@linutronix.de/ +Fixes: 99ba2b5aba24 ("bpf: sockhash, disallow bpf_tcp_close and update in parallel") +Reviewed-by: John Fastabend +Link: https://lore.kernel.org/r/20230728064411.305576-1-tglozar@redhat.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/core/sock_map.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/net/core/sock_map.c b/net/core/sock_map.c +index 5bce6d4d20573..5b82ff0e2680f 100644 +--- a/net/core/sock_map.c ++++ b/net/core/sock_map.c +@@ -115,7 +115,6 @@ static void sock_map_sk_acquire(struct sock *sk) + __acquires(&sk->sk_lock.slock) + { + lock_sock(sk); +- preempt_disable(); + rcu_read_lock(); + } + +@@ -123,7 +122,6 @@ static void sock_map_sk_release(struct sock *sk) + __releases(&sk->sk_lock.slock) + { + rcu_read_unlock(); +- preempt_enable(); + release_sock(sk); + } + +-- +2.40.1 + diff --git a/queue-5.4/driver-core-add-device-probe-log-helper.patch b/queue-5.4/driver-core-add-device-probe-log-helper.patch new file mode 100644 index 00000000000..3b299fec71b --- /dev/null +++ b/queue-5.4/driver-core-add-device-probe-log-helper.patch @@ -0,0 +1,105 @@ +From 1294b643c934eb581fa0901fd70eaa12a6586afc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 13 Jul 2020 16:43:21 +0200 +Subject: driver core: add device probe log helper + +From: Andrzej Hajda + +[ Upstream commit a787e5400a1ceeb0ef92d71ec43aeb35b1fa1334 ] + +During probe every time driver gets resource it should usually check for +error printk some message if it is not -EPROBE_DEFER and return the error. +This pattern is simple but requires adding few lines after any resource +acquisition code, as a result it is often omitted or implemented only +partially. +dev_err_probe helps to replace such code sequences with simple call, +so code: + if (err != -EPROBE_DEFER) + dev_err(dev, ...); + return err; +becomes: + return dev_err_probe(dev, err, ...); + +Signed-off-by: Andrzej Hajda +Reviewed-by: Rafael J. Wysocki +Reviewed-by: Mark Brown +Reviewed-by: Andy Shevchenko +Link: https://lore.kernel.org/r/20200713144324.23654-2-a.hajda@samsung.com +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: ef45e8400f5b ("net: ll_temac: fix error checking of irq_of_parse_and_map()") +Signed-off-by: Sasha Levin +--- + drivers/base/core.c | 42 ++++++++++++++++++++++++++++++++++++++++++ + include/linux/device.h | 3 +++ + 2 files changed, 45 insertions(+) + +diff --git a/drivers/base/core.c b/drivers/base/core.c +index f8e157ede44f8..4949aba7e1880 100644 +--- a/drivers/base/core.c ++++ b/drivers/base/core.c +@@ -3399,6 +3399,48 @@ define_dev_printk_level(_dev_info, KERN_INFO); + + #endif + ++/** ++ * dev_err_probe - probe error check and log helper ++ * @dev: the pointer to the struct device ++ * @err: error value to test ++ * @fmt: printf-style format string ++ * @...: arguments as specified in the format string ++ * ++ * This helper implements common pattern present in probe functions for error ++ * checking: print debug or error message depending if the error value is ++ * -EPROBE_DEFER and propagate error upwards. ++ * It replaces code sequence: ++ * if (err != -EPROBE_DEFER) ++ * dev_err(dev, ...); ++ * else ++ * dev_dbg(dev, ...); ++ * return err; ++ * with ++ * return dev_err_probe(dev, err, ...); ++ * ++ * Returns @err. ++ * ++ */ ++int dev_err_probe(const struct device *dev, int err, const char *fmt, ...) ++{ ++ struct va_format vaf; ++ va_list args; ++ ++ va_start(args, fmt); ++ vaf.fmt = fmt; ++ vaf.va = &args; ++ ++ if (err != -EPROBE_DEFER) ++ dev_err(dev, "error %d: %pV", err, &vaf); ++ else ++ dev_dbg(dev, "error %d: %pV", err, &vaf); ++ ++ va_end(args); ++ ++ return err; ++} ++EXPORT_SYMBOL_GPL(dev_err_probe); ++ + static inline bool fwnode_is_primary(struct fwnode_handle *fwnode) + { + return fwnode && !IS_ERR(fwnode->secondary); +diff --git a/include/linux/device.h b/include/linux/device.h +index d74275e2047a4..c7be3a8073ec3 100644 +--- a/include/linux/device.h ++++ b/include/linux/device.h +@@ -1871,6 +1871,9 @@ do { \ + WARN_ONCE(condition, "%s %s: " format, \ + dev_driver_string(dev), dev_name(dev), ## arg) + ++extern __printf(3, 4) ++int dev_err_probe(const struct device *dev, int err, const char *fmt, ...); ++ + /* Create alias, so I can be autoloaded. */ + #define MODULE_ALIAS_CHARDEV(major,minor) \ + MODULE_ALIAS("char-major-" __stringify(major) "-" __stringify(minor)) +-- +2.40.1 + diff --git a/queue-5.4/ip6mr-fix-skb_under_panic-in-ip6mr_cache_report.patch b/queue-5.4/ip6mr-fix-skb_under_panic-in-ip6mr_cache_report.patch new file mode 100644 index 00000000000..82814b4f96b --- /dev/null +++ b/queue-5.4/ip6mr-fix-skb_under_panic-in-ip6mr_cache_report.patch @@ -0,0 +1,77 @@ +From 6208e0ab4491793fa444a0e2d46381a6f450b4b9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 1 Aug 2023 14:43:18 +0800 +Subject: ip6mr: Fix skb_under_panic in ip6mr_cache_report() + +From: Yue Haibing + +[ Upstream commit 30e0191b16e8a58e4620fa3e2839ddc7b9d4281c ] + +skbuff: skb_under_panic: text:ffffffff88771f69 len:56 put:-4 + head:ffff88805f86a800 data:ffff887f5f86a850 tail:0x88 end:0x2c0 dev:pim6reg + ------------[ cut here ]------------ + kernel BUG at net/core/skbuff.c:192! + invalid opcode: 0000 [#1] PREEMPT SMP KASAN + CPU: 2 PID: 22968 Comm: kworker/2:11 Not tainted 6.5.0-rc3-00044-g0a8db05b571a #236 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 + Workqueue: ipv6_addrconf addrconf_dad_work + RIP: 0010:skb_panic+0x152/0x1d0 + Call Trace: + + skb_push+0xc4/0xe0 + ip6mr_cache_report+0xd69/0x19b0 + reg_vif_xmit+0x406/0x690 + dev_hard_start_xmit+0x17e/0x6e0 + __dev_queue_xmit+0x2d6a/0x3d20 + vlan_dev_hard_start_xmit+0x3ab/0x5c0 + dev_hard_start_xmit+0x17e/0x6e0 + __dev_queue_xmit+0x2d6a/0x3d20 + neigh_connected_output+0x3ed/0x570 + ip6_finish_output2+0x5b5/0x1950 + ip6_finish_output+0x693/0x11c0 + ip6_output+0x24b/0x880 + NF_HOOK.constprop.0+0xfd/0x530 + ndisc_send_skb+0x9db/0x1400 + ndisc_send_rs+0x12a/0x6c0 + addrconf_dad_completed+0x3c9/0xea0 + addrconf_dad_work+0x849/0x1420 + process_one_work+0xa22/0x16e0 + worker_thread+0x679/0x10c0 + ret_from_fork+0x28/0x60 + ret_from_fork_asm+0x11/0x20 + +When setup a vlan device on dev pim6reg, DAD ns packet may sent on reg_vif_xmit(). +reg_vif_xmit() + ip6mr_cache_report() + skb_push(skb, -skb_network_offset(pkt));//skb_network_offset(pkt) is 4 +And skb_push declared as: + void *skb_push(struct sk_buff *skb, unsigned int len); + skb->data -= len; + //0xffff88805f86a84c - 0xfffffffc = 0xffff887f5f86a850 +skb->data is set to 0xffff887f5f86a850, which is invalid mem addr, lead to skb_push() fails. + +Fixes: 14fb64e1f449 ("[IPV6] MROUTE: Support PIM-SM (SSM).") +Signed-off-by: Yue Haibing +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv6/ip6mr.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c +index 6248e00c2bf72..6642bc7b9870f 100644 +--- a/net/ipv6/ip6mr.c ++++ b/net/ipv6/ip6mr.c +@@ -1065,7 +1065,7 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, + And all this only to mangle msg->im6_msgtype and + to set msg->im6_mbz to "mbz" :-) + */ +- skb_push(skb, -skb_network_offset(pkt)); ++ __skb_pull(skb, skb_network_offset(pkt)); + + skb_push(skb, sizeof(*msg)); + skb_reset_transport_header(skb); +-- +2.40.1 + diff --git a/queue-5.4/kvm-s390-fix-sthyi-error-handling.patch b/queue-5.4/kvm-s390-fix-sthyi-error-handling.patch new file mode 100644 index 00000000000..a1404cbaa8d --- /dev/null +++ b/queue-5.4/kvm-s390-fix-sthyi-error-handling.patch @@ -0,0 +1,78 @@ +From 9cb08a9a1f2ca26200cc1759c9d1572b5b49c0b8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 27 Jul 2023 20:29:39 +0200 +Subject: KVM: s390: fix sthyi error handling + +From: Heiko Carstens + +[ Upstream commit 0c02cc576eac161601927b41634f80bfd55bfa9e ] + +Commit 9fb6c9b3fea1 ("s390/sthyi: add cache to store hypervisor info") +added cache handling for store hypervisor info. This also changed the +possible return code for sthyi_fill(). + +Instead of only returning a condition code like the sthyi instruction would +do, it can now also return a negative error value (-ENOMEM). handle_styhi() +was not changed accordingly. In case of an error, the negative error value +would incorrectly injected into the guest PSW. + +Add proper error handling to prevent this, and update the comment which +describes the possible return values of sthyi_fill(). + +Fixes: 9fb6c9b3fea1 ("s390/sthyi: add cache to store hypervisor info") +Reviewed-by: Christian Borntraeger +Link: https://lore.kernel.org/r/20230727182939.2050744-1-hca@linux.ibm.com +Signed-off-by: Heiko Carstens +Signed-off-by: Sasha Levin +--- + arch/s390/kernel/sthyi.c | 6 +++--- + arch/s390/kvm/intercept.c | 9 ++++++--- + 2 files changed, 9 insertions(+), 6 deletions(-) + +diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c +index 888cc2f166db7..ce6084e28d904 100644 +--- a/arch/s390/kernel/sthyi.c ++++ b/arch/s390/kernel/sthyi.c +@@ -460,9 +460,9 @@ static int sthyi_update_cache(u64 *rc) + * + * Fills the destination with system information returned by the STHYI + * instruction. The data is generated by emulation or execution of STHYI, +- * if available. The return value is the condition code that would be +- * returned, the rc parameter is the return code which is passed in +- * register R2 + 1. ++ * if available. The return value is either a negative error value or ++ * the condition code that would be returned, the rc parameter is the ++ * return code which is passed in register R2 + 1. + */ + int sthyi_fill(void *dst, u64 *rc) + { +diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c +index a389fa85cca2d..5450d43d26ea5 100644 +--- a/arch/s390/kvm/intercept.c ++++ b/arch/s390/kvm/intercept.c +@@ -360,8 +360,8 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu) + */ + int handle_sthyi(struct kvm_vcpu *vcpu) + { +- int reg1, reg2, r = 0; +- u64 code, addr, cc = 0, rc = 0; ++ int reg1, reg2, cc = 0, r = 0; ++ u64 code, addr, rc = 0; + struct sthyi_sctns *sctns = NULL; + + if (!test_kvm_facility(vcpu->kvm, 74)) +@@ -392,7 +392,10 @@ int handle_sthyi(struct kvm_vcpu *vcpu) + return -ENOMEM; + + cc = sthyi_fill(sctns, &rc); +- ++ if (cc < 0) { ++ free_page((unsigned long)sctns); ++ return cc; ++ } + out: + if (!cc) { + r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE); +-- +2.40.1 + diff --git a/queue-5.4/misdn-hfcpci-fix-potential-deadlock-on-hc-lock.patch b/queue-5.4/misdn-hfcpci-fix-potential-deadlock-on-hc-lock.patch new file mode 100644 index 00000000000..a95901208ac --- /dev/null +++ b/queue-5.4/misdn-hfcpci-fix-potential-deadlock-on-hc-lock.patch @@ -0,0 +1,88 @@ +From 53d16071fc47e48ef818e06a35382907195ba225 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 27 Jul 2023 08:56:19 +0000 +Subject: mISDN: hfcpci: Fix potential deadlock on &hc->lock + +From: Chengfeng Ye + +[ Upstream commit 56c6be35fcbed54279df0a2c9e60480a61841d6f ] + +As &hc->lock is acquired by both timer _hfcpci_softirq() and hardirq +hfcpci_int(), the timer should disable irq before lock acquisition +otherwise deadlock could happen if the timmer is preemtped by the hadr irq. + +Possible deadlock scenario: +hfcpci_softirq() (timer) + -> _hfcpci_softirq() + -> spin_lock(&hc->lock); + + -> hfcpci_int() + -> spin_lock(&hc->lock); (deadlock here) + +This flaw was found by an experimental static analysis tool I am developing +for irq-related deadlock. + +The tentative patch fixes the potential deadlock by spin_lock_irq() +in timer. + +Fixes: b36b654a7e82 ("mISDN: Create /sys/class/mISDN") +Signed-off-by: Chengfeng Ye +Link: https://lore.kernel.org/r/20230727085619.7419-1-dg573847474@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/isdn/hardware/mISDN/hfcpci.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c +index 41ff2e3dc8430..0a683a66fc612 100644 +--- a/drivers/isdn/hardware/mISDN/hfcpci.c ++++ b/drivers/isdn/hardware/mISDN/hfcpci.c +@@ -839,7 +839,7 @@ hfcpci_fill_fifo(struct bchannel *bch) + *z1t = cpu_to_le16(new_z1); /* now send data */ + if (bch->tx_idx < bch->tx_skb->len) + return; +- dev_kfree_skb(bch->tx_skb); ++ dev_kfree_skb_any(bch->tx_skb); + if (get_next_bframe(bch)) + goto next_t_frame; + return; +@@ -895,7 +895,7 @@ hfcpci_fill_fifo(struct bchannel *bch) + } + bz->za[new_f1].z1 = cpu_to_le16(new_z1); /* for next buffer */ + bz->f1 = new_f1; /* next frame */ +- dev_kfree_skb(bch->tx_skb); ++ dev_kfree_skb_any(bch->tx_skb); + get_next_bframe(bch); + } + +@@ -1119,7 +1119,7 @@ tx_birq(struct bchannel *bch) + if (bch->tx_skb && bch->tx_idx < bch->tx_skb->len) + hfcpci_fill_fifo(bch); + else { +- dev_kfree_skb(bch->tx_skb); ++ dev_kfree_skb_any(bch->tx_skb); + if (get_next_bframe(bch)) + hfcpci_fill_fifo(bch); + } +@@ -2272,7 +2272,7 @@ _hfcpci_softirq(struct device *dev, void *unused) + return 0; + + if (hc->hw.int_m2 & HFCPCI_IRQ_ENABLE) { +- spin_lock(&hc->lock); ++ spin_lock_irq(&hc->lock); + bch = Sel_BCS(hc, hc->hw.bswapped ? 2 : 1); + if (bch && bch->state == ISDN_P_B_RAW) { /* B1 rx&tx */ + main_rec_hfcpci(bch); +@@ -2283,7 +2283,7 @@ _hfcpci_softirq(struct device *dev, void *unused) + main_rec_hfcpci(bch); + tx_birq(bch); + } +- spin_unlock(&hc->lock); ++ spin_unlock_irq(&hc->lock); + } + return 0; + } +-- +2.40.1 + diff --git a/queue-5.4/net-add-missing-data-race-annotation-for-sk_ll_usec.patch b/queue-5.4/net-add-missing-data-race-annotation-for-sk_ll_usec.patch new file mode 100644 index 00000000000..8afd3327da1 --- /dev/null +++ b/queue-5.4/net-add-missing-data-race-annotation-for-sk_ll_usec.patch @@ -0,0 +1,36 @@ +From 8feab57693cfbbe011d879f465bb1ef42603e8ca Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 28 Jul 2023 15:03:17 +0000 +Subject: net: add missing data-race annotation for sk_ll_usec + +From: Eric Dumazet + +[ Upstream commit e5f0d2dd3c2faa671711dac6d3ff3cef307bcfe3 ] + +In a prior commit I forgot that sk_getsockopt() reads +sk->sk_ll_usec without holding a lock. + +Fixes: 0dbffbb5335a ("net: annotate data race around sk_ll_usec") +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/core/sock.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/core/sock.c b/net/core/sock.c +index e1204da609a1b..636427d400d7f 100644 +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -1474,7 +1474,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, + + #ifdef CONFIG_NET_RX_BUSY_POLL + case SO_BUSY_POLL: +- v.val = sk->sk_ll_usec; ++ v.val = READ_ONCE(sk->sk_ll_usec); + break; + #endif + +-- +2.40.1 + diff --git a/queue-5.4/net-add-missing-data-race-annotations-around-sk-sk_p.patch b/queue-5.4/net-add-missing-data-race-annotations-around-sk-sk_p.patch new file mode 100644 index 00000000000..7062adaf3e0 --- /dev/null +++ b/queue-5.4/net-add-missing-data-race-annotations-around-sk-sk_p.patch @@ -0,0 +1,63 @@ +From 3e0684dc4a13ae50a1c000ff7f924fb842ea6259 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 28 Jul 2023 15:03:16 +0000 +Subject: net: add missing data-race annotations around sk->sk_peek_off + +From: Eric Dumazet + +[ Upstream commit 11695c6e966b0ec7ed1d16777d294cef865a5c91 ] + +sk_getsockopt() runs locklessly, thus we need to annotate the read +of sk->sk_peek_off. + +While we are at it, add corresponding annotations to sk_set_peek_off() +and unix_set_peek_off(). + +Fixes: b9bb53f3836f ("sock: convert sk_peek_offset functions to WRITE_ONCE") +Signed-off-by: Eric Dumazet +Cc: Willem de Bruijn +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/core/sock.c | 4 ++-- + net/unix/af_unix.c | 2 +- + 2 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/net/core/sock.c b/net/core/sock.c +index 6d695da921094..e1204da609a1b 100644 +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -1444,7 +1444,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, + if (!sock->ops->set_peek_off) + return -EOPNOTSUPP; + +- v.val = sk->sk_peek_off; ++ v.val = READ_ONCE(sk->sk_peek_off); + break; + case SO_NOFCS: + v.val = sock_flag(sk, SOCK_NOFCS); +@@ -2652,7 +2652,7 @@ EXPORT_SYMBOL(__sk_mem_reclaim); + + int sk_set_peek_off(struct sock *sk, int val) + { +- sk->sk_peek_off = val; ++ WRITE_ONCE(sk->sk_peek_off, val); + return 0; + } + EXPORT_SYMBOL_GPL(sk_set_peek_off); +diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c +index 01fd049da104a..f966b64d2939a 100644 +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -701,7 +701,7 @@ static int unix_set_peek_off(struct sock *sk, int val) + if (mutex_lock_interruptible(&u->iolock)) + return -EINTR; + +- sk->sk_peek_off = val; ++ WRITE_ONCE(sk->sk_peek_off, val); + mutex_unlock(&u->iolock); + + return 0; +-- +2.40.1 + diff --git a/queue-5.4/net-add-missing-read_once-sk-sk_rcvbuf-annotation.patch b/queue-5.4/net-add-missing-read_once-sk-sk_rcvbuf-annotation.patch new file mode 100644 index 00000000000..b140684044d --- /dev/null +++ b/queue-5.4/net-add-missing-read_once-sk-sk_rcvbuf-annotation.patch @@ -0,0 +1,36 @@ +From a66bb1ab22fc670307c90897bfb25a83f8d38c14 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 28 Jul 2023 15:03:14 +0000 +Subject: net: add missing READ_ONCE(sk->sk_rcvbuf) annotation + +From: Eric Dumazet + +[ Upstream commit b4b553253091cafe9ec38994acf42795e073bef5 ] + +In a prior commit, I forgot to change sk_getsockopt() +when reading sk->sk_rcvbuf locklessly. + +Fixes: ebb3b78db7bf ("tcp: annotate sk->sk_rcvbuf lockless reads") +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/core/sock.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/core/sock.c b/net/core/sock.c +index e6d26cfba32d5..6d695da921094 100644 +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -1262,7 +1262,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, + break; + + case SO_RCVBUF: +- v.val = sk->sk_rcvbuf; ++ v.val = READ_ONCE(sk->sk_rcvbuf); + break; + + case SO_REUSEADDR: +-- +2.40.1 + diff --git a/queue-5.4/net-add-missing-read_once-sk-sk_rcvlowat-annotation.patch b/queue-5.4/net-add-missing-read_once-sk-sk_rcvlowat-annotation.patch new file mode 100644 index 00000000000..25e944fb1da --- /dev/null +++ b/queue-5.4/net-add-missing-read_once-sk-sk_rcvlowat-annotation.patch @@ -0,0 +1,36 @@ +From 510bfde141d282599b53223172b4fad554e9e0ea Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 28 Jul 2023 15:03:11 +0000 +Subject: net: add missing READ_ONCE(sk->sk_rcvlowat) annotation + +From: Eric Dumazet + +[ Upstream commit e6d12bdb435d23ff6c1890c852d85408a2f496ee ] + +In a prior commit, I forgot to change sk_getsockopt() +when reading sk->sk_rcvlowat locklessly. + +Fixes: eac66402d1c3 ("net: annotate sk->sk_rcvlowat lockless reads") +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/core/sock.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/core/sock.c b/net/core/sock.c +index 539c39ad1e488..a73111be68581 100644 +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -1350,7 +1350,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, + break; + + case SO_RCVLOWAT: +- v.val = sk->sk_rcvlowat; ++ v.val = READ_ONCE(sk->sk_rcvlowat); + break; + + case SO_SNDLOWAT: +-- +2.40.1 + diff --git a/queue-5.4/net-add-missing-read_once-sk-sk_sndbuf-annotation.patch b/queue-5.4/net-add-missing-read_once-sk-sk_sndbuf-annotation.patch new file mode 100644 index 00000000000..77e3a0e101c --- /dev/null +++ b/queue-5.4/net-add-missing-read_once-sk-sk_sndbuf-annotation.patch @@ -0,0 +1,36 @@ +From 01ab33478ac6646afcb236b3c1ecf6084c4d30c4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 28 Jul 2023 15:03:13 +0000 +Subject: net: add missing READ_ONCE(sk->sk_sndbuf) annotation + +From: Eric Dumazet + +[ Upstream commit 74bc084327c643499474ba75df485607da37dd6e ] + +In a prior commit, I forgot to change sk_getsockopt() +when reading sk->sk_sndbuf locklessly. + +Fixes: e292f05e0df7 ("tcp: annotate sk->sk_sndbuf lockless reads") +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/core/sock.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/core/sock.c b/net/core/sock.c +index a73111be68581..e6d26cfba32d5 100644 +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -1258,7 +1258,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, + break; + + case SO_SNDBUF: +- v.val = sk->sk_sndbuf; ++ v.val = READ_ONCE(sk->sk_sndbuf); + break; + + case SO_RCVBUF: +-- +2.40.1 + diff --git a/queue-5.4/net-annotate-data-races-around-sk-sk_max_pacing_rate.patch b/queue-5.4/net-annotate-data-races-around-sk-sk_max_pacing_rate.patch new file mode 100644 index 00000000000..b9ba531c886 --- /dev/null +++ b/queue-5.4/net-annotate-data-races-around-sk-sk_max_pacing_rate.patch @@ -0,0 +1,54 @@ +From 453396818d23d8b006379c482cb71b839407d1b8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 28 Jul 2023 15:03:10 +0000 +Subject: net: annotate data-races around sk->sk_max_pacing_rate + +From: Eric Dumazet + +[ Upstream commit ea7f45ef77b39e72244d282e47f6cb1ef4135cd2 ] + +sk_getsockopt() runs locklessly. This means sk->sk_max_pacing_rate +can be read while other threads are changing its value. + +Fixes: 62748f32d501 ("net: introduce SO_MAX_PACING_RATE") +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/core/sock.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/net/core/sock.c b/net/core/sock.c +index d55eea5538bce..539c39ad1e488 100644 +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -1117,7 +1117,8 @@ int sock_setsockopt(struct socket *sock, int level, int optname, + cmpxchg(&sk->sk_pacing_status, + SK_PACING_NONE, + SK_PACING_NEEDED); +- sk->sk_max_pacing_rate = ulval; ++ /* Pairs with READ_ONCE() from sk_getsockopt() */ ++ WRITE_ONCE(sk->sk_max_pacing_rate, ulval); + sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval); + break; + } +@@ -1478,12 +1479,14 @@ int sock_getsockopt(struct socket *sock, int level, int optname, + #endif + + case SO_MAX_PACING_RATE: ++ /* The READ_ONCE() pair with the WRITE_ONCE() in sk_setsockopt() */ + if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) { + lv = sizeof(v.ulval); +- v.ulval = sk->sk_max_pacing_rate; ++ v.ulval = READ_ONCE(sk->sk_max_pacing_rate); + } else { + /* 32bit version */ +- v.val = min_t(unsigned long, sk->sk_max_pacing_rate, ~0U); ++ v.val = min_t(unsigned long, ~0U, ++ READ_ONCE(sk->sk_max_pacing_rate)); + } + break; + +-- +2.40.1 + diff --git a/queue-5.4/net-dcb-choose-correct-policy-to-parse-dcb_attr_bcn.patch b/queue-5.4/net-dcb-choose-correct-policy-to-parse-dcb_attr_bcn.patch new file mode 100644 index 00000000000..eab6b4c0e2f --- /dev/null +++ b/queue-5.4/net-dcb-choose-correct-policy-to-parse-dcb_attr_bcn.patch @@ -0,0 +1,103 @@ +From a467630d0bf7ccb82a56cd9824e27eb0d26d0534 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 1 Aug 2023 09:32:48 +0800 +Subject: net: dcb: choose correct policy to parse DCB_ATTR_BCN + +From: Lin Ma + +[ Upstream commit 31d49ba033095f6e8158c60f69714a500922e0c3 ] + +The dcbnl_bcn_setcfg uses erroneous policy to parse tb[DCB_ATTR_BCN], +which is introduced in commit 859ee3c43812 ("DCB: Add support for DCB +BCN"). Please see the comment in below code + +static int dcbnl_bcn_setcfg(...) +{ + ... + ret = nla_parse_nested_deprecated(..., dcbnl_pfc_up_nest, .. ) + // !!! dcbnl_pfc_up_nest for attributes + // DCB_PFC_UP_ATTR_0 to DCB_PFC_UP_ATTR_ALL in enum dcbnl_pfc_up_attrs + ... + for (i = DCB_BCN_ATTR_RP_0; i <= DCB_BCN_ATTR_RP_7; i++) { + // !!! DCB_BCN_ATTR_RP_0 to DCB_BCN_ATTR_RP_7 in enum dcbnl_bcn_attrs + ... + value_byte = nla_get_u8(data[i]); + ... + } + ... + for (i = DCB_BCN_ATTR_BCNA_0; i <= DCB_BCN_ATTR_RI; i++) { + // !!! DCB_BCN_ATTR_BCNA_0 to DCB_BCN_ATTR_RI in enum dcbnl_bcn_attrs + ... + value_int = nla_get_u32(data[i]); + ... + } + ... +} + +That is, the nla_parse_nested_deprecated uses dcbnl_pfc_up_nest +attributes to parse nlattr defined in dcbnl_pfc_up_attrs. But the +following access code fetch each nlattr as dcbnl_bcn_attrs attributes. +By looking up the associated nla_policy for dcbnl_bcn_attrs. We can find +the beginning part of these two policies are "same". + +static const struct nla_policy dcbnl_pfc_up_nest[...] = { + [DCB_PFC_UP_ATTR_0] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_1] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_2] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_3] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_4] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_5] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_6] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_7] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_ALL] = {.type = NLA_FLAG}, +}; + +static const struct nla_policy dcbnl_bcn_nest[...] = { + [DCB_BCN_ATTR_RP_0] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_1] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_2] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_3] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_4] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_5] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_6] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_7] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_ALL] = {.type = NLA_FLAG}, + // from here is somewhat different + [DCB_BCN_ATTR_BCNA_0] = {.type = NLA_U32}, + ... + [DCB_BCN_ATTR_ALL] = {.type = NLA_FLAG}, +}; + +Therefore, the current code is buggy and this +nla_parse_nested_deprecated could overflow the dcbnl_pfc_up_nest and use +the adjacent nla_policy to parse attributes from DCB_BCN_ATTR_BCNA_0. + +Hence use the correct policy dcbnl_bcn_nest to parse the nested +tb[DCB_ATTR_BCN] TLV. + +Fixes: 859ee3c43812 ("DCB: Add support for DCB BCN") +Signed-off-by: Lin Ma +Reviewed-by: Simon Horman +Link: https://lore.kernel.org/r/20230801013248.87240-1-linma@zju.edu.cn +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/dcb/dcbnl.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c +index b53d5e1d026fe..71e97e2a36845 100644 +--- a/net/dcb/dcbnl.c ++++ b/net/dcb/dcbnl.c +@@ -946,7 +946,7 @@ static int dcbnl_bcn_setcfg(struct net_device *netdev, struct nlmsghdr *nlh, + return -EOPNOTSUPP; + + ret = nla_parse_nested_deprecated(data, DCB_BCN_ATTR_MAX, +- tb[DCB_ATTR_BCN], dcbnl_pfc_up_nest, ++ tb[DCB_ATTR_BCN], dcbnl_bcn_nest, + NULL); + if (ret) + return ret; +-- +2.40.1 + diff --git a/queue-5.4/net-ll_temac-fix-error-checking-of-irq_of_parse_and_.patch b/queue-5.4/net-ll_temac-fix-error-checking-of-irq_of_parse_and_.patch new file mode 100644 index 00000000000..4b76642b758 --- /dev/null +++ b/queue-5.4/net-ll_temac-fix-error-checking-of-irq_of_parse_and_.patch @@ -0,0 +1,54 @@ +From 0146920e4977df2a528283ba1c346d18702f23b6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 31 Jul 2023 10:42:32 +0300 +Subject: net: ll_temac: fix error checking of irq_of_parse_and_map() + +From: Dan Carpenter + +[ Upstream commit ef45e8400f5bb66b03cc949f76c80e2a118447de ] + +Most kernel functions return negative error codes but some irq functions +return zero on error. In this code irq_of_parse_and_map(), returns zero +and platform_get_irq() returns negative error codes. We need to handle +both cases appropriately. + +Fixes: 8425c41d1ef7 ("net: ll_temac: Extend support to non-device-tree platforms") +Signed-off-by: Dan Carpenter +Acked-by: Esben Haabendal +Reviewed-by: Yang Yingliang +Reviewed-by: Harini Katakam +Link: https://lore.kernel.org/r/3d0aef75-06e0-45a5-a2a6-2cc4738d4143@moroto.mountain +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/xilinx/ll_temac_main.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c +index 9756d83994fca..86edc95919146 100644 +--- a/drivers/net/ethernet/xilinx/ll_temac_main.c ++++ b/drivers/net/ethernet/xilinx/ll_temac_main.c +@@ -1481,12 +1481,16 @@ static int temac_probe(struct platform_device *pdev) + } + + /* Error handle returned DMA RX and TX interrupts */ +- if (lp->rx_irq < 0) +- return dev_err_probe(&pdev->dev, lp->rx_irq, ++ if (lp->rx_irq <= 0) { ++ rc = lp->rx_irq ?: -EINVAL; ++ return dev_err_probe(&pdev->dev, rc, + "could not get DMA RX irq\n"); +- if (lp->tx_irq < 0) +- return dev_err_probe(&pdev->dev, lp->tx_irq, ++ } ++ if (lp->tx_irq <= 0) { ++ rc = lp->tx_irq ?: -EINVAL; ++ return dev_err_probe(&pdev->dev, rc, + "could not get DMA TX irq\n"); ++ } + + if (temac_np) { + /* Retrieve the MAC address */ +-- +2.40.1 + diff --git a/queue-5.4/net-ll_temac-switch-to-use-dev_err_probe-helper.patch b/queue-5.4/net-ll_temac-switch-to-use-dev_err_probe-helper.patch new file mode 100644 index 00000000000..01c61d7e0f3 --- /dev/null +++ b/queue-5.4/net-ll_temac-switch-to-use-dev_err_probe-helper.patch @@ -0,0 +1,50 @@ +From 3b5f7c539ee1ec97d5fe660d8c4c960263578049 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 15 Sep 2022 19:42:14 +0800 +Subject: net: ll_temac: Switch to use dev_err_probe() helper + +From: Yang Yingliang + +[ Upstream commit 75ae8c284c00dc3584b7c173f6fcf96ee15bd02c ] + +dev_err() can be replace with dev_err_probe() which will check if error +code is -EPROBE_DEFER. + +Signed-off-by: Yang Yingliang +Signed-off-by: David S. Miller +Stable-dep-of: ef45e8400f5b ("net: ll_temac: fix error checking of irq_of_parse_and_map()") +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/xilinx/ll_temac_main.c | 16 ++++++---------- + 1 file changed, 6 insertions(+), 10 deletions(-) + +diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c +index a109438f4a78e..9756d83994fca 100644 +--- a/drivers/net/ethernet/xilinx/ll_temac_main.c ++++ b/drivers/net/ethernet/xilinx/ll_temac_main.c +@@ -1481,16 +1481,12 @@ static int temac_probe(struct platform_device *pdev) + } + + /* Error handle returned DMA RX and TX interrupts */ +- if (lp->rx_irq < 0) { +- if (lp->rx_irq != -EPROBE_DEFER) +- dev_err(&pdev->dev, "could not get DMA RX irq\n"); +- return lp->rx_irq; +- } +- if (lp->tx_irq < 0) { +- if (lp->tx_irq != -EPROBE_DEFER) +- dev_err(&pdev->dev, "could not get DMA TX irq\n"); +- return lp->tx_irq; +- } ++ if (lp->rx_irq < 0) ++ return dev_err_probe(&pdev->dev, lp->rx_irq, ++ "could not get DMA RX irq\n"); ++ if (lp->tx_irq < 0) ++ return dev_err_probe(&pdev->dev, lp->tx_irq, ++ "could not get DMA TX irq\n"); + + if (temac_np) { + /* Retrieve the MAC address */ +-- +2.40.1 + diff --git a/queue-5.4/net-mlx5-dr-fix-memory-leak-in-mlx5dr_cmd_create_ref.patch b/queue-5.4/net-mlx5-dr-fix-memory-leak-in-mlx5dr_cmd_create_ref.patch new file mode 100644 index 00000000000..6245e79e73a --- /dev/null +++ b/queue-5.4/net-mlx5-dr-fix-memory-leak-in-mlx5dr_cmd_create_ref.patch @@ -0,0 +1,44 @@ +From 8dcc22a25e8380ddb4f3de2c569743474f92d001 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 5 Jul 2023 20:15:27 +0800 +Subject: net/mlx5: DR, fix memory leak in mlx5dr_cmd_create_reformat_ctx + +From: Zhengchao Shao + +[ Upstream commit 5dd77585dd9d0e03dd1bceb95f0269a7eaf6b936 ] + +when mlx5_cmd_exec failed in mlx5dr_cmd_create_reformat_ctx, the memory +pointed by 'in' is not released, which will cause memory leak. Move memory +release after mlx5_cmd_exec. + +Fixes: 1d9186476e12 ("net/mlx5: DR, Add direct rule command utilities") +Signed-off-by: Zhengchao Shao +Reviewed-by: Leon Romanovsky +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c +index 64f6f529f6eb1..45b90c7698787 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c +@@ -423,11 +423,12 @@ int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev, + + err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); + if (err) +- return err; ++ goto err_free_in; + + *reformat_id = MLX5_GET(alloc_packet_reformat_context_out, out, packet_reformat_id); +- kvfree(in); + ++err_free_in: ++ kvfree(in); + return err; + } + +-- +2.40.1 + diff --git a/queue-5.4/net-mlx5e-fix-return-value-check-in-mlx5e_ipsec_remo.patch b/queue-5.4/net-mlx5e-fix-return-value-check-in-mlx5e_ipsec_remo.patch new file mode 100644 index 00000000000..05e2818ce92 --- /dev/null +++ b/queue-5.4/net-mlx5e-fix-return-value-check-in-mlx5e_ipsec_remo.patch @@ -0,0 +1,39 @@ +From 28773084d652361ddb715321b9a8bde21797f5b6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 25 Jul 2023 14:56:55 +0800 +Subject: net/mlx5e: fix return value check in mlx5e_ipsec_remove_trailer() + +From: Yuanjun Gong + +[ Upstream commit e5bcb7564d3bd0c88613c76963c5349be9c511c5 ] + +mlx5e_ipsec_remove_trailer() should return an error code if function +pskb_trim() returns an unexpected value. + +Fixes: 2ac9cfe78223 ("net/mlx5e: IPSec, Add Innova IPSec offload TX data path") +Signed-off-by: Yuanjun Gong +Reviewed-by: Leon Romanovsky +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +index 0dd17514caae8..d212706f1bdea 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +@@ -121,7 +121,9 @@ static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x) + + trailer_len = alen + plen + 2; + +- pskb_trim(skb, skb->len - trailer_len); ++ ret = pskb_trim(skb, skb->len - trailer_len); ++ if (unlikely(ret)) ++ return ret; + if (skb->protocol == htons(ETH_P_IP)) { + ipv4hdr->tot_len = htons(ntohs(ipv4hdr->tot_len) - trailer_len); + ip_send_check(ipv4hdr); +-- +2.40.1 + diff --git a/queue-5.4/net-sched-cls_fw-no-longer-copy-tcf_result-on-update.patch b/queue-5.4/net-sched-cls_fw-no-longer-copy-tcf_result-on-update.patch new file mode 100644 index 00000000000..02d2f6cf0cd --- /dev/null +++ b/queue-5.4/net-sched-cls_fw-no-longer-copy-tcf_result-on-update.patch @@ -0,0 +1,50 @@ +From 0f7864e133323a51b83c24ca54fa3a70eaefdfe9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 29 Jul 2023 08:32:01 -0400 +Subject: net/sched: cls_fw: No longer copy tcf_result on update to avoid + use-after-free + +From: valis + +[ Upstream commit 76e42ae831991c828cffa8c37736ebfb831ad5ec ] + +When fw_change() is called on an existing filter, the whole +tcf_result struct is always copied into the new instance of the filter. + +This causes a problem when updating a filter bound to a class, +as tcf_unbind_filter() is always called on the old instance in the +success path, decreasing filter_cnt of the still referenced class +and allowing it to be deleted, leading to a use-after-free. + +Fix this by no longer copying the tcf_result struct from the old filter. + +Fixes: e35a8ee5993b ("net: sched: fw use RCU") +Reported-by: valis +Reported-by: Bing-Jhong Billy Jheng +Signed-off-by: valis +Signed-off-by: Jamal Hadi Salim +Reviewed-by: Victor Nogueira +Reviewed-by: Pedro Tammela +Reviewed-by: M A Ramdhan +Link: https://lore.kernel.org/r/20230729123202.72406-3-jhs@mojatatu.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/sched/cls_fw.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c +index 41f0898a5a565..08c41f1976c47 100644 +--- a/net/sched/cls_fw.c ++++ b/net/sched/cls_fw.c +@@ -266,7 +266,6 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, + return -ENOBUFS; + + fnew->id = f->id; +- fnew->res = f->res; + fnew->ifindex = f->ifindex; + fnew->tp = f->tp; + +-- +2.40.1 + diff --git a/queue-5.4/net-sched-cls_route-no-longer-copy-tcf_result-on-upd.patch b/queue-5.4/net-sched-cls_route-no-longer-copy-tcf_result-on-upd.patch new file mode 100644 index 00000000000..06617150c9d --- /dev/null +++ b/queue-5.4/net-sched-cls_route-no-longer-copy-tcf_result-on-upd.patch @@ -0,0 +1,50 @@ +From 11fbf7e4c4837579b471ecb7275030d4334b4329 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 29 Jul 2023 08:32:02 -0400 +Subject: net/sched: cls_route: No longer copy tcf_result on update to avoid + use-after-free + +From: valis + +[ Upstream commit b80b829e9e2c1b3f7aae34855e04d8f6ecaf13c8 ] + +When route4_change() is called on an existing filter, the whole +tcf_result struct is always copied into the new instance of the filter. + +This causes a problem when updating a filter bound to a class, +as tcf_unbind_filter() is always called on the old instance in the +success path, decreasing filter_cnt of the still referenced class +and allowing it to be deleted, leading to a use-after-free. + +Fix this by no longer copying the tcf_result struct from the old filter. + +Fixes: 1109c00547fc ("net: sched: RCU cls_route") +Reported-by: valis +Reported-by: Bing-Jhong Billy Jheng +Signed-off-by: valis +Signed-off-by: Jamal Hadi Salim +Reviewed-by: Victor Nogueira +Reviewed-by: Pedro Tammela +Reviewed-by: M A Ramdhan +Link: https://lore.kernel.org/r/20230729123202.72406-4-jhs@mojatatu.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/sched/cls_route.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c +index b775e681cb56e..1ad4b3e60eb3b 100644 +--- a/net/sched/cls_route.c ++++ b/net/sched/cls_route.c +@@ -511,7 +511,6 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, + if (fold) { + f->id = fold->id; + f->iif = fold->iif; +- f->res = fold->res; + f->handle = fold->handle; + + f->tp = fold->tp; +-- +2.40.1 + diff --git a/queue-5.4/net-sched-cls_u32-fix-match-key-mis-addressing.patch b/queue-5.4/net-sched-cls_u32-fix-match-key-mis-addressing.patch new file mode 100644 index 00000000000..440b6e911ee --- /dev/null +++ b/queue-5.4/net-sched-cls_u32-fix-match-key-mis-addressing.patch @@ -0,0 +1,145 @@ +From f1928d23ee09cc2a4251c02d68267e3c579c4077 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 26 Jul 2023 09:51:51 -0400 +Subject: net: sched: cls_u32: Fix match key mis-addressing + +From: Jamal Hadi Salim + +[ Upstream commit e68409db995380d1badacba41ff24996bd396171 ] + +A match entry is uniquely identified with an "address" or "path" in the +form of: hashtable ID(12b):bucketid(8b):nodeid(12b). + +When creating table match entries all of hash table id, bucket id and +node (match entry id) are needed to be either specified by the user or +reasonable in-kernel defaults are used. The in-kernel default for a table id is +0x800(omnipresent root table); for bucketid it is 0x0. Prior to this fix there +was none for a nodeid i.e. the code assumed that the user passed the correct +nodeid and if the user passes a nodeid of 0 (as Mingi Cho did) then that is what +was used. But nodeid of 0 is reserved for identifying the table. This is not +a problem until we dump. The dump code notices that the nodeid is zero and +assumes it is referencing a table and therefore references table struct +tc_u_hnode instead of what was created i.e match entry struct tc_u_knode. + +Ming does an equivalent of: +tc filter add dev dummy0 parent 10: prio 1 handle 0x1000 \ +protocol ip u32 match ip src 10.0.0.1/32 classid 10:1 action ok + +Essentially specifying a table id 0, bucketid 1 and nodeid of zero +Tableid 0 is remapped to the default of 0x800. +Bucketid 1 is ignored and defaults to 0x00. +Nodeid was assumed to be what Ming passed - 0x000 + +dumping before fix shows: +~$ tc filter ls dev dummy0 parent 10: +filter protocol ip pref 1 u32 chain 0 +filter protocol ip pref 1 u32 chain 0 fh 800: ht divisor 1 +filter protocol ip pref 1 u32 chain 0 fh 800: ht divisor -30591 + +Note that the last line reports a table instead of a match entry +(you can tell this because it says "ht divisor..."). +As a result of reporting the wrong data type (misinterpretting of struct +tc_u_knode as being struct tc_u_hnode) the divisor is reported with value +of -30591. Ming identified this as part of the heap address +(physmap_base is 0xffff8880 (-30591 - 1)). + +The fix is to ensure that when table entry matches are added and no +nodeid is specified (i.e nodeid == 0) then we get the next available +nodeid from the table's pool. + +After the fix, this is what the dump shows: +$ tc filter ls dev dummy0 parent 10: +filter protocol ip pref 1 u32 chain 0 +filter protocol ip pref 1 u32 chain 0 fh 800: ht divisor 1 +filter protocol ip pref 1 u32 chain 0 fh 800::800 order 2048 key ht 800 bkt 0 flowid 10:1 not_in_hw + match 0a000001/ffffffff at 12 + action order 1: gact action pass + random type none pass val 0 + index 1 ref 1 bind 1 + +Reported-by: Mingi Cho +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Jamal Hadi Salim +Link: https://lore.kernel.org/r/20230726135151.416917-1-jhs@mojatatu.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/sched/cls_u32.c | 56 ++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 50 insertions(+), 6 deletions(-) + +diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c +index e5cc2b4d38d5a..316ccbb01b55f 100644 +--- a/net/sched/cls_u32.c ++++ b/net/sched/cls_u32.c +@@ -1004,18 +1004,62 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, + return -EINVAL; + } + ++ /* At this point, we need to derive the new handle that will be used to ++ * uniquely map the identity of this table match entry. The ++ * identity of the entry that we need to construct is 32 bits made of: ++ * htid(12b):bucketid(8b):node/entryid(12b) ++ * ++ * At this point _we have the table(ht)_ in which we will insert this ++ * entry. We carry the table's id in variable "htid". ++ * Note that earlier code picked the ht selection either by a) the user ++ * providing the htid specified via TCA_U32_HASH attribute or b) when ++ * no such attribute is passed then the root ht, is default to at ID ++ * 0x[800][00][000]. Rule: the root table has a single bucket with ID 0. ++ * If OTOH the user passed us the htid, they may also pass a bucketid of ++ * choice. 0 is fine. For example a user htid is 0x[600][01][000] it is ++ * indicating hash bucketid of 1. Rule: the entry/node ID _cannot_ be ++ * passed via the htid, so even if it was non-zero it will be ignored. ++ * ++ * We may also have a handle, if the user passed one. The handle also ++ * carries the same addressing of htid(12b):bucketid(8b):node/entryid(12b). ++ * Rule: the bucketid on the handle is ignored even if one was passed; ++ * rather the value on "htid" is always assumed to be the bucketid. ++ */ + if (handle) { ++ /* Rule: The htid from handle and tableid from htid must match */ + if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) { + NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch"); + return -EINVAL; + } +- handle = htid | TC_U32_NODE(handle); +- err = idr_alloc_u32(&ht->handle_idr, NULL, &handle, handle, +- GFP_KERNEL); +- if (err) +- return err; +- } else ++ /* Ok, so far we have a valid htid(12b):bucketid(8b) but we ++ * need to finalize the table entry identification with the last ++ * part - the node/entryid(12b)). Rule: Nodeid _cannot be 0_ for ++ * entries. Rule: nodeid of 0 is reserved only for tables(see ++ * earlier code which processes TC_U32_DIVISOR attribute). ++ * Rule: The nodeid can only be derived from the handle (and not ++ * htid). ++ * Rule: if the handle specified zero for the node id example ++ * 0x60000000, then pick a new nodeid from the pool of IDs ++ * this hash table has been allocating from. ++ * If OTOH it is specified (i.e for example the user passed a ++ * handle such as 0x60000123), then we use it generate our final ++ * handle which is used to uniquely identify the match entry. ++ */ ++ if (!TC_U32_NODE(handle)) { ++ handle = gen_new_kid(ht, htid); ++ } else { ++ handle = htid | TC_U32_NODE(handle); ++ err = idr_alloc_u32(&ht->handle_idr, NULL, &handle, ++ handle, GFP_KERNEL); ++ if (err) ++ return err; ++ } ++ } else { ++ /* The user did not give us a handle; lets just generate one ++ * from the table's pool of nodeids. ++ */ + handle = gen_new_kid(ht, htid); ++ } + + if (tb[TCA_U32_SEL] == NULL) { + NL_SET_ERR_MSG_MOD(extack, "Selector not specified"); +-- +2.40.1 + diff --git a/queue-5.4/net-sched-cls_u32-no-longer-copy-tcf_result-on-updat.patch b/queue-5.4/net-sched-cls_u32-no-longer-copy-tcf_result-on-updat.patch new file mode 100644 index 00000000000..b7bbc2e9467 --- /dev/null +++ b/queue-5.4/net-sched-cls_u32-no-longer-copy-tcf_result-on-updat.patch @@ -0,0 +1,50 @@ +From 0007f909cecca5fc8d0578376641fe2716721ef3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 29 Jul 2023 08:32:00 -0400 +Subject: net/sched: cls_u32: No longer copy tcf_result on update to avoid + use-after-free + +From: valis + +[ Upstream commit 3044b16e7c6fe5d24b1cdbcf1bd0a9d92d1ebd81 ] + +When u32_change() is called on an existing filter, the whole +tcf_result struct is always copied into the new instance of the filter. + +This causes a problem when updating a filter bound to a class, +as tcf_unbind_filter() is always called on the old instance in the +success path, decreasing filter_cnt of the still referenced class +and allowing it to be deleted, leading to a use-after-free. + +Fix this by no longer copying the tcf_result struct from the old filter. + +Fixes: de5df63228fc ("net: sched: cls_u32 changes to knode must appear atomic to readers") +Reported-by: valis +Reported-by: M A Ramdhan +Signed-off-by: valis +Signed-off-by: Jamal Hadi Salim +Reviewed-by: Victor Nogueira +Reviewed-by: Pedro Tammela +Reviewed-by: M A Ramdhan +Link: https://lore.kernel.org/r/20230729123202.72406-2-jhs@mojatatu.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/sched/cls_u32.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c +index 316ccbb01b55f..65598207a2fcb 100644 +--- a/net/sched/cls_u32.c ++++ b/net/sched/cls_u32.c +@@ -814,7 +814,6 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp, + + new->ifindex = n->ifindex; + new->fshift = n->fshift; +- new->res = n->res; + new->flags = n->flags; + RCU_INIT_POINTER(new->ht_down, ht); + +-- +2.40.1 + diff --git a/queue-5.4/perf-test-uprobe_from_different_cu-skip-if-there-is-.patch b/queue-5.4/perf-test-uprobe_from_different_cu-skip-if-there-is-.patch new file mode 100644 index 00000000000..db2911bab48 --- /dev/null +++ b/queue-5.4/perf-test-uprobe_from_different_cu-skip-if-there-is-.patch @@ -0,0 +1,66 @@ +From e81737ba90b46165e4cebc56f4732b414145510d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 28 Jul 2023 17:18:12 +0200 +Subject: perf test uprobe_from_different_cu: Skip if there is no gcc +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Georg Müller + +[ Upstream commit 98ce8e4a9dcfb448b30a2d7a16190f4a00382377 ] + +Without gcc, the test will fail. + +On cleanup, ignore probe removal errors. Otherwise, in case of an error +adding the probe, the temporary directory is not removed. + +Fixes: 56cbeacf14353057 ("perf probe: Add test for regression introduced by switch to die_get_decl_file()") +Signed-off-by: Georg Müller +Acked-by: Ian Rogers +Cc: Adrian Hunter +Cc: Alexander Shishkin +Cc: Georg Müller +Cc: Ingo Molnar +Cc: Jiri Olsa +Cc: Mark Rutland +Cc: Masami Hiramatsu +Cc: Namhyung Kim +Cc: Peter Zijlstra +Link: https://lore.kernel.org/r/20230728151812.454806-2-georgmueller@gmx.net +Link: https://lore.kernel.org/r/CAP-5=fUP6UuLgRty3t2=fQsQi3k4hDMz415vWdp1x88QMvZ8ug@mail.gmail.com/ +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Sasha Levin +--- + tools/perf/tests/shell/test_uprobe_from_different_cu.sh | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/tools/perf/tests/shell/test_uprobe_from_different_cu.sh b/tools/perf/tests/shell/test_uprobe_from_different_cu.sh +index 00d2e0e2e0c28..319f36ebb9a40 100644 +--- a/tools/perf/tests/shell/test_uprobe_from_different_cu.sh ++++ b/tools/perf/tests/shell/test_uprobe_from_different_cu.sh +@@ -4,6 +4,12 @@ + + set -e + ++# skip if there's no gcc ++if ! [ -x "$(command -v gcc)" ]; then ++ echo "failed: no gcc compiler" ++ exit 2 ++fi ++ + temp_dir=$(mktemp -d /tmp/perf-uprobe-different-cu-sh.XXXXXXXXXX) + + cleanup() +@@ -11,7 +17,7 @@ cleanup() + trap - EXIT TERM INT + if [[ "${temp_dir}" =~ ^/tmp/perf-uprobe-different-cu-sh.*$ ]]; then + echo "--- Cleaning up ---" +- perf probe -x ${temp_dir}/testfile -d foo ++ perf probe -x ${temp_dir}/testfile -d foo || true + rm -f "${temp_dir}/"* + rmdir "${temp_dir}" + fi +-- +2.40.1 + diff --git a/queue-5.4/rtnetlink-let-rtnl_bridge_setlink-checks-ifla_bridge.patch b/queue-5.4/rtnetlink-let-rtnl_bridge_setlink-checks-ifla_bridge.patch new file mode 100644 index 00000000000..289a1fe5488 --- /dev/null +++ b/queue-5.4/rtnetlink-let-rtnl_bridge_setlink-checks-ifla_bridge.patch @@ -0,0 +1,66 @@ +From 39ea9761c47658832a933b9f8f24fea7d671ee2c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 26 Jul 2023 15:53:14 +0800 +Subject: rtnetlink: let rtnl_bridge_setlink checks IFLA_BRIDGE_MODE length + +From: Lin Ma + +[ Upstream commit d73ef2d69c0dba5f5a1cb9600045c873bab1fb7f ] + +There are totally 9 ndo_bridge_setlink handlers in the current kernel, +which are 1) bnxt_bridge_setlink, 2) be_ndo_bridge_setlink 3) +i40e_ndo_bridge_setlink 4) ice_bridge_setlink 5) +ixgbe_ndo_bridge_setlink 6) mlx5e_bridge_setlink 7) +nfp_net_bridge_setlink 8) qeth_l2_bridge_setlink 9) br_setlink. + +By investigating the code, we find that 1-7 parse and use nlattr +IFLA_BRIDGE_MODE but 3 and 4 forget to do the nla_len check. This can +lead to an out-of-attribute read and allow a malformed nlattr (e.g., +length 0) to be viewed as a 2 byte integer. + +To avoid such issues, also for other ndo_bridge_setlink handlers in the +future. This patch adds the nla_len check in rtnl_bridge_setlink and +does an early error return if length mismatches. To make it works, the +break is removed from the parsing for IFLA_BRIDGE_FLAGS to make sure +this nla_for_each_nested iterates every attribute. + +Fixes: b1edc14a3fbf ("ice: Implement ice_bridge_getlink and ice_bridge_setlink") +Fixes: 51616018dd1b ("i40e: Add support for getlink, setlink ndo ops") +Suggested-by: Jakub Kicinski +Signed-off-by: Lin Ma +Acked-by: Nikolay Aleksandrov +Reviewed-by: Hangbin Liu +Link: https://lore.kernel.org/r/20230726075314.1059224-1-linma@zju.edu.cn +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/core/rtnetlink.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c +index 1db92a44548f0..3eaf7c706b0ec 100644 +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -4590,13 +4590,17 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (br_spec) { + nla_for_each_nested(attr, br_spec, rem) { +- if (nla_type(attr) == IFLA_BRIDGE_FLAGS) { ++ if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !have_flags) { + if (nla_len(attr) < sizeof(flags)) + return -EINVAL; + + have_flags = true; + flags = nla_get_u16(attr); +- break; ++ } ++ ++ if (nla_type(attr) == IFLA_BRIDGE_MODE) { ++ if (nla_len(attr) < sizeof(u16)) ++ return -EINVAL; + } + } + } +-- +2.40.1 + diff --git a/queue-5.4/series b/queue-5.4/series index f37e1b3e60d..4fef9534dc7 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -90,3 +90,32 @@ arm64-add-ampere1-to-the-spectre-bhb-affected-list.patch arm64-fix-bit-shifting-ub-in-the-midr_cpu_model-macro.patch perf-fix-function-pointer-case.patch loop-select-i-o-scheduler-none-from-inside-add_disk.patch +word-at-a-time-use-the-same-return-type-for-has_zero.patch +kvm-s390-fix-sthyi-error-handling.patch +net-mlx5-dr-fix-memory-leak-in-mlx5dr_cmd_create_ref.patch +net-mlx5e-fix-return-value-check-in-mlx5e_ipsec_remo.patch +rtnetlink-let-rtnl_bridge_setlink-checks-ifla_bridge.patch +perf-test-uprobe_from_different_cu-skip-if-there-is-.patch +net-sched-cls_u32-fix-match-key-mis-addressing.patch +misdn-hfcpci-fix-potential-deadlock-on-hc-lock.patch +net-annotate-data-races-around-sk-sk_max_pacing_rate.patch +net-add-missing-read_once-sk-sk_rcvlowat-annotation.patch +net-add-missing-read_once-sk-sk_sndbuf-annotation.patch +net-add-missing-read_once-sk-sk_rcvbuf-annotation.patch +net-add-missing-data-race-annotations-around-sk-sk_p.patch +net-add-missing-data-race-annotation-for-sk_ll_usec.patch +net-sched-cls_u32-no-longer-copy-tcf_result-on-updat.patch +net-sched-cls_fw-no-longer-copy-tcf_result-on-update.patch +net-sched-cls_route-no-longer-copy-tcf_result-on-upd.patch +bpf-sockmap-remove-preempt_disable-in-sock_map_sk_ac.patch +driver-core-add-device-probe-log-helper.patch +net-ll_temac-switch-to-use-dev_err_probe-helper.patch +net-ll_temac-fix-error-checking-of-irq_of_parse_and_.patch +net-dcb-choose-correct-policy-to-parse-dcb_attr_bcn.patch +ip6mr-fix-skb_under_panic-in-ip6mr_cache_report.patch +tcp_metrics-fix-addr_same-helper.patch +tcp_metrics-annotate-data-races-around-tm-tcpm_stamp.patch +tcp_metrics-annotate-data-races-around-tm-tcpm_lock.patch +tcp_metrics-annotate-data-races-around-tm-tcpm_vals.patch +tcp_metrics-annotate-data-races-around-tm-tcpm_net.patch +tcp_metrics-fix-data-race-in-tcpm_suck_dst-vs-fastop.patch diff --git a/queue-5.4/tcp_metrics-annotate-data-races-around-tm-tcpm_lock.patch b/queue-5.4/tcp_metrics-annotate-data-races-around-tm-tcpm_lock.patch new file mode 100644 index 00000000000..edc213b3a16 --- /dev/null +++ b/queue-5.4/tcp_metrics-annotate-data-races-around-tm-tcpm_lock.patch @@ -0,0 +1,51 @@ +From 74c204ba460603978f9f32d0e5f2fefca483212b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 2 Aug 2023 13:14:57 +0000 +Subject: tcp_metrics: annotate data-races around tm->tcpm_lock + +From: Eric Dumazet + +[ Upstream commit 285ce119a3c6c4502585936650143e54c8692788 ] + +tm->tcpm_lock can be read or written locklessly. + +Add needed READ_ONCE()/WRITE_ONCE() to document this. + +Fixes: 51c5d0c4b169 ("tcp: Maintain dynamic metrics in local cache.") +Signed-off-by: Eric Dumazet +Reviewed-by: David Ahern +Reviewed-by: Kuniyuki Iwashima +Link: https://lore.kernel.org/r/20230802131500.1478140-4-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_metrics.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c +index 6a5a6683194c9..2dac82154af85 100644 +--- a/net/ipv4/tcp_metrics.c ++++ b/net/ipv4/tcp_metrics.c +@@ -59,7 +59,8 @@ static inline struct net *tm_net(struct tcp_metrics_block *tm) + static bool tcp_metric_locked(struct tcp_metrics_block *tm, + enum tcp_metric_index idx) + { +- return tm->tcpm_lock & (1 << idx); ++ /* Paired with WRITE_ONCE() in tcpm_suck_dst() */ ++ return READ_ONCE(tm->tcpm_lock) & (1 << idx); + } + + static u32 tcp_metric_get(struct tcp_metrics_block *tm, +@@ -110,7 +111,8 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, + val |= 1 << TCP_METRIC_CWND; + if (dst_metric_locked(dst, RTAX_REORDERING)) + val |= 1 << TCP_METRIC_REORDERING; +- tm->tcpm_lock = val; ++ /* Paired with READ_ONCE() in tcp_metric_locked() */ ++ WRITE_ONCE(tm->tcpm_lock, val); + + msval = dst_metric_raw(dst, RTAX_RTT); + tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC; +-- +2.40.1 + diff --git a/queue-5.4/tcp_metrics-annotate-data-races-around-tm-tcpm_net.patch b/queue-5.4/tcp_metrics-annotate-data-races-around-tm-tcpm_net.patch new file mode 100644 index 00000000000..841e7efcf30 --- /dev/null +++ b/queue-5.4/tcp_metrics-annotate-data-races-around-tm-tcpm_net.patch @@ -0,0 +1,66 @@ +From 48d653fc96b8daec9d90fef32b5146001c748981 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 2 Aug 2023 13:14:59 +0000 +Subject: tcp_metrics: annotate data-races around tm->tcpm_net + +From: Eric Dumazet + +[ Upstream commit d5d986ce42c71a7562d32c4e21e026b0f87befec ] + +tm->tcpm_net can be read or written locklessly. + +Instead of changing write_pnet() and read_pnet() and potentially +hurt performance, add the needed READ_ONCE()/WRITE_ONCE() +in tm_net() and tcpm_new(). + +Fixes: 849e8a0ca8d5 ("tcp_metrics: Add a field tcpm_net and verify it matches on lookup") +Signed-off-by: Eric Dumazet +Reviewed-by: David Ahern +Reviewed-by: Kuniyuki Iwashima +Link: https://lore.kernel.org/r/20230802131500.1478140-6-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_metrics.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c +index 29261aa96eb40..3d0452bb6c2a1 100644 +--- a/net/ipv4/tcp_metrics.c ++++ b/net/ipv4/tcp_metrics.c +@@ -40,7 +40,7 @@ struct tcp_fastopen_metrics { + + struct tcp_metrics_block { + struct tcp_metrics_block __rcu *tcpm_next; +- possible_net_t tcpm_net; ++ struct net *tcpm_net; + struct inetpeer_addr tcpm_saddr; + struct inetpeer_addr tcpm_daddr; + unsigned long tcpm_stamp; +@@ -51,9 +51,10 @@ struct tcp_metrics_block { + struct rcu_head rcu_head; + }; + +-static inline struct net *tm_net(struct tcp_metrics_block *tm) ++static inline struct net *tm_net(const struct tcp_metrics_block *tm) + { +- return read_pnet(&tm->tcpm_net); ++ /* Paired with the WRITE_ONCE() in tcpm_new() */ ++ return READ_ONCE(tm->tcpm_net); + } + + static bool tcp_metric_locked(struct tcp_metrics_block *tm, +@@ -197,7 +198,9 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, + if (!tm) + goto out_unlock; + } +- write_pnet(&tm->tcpm_net, net); ++ /* Paired with the READ_ONCE() in tm_net() */ ++ WRITE_ONCE(tm->tcpm_net, net); ++ + tm->tcpm_saddr = *saddr; + tm->tcpm_daddr = *daddr; + +-- +2.40.1 + diff --git a/queue-5.4/tcp_metrics-annotate-data-races-around-tm-tcpm_stamp.patch b/queue-5.4/tcp_metrics-annotate-data-races-around-tm-tcpm_stamp.patch new file mode 100644 index 00000000000..211b13416b5 --- /dev/null +++ b/queue-5.4/tcp_metrics-annotate-data-races-around-tm-tcpm_stamp.patch @@ -0,0 +1,88 @@ +From 890588969f8887c6c738ab038d1b311948460346 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 2 Aug 2023 13:14:56 +0000 +Subject: tcp_metrics: annotate data-races around tm->tcpm_stamp + +From: Eric Dumazet + +[ Upstream commit 949ad62a5d5311d36fce2e14fe5fed3f936da51c ] + +tm->tcpm_stamp can be read or written locklessly. + +Add needed READ_ONCE()/WRITE_ONCE() to document this. + +Also constify tcpm_check_stamp() dst argument. + +Fixes: 51c5d0c4b169 ("tcp: Maintain dynamic metrics in local cache.") +Signed-off-by: Eric Dumazet +Reviewed-by: David Ahern +Reviewed-by: Kuniyuki Iwashima +Link: https://lore.kernel.org/r/20230802131500.1478140-3-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_metrics.c | 19 +++++++++++++------ + 1 file changed, 13 insertions(+), 6 deletions(-) + +diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c +index f3fb19df72e1c..6a5a6683194c9 100644 +--- a/net/ipv4/tcp_metrics.c ++++ b/net/ipv4/tcp_metrics.c +@@ -97,7 +97,7 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, + u32 msval; + u32 val; + +- tm->tcpm_stamp = jiffies; ++ WRITE_ONCE(tm->tcpm_stamp, jiffies); + + val = 0; + if (dst_metric_locked(dst, RTAX_RTT)) +@@ -131,9 +131,15 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, + + #define TCP_METRICS_TIMEOUT (60 * 60 * HZ) + +-static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst) ++static void tcpm_check_stamp(struct tcp_metrics_block *tm, ++ const struct dst_entry *dst) + { +- if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT))) ++ unsigned long limit; ++ ++ if (!tm) ++ return; ++ limit = READ_ONCE(tm->tcpm_stamp) + TCP_METRICS_TIMEOUT; ++ if (unlikely(time_after(jiffies, limit))) + tcpm_suck_dst(tm, dst, false); + } + +@@ -174,7 +180,8 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, + oldest = deref_locked(tcp_metrics_hash[hash].chain); + for (tm = deref_locked(oldest->tcpm_next); tm; + tm = deref_locked(tm->tcpm_next)) { +- if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp)) ++ if (time_before(READ_ONCE(tm->tcpm_stamp), ++ READ_ONCE(oldest->tcpm_stamp))) + oldest = tm; + } + tm = oldest; +@@ -431,7 +438,7 @@ void tcp_update_metrics(struct sock *sk) + tp->reordering); + } + } +- tm->tcpm_stamp = jiffies; ++ WRITE_ONCE(tm->tcpm_stamp, jiffies); + out_unlock: + rcu_read_unlock(); + } +@@ -642,7 +649,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, + } + + if (nla_put_msecs(msg, TCP_METRICS_ATTR_AGE, +- jiffies - tm->tcpm_stamp, ++ jiffies - READ_ONCE(tm->tcpm_stamp), + TCP_METRICS_ATTR_PAD) < 0) + goto nla_put_failure; + +-- +2.40.1 + diff --git a/queue-5.4/tcp_metrics-annotate-data-races-around-tm-tcpm_vals.patch b/queue-5.4/tcp_metrics-annotate-data-races-around-tm-tcpm_vals.patch new file mode 100644 index 00000000000..002c51ab75f --- /dev/null +++ b/queue-5.4/tcp_metrics-annotate-data-races-around-tm-tcpm_vals.patch @@ -0,0 +1,85 @@ +From ae4b0cb37d2715f02184ae4e91ac925e36646ffc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 2 Aug 2023 13:14:58 +0000 +Subject: tcp_metrics: annotate data-races around tm->tcpm_vals[] + +From: Eric Dumazet + +[ Upstream commit 8c4d04f6b443869d25e59822f7cec88d647028a9 ] + +tm->tcpm_vals[] values can be read or written locklessly. + +Add needed READ_ONCE()/WRITE_ONCE() to document this, +and force use of tcp_metric_get() and tcp_metric_set() + +Fixes: 51c5d0c4b169 ("tcp: Maintain dynamic metrics in local cache.") +Signed-off-by: Eric Dumazet +Reviewed-by: David Ahern +Reviewed-by: Kuniyuki Iwashima +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_metrics.c | 23 ++++++++++++++--------- + 1 file changed, 14 insertions(+), 9 deletions(-) + +diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c +index 2dac82154af85..29261aa96eb40 100644 +--- a/net/ipv4/tcp_metrics.c ++++ b/net/ipv4/tcp_metrics.c +@@ -63,17 +63,19 @@ static bool tcp_metric_locked(struct tcp_metrics_block *tm, + return READ_ONCE(tm->tcpm_lock) & (1 << idx); + } + +-static u32 tcp_metric_get(struct tcp_metrics_block *tm, ++static u32 tcp_metric_get(const struct tcp_metrics_block *tm, + enum tcp_metric_index idx) + { +- return tm->tcpm_vals[idx]; ++ /* Paired with WRITE_ONCE() in tcp_metric_set() */ ++ return READ_ONCE(tm->tcpm_vals[idx]); + } + + static void tcp_metric_set(struct tcp_metrics_block *tm, + enum tcp_metric_index idx, + u32 val) + { +- tm->tcpm_vals[idx] = val; ++ /* Paired with READ_ONCE() in tcp_metric_get() */ ++ WRITE_ONCE(tm->tcpm_vals[idx], val); + } + + static bool addr_same(const struct inetpeer_addr *a, +@@ -115,13 +117,16 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, + WRITE_ONCE(tm->tcpm_lock, val); + + msval = dst_metric_raw(dst, RTAX_RTT); +- tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC; ++ tcp_metric_set(tm, TCP_METRIC_RTT, msval * USEC_PER_MSEC); + + msval = dst_metric_raw(dst, RTAX_RTTVAR); +- tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC; +- tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH); +- tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND); +- tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); ++ tcp_metric_set(tm, TCP_METRIC_RTTVAR, msval * USEC_PER_MSEC); ++ tcp_metric_set(tm, TCP_METRIC_SSTHRESH, ++ dst_metric_raw(dst, RTAX_SSTHRESH)); ++ tcp_metric_set(tm, TCP_METRIC_CWND, ++ dst_metric_raw(dst, RTAX_CWND)); ++ tcp_metric_set(tm, TCP_METRIC_REORDERING, ++ dst_metric_raw(dst, RTAX_REORDERING)); + if (fastopen_clear) { + tm->tcpm_fastopen.mss = 0; + tm->tcpm_fastopen.syn_loss = 0; +@@ -662,7 +667,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, + if (!nest) + goto nla_put_failure; + for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) { +- u32 val = tm->tcpm_vals[i]; ++ u32 val = tcp_metric_get(tm, i); + + if (!val) + continue; +-- +2.40.1 + diff --git a/queue-5.4/tcp_metrics-fix-addr_same-helper.patch b/queue-5.4/tcp_metrics-fix-addr_same-helper.patch new file mode 100644 index 00000000000..b043a752454 --- /dev/null +++ b/queue-5.4/tcp_metrics-fix-addr_same-helper.patch @@ -0,0 +1,46 @@ +From 826092f3bf5fc15cb80742a236be21e8fe528b07 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 2 Aug 2023 13:14:55 +0000 +Subject: tcp_metrics: fix addr_same() helper + +From: Eric Dumazet + +[ Upstream commit e6638094d7af6c7b9dcca05ad009e79e31b4f670 ] + +Because v4 and v6 families use separate inetpeer trees (respectively +net->ipv4.peers and net->ipv6.peers), inetpeer_addr_cmp(a, b) assumes +a & b share the same family. + +tcp_metrics use a common hash table, where entries can have different +families. + +We must therefore make sure to not call inetpeer_addr_cmp() +if the families do not match. + +Fixes: d39d14ffa24c ("net: Add helper function to compare inetpeer addresses") +Signed-off-by: Eric Dumazet +Reviewed-by: David Ahern +Reviewed-by: Kuniyuki Iwashima +Link: https://lore.kernel.org/r/20230802131500.1478140-2-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_metrics.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c +index 0af6249a993af..f3fb19df72e1c 100644 +--- a/net/ipv4/tcp_metrics.c ++++ b/net/ipv4/tcp_metrics.c +@@ -78,7 +78,7 @@ static void tcp_metric_set(struct tcp_metrics_block *tm, + static bool addr_same(const struct inetpeer_addr *a, + const struct inetpeer_addr *b) + { +- return inetpeer_addr_cmp(a, b) == 0; ++ return (a->family == b->family) && !inetpeer_addr_cmp(a, b); + } + + struct tcpm_hash_bucket { +-- +2.40.1 + diff --git a/queue-5.4/tcp_metrics-fix-data-race-in-tcpm_suck_dst-vs-fastop.patch b/queue-5.4/tcp_metrics-fix-data-race-in-tcpm_suck_dst-vs-fastop.patch new file mode 100644 index 00000000000..4f14ce9ca45 --- /dev/null +++ b/queue-5.4/tcp_metrics-fix-data-race-in-tcpm_suck_dst-vs-fastop.patch @@ -0,0 +1,85 @@ +From 009a6aa28f35122e7ec9c4dad022b440a505a498 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 2 Aug 2023 13:15:00 +0000 +Subject: tcp_metrics: fix data-race in tcpm_suck_dst() vs fastopen + +From: Eric Dumazet + +[ Upstream commit ddf251fa2bc1d3699eec0bae6ed0bc373b8fda79 ] + +Whenever tcpm_new() reclaims an old entry, tcpm_suck_dst() +would overwrite data that could be read from tcp_fastopen_cache_get() +or tcp_metrics_fill_info(). + +We need to acquire fastopen_seqlock to maintain consistency. + +For newly allocated objects, tcpm_new() can switch to kzalloc() +to avoid an extra fastopen_seqlock acquisition. + +Fixes: 1fe4c481ba63 ("net-tcp: Fast Open client - cookie cache") +Signed-off-by: Eric Dumazet +Cc: Yuchung Cheng +Reviewed-by: Kuniyuki Iwashima +Link: https://lore.kernel.org/r/20230802131500.1478140-7-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_metrics.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c +index 3d0452bb6c2a1..e89e19a6852ce 100644 +--- a/net/ipv4/tcp_metrics.c ++++ b/net/ipv4/tcp_metrics.c +@@ -93,6 +93,7 @@ static struct tcpm_hash_bucket *tcp_metrics_hash __read_mostly; + static unsigned int tcp_metrics_hash_log __read_mostly; + + static DEFINE_SPINLOCK(tcp_metrics_lock); ++static DEFINE_SEQLOCK(fastopen_seqlock); + + static void tcpm_suck_dst(struct tcp_metrics_block *tm, + const struct dst_entry *dst, +@@ -129,11 +130,13 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, + tcp_metric_set(tm, TCP_METRIC_REORDERING, + dst_metric_raw(dst, RTAX_REORDERING)); + if (fastopen_clear) { ++ write_seqlock(&fastopen_seqlock); + tm->tcpm_fastopen.mss = 0; + tm->tcpm_fastopen.syn_loss = 0; + tm->tcpm_fastopen.try_exp = 0; + tm->tcpm_fastopen.cookie.exp = false; + tm->tcpm_fastopen.cookie.len = 0; ++ write_sequnlock(&fastopen_seqlock); + } + } + +@@ -194,7 +197,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, + } + tm = oldest; + } else { +- tm = kmalloc(sizeof(*tm), GFP_ATOMIC); ++ tm = kzalloc(sizeof(*tm), GFP_ATOMIC); + if (!tm) + goto out_unlock; + } +@@ -204,7 +207,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, + tm->tcpm_saddr = *saddr; + tm->tcpm_daddr = *daddr; + +- tcpm_suck_dst(tm, dst, true); ++ tcpm_suck_dst(tm, dst, reclaim); + + if (likely(!reclaim)) { + tm->tcpm_next = tcp_metrics_hash[hash].chain; +@@ -551,8 +554,6 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst) + return ret; + } + +-static DEFINE_SEQLOCK(fastopen_seqlock); +- + void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, + struct tcp_fastopen_cookie *cookie) + { +-- +2.40.1 + diff --git a/queue-5.4/word-at-a-time-use-the-same-return-type-for-has_zero.patch b/queue-5.4/word-at-a-time-use-the-same-return-type-for-has_zero.patch new file mode 100644 index 00000000000..2c0d63b458b --- /dev/null +++ b/queue-5.4/word-at-a-time-use-the-same-return-type-for-has_zero.patch @@ -0,0 +1,74 @@ +From 17a3d2fbb253e6da25631ee9197e405aa671bbf4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 1 Aug 2023 15:22:17 -0700 +Subject: word-at-a-time: use the same return type for has_zero regardless of + endianness + +From: ndesaulniers@google.com + +[ Upstream commit 79e8328e5acbe691bbde029a52c89d70dcbc22f3 ] + +Compiling big-endian targets with Clang produces the diagnostic: + + fs/namei.c:2173:13: warning: use of bitwise '|' with boolean operands [-Wbitwise-instead-of-logical] + } while (!(has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants))); + ~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + || + fs/namei.c:2173:13: note: cast one or both operands to int to silence this warning + +It appears that when has_zero was introduced, two definitions were +produced with different signatures (in particular different return +types). + +Looking at the usage in hash_name() in fs/namei.c, I suspect that +has_zero() is meant to be invoked twice per while loop iteration; using +logical-or would not update `bdata` when `a` did not have zeros. So I +think it's preferred to always return an unsigned long rather than a +bool than update the while loop in hash_name() to use a logical-or +rather than bitwise-or. + +[ Also changed powerpc version to do the same - Linus ] + +Link: https://github.com/ClangBuiltLinux/linux/issues/1832 +Link: https://lore.kernel.org/lkml/20230801-bitwise-v1-1-799bec468dc4@google.com/ +Fixes: 36126f8f2ed8 ("word-at-a-time: make the interfaces truly generic") +Debugged-by: Nathan Chancellor +Signed-off-by: Nick Desaulniers +Acked-by: Heiko Carstens +Cc: Arnd Bergmann +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + arch/powerpc/include/asm/word-at-a-time.h | 2 +- + include/asm-generic/word-at-a-time.h | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h +index f3f4710d4ff52..99129b0cd8b8a 100644 +--- a/arch/powerpc/include/asm/word-at-a-time.h ++++ b/arch/powerpc/include/asm/word-at-a-time.h +@@ -34,7 +34,7 @@ static inline long find_zero(unsigned long mask) + return leading_zero_bits >> 3; + } + +-static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c) ++static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c) + { + unsigned long rhs = val | c->low_bits; + *data = rhs; +diff --git a/include/asm-generic/word-at-a-time.h b/include/asm-generic/word-at-a-time.h +index 20c93f08c9933..95a1d214108a5 100644 +--- a/include/asm-generic/word-at-a-time.h ++++ b/include/asm-generic/word-at-a-time.h +@@ -38,7 +38,7 @@ static inline long find_zero(unsigned long mask) + return (mask >> 8) ? byte : byte + 1; + } + +-static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c) ++static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c) + { + unsigned long rhs = val | c->low_bits; + *data = rhs; +-- +2.40.1 +