]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.4
authorSasha Levin <sashal@kernel.org>
Sat, 5 Aug 2023 20:50:04 +0000 (16:50 -0400)
committerSasha Levin <sashal@kernel.org>
Sat, 5 Aug 2023 20:50:04 +0000 (16:50 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
30 files changed:
queue-5.4/bpf-sockmap-remove-preempt_disable-in-sock_map_sk_ac.patch [new file with mode: 0644]
queue-5.4/driver-core-add-device-probe-log-helper.patch [new file with mode: 0644]
queue-5.4/ip6mr-fix-skb_under_panic-in-ip6mr_cache_report.patch [new file with mode: 0644]
queue-5.4/kvm-s390-fix-sthyi-error-handling.patch [new file with mode: 0644]
queue-5.4/misdn-hfcpci-fix-potential-deadlock-on-hc-lock.patch [new file with mode: 0644]
queue-5.4/net-add-missing-data-race-annotation-for-sk_ll_usec.patch [new file with mode: 0644]
queue-5.4/net-add-missing-data-race-annotations-around-sk-sk_p.patch [new file with mode: 0644]
queue-5.4/net-add-missing-read_once-sk-sk_rcvbuf-annotation.patch [new file with mode: 0644]
queue-5.4/net-add-missing-read_once-sk-sk_rcvlowat-annotation.patch [new file with mode: 0644]
queue-5.4/net-add-missing-read_once-sk-sk_sndbuf-annotation.patch [new file with mode: 0644]
queue-5.4/net-annotate-data-races-around-sk-sk_max_pacing_rate.patch [new file with mode: 0644]
queue-5.4/net-dcb-choose-correct-policy-to-parse-dcb_attr_bcn.patch [new file with mode: 0644]
queue-5.4/net-ll_temac-fix-error-checking-of-irq_of_parse_and_.patch [new file with mode: 0644]
queue-5.4/net-ll_temac-switch-to-use-dev_err_probe-helper.patch [new file with mode: 0644]
queue-5.4/net-mlx5-dr-fix-memory-leak-in-mlx5dr_cmd_create_ref.patch [new file with mode: 0644]
queue-5.4/net-mlx5e-fix-return-value-check-in-mlx5e_ipsec_remo.patch [new file with mode: 0644]
queue-5.4/net-sched-cls_fw-no-longer-copy-tcf_result-on-update.patch [new file with mode: 0644]
queue-5.4/net-sched-cls_route-no-longer-copy-tcf_result-on-upd.patch [new file with mode: 0644]
queue-5.4/net-sched-cls_u32-fix-match-key-mis-addressing.patch [new file with mode: 0644]
queue-5.4/net-sched-cls_u32-no-longer-copy-tcf_result-on-updat.patch [new file with mode: 0644]
queue-5.4/perf-test-uprobe_from_different_cu-skip-if-there-is-.patch [new file with mode: 0644]
queue-5.4/rtnetlink-let-rtnl_bridge_setlink-checks-ifla_bridge.patch [new file with mode: 0644]
queue-5.4/series
queue-5.4/tcp_metrics-annotate-data-races-around-tm-tcpm_lock.patch [new file with mode: 0644]
queue-5.4/tcp_metrics-annotate-data-races-around-tm-tcpm_net.patch [new file with mode: 0644]
queue-5.4/tcp_metrics-annotate-data-races-around-tm-tcpm_stamp.patch [new file with mode: 0644]
queue-5.4/tcp_metrics-annotate-data-races-around-tm-tcpm_vals.patch [new file with mode: 0644]
queue-5.4/tcp_metrics-fix-addr_same-helper.patch [new file with mode: 0644]
queue-5.4/tcp_metrics-fix-data-race-in-tcpm_suck_dst-vs-fastop.patch [new file with mode: 0644]
queue-5.4/word-at-a-time-use-the-same-return-type-for-has_zero.patch [new file with mode: 0644]

diff --git a/queue-5.4/bpf-sockmap-remove-preempt_disable-in-sock_map_sk_ac.patch b/queue-5.4/bpf-sockmap-remove-preempt_disable-in-sock_map_sk_ac.patch
new file mode 100644 (file)
index 0000000..445536f
--- /dev/null
@@ -0,0 +1,59 @@
+From a645d88fa9de1bfe63729527e48559436ee01df2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 08:44:11 +0200
+Subject: bpf: sockmap: Remove preempt_disable in sock_map_sk_acquire
+
+From: Tomas Glozar <tglozar@redhat.com>
+
+[ Upstream commit 13d2618b48f15966d1adfe1ff6a1985f5eef40ba ]
+
+Disabling preemption in sock_map_sk_acquire conflicts with GFP_ATOMIC
+allocation later in sk_psock_init_link on PREEMPT_RT kernels, since
+GFP_ATOMIC might sleep on RT (see bpf: Make BPF and PREEMPT_RT co-exist
+patchset notes for details).
+
+This causes calling bpf_map_update_elem on BPF_MAP_TYPE_SOCKMAP maps to
+BUG (sleeping function called from invalid context) on RT kernels.
+
+preempt_disable was introduced together with lock_sk and rcu_read_lock
+in commit 99ba2b5aba24e ("bpf: sockhash, disallow bpf_tcp_close and update
+in parallel"), probably to match disabled migration of BPF programs, and
+is no longer necessary.
+
+Remove preempt_disable to fix BUG in sock_map_update_common on RT.
+
+Signed-off-by: Tomas Glozar <tglozar@redhat.com>
+Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
+Link: https://lore.kernel.org/all/20200224140131.461979697@linutronix.de/
+Fixes: 99ba2b5aba24 ("bpf: sockhash, disallow bpf_tcp_close and update in parallel")
+Reviewed-by: John Fastabend <john.fastabend@gmail.com>
+Link: https://lore.kernel.org/r/20230728064411.305576-1-tglozar@redhat.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock_map.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/net/core/sock_map.c b/net/core/sock_map.c
+index 5bce6d4d20573..5b82ff0e2680f 100644
+--- a/net/core/sock_map.c
++++ b/net/core/sock_map.c
+@@ -115,7 +115,6 @@ static void sock_map_sk_acquire(struct sock *sk)
+       __acquires(&sk->sk_lock.slock)
+ {
+       lock_sock(sk);
+-      preempt_disable();
+       rcu_read_lock();
+ }
+@@ -123,7 +122,6 @@ static void sock_map_sk_release(struct sock *sk)
+       __releases(&sk->sk_lock.slock)
+ {
+       rcu_read_unlock();
+-      preempt_enable();
+       release_sock(sk);
+ }
+-- 
+2.40.1
+
diff --git a/queue-5.4/driver-core-add-device-probe-log-helper.patch b/queue-5.4/driver-core-add-device-probe-log-helper.patch
new file mode 100644 (file)
index 0000000..3b299fe
--- /dev/null
@@ -0,0 +1,105 @@
+From 1294b643c934eb581fa0901fd70eaa12a6586afc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Jul 2020 16:43:21 +0200
+Subject: driver core: add device probe log helper
+
+From: Andrzej Hajda <a.hajda@samsung.com>
+
+[ Upstream commit a787e5400a1ceeb0ef92d71ec43aeb35b1fa1334 ]
+
+During probe every time driver gets resource it should usually check for
+error printk some message if it is not -EPROBE_DEFER and return the error.
+This pattern is simple but requires adding few lines after any resource
+acquisition code, as a result it is often omitted or implemented only
+partially.
+dev_err_probe helps to replace such code sequences with simple call,
+so code:
+       if (err != -EPROBE_DEFER)
+               dev_err(dev, ...);
+       return err;
+becomes:
+       return dev_err_probe(dev, err, ...);
+
+Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
+Reviewed-by: Rafael J. Wysocki <rafael@kernel.org>
+Reviewed-by: Mark Brown <broonie@kernel.org>
+Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
+Link: https://lore.kernel.org/r/20200713144324.23654-2-a.hajda@samsung.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: ef45e8400f5b ("net: ll_temac: fix error checking of irq_of_parse_and_map()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/base/core.c    | 42 ++++++++++++++++++++++++++++++++++++++++++
+ include/linux/device.h |  3 +++
+ 2 files changed, 45 insertions(+)
+
+diff --git a/drivers/base/core.c b/drivers/base/core.c
+index f8e157ede44f8..4949aba7e1880 100644
+--- a/drivers/base/core.c
++++ b/drivers/base/core.c
+@@ -3399,6 +3399,48 @@ define_dev_printk_level(_dev_info, KERN_INFO);
+ #endif
++/**
++ * dev_err_probe - probe error check and log helper
++ * @dev: the pointer to the struct device
++ * @err: error value to test
++ * @fmt: printf-style format string
++ * @...: arguments as specified in the format string
++ *
++ * This helper implements common pattern present in probe functions for error
++ * checking: print debug or error message depending if the error value is
++ * -EPROBE_DEFER and propagate error upwards.
++ * It replaces code sequence:
++ *    if (err != -EPROBE_DEFER)
++ *            dev_err(dev, ...);
++ *    else
++ *            dev_dbg(dev, ...);
++ *    return err;
++ * with
++ *    return dev_err_probe(dev, err, ...);
++ *
++ * Returns @err.
++ *
++ */
++int dev_err_probe(const struct device *dev, int err, const char *fmt, ...)
++{
++      struct va_format vaf;
++      va_list args;
++
++      va_start(args, fmt);
++      vaf.fmt = fmt;
++      vaf.va = &args;
++
++      if (err != -EPROBE_DEFER)
++              dev_err(dev, "error %d: %pV", err, &vaf);
++      else
++              dev_dbg(dev, "error %d: %pV", err, &vaf);
++
++      va_end(args);
++
++      return err;
++}
++EXPORT_SYMBOL_GPL(dev_err_probe);
++
+ static inline bool fwnode_is_primary(struct fwnode_handle *fwnode)
+ {
+       return fwnode && !IS_ERR(fwnode->secondary);
+diff --git a/include/linux/device.h b/include/linux/device.h
+index d74275e2047a4..c7be3a8073ec3 100644
+--- a/include/linux/device.h
++++ b/include/linux/device.h
+@@ -1871,6 +1871,9 @@ do {                                                                     \
+       WARN_ONCE(condition, "%s %s: " format, \
+                       dev_driver_string(dev), dev_name(dev), ## arg)
++extern __printf(3, 4)
++int dev_err_probe(const struct device *dev, int err, const char *fmt, ...);
++
+ /* Create alias, so I can be autoloaded. */
+ #define MODULE_ALIAS_CHARDEV(major,minor) \
+       MODULE_ALIAS("char-major-" __stringify(major) "-" __stringify(minor))
+-- 
+2.40.1
+
diff --git a/queue-5.4/ip6mr-fix-skb_under_panic-in-ip6mr_cache_report.patch b/queue-5.4/ip6mr-fix-skb_under_panic-in-ip6mr_cache_report.patch
new file mode 100644 (file)
index 0000000..82814b4
--- /dev/null
@@ -0,0 +1,77 @@
+From 6208e0ab4491793fa444a0e2d46381a6f450b4b9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Aug 2023 14:43:18 +0800
+Subject: ip6mr: Fix skb_under_panic in ip6mr_cache_report()
+
+From: Yue Haibing <yuehaibing@huawei.com>
+
+[ Upstream commit 30e0191b16e8a58e4620fa3e2839ddc7b9d4281c ]
+
+skbuff: skb_under_panic: text:ffffffff88771f69 len:56 put:-4
+ head:ffff88805f86a800 data:ffff887f5f86a850 tail:0x88 end:0x2c0 dev:pim6reg
+ ------------[ cut here ]------------
+ kernel BUG at net/core/skbuff.c:192!
+ invalid opcode: 0000 [#1] PREEMPT SMP KASAN
+ CPU: 2 PID: 22968 Comm: kworker/2:11 Not tainted 6.5.0-rc3-00044-g0a8db05b571a #236
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
+ Workqueue: ipv6_addrconf addrconf_dad_work
+ RIP: 0010:skb_panic+0x152/0x1d0
+ Call Trace:
+  <TASK>
+  skb_push+0xc4/0xe0
+  ip6mr_cache_report+0xd69/0x19b0
+  reg_vif_xmit+0x406/0x690
+  dev_hard_start_xmit+0x17e/0x6e0
+  __dev_queue_xmit+0x2d6a/0x3d20
+  vlan_dev_hard_start_xmit+0x3ab/0x5c0
+  dev_hard_start_xmit+0x17e/0x6e0
+  __dev_queue_xmit+0x2d6a/0x3d20
+  neigh_connected_output+0x3ed/0x570
+  ip6_finish_output2+0x5b5/0x1950
+  ip6_finish_output+0x693/0x11c0
+  ip6_output+0x24b/0x880
+  NF_HOOK.constprop.0+0xfd/0x530
+  ndisc_send_skb+0x9db/0x1400
+  ndisc_send_rs+0x12a/0x6c0
+  addrconf_dad_completed+0x3c9/0xea0
+  addrconf_dad_work+0x849/0x1420
+  process_one_work+0xa22/0x16e0
+  worker_thread+0x679/0x10c0
+  ret_from_fork+0x28/0x60
+  ret_from_fork_asm+0x11/0x20
+
+When setup a vlan device on dev pim6reg, DAD ns packet may sent on reg_vif_xmit().
+reg_vif_xmit()
+    ip6mr_cache_report()
+        skb_push(skb, -skb_network_offset(pkt));//skb_network_offset(pkt) is 4
+And skb_push declared as:
+       void *skb_push(struct sk_buff *skb, unsigned int len);
+               skb->data -= len;
+               //0xffff88805f86a84c - 0xfffffffc = 0xffff887f5f86a850
+skb->data is set to 0xffff887f5f86a850, which is invalid mem addr, lead to skb_push() fails.
+
+Fixes: 14fb64e1f449 ("[IPV6] MROUTE: Support PIM-SM (SSM).")
+Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/ip6mr.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
+index 6248e00c2bf72..6642bc7b9870f 100644
+--- a/net/ipv6/ip6mr.c
++++ b/net/ipv6/ip6mr.c
+@@ -1065,7 +1065,7 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
+                  And all this only to mangle msg->im6_msgtype and
+                  to set msg->im6_mbz to "mbz" :-)
+                */
+-              skb_push(skb, -skb_network_offset(pkt));
++              __skb_pull(skb, skb_network_offset(pkt));
+               skb_push(skb, sizeof(*msg));
+               skb_reset_transport_header(skb);
+-- 
+2.40.1
+
diff --git a/queue-5.4/kvm-s390-fix-sthyi-error-handling.patch b/queue-5.4/kvm-s390-fix-sthyi-error-handling.patch
new file mode 100644 (file)
index 0000000..a1404cb
--- /dev/null
@@ -0,0 +1,78 @@
+From 9cb08a9a1f2ca26200cc1759c9d1572b5b49c0b8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 27 Jul 2023 20:29:39 +0200
+Subject: KVM: s390: fix sthyi error handling
+
+From: Heiko Carstens <hca@linux.ibm.com>
+
+[ Upstream commit 0c02cc576eac161601927b41634f80bfd55bfa9e ]
+
+Commit 9fb6c9b3fea1 ("s390/sthyi: add cache to store hypervisor info")
+added cache handling for store hypervisor info. This also changed the
+possible return code for sthyi_fill().
+
+Instead of only returning a condition code like the sthyi instruction would
+do, it can now also return a negative error value (-ENOMEM). handle_styhi()
+was not changed accordingly. In case of an error, the negative error value
+would incorrectly injected into the guest PSW.
+
+Add proper error handling to prevent this, and update the comment which
+describes the possible return values of sthyi_fill().
+
+Fixes: 9fb6c9b3fea1 ("s390/sthyi: add cache to store hypervisor info")
+Reviewed-by: Christian Borntraeger <borntraeger@linux.ibm.com>
+Link: https://lore.kernel.org/r/20230727182939.2050744-1-hca@linux.ibm.com
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/kernel/sthyi.c  | 6 +++---
+ arch/s390/kvm/intercept.c | 9 ++++++---
+ 2 files changed, 9 insertions(+), 6 deletions(-)
+
+diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c
+index 888cc2f166db7..ce6084e28d904 100644
+--- a/arch/s390/kernel/sthyi.c
++++ b/arch/s390/kernel/sthyi.c
+@@ -460,9 +460,9 @@ static int sthyi_update_cache(u64 *rc)
+  *
+  * Fills the destination with system information returned by the STHYI
+  * instruction. The data is generated by emulation or execution of STHYI,
+- * if available. The return value is the condition code that would be
+- * returned, the rc parameter is the return code which is passed in
+- * register R2 + 1.
++ * if available. The return value is either a negative error value or
++ * the condition code that would be returned, the rc parameter is the
++ * return code which is passed in register R2 + 1.
+  */
+ int sthyi_fill(void *dst, u64 *rc)
+ {
+diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
+index a389fa85cca2d..5450d43d26ea5 100644
+--- a/arch/s390/kvm/intercept.c
++++ b/arch/s390/kvm/intercept.c
+@@ -360,8 +360,8 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu)
+  */
+ int handle_sthyi(struct kvm_vcpu *vcpu)
+ {
+-      int reg1, reg2, r = 0;
+-      u64 code, addr, cc = 0, rc = 0;
++      int reg1, reg2, cc = 0, r = 0;
++      u64 code, addr, rc = 0;
+       struct sthyi_sctns *sctns = NULL;
+       if (!test_kvm_facility(vcpu->kvm, 74))
+@@ -392,7 +392,10 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
+               return -ENOMEM;
+       cc = sthyi_fill(sctns, &rc);
+-
++      if (cc < 0) {
++              free_page((unsigned long)sctns);
++              return cc;
++      }
+ out:
+       if (!cc) {
+               r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE);
+-- 
+2.40.1
+
diff --git a/queue-5.4/misdn-hfcpci-fix-potential-deadlock-on-hc-lock.patch b/queue-5.4/misdn-hfcpci-fix-potential-deadlock-on-hc-lock.patch
new file mode 100644 (file)
index 0000000..a959012
--- /dev/null
@@ -0,0 +1,88 @@
+From 53d16071fc47e48ef818e06a35382907195ba225 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 27 Jul 2023 08:56:19 +0000
+Subject: mISDN: hfcpci: Fix potential deadlock on &hc->lock
+
+From: Chengfeng Ye <dg573847474@gmail.com>
+
+[ Upstream commit 56c6be35fcbed54279df0a2c9e60480a61841d6f ]
+
+As &hc->lock is acquired by both timer _hfcpci_softirq() and hardirq
+hfcpci_int(), the timer should disable irq before lock acquisition
+otherwise deadlock could happen if the timmer is preemtped by the hadr irq.
+
+Possible deadlock scenario:
+hfcpci_softirq() (timer)
+    -> _hfcpci_softirq()
+    -> spin_lock(&hc->lock);
+        <irq interruption>
+        -> hfcpci_int()
+        -> spin_lock(&hc->lock); (deadlock here)
+
+This flaw was found by an experimental static analysis tool I am developing
+for irq-related deadlock.
+
+The tentative patch fixes the potential deadlock by spin_lock_irq()
+in timer.
+
+Fixes: b36b654a7e82 ("mISDN: Create /sys/class/mISDN")
+Signed-off-by: Chengfeng Ye <dg573847474@gmail.com>
+Link: https://lore.kernel.org/r/20230727085619.7419-1-dg573847474@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/isdn/hardware/mISDN/hfcpci.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c
+index 41ff2e3dc8430..0a683a66fc612 100644
+--- a/drivers/isdn/hardware/mISDN/hfcpci.c
++++ b/drivers/isdn/hardware/mISDN/hfcpci.c
+@@ -839,7 +839,7 @@ hfcpci_fill_fifo(struct bchannel *bch)
+               *z1t = cpu_to_le16(new_z1);     /* now send data */
+               if (bch->tx_idx < bch->tx_skb->len)
+                       return;
+-              dev_kfree_skb(bch->tx_skb);
++              dev_kfree_skb_any(bch->tx_skb);
+               if (get_next_bframe(bch))
+                       goto next_t_frame;
+               return;
+@@ -895,7 +895,7 @@ hfcpci_fill_fifo(struct bchannel *bch)
+       }
+       bz->za[new_f1].z1 = cpu_to_le16(new_z1);        /* for next buffer */
+       bz->f1 = new_f1;        /* next frame */
+-      dev_kfree_skb(bch->tx_skb);
++      dev_kfree_skb_any(bch->tx_skb);
+       get_next_bframe(bch);
+ }
+@@ -1119,7 +1119,7 @@ tx_birq(struct bchannel *bch)
+       if (bch->tx_skb && bch->tx_idx < bch->tx_skb->len)
+               hfcpci_fill_fifo(bch);
+       else {
+-              dev_kfree_skb(bch->tx_skb);
++              dev_kfree_skb_any(bch->tx_skb);
+               if (get_next_bframe(bch))
+                       hfcpci_fill_fifo(bch);
+       }
+@@ -2272,7 +2272,7 @@ _hfcpci_softirq(struct device *dev, void *unused)
+               return 0;
+       if (hc->hw.int_m2 & HFCPCI_IRQ_ENABLE) {
+-              spin_lock(&hc->lock);
++              spin_lock_irq(&hc->lock);
+               bch = Sel_BCS(hc, hc->hw.bswapped ? 2 : 1);
+               if (bch && bch->state == ISDN_P_B_RAW) { /* B1 rx&tx */
+                       main_rec_hfcpci(bch);
+@@ -2283,7 +2283,7 @@ _hfcpci_softirq(struct device *dev, void *unused)
+                       main_rec_hfcpci(bch);
+                       tx_birq(bch);
+               }
+-              spin_unlock(&hc->lock);
++              spin_unlock_irq(&hc->lock);
+       }
+       return 0;
+ }
+-- 
+2.40.1
+
diff --git a/queue-5.4/net-add-missing-data-race-annotation-for-sk_ll_usec.patch b/queue-5.4/net-add-missing-data-race-annotation-for-sk_ll_usec.patch
new file mode 100644 (file)
index 0000000..8afd332
--- /dev/null
@@ -0,0 +1,36 @@
+From 8feab57693cfbbe011d879f465bb1ef42603e8ca Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 15:03:17 +0000
+Subject: net: add missing data-race annotation for sk_ll_usec
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit e5f0d2dd3c2faa671711dac6d3ff3cef307bcfe3 ]
+
+In a prior commit I forgot that sk_getsockopt() reads
+sk->sk_ll_usec without holding a lock.
+
+Fixes: 0dbffbb5335a ("net: annotate data race around sk_ll_usec")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index e1204da609a1b..636427d400d7f 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1474,7 +1474,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
+ #ifdef CONFIG_NET_RX_BUSY_POLL
+       case SO_BUSY_POLL:
+-              v.val = sk->sk_ll_usec;
++              v.val = READ_ONCE(sk->sk_ll_usec);
+               break;
+ #endif
+-- 
+2.40.1
+
diff --git a/queue-5.4/net-add-missing-data-race-annotations-around-sk-sk_p.patch b/queue-5.4/net-add-missing-data-race-annotations-around-sk-sk_p.patch
new file mode 100644 (file)
index 0000000..7062ada
--- /dev/null
@@ -0,0 +1,63 @@
+From 3e0684dc4a13ae50a1c000ff7f924fb842ea6259 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 15:03:16 +0000
+Subject: net: add missing data-race annotations around sk->sk_peek_off
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 11695c6e966b0ec7ed1d16777d294cef865a5c91 ]
+
+sk_getsockopt() runs locklessly, thus we need to annotate the read
+of sk->sk_peek_off.
+
+While we are at it, add corresponding annotations to sk_set_peek_off()
+and unix_set_peek_off().
+
+Fixes: b9bb53f3836f ("sock: convert sk_peek_offset functions to WRITE_ONCE")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c    | 4 ++--
+ net/unix/af_unix.c | 2 +-
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index 6d695da921094..e1204da609a1b 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1444,7 +1444,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
+               if (!sock->ops->set_peek_off)
+                       return -EOPNOTSUPP;
+-              v.val = sk->sk_peek_off;
++              v.val = READ_ONCE(sk->sk_peek_off);
+               break;
+       case SO_NOFCS:
+               v.val = sock_flag(sk, SOCK_NOFCS);
+@@ -2652,7 +2652,7 @@ EXPORT_SYMBOL(__sk_mem_reclaim);
+ int sk_set_peek_off(struct sock *sk, int val)
+ {
+-      sk->sk_peek_off = val;
++      WRITE_ONCE(sk->sk_peek_off, val);
+       return 0;
+ }
+ EXPORT_SYMBOL_GPL(sk_set_peek_off);
+diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
+index 01fd049da104a..f966b64d2939a 100644
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -701,7 +701,7 @@ static int unix_set_peek_off(struct sock *sk, int val)
+       if (mutex_lock_interruptible(&u->iolock))
+               return -EINTR;
+-      sk->sk_peek_off = val;
++      WRITE_ONCE(sk->sk_peek_off, val);
+       mutex_unlock(&u->iolock);
+       return 0;
+-- 
+2.40.1
+
diff --git a/queue-5.4/net-add-missing-read_once-sk-sk_rcvbuf-annotation.patch b/queue-5.4/net-add-missing-read_once-sk-sk_rcvbuf-annotation.patch
new file mode 100644 (file)
index 0000000..b140684
--- /dev/null
@@ -0,0 +1,36 @@
+From a66bb1ab22fc670307c90897bfb25a83f8d38c14 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 15:03:14 +0000
+Subject: net: add missing READ_ONCE(sk->sk_rcvbuf) annotation
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit b4b553253091cafe9ec38994acf42795e073bef5 ]
+
+In a prior commit, I forgot to change sk_getsockopt()
+when reading sk->sk_rcvbuf locklessly.
+
+Fixes: ebb3b78db7bf ("tcp: annotate sk->sk_rcvbuf lockless reads")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index e6d26cfba32d5..6d695da921094 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1262,7 +1262,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
+               break;
+       case SO_RCVBUF:
+-              v.val = sk->sk_rcvbuf;
++              v.val = READ_ONCE(sk->sk_rcvbuf);
+               break;
+       case SO_REUSEADDR:
+-- 
+2.40.1
+
diff --git a/queue-5.4/net-add-missing-read_once-sk-sk_rcvlowat-annotation.patch b/queue-5.4/net-add-missing-read_once-sk-sk_rcvlowat-annotation.patch
new file mode 100644 (file)
index 0000000..25e944f
--- /dev/null
@@ -0,0 +1,36 @@
+From 510bfde141d282599b53223172b4fad554e9e0ea Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 15:03:11 +0000
+Subject: net: add missing READ_ONCE(sk->sk_rcvlowat) annotation
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit e6d12bdb435d23ff6c1890c852d85408a2f496ee ]
+
+In a prior commit, I forgot to change sk_getsockopt()
+when reading sk->sk_rcvlowat locklessly.
+
+Fixes: eac66402d1c3 ("net: annotate sk->sk_rcvlowat lockless reads")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index 539c39ad1e488..a73111be68581 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1350,7 +1350,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
+               break;
+       case SO_RCVLOWAT:
+-              v.val = sk->sk_rcvlowat;
++              v.val = READ_ONCE(sk->sk_rcvlowat);
+               break;
+       case SO_SNDLOWAT:
+-- 
+2.40.1
+
diff --git a/queue-5.4/net-add-missing-read_once-sk-sk_sndbuf-annotation.patch b/queue-5.4/net-add-missing-read_once-sk-sk_sndbuf-annotation.patch
new file mode 100644 (file)
index 0000000..77e3a0e
--- /dev/null
@@ -0,0 +1,36 @@
+From 01ab33478ac6646afcb236b3c1ecf6084c4d30c4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 15:03:13 +0000
+Subject: net: add missing READ_ONCE(sk->sk_sndbuf) annotation
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 74bc084327c643499474ba75df485607da37dd6e ]
+
+In a prior commit, I forgot to change sk_getsockopt()
+when reading sk->sk_sndbuf locklessly.
+
+Fixes: e292f05e0df7 ("tcp: annotate sk->sk_sndbuf lockless reads")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index a73111be68581..e6d26cfba32d5 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1258,7 +1258,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
+               break;
+       case SO_SNDBUF:
+-              v.val = sk->sk_sndbuf;
++              v.val = READ_ONCE(sk->sk_sndbuf);
+               break;
+       case SO_RCVBUF:
+-- 
+2.40.1
+
diff --git a/queue-5.4/net-annotate-data-races-around-sk-sk_max_pacing_rate.patch b/queue-5.4/net-annotate-data-races-around-sk-sk_max_pacing_rate.patch
new file mode 100644 (file)
index 0000000..b9ba531
--- /dev/null
@@ -0,0 +1,54 @@
+From 453396818d23d8b006379c482cb71b839407d1b8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 15:03:10 +0000
+Subject: net: annotate data-races around sk->sk_max_pacing_rate
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit ea7f45ef77b39e72244d282e47f6cb1ef4135cd2 ]
+
+sk_getsockopt() runs locklessly. This means sk->sk_max_pacing_rate
+can be read while other threads are changing its value.
+
+Fixes: 62748f32d501 ("net: introduce SO_MAX_PACING_RATE")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index d55eea5538bce..539c39ad1e488 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1117,7 +1117,8 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
+                       cmpxchg(&sk->sk_pacing_status,
+                               SK_PACING_NONE,
+                               SK_PACING_NEEDED);
+-              sk->sk_max_pacing_rate = ulval;
++              /* Pairs with READ_ONCE() from sk_getsockopt() */
++              WRITE_ONCE(sk->sk_max_pacing_rate, ulval);
+               sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval);
+               break;
+               }
+@@ -1478,12 +1479,14 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
+ #endif
+       case SO_MAX_PACING_RATE:
++              /* The READ_ONCE() pair with the WRITE_ONCE() in sk_setsockopt() */
+               if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) {
+                       lv = sizeof(v.ulval);
+-                      v.ulval = sk->sk_max_pacing_rate;
++                      v.ulval = READ_ONCE(sk->sk_max_pacing_rate);
+               } else {
+                       /* 32bit version */
+-                      v.val = min_t(unsigned long, sk->sk_max_pacing_rate, ~0U);
++                      v.val = min_t(unsigned long, ~0U,
++                                    READ_ONCE(sk->sk_max_pacing_rate));
+               }
+               break;
+-- 
+2.40.1
+
diff --git a/queue-5.4/net-dcb-choose-correct-policy-to-parse-dcb_attr_bcn.patch b/queue-5.4/net-dcb-choose-correct-policy-to-parse-dcb_attr_bcn.patch
new file mode 100644 (file)
index 0000000..eab6b4c
--- /dev/null
@@ -0,0 +1,103 @@
+From a467630d0bf7ccb82a56cd9824e27eb0d26d0534 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Aug 2023 09:32:48 +0800
+Subject: net: dcb: choose correct policy to parse DCB_ATTR_BCN
+
+From: Lin Ma <linma@zju.edu.cn>
+
+[ Upstream commit 31d49ba033095f6e8158c60f69714a500922e0c3 ]
+
+The dcbnl_bcn_setcfg uses erroneous policy to parse tb[DCB_ATTR_BCN],
+which is introduced in commit 859ee3c43812 ("DCB: Add support for DCB
+BCN"). Please see the comment in below code
+
+static int dcbnl_bcn_setcfg(...)
+{
+  ...
+  ret = nla_parse_nested_deprecated(..., dcbnl_pfc_up_nest, .. )
+  // !!! dcbnl_pfc_up_nest for attributes
+  //  DCB_PFC_UP_ATTR_0 to DCB_PFC_UP_ATTR_ALL in enum dcbnl_pfc_up_attrs
+  ...
+  for (i = DCB_BCN_ATTR_RP_0; i <= DCB_BCN_ATTR_RP_7; i++) {
+  // !!! DCB_BCN_ATTR_RP_0 to DCB_BCN_ATTR_RP_7 in enum dcbnl_bcn_attrs
+    ...
+    value_byte = nla_get_u8(data[i]);
+    ...
+  }
+  ...
+  for (i = DCB_BCN_ATTR_BCNA_0; i <= DCB_BCN_ATTR_RI; i++) {
+  // !!! DCB_BCN_ATTR_BCNA_0 to DCB_BCN_ATTR_RI in enum dcbnl_bcn_attrs
+  ...
+    value_int = nla_get_u32(data[i]);
+  ...
+  }
+  ...
+}
+
+That is, the nla_parse_nested_deprecated uses dcbnl_pfc_up_nest
+attributes to parse nlattr defined in dcbnl_pfc_up_attrs. But the
+following access code fetch each nlattr as dcbnl_bcn_attrs attributes.
+By looking up the associated nla_policy for dcbnl_bcn_attrs. We can find
+the beginning part of these two policies are "same".
+
+static const struct nla_policy dcbnl_pfc_up_nest[...] = {
+        [DCB_PFC_UP_ATTR_0]   = {.type = NLA_U8},
+        [DCB_PFC_UP_ATTR_1]   = {.type = NLA_U8},
+        [DCB_PFC_UP_ATTR_2]   = {.type = NLA_U8},
+        [DCB_PFC_UP_ATTR_3]   = {.type = NLA_U8},
+        [DCB_PFC_UP_ATTR_4]   = {.type = NLA_U8},
+        [DCB_PFC_UP_ATTR_5]   = {.type = NLA_U8},
+        [DCB_PFC_UP_ATTR_6]   = {.type = NLA_U8},
+        [DCB_PFC_UP_ATTR_7]   = {.type = NLA_U8},
+        [DCB_PFC_UP_ATTR_ALL] = {.type = NLA_FLAG},
+};
+
+static const struct nla_policy dcbnl_bcn_nest[...] = {
+        [DCB_BCN_ATTR_RP_0]         = {.type = NLA_U8},
+        [DCB_BCN_ATTR_RP_1]         = {.type = NLA_U8},
+        [DCB_BCN_ATTR_RP_2]         = {.type = NLA_U8},
+        [DCB_BCN_ATTR_RP_3]         = {.type = NLA_U8},
+        [DCB_BCN_ATTR_RP_4]         = {.type = NLA_U8},
+        [DCB_BCN_ATTR_RP_5]         = {.type = NLA_U8},
+        [DCB_BCN_ATTR_RP_6]         = {.type = NLA_U8},
+        [DCB_BCN_ATTR_RP_7]         = {.type = NLA_U8},
+        [DCB_BCN_ATTR_RP_ALL]       = {.type = NLA_FLAG},
+        // from here is somewhat different
+        [DCB_BCN_ATTR_BCNA_0]       = {.type = NLA_U32},
+        ...
+        [DCB_BCN_ATTR_ALL]          = {.type = NLA_FLAG},
+};
+
+Therefore, the current code is buggy and this
+nla_parse_nested_deprecated could overflow the dcbnl_pfc_up_nest and use
+the adjacent nla_policy to parse attributes from DCB_BCN_ATTR_BCNA_0.
+
+Hence use the correct policy dcbnl_bcn_nest to parse the nested
+tb[DCB_ATTR_BCN] TLV.
+
+Fixes: 859ee3c43812 ("DCB: Add support for DCB BCN")
+Signed-off-by: Lin Ma <linma@zju.edu.cn>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://lore.kernel.org/r/20230801013248.87240-1-linma@zju.edu.cn
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/dcb/dcbnl.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
+index b53d5e1d026fe..71e97e2a36845 100644
+--- a/net/dcb/dcbnl.c
++++ b/net/dcb/dcbnl.c
+@@ -946,7 +946,7 @@ static int dcbnl_bcn_setcfg(struct net_device *netdev, struct nlmsghdr *nlh,
+               return -EOPNOTSUPP;
+       ret = nla_parse_nested_deprecated(data, DCB_BCN_ATTR_MAX,
+-                                        tb[DCB_ATTR_BCN], dcbnl_pfc_up_nest,
++                                        tb[DCB_ATTR_BCN], dcbnl_bcn_nest,
+                                         NULL);
+       if (ret)
+               return ret;
+-- 
+2.40.1
+
diff --git a/queue-5.4/net-ll_temac-fix-error-checking-of-irq_of_parse_and_.patch b/queue-5.4/net-ll_temac-fix-error-checking-of-irq_of_parse_and_.patch
new file mode 100644 (file)
index 0000000..4b76642
--- /dev/null
@@ -0,0 +1,54 @@
+From 0146920e4977df2a528283ba1c346d18702f23b6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 31 Jul 2023 10:42:32 +0300
+Subject: net: ll_temac: fix error checking of irq_of_parse_and_map()
+
+From: Dan Carpenter <dan.carpenter@linaro.org>
+
+[ Upstream commit ef45e8400f5bb66b03cc949f76c80e2a118447de ]
+
+Most kernel functions return negative error codes but some irq functions
+return zero on error.  In this code irq_of_parse_and_map(), returns zero
+and platform_get_irq() returns negative error codes.  We need to handle
+both cases appropriately.
+
+Fixes: 8425c41d1ef7 ("net: ll_temac: Extend support to non-device-tree platforms")
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Acked-by: Esben Haabendal <esben@geanix.com>
+Reviewed-by: Yang Yingliang <yangyingliang@huawei.com>
+Reviewed-by: Harini Katakam <harini.katakam@amd.com>
+Link: https://lore.kernel.org/r/3d0aef75-06e0-45a5-a2a6-2cc4738d4143@moroto.mountain
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/xilinx/ll_temac_main.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
+index 9756d83994fca..86edc95919146 100644
+--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
++++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
+@@ -1481,12 +1481,16 @@ static int temac_probe(struct platform_device *pdev)
+       }
+       /* Error handle returned DMA RX and TX interrupts */
+-      if (lp->rx_irq < 0)
+-              return dev_err_probe(&pdev->dev, lp->rx_irq,
++      if (lp->rx_irq <= 0) {
++              rc = lp->rx_irq ?: -EINVAL;
++              return dev_err_probe(&pdev->dev, rc,
+                                    "could not get DMA RX irq\n");
+-      if (lp->tx_irq < 0)
+-              return dev_err_probe(&pdev->dev, lp->tx_irq,
++      }
++      if (lp->tx_irq <= 0) {
++              rc = lp->tx_irq ?: -EINVAL;
++              return dev_err_probe(&pdev->dev, rc,
+                                    "could not get DMA TX irq\n");
++      }
+       if (temac_np) {
+               /* Retrieve the MAC address */
+-- 
+2.40.1
+
diff --git a/queue-5.4/net-ll_temac-switch-to-use-dev_err_probe-helper.patch b/queue-5.4/net-ll_temac-switch-to-use-dev_err_probe-helper.patch
new file mode 100644 (file)
index 0000000..01c61d7
--- /dev/null
@@ -0,0 +1,50 @@
+From 3b5f7c539ee1ec97d5fe660d8c4c960263578049 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 15 Sep 2022 19:42:14 +0800
+Subject: net: ll_temac: Switch to use dev_err_probe() helper
+
+From: Yang Yingliang <yangyingliang@huawei.com>
+
+[ Upstream commit 75ae8c284c00dc3584b7c173f6fcf96ee15bd02c ]
+
+dev_err() can be replace with dev_err_probe() which will check if error
+code is -EPROBE_DEFER.
+
+Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: ef45e8400f5b ("net: ll_temac: fix error checking of irq_of_parse_and_map()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/xilinx/ll_temac_main.c | 16 ++++++----------
+ 1 file changed, 6 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
+index a109438f4a78e..9756d83994fca 100644
+--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
++++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
+@@ -1481,16 +1481,12 @@ static int temac_probe(struct platform_device *pdev)
+       }
+       /* Error handle returned DMA RX and TX interrupts */
+-      if (lp->rx_irq < 0) {
+-              if (lp->rx_irq != -EPROBE_DEFER)
+-                      dev_err(&pdev->dev, "could not get DMA RX irq\n");
+-              return lp->rx_irq;
+-      }
+-      if (lp->tx_irq < 0) {
+-              if (lp->tx_irq != -EPROBE_DEFER)
+-                      dev_err(&pdev->dev, "could not get DMA TX irq\n");
+-              return lp->tx_irq;
+-      }
++      if (lp->rx_irq < 0)
++              return dev_err_probe(&pdev->dev, lp->rx_irq,
++                                   "could not get DMA RX irq\n");
++      if (lp->tx_irq < 0)
++              return dev_err_probe(&pdev->dev, lp->tx_irq,
++                                   "could not get DMA TX irq\n");
+       if (temac_np) {
+               /* Retrieve the MAC address */
+-- 
+2.40.1
+
diff --git a/queue-5.4/net-mlx5-dr-fix-memory-leak-in-mlx5dr_cmd_create_ref.patch b/queue-5.4/net-mlx5-dr-fix-memory-leak-in-mlx5dr_cmd_create_ref.patch
new file mode 100644 (file)
index 0000000..6245e79
--- /dev/null
@@ -0,0 +1,44 @@
+From 8dcc22a25e8380ddb4f3de2c569743474f92d001 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Jul 2023 20:15:27 +0800
+Subject: net/mlx5: DR, fix memory leak in mlx5dr_cmd_create_reformat_ctx
+
+From: Zhengchao Shao <shaozhengchao@huawei.com>
+
+[ Upstream commit 5dd77585dd9d0e03dd1bceb95f0269a7eaf6b936 ]
+
+when mlx5_cmd_exec failed in mlx5dr_cmd_create_reformat_ctx, the memory
+pointed by 'in' is not released, which will cause memory leak. Move memory
+release after mlx5_cmd_exec.
+
+Fixes: 1d9186476e12 ("net/mlx5: DR, Add direct rule command utilities")
+Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+index 64f6f529f6eb1..45b90c7698787 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+@@ -423,11 +423,12 @@ int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev,
+       err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
+       if (err)
+-              return err;
++              goto err_free_in;
+       *reformat_id = MLX5_GET(alloc_packet_reformat_context_out, out, packet_reformat_id);
+-      kvfree(in);
++err_free_in:
++      kvfree(in);
+       return err;
+ }
+-- 
+2.40.1
+
diff --git a/queue-5.4/net-mlx5e-fix-return-value-check-in-mlx5e_ipsec_remo.patch b/queue-5.4/net-mlx5e-fix-return-value-check-in-mlx5e_ipsec_remo.patch
new file mode 100644 (file)
index 0000000..05e2818
--- /dev/null
@@ -0,0 +1,39 @@
+From 28773084d652361ddb715321b9a8bde21797f5b6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Jul 2023 14:56:55 +0800
+Subject: net/mlx5e: fix return value check in mlx5e_ipsec_remove_trailer()
+
+From: Yuanjun Gong <ruc_gongyuanjun@163.com>
+
+[ Upstream commit e5bcb7564d3bd0c88613c76963c5349be9c511c5 ]
+
+mlx5e_ipsec_remove_trailer() should return an error code if function
+pskb_trim() returns an unexpected value.
+
+Fixes: 2ac9cfe78223 ("net/mlx5e: IPSec, Add Innova IPSec offload TX data path")
+Signed-off-by: Yuanjun Gong <ruc_gongyuanjun@163.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+index 0dd17514caae8..d212706f1bdea 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+@@ -121,7 +121,9 @@ static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x)
+       trailer_len = alen + plen + 2;
+-      pskb_trim(skb, skb->len - trailer_len);
++      ret = pskb_trim(skb, skb->len - trailer_len);
++      if (unlikely(ret))
++              return ret;
+       if (skb->protocol == htons(ETH_P_IP)) {
+               ipv4hdr->tot_len = htons(ntohs(ipv4hdr->tot_len) - trailer_len);
+               ip_send_check(ipv4hdr);
+-- 
+2.40.1
+
diff --git a/queue-5.4/net-sched-cls_fw-no-longer-copy-tcf_result-on-update.patch b/queue-5.4/net-sched-cls_fw-no-longer-copy-tcf_result-on-update.patch
new file mode 100644 (file)
index 0000000..02d2f6c
--- /dev/null
@@ -0,0 +1,50 @@
+From 0f7864e133323a51b83c24ca54fa3a70eaefdfe9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 29 Jul 2023 08:32:01 -0400
+Subject: net/sched: cls_fw: No longer copy tcf_result on update to avoid
+ use-after-free
+
+From: valis <sec@valis.email>
+
+[ Upstream commit 76e42ae831991c828cffa8c37736ebfb831ad5ec ]
+
+When fw_change() is called on an existing filter, the whole
+tcf_result struct is always copied into the new instance of the filter.
+
+This causes a problem when updating a filter bound to a class,
+as tcf_unbind_filter() is always called on the old instance in the
+success path, decreasing filter_cnt of the still referenced class
+and allowing it to be deleted, leading to a use-after-free.
+
+Fix this by no longer copying the tcf_result struct from the old filter.
+
+Fixes: e35a8ee5993b ("net: sched: fw use RCU")
+Reported-by: valis <sec@valis.email>
+Reported-by: Bing-Jhong Billy Jheng <billy@starlabs.sg>
+Signed-off-by: valis <sec@valis.email>
+Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Reviewed-by: Victor Nogueira <victor@mojatatu.com>
+Reviewed-by: Pedro Tammela <pctammela@mojatatu.com>
+Reviewed-by: M A Ramdhan <ramdhan@starlabs.sg>
+Link: https://lore.kernel.org/r/20230729123202.72406-3-jhs@mojatatu.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/cls_fw.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
+index 41f0898a5a565..08c41f1976c47 100644
+--- a/net/sched/cls_fw.c
++++ b/net/sched/cls_fw.c
+@@ -266,7 +266,6 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
+                       return -ENOBUFS;
+               fnew->id = f->id;
+-              fnew->res = f->res;
+               fnew->ifindex = f->ifindex;
+               fnew->tp = f->tp;
+-- 
+2.40.1
+
diff --git a/queue-5.4/net-sched-cls_route-no-longer-copy-tcf_result-on-upd.patch b/queue-5.4/net-sched-cls_route-no-longer-copy-tcf_result-on-upd.patch
new file mode 100644 (file)
index 0000000..0661715
--- /dev/null
@@ -0,0 +1,50 @@
+From 11fbf7e4c4837579b471ecb7275030d4334b4329 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 29 Jul 2023 08:32:02 -0400
+Subject: net/sched: cls_route: No longer copy tcf_result on update to avoid
+ use-after-free
+
+From: valis <sec@valis.email>
+
+[ Upstream commit b80b829e9e2c1b3f7aae34855e04d8f6ecaf13c8 ]
+
+When route4_change() is called on an existing filter, the whole
+tcf_result struct is always copied into the new instance of the filter.
+
+This causes a problem when updating a filter bound to a class,
+as tcf_unbind_filter() is always called on the old instance in the
+success path, decreasing filter_cnt of the still referenced class
+and allowing it to be deleted, leading to a use-after-free.
+
+Fix this by no longer copying the tcf_result struct from the old filter.
+
+Fixes: 1109c00547fc ("net: sched: RCU cls_route")
+Reported-by: valis <sec@valis.email>
+Reported-by: Bing-Jhong Billy Jheng <billy@starlabs.sg>
+Signed-off-by: valis <sec@valis.email>
+Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Reviewed-by: Victor Nogueira <victor@mojatatu.com>
+Reviewed-by: Pedro Tammela <pctammela@mojatatu.com>
+Reviewed-by: M A Ramdhan <ramdhan@starlabs.sg>
+Link: https://lore.kernel.org/r/20230729123202.72406-4-jhs@mojatatu.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/cls_route.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
+index b775e681cb56e..1ad4b3e60eb3b 100644
+--- a/net/sched/cls_route.c
++++ b/net/sched/cls_route.c
+@@ -511,7 +511,6 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
+       if (fold) {
+               f->id = fold->id;
+               f->iif = fold->iif;
+-              f->res = fold->res;
+               f->handle = fold->handle;
+               f->tp = fold->tp;
+-- 
+2.40.1
+
diff --git a/queue-5.4/net-sched-cls_u32-fix-match-key-mis-addressing.patch b/queue-5.4/net-sched-cls_u32-fix-match-key-mis-addressing.patch
new file mode 100644 (file)
index 0000000..440b6e9
--- /dev/null
@@ -0,0 +1,145 @@
+From f1928d23ee09cc2a4251c02d68267e3c579c4077 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 26 Jul 2023 09:51:51 -0400
+Subject: net: sched: cls_u32: Fix match key mis-addressing
+
+From: Jamal Hadi Salim <jhs@mojatatu.com>
+
+[ Upstream commit e68409db995380d1badacba41ff24996bd396171 ]
+
+A match entry is uniquely identified with an "address" or "path" in the
+form of: hashtable ID(12b):bucketid(8b):nodeid(12b).
+
+When creating table match entries all of hash table id, bucket id and
+node (match entry id) are needed to be either specified by the user or
+reasonable in-kernel defaults are used. The in-kernel default for a table id is
+0x800(omnipresent root table); for bucketid it is 0x0. Prior to this fix there
+was none for a nodeid i.e. the code assumed that the user passed the correct
+nodeid and if the user passes a nodeid of 0 (as Mingi Cho did) then that is what
+was used. But nodeid of 0 is reserved for identifying the table. This is not
+a problem until we dump. The dump code notices that the nodeid is zero and
+assumes it is referencing a table and therefore references table struct
+tc_u_hnode instead of what was created i.e match entry struct tc_u_knode.
+
+Ming does an equivalent of:
+tc filter add dev dummy0 parent 10: prio 1 handle 0x1000 \
+protocol ip u32 match ip src 10.0.0.1/32 classid 10:1 action ok
+
+Essentially specifying a table id 0, bucketid 1 and nodeid of zero
+Tableid 0 is remapped to the default of 0x800.
+Bucketid 1 is ignored and defaults to 0x00.
+Nodeid was assumed to be what Ming passed - 0x000
+
+dumping before fix shows:
+~$ tc filter ls dev dummy0 parent 10:
+filter protocol ip pref 1 u32 chain 0
+filter protocol ip pref 1 u32 chain 0 fh 800: ht divisor 1
+filter protocol ip pref 1 u32 chain 0 fh 800: ht divisor -30591
+
+Note that the last line reports a table instead of a match entry
+(you can tell this because it says "ht divisor...").
+As a result of reporting the wrong data type (misinterpretting of struct
+tc_u_knode as being struct tc_u_hnode) the divisor is reported with value
+of -30591. Ming identified this as part of the heap address
+(physmap_base is 0xffff8880 (-30591 - 1)).
+
+The fix is to ensure that when table entry matches are added and no
+nodeid is specified (i.e nodeid == 0) then we get the next available
+nodeid from the table's pool.
+
+After the fix, this is what the dump shows:
+$ tc filter ls dev dummy0 parent 10:
+filter protocol ip pref 1 u32 chain 0
+filter protocol ip pref 1 u32 chain 0 fh 800: ht divisor 1
+filter protocol ip pref 1 u32 chain 0 fh 800::800 order 2048 key ht 800 bkt 0 flowid 10:1 not_in_hw
+  match 0a000001/ffffffff at 12
+       action order 1: gact action pass
+        random type none pass val 0
+        index 1 ref 1 bind 1
+
+Reported-by: Mingi Cho <mgcho.minic@gmail.com>
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Link: https://lore.kernel.org/r/20230726135151.416917-1-jhs@mojatatu.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/cls_u32.c | 56 ++++++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 50 insertions(+), 6 deletions(-)
+
+diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
+index e5cc2b4d38d5a..316ccbb01b55f 100644
+--- a/net/sched/cls_u32.c
++++ b/net/sched/cls_u32.c
+@@ -1004,18 +1004,62 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
+               return -EINVAL;
+       }
++      /* At this point, we need to derive the new handle that will be used to
++       * uniquely map the identity of this table match entry. The
++       * identity of the entry that we need to construct is 32 bits made of:
++       *     htid(12b):bucketid(8b):node/entryid(12b)
++       *
++       * At this point _we have the table(ht)_ in which we will insert this
++       * entry. We carry the table's id in variable "htid".
++       * Note that earlier code picked the ht selection either by a) the user
++       * providing the htid specified via TCA_U32_HASH attribute or b) when
++       * no such attribute is passed then the root ht, is default to at ID
++       * 0x[800][00][000]. Rule: the root table has a single bucket with ID 0.
++       * If OTOH the user passed us the htid, they may also pass a bucketid of
++       * choice. 0 is fine. For example a user htid is 0x[600][01][000] it is
++       * indicating hash bucketid of 1. Rule: the entry/node ID _cannot_ be
++       * passed via the htid, so even if it was non-zero it will be ignored.
++       *
++       * We may also have a handle, if the user passed one. The handle also
++       * carries the same addressing of htid(12b):bucketid(8b):node/entryid(12b).
++       * Rule: the bucketid on the handle is ignored even if one was passed;
++       * rather the value on "htid" is always assumed to be the bucketid.
++       */
+       if (handle) {
++              /* Rule: The htid from handle and tableid from htid must match */
+               if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) {
+                       NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch");
+                       return -EINVAL;
+               }
+-              handle = htid | TC_U32_NODE(handle);
+-              err = idr_alloc_u32(&ht->handle_idr, NULL, &handle, handle,
+-                                  GFP_KERNEL);
+-              if (err)
+-                      return err;
+-      } else
++              /* Ok, so far we have a valid htid(12b):bucketid(8b) but we
++               * need to finalize the table entry identification with the last
++               * part - the node/entryid(12b)). Rule: Nodeid _cannot be 0_ for
++               * entries. Rule: nodeid of 0 is reserved only for tables(see
++               * earlier code which processes TC_U32_DIVISOR attribute).
++               * Rule: The nodeid can only be derived from the handle (and not
++               * htid).
++               * Rule: if the handle specified zero for the node id example
++               * 0x60000000, then pick a new nodeid from the pool of IDs
++               * this hash table has been allocating from.
++               * If OTOH it is specified (i.e for example the user passed a
++               * handle such as 0x60000123), then we use it generate our final
++               * handle which is used to uniquely identify the match entry.
++               */
++              if (!TC_U32_NODE(handle)) {
++                      handle = gen_new_kid(ht, htid);
++              } else {
++                      handle = htid | TC_U32_NODE(handle);
++                      err = idr_alloc_u32(&ht->handle_idr, NULL, &handle,
++                                          handle, GFP_KERNEL);
++                      if (err)
++                              return err;
++              }
++      } else {
++              /* The user did not give us a handle; lets just generate one
++               * from the table's pool of nodeids.
++               */
+               handle = gen_new_kid(ht, htid);
++      }
+       if (tb[TCA_U32_SEL] == NULL) {
+               NL_SET_ERR_MSG_MOD(extack, "Selector not specified");
+-- 
+2.40.1
+
diff --git a/queue-5.4/net-sched-cls_u32-no-longer-copy-tcf_result-on-updat.patch b/queue-5.4/net-sched-cls_u32-no-longer-copy-tcf_result-on-updat.patch
new file mode 100644 (file)
index 0000000..b7bbc2e
--- /dev/null
@@ -0,0 +1,50 @@
+From 0007f909cecca5fc8d0578376641fe2716721ef3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 29 Jul 2023 08:32:00 -0400
+Subject: net/sched: cls_u32: No longer copy tcf_result on update to avoid
+ use-after-free
+
+From: valis <sec@valis.email>
+
+[ Upstream commit 3044b16e7c6fe5d24b1cdbcf1bd0a9d92d1ebd81 ]
+
+When u32_change() is called on an existing filter, the whole
+tcf_result struct is always copied into the new instance of the filter.
+
+This causes a problem when updating a filter bound to a class,
+as tcf_unbind_filter() is always called on the old instance in the
+success path, decreasing filter_cnt of the still referenced class
+and allowing it to be deleted, leading to a use-after-free.
+
+Fix this by no longer copying the tcf_result struct from the old filter.
+
+Fixes: de5df63228fc ("net: sched: cls_u32 changes to knode must appear atomic to readers")
+Reported-by: valis <sec@valis.email>
+Reported-by: M A Ramdhan <ramdhan@starlabs.sg>
+Signed-off-by: valis <sec@valis.email>
+Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Reviewed-by: Victor Nogueira <victor@mojatatu.com>
+Reviewed-by: Pedro Tammela <pctammela@mojatatu.com>
+Reviewed-by: M A Ramdhan <ramdhan@starlabs.sg>
+Link: https://lore.kernel.org/r/20230729123202.72406-2-jhs@mojatatu.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/cls_u32.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
+index 316ccbb01b55f..65598207a2fcb 100644
+--- a/net/sched/cls_u32.c
++++ b/net/sched/cls_u32.c
+@@ -814,7 +814,6 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
+       new->ifindex = n->ifindex;
+       new->fshift = n->fshift;
+-      new->res = n->res;
+       new->flags = n->flags;
+       RCU_INIT_POINTER(new->ht_down, ht);
+-- 
+2.40.1
+
diff --git a/queue-5.4/perf-test-uprobe_from_different_cu-skip-if-there-is-.patch b/queue-5.4/perf-test-uprobe_from_different_cu-skip-if-there-is-.patch
new file mode 100644 (file)
index 0000000..db2911b
--- /dev/null
@@ -0,0 +1,66 @@
+From e81737ba90b46165e4cebc56f4732b414145510d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 17:18:12 +0200
+Subject: perf test uprobe_from_different_cu: Skip if there is no gcc
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Georg Müller <georgmueller@gmx.net>
+
+[ Upstream commit 98ce8e4a9dcfb448b30a2d7a16190f4a00382377 ]
+
+Without gcc, the test will fail.
+
+On cleanup, ignore probe removal errors. Otherwise, in case of an error
+adding the probe, the temporary directory is not removed.
+
+Fixes: 56cbeacf14353057 ("perf probe: Add test for regression introduced by switch to die_get_decl_file()")
+Signed-off-by: Georg Müller <georgmueller@gmx.net>
+Acked-by: Ian Rogers <irogers@google.com>
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Georg Müller <georgmueller@gmx.net>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20230728151812.454806-2-georgmueller@gmx.net
+Link: https://lore.kernel.org/r/CAP-5=fUP6UuLgRty3t2=fQsQi3k4hDMz415vWdp1x88QMvZ8ug@mail.gmail.com/
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/tests/shell/test_uprobe_from_different_cu.sh | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/tools/perf/tests/shell/test_uprobe_from_different_cu.sh b/tools/perf/tests/shell/test_uprobe_from_different_cu.sh
+index 00d2e0e2e0c28..319f36ebb9a40 100644
+--- a/tools/perf/tests/shell/test_uprobe_from_different_cu.sh
++++ b/tools/perf/tests/shell/test_uprobe_from_different_cu.sh
+@@ -4,6 +4,12 @@
+ set -e
++# skip if there's no gcc
++if ! [ -x "$(command -v gcc)" ]; then
++        echo "failed: no gcc compiler"
++        exit 2
++fi
++
+ temp_dir=$(mktemp -d /tmp/perf-uprobe-different-cu-sh.XXXXXXXXXX)
+ cleanup()
+@@ -11,7 +17,7 @@ cleanup()
+       trap - EXIT TERM INT
+       if [[ "${temp_dir}" =~ ^/tmp/perf-uprobe-different-cu-sh.*$ ]]; then
+               echo "--- Cleaning up ---"
+-              perf probe -x ${temp_dir}/testfile -d foo
++              perf probe -x ${temp_dir}/testfile -d foo || true
+               rm -f "${temp_dir}/"*
+               rmdir "${temp_dir}"
+       fi
+-- 
+2.40.1
+
diff --git a/queue-5.4/rtnetlink-let-rtnl_bridge_setlink-checks-ifla_bridge.patch b/queue-5.4/rtnetlink-let-rtnl_bridge_setlink-checks-ifla_bridge.patch
new file mode 100644 (file)
index 0000000..289a1fe
--- /dev/null
@@ -0,0 +1,66 @@
+From 39ea9761c47658832a933b9f8f24fea7d671ee2c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 26 Jul 2023 15:53:14 +0800
+Subject: rtnetlink: let rtnl_bridge_setlink checks IFLA_BRIDGE_MODE length
+
+From: Lin Ma <linma@zju.edu.cn>
+
+[ Upstream commit d73ef2d69c0dba5f5a1cb9600045c873bab1fb7f ]
+
+There are totally 9 ndo_bridge_setlink handlers in the current kernel,
+which are 1) bnxt_bridge_setlink, 2) be_ndo_bridge_setlink 3)
+i40e_ndo_bridge_setlink 4) ice_bridge_setlink 5)
+ixgbe_ndo_bridge_setlink 6) mlx5e_bridge_setlink 7)
+nfp_net_bridge_setlink 8) qeth_l2_bridge_setlink 9) br_setlink.
+
+By investigating the code, we find that 1-7 parse and use nlattr
+IFLA_BRIDGE_MODE but 3 and 4 forget to do the nla_len check. This can
+lead to an out-of-attribute read and allow a malformed nlattr (e.g.,
+length 0) to be viewed as a 2 byte integer.
+
+To avoid such issues, also for other ndo_bridge_setlink handlers in the
+future. This patch adds the nla_len check in rtnl_bridge_setlink and
+does an early error return if length mismatches. To make it works, the
+break is removed from the parsing for IFLA_BRIDGE_FLAGS to make sure
+this nla_for_each_nested iterates every attribute.
+
+Fixes: b1edc14a3fbf ("ice: Implement ice_bridge_getlink and ice_bridge_setlink")
+Fixes: 51616018dd1b ("i40e: Add support for getlink, setlink ndo ops")
+Suggested-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Lin Ma <linma@zju.edu.cn>
+Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
+Reviewed-by: Hangbin Liu <liuhangbin@gmail.com>
+Link: https://lore.kernel.org/r/20230726075314.1059224-1-linma@zju.edu.cn
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/rtnetlink.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
+index 1db92a44548f0..3eaf7c706b0ec 100644
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -4590,13 +4590,17 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+       br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+       if (br_spec) {
+               nla_for_each_nested(attr, br_spec, rem) {
+-                      if (nla_type(attr) == IFLA_BRIDGE_FLAGS) {
++                      if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !have_flags) {
+                               if (nla_len(attr) < sizeof(flags))
+                                       return -EINVAL;
+                               have_flags = true;
+                               flags = nla_get_u16(attr);
+-                              break;
++                      }
++
++                      if (nla_type(attr) == IFLA_BRIDGE_MODE) {
++                              if (nla_len(attr) < sizeof(u16))
++                                      return -EINVAL;
+                       }
+               }
+       }
+-- 
+2.40.1
+
index f37e1b3e60dadec829a7af82ce8885f2c7f28e2a..4fef9534dc755e90e1f8935743633abc8b4c8c0a 100644 (file)
@@ -90,3 +90,32 @@ arm64-add-ampere1-to-the-spectre-bhb-affected-list.patch
 arm64-fix-bit-shifting-ub-in-the-midr_cpu_model-macro.patch
 perf-fix-function-pointer-case.patch
 loop-select-i-o-scheduler-none-from-inside-add_disk.patch
+word-at-a-time-use-the-same-return-type-for-has_zero.patch
+kvm-s390-fix-sthyi-error-handling.patch
+net-mlx5-dr-fix-memory-leak-in-mlx5dr_cmd_create_ref.patch
+net-mlx5e-fix-return-value-check-in-mlx5e_ipsec_remo.patch
+rtnetlink-let-rtnl_bridge_setlink-checks-ifla_bridge.patch
+perf-test-uprobe_from_different_cu-skip-if-there-is-.patch
+net-sched-cls_u32-fix-match-key-mis-addressing.patch
+misdn-hfcpci-fix-potential-deadlock-on-hc-lock.patch
+net-annotate-data-races-around-sk-sk_max_pacing_rate.patch
+net-add-missing-read_once-sk-sk_rcvlowat-annotation.patch
+net-add-missing-read_once-sk-sk_sndbuf-annotation.patch
+net-add-missing-read_once-sk-sk_rcvbuf-annotation.patch
+net-add-missing-data-race-annotations-around-sk-sk_p.patch
+net-add-missing-data-race-annotation-for-sk_ll_usec.patch
+net-sched-cls_u32-no-longer-copy-tcf_result-on-updat.patch
+net-sched-cls_fw-no-longer-copy-tcf_result-on-update.patch
+net-sched-cls_route-no-longer-copy-tcf_result-on-upd.patch
+bpf-sockmap-remove-preempt_disable-in-sock_map_sk_ac.patch
+driver-core-add-device-probe-log-helper.patch
+net-ll_temac-switch-to-use-dev_err_probe-helper.patch
+net-ll_temac-fix-error-checking-of-irq_of_parse_and_.patch
+net-dcb-choose-correct-policy-to-parse-dcb_attr_bcn.patch
+ip6mr-fix-skb_under_panic-in-ip6mr_cache_report.patch
+tcp_metrics-fix-addr_same-helper.patch
+tcp_metrics-annotate-data-races-around-tm-tcpm_stamp.patch
+tcp_metrics-annotate-data-races-around-tm-tcpm_lock.patch
+tcp_metrics-annotate-data-races-around-tm-tcpm_vals.patch
+tcp_metrics-annotate-data-races-around-tm-tcpm_net.patch
+tcp_metrics-fix-data-race-in-tcpm_suck_dst-vs-fastop.patch
diff --git a/queue-5.4/tcp_metrics-annotate-data-races-around-tm-tcpm_lock.patch b/queue-5.4/tcp_metrics-annotate-data-races-around-tm-tcpm_lock.patch
new file mode 100644 (file)
index 0000000..edc213b
--- /dev/null
@@ -0,0 +1,51 @@
+From 74c204ba460603978f9f32d0e5f2fefca483212b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Aug 2023 13:14:57 +0000
+Subject: tcp_metrics: annotate data-races around tm->tcpm_lock
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 285ce119a3c6c4502585936650143e54c8692788 ]
+
+tm->tcpm_lock can be read or written locklessly.
+
+Add needed READ_ONCE()/WRITE_ONCE() to document this.
+
+Fixes: 51c5d0c4b169 ("tcp: Maintain dynamic metrics in local cache.")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20230802131500.1478140-4-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_metrics.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
+index 6a5a6683194c9..2dac82154af85 100644
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -59,7 +59,8 @@ static inline struct net *tm_net(struct tcp_metrics_block *tm)
+ static bool tcp_metric_locked(struct tcp_metrics_block *tm,
+                             enum tcp_metric_index idx)
+ {
+-      return tm->tcpm_lock & (1 << idx);
++      /* Paired with WRITE_ONCE() in tcpm_suck_dst() */
++      return READ_ONCE(tm->tcpm_lock) & (1 << idx);
+ }
+ static u32 tcp_metric_get(struct tcp_metrics_block *tm,
+@@ -110,7 +111,8 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
+               val |= 1 << TCP_METRIC_CWND;
+       if (dst_metric_locked(dst, RTAX_REORDERING))
+               val |= 1 << TCP_METRIC_REORDERING;
+-      tm->tcpm_lock = val;
++      /* Paired with READ_ONCE() in tcp_metric_locked() */
++      WRITE_ONCE(tm->tcpm_lock, val);
+       msval = dst_metric_raw(dst, RTAX_RTT);
+       tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC;
+-- 
+2.40.1
+
diff --git a/queue-5.4/tcp_metrics-annotate-data-races-around-tm-tcpm_net.patch b/queue-5.4/tcp_metrics-annotate-data-races-around-tm-tcpm_net.patch
new file mode 100644 (file)
index 0000000..841e7ef
--- /dev/null
@@ -0,0 +1,66 @@
+From 48d653fc96b8daec9d90fef32b5146001c748981 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Aug 2023 13:14:59 +0000
+Subject: tcp_metrics: annotate data-races around tm->tcpm_net
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit d5d986ce42c71a7562d32c4e21e026b0f87befec ]
+
+tm->tcpm_net can be read or written locklessly.
+
+Instead of changing write_pnet() and read_pnet() and potentially
+hurt performance, add the needed READ_ONCE()/WRITE_ONCE()
+in tm_net() and tcpm_new().
+
+Fixes: 849e8a0ca8d5 ("tcp_metrics: Add a field tcpm_net and verify it matches on lookup")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20230802131500.1478140-6-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_metrics.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
+index 29261aa96eb40..3d0452bb6c2a1 100644
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -40,7 +40,7 @@ struct tcp_fastopen_metrics {
+ struct tcp_metrics_block {
+       struct tcp_metrics_block __rcu  *tcpm_next;
+-      possible_net_t                  tcpm_net;
++      struct net                      *tcpm_net;
+       struct inetpeer_addr            tcpm_saddr;
+       struct inetpeer_addr            tcpm_daddr;
+       unsigned long                   tcpm_stamp;
+@@ -51,9 +51,10 @@ struct tcp_metrics_block {
+       struct rcu_head                 rcu_head;
+ };
+-static inline struct net *tm_net(struct tcp_metrics_block *tm)
++static inline struct net *tm_net(const struct tcp_metrics_block *tm)
+ {
+-      return read_pnet(&tm->tcpm_net);
++      /* Paired with the WRITE_ONCE() in tcpm_new() */
++      return READ_ONCE(tm->tcpm_net);
+ }
+ static bool tcp_metric_locked(struct tcp_metrics_block *tm,
+@@ -197,7 +198,9 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
+               if (!tm)
+                       goto out_unlock;
+       }
+-      write_pnet(&tm->tcpm_net, net);
++      /* Paired with the READ_ONCE() in tm_net() */
++      WRITE_ONCE(tm->tcpm_net, net);
++
+       tm->tcpm_saddr = *saddr;
+       tm->tcpm_daddr = *daddr;
+-- 
+2.40.1
+
diff --git a/queue-5.4/tcp_metrics-annotate-data-races-around-tm-tcpm_stamp.patch b/queue-5.4/tcp_metrics-annotate-data-races-around-tm-tcpm_stamp.patch
new file mode 100644 (file)
index 0000000..211b134
--- /dev/null
@@ -0,0 +1,88 @@
+From 890588969f8887c6c738ab038d1b311948460346 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Aug 2023 13:14:56 +0000
+Subject: tcp_metrics: annotate data-races around tm->tcpm_stamp
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 949ad62a5d5311d36fce2e14fe5fed3f936da51c ]
+
+tm->tcpm_stamp can be read or written locklessly.
+
+Add needed READ_ONCE()/WRITE_ONCE() to document this.
+
+Also constify tcpm_check_stamp() dst argument.
+
+Fixes: 51c5d0c4b169 ("tcp: Maintain dynamic metrics in local cache.")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20230802131500.1478140-3-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_metrics.c | 19 +++++++++++++------
+ 1 file changed, 13 insertions(+), 6 deletions(-)
+
+diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
+index f3fb19df72e1c..6a5a6683194c9 100644
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -97,7 +97,7 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
+       u32 msval;
+       u32 val;
+-      tm->tcpm_stamp = jiffies;
++      WRITE_ONCE(tm->tcpm_stamp, jiffies);
+       val = 0;
+       if (dst_metric_locked(dst, RTAX_RTT))
+@@ -131,9 +131,15 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
+ #define TCP_METRICS_TIMEOUT           (60 * 60 * HZ)
+-static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst)
++static void tcpm_check_stamp(struct tcp_metrics_block *tm,
++                           const struct dst_entry *dst)
+ {
+-      if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT)))
++      unsigned long limit;
++
++      if (!tm)
++              return;
++      limit = READ_ONCE(tm->tcpm_stamp) + TCP_METRICS_TIMEOUT;
++      if (unlikely(time_after(jiffies, limit)))
+               tcpm_suck_dst(tm, dst, false);
+ }
+@@ -174,7 +180,8 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
+               oldest = deref_locked(tcp_metrics_hash[hash].chain);
+               for (tm = deref_locked(oldest->tcpm_next); tm;
+                    tm = deref_locked(tm->tcpm_next)) {
+-                      if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp))
++                      if (time_before(READ_ONCE(tm->tcpm_stamp),
++                                      READ_ONCE(oldest->tcpm_stamp)))
+                               oldest = tm;
+               }
+               tm = oldest;
+@@ -431,7 +438,7 @@ void tcp_update_metrics(struct sock *sk)
+                                              tp->reordering);
+               }
+       }
+-      tm->tcpm_stamp = jiffies;
++      WRITE_ONCE(tm->tcpm_stamp, jiffies);
+ out_unlock:
+       rcu_read_unlock();
+ }
+@@ -642,7 +649,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
+       }
+       if (nla_put_msecs(msg, TCP_METRICS_ATTR_AGE,
+-                        jiffies - tm->tcpm_stamp,
++                        jiffies - READ_ONCE(tm->tcpm_stamp),
+                         TCP_METRICS_ATTR_PAD) < 0)
+               goto nla_put_failure;
+-- 
+2.40.1
+
diff --git a/queue-5.4/tcp_metrics-annotate-data-races-around-tm-tcpm_vals.patch b/queue-5.4/tcp_metrics-annotate-data-races-around-tm-tcpm_vals.patch
new file mode 100644 (file)
index 0000000..002c51a
--- /dev/null
@@ -0,0 +1,85 @@
+From ae4b0cb37d2715f02184ae4e91ac925e36646ffc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Aug 2023 13:14:58 +0000
+Subject: tcp_metrics: annotate data-races around tm->tcpm_vals[]
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 8c4d04f6b443869d25e59822f7cec88d647028a9 ]
+
+tm->tcpm_vals[] values can be read or written locklessly.
+
+Add needed READ_ONCE()/WRITE_ONCE() to document this,
+and force use of tcp_metric_get() and tcp_metric_set()
+
+Fixes: 51c5d0c4b169 ("tcp: Maintain dynamic metrics in local cache.")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_metrics.c | 23 ++++++++++++++---------
+ 1 file changed, 14 insertions(+), 9 deletions(-)
+
+diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
+index 2dac82154af85..29261aa96eb40 100644
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -63,17 +63,19 @@ static bool tcp_metric_locked(struct tcp_metrics_block *tm,
+       return READ_ONCE(tm->tcpm_lock) & (1 << idx);
+ }
+-static u32 tcp_metric_get(struct tcp_metrics_block *tm,
++static u32 tcp_metric_get(const struct tcp_metrics_block *tm,
+                         enum tcp_metric_index idx)
+ {
+-      return tm->tcpm_vals[idx];
++      /* Paired with WRITE_ONCE() in tcp_metric_set() */
++      return READ_ONCE(tm->tcpm_vals[idx]);
+ }
+ static void tcp_metric_set(struct tcp_metrics_block *tm,
+                          enum tcp_metric_index idx,
+                          u32 val)
+ {
+-      tm->tcpm_vals[idx] = val;
++      /* Paired with READ_ONCE() in tcp_metric_get() */
++      WRITE_ONCE(tm->tcpm_vals[idx], val);
+ }
+ static bool addr_same(const struct inetpeer_addr *a,
+@@ -115,13 +117,16 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
+       WRITE_ONCE(tm->tcpm_lock, val);
+       msval = dst_metric_raw(dst, RTAX_RTT);
+-      tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC;
++      tcp_metric_set(tm, TCP_METRIC_RTT, msval * USEC_PER_MSEC);
+       msval = dst_metric_raw(dst, RTAX_RTTVAR);
+-      tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC;
+-      tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);
+-      tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);
+-      tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
++      tcp_metric_set(tm, TCP_METRIC_RTTVAR, msval * USEC_PER_MSEC);
++      tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
++                     dst_metric_raw(dst, RTAX_SSTHRESH));
++      tcp_metric_set(tm, TCP_METRIC_CWND,
++                     dst_metric_raw(dst, RTAX_CWND));
++      tcp_metric_set(tm, TCP_METRIC_REORDERING,
++                     dst_metric_raw(dst, RTAX_REORDERING));
+       if (fastopen_clear) {
+               tm->tcpm_fastopen.mss = 0;
+               tm->tcpm_fastopen.syn_loss = 0;
+@@ -662,7 +667,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
+               if (!nest)
+                       goto nla_put_failure;
+               for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) {
+-                      u32 val = tm->tcpm_vals[i];
++                      u32 val = tcp_metric_get(tm, i);
+                       if (!val)
+                               continue;
+-- 
+2.40.1
+
diff --git a/queue-5.4/tcp_metrics-fix-addr_same-helper.patch b/queue-5.4/tcp_metrics-fix-addr_same-helper.patch
new file mode 100644 (file)
index 0000000..b043a75
--- /dev/null
@@ -0,0 +1,46 @@
+From 826092f3bf5fc15cb80742a236be21e8fe528b07 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Aug 2023 13:14:55 +0000
+Subject: tcp_metrics: fix addr_same() helper
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit e6638094d7af6c7b9dcca05ad009e79e31b4f670 ]
+
+Because v4 and v6 families use separate inetpeer trees (respectively
+net->ipv4.peers and net->ipv6.peers), inetpeer_addr_cmp(a, b) assumes
+a & b share the same family.
+
+tcp_metrics use a common hash table, where entries can have different
+families.
+
+We must therefore make sure to not call inetpeer_addr_cmp()
+if the families do not match.
+
+Fixes: d39d14ffa24c ("net: Add helper function to compare inetpeer addresses")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20230802131500.1478140-2-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_metrics.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
+index 0af6249a993af..f3fb19df72e1c 100644
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -78,7 +78,7 @@ static void tcp_metric_set(struct tcp_metrics_block *tm,
+ static bool addr_same(const struct inetpeer_addr *a,
+                     const struct inetpeer_addr *b)
+ {
+-      return inetpeer_addr_cmp(a, b) == 0;
++      return (a->family == b->family) && !inetpeer_addr_cmp(a, b);
+ }
+ struct tcpm_hash_bucket {
+-- 
+2.40.1
+
diff --git a/queue-5.4/tcp_metrics-fix-data-race-in-tcpm_suck_dst-vs-fastop.patch b/queue-5.4/tcp_metrics-fix-data-race-in-tcpm_suck_dst-vs-fastop.patch
new file mode 100644 (file)
index 0000000..4f14ce9
--- /dev/null
@@ -0,0 +1,85 @@
+From 009a6aa28f35122e7ec9c4dad022b440a505a498 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Aug 2023 13:15:00 +0000
+Subject: tcp_metrics: fix data-race in tcpm_suck_dst() vs fastopen
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit ddf251fa2bc1d3699eec0bae6ed0bc373b8fda79 ]
+
+Whenever tcpm_new() reclaims an old entry, tcpm_suck_dst()
+would overwrite data that could be read from tcp_fastopen_cache_get()
+or tcp_metrics_fill_info().
+
+We need to acquire fastopen_seqlock to maintain consistency.
+
+For newly allocated objects, tcpm_new() can switch to kzalloc()
+to avoid an extra fastopen_seqlock acquisition.
+
+Fixes: 1fe4c481ba63 ("net-tcp: Fast Open client - cookie cache")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20230802131500.1478140-7-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_metrics.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
+index 3d0452bb6c2a1..e89e19a6852ce 100644
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -93,6 +93,7 @@ static struct tcpm_hash_bucket       *tcp_metrics_hash __read_mostly;
+ static unsigned int           tcp_metrics_hash_log __read_mostly;
+ static DEFINE_SPINLOCK(tcp_metrics_lock);
++static DEFINE_SEQLOCK(fastopen_seqlock);
+ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
+                         const struct dst_entry *dst,
+@@ -129,11 +130,13 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
+       tcp_metric_set(tm, TCP_METRIC_REORDERING,
+                      dst_metric_raw(dst, RTAX_REORDERING));
+       if (fastopen_clear) {
++              write_seqlock(&fastopen_seqlock);
+               tm->tcpm_fastopen.mss = 0;
+               tm->tcpm_fastopen.syn_loss = 0;
+               tm->tcpm_fastopen.try_exp = 0;
+               tm->tcpm_fastopen.cookie.exp = false;
+               tm->tcpm_fastopen.cookie.len = 0;
++              write_sequnlock(&fastopen_seqlock);
+       }
+ }
+@@ -194,7 +197,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
+               }
+               tm = oldest;
+       } else {
+-              tm = kmalloc(sizeof(*tm), GFP_ATOMIC);
++              tm = kzalloc(sizeof(*tm), GFP_ATOMIC);
+               if (!tm)
+                       goto out_unlock;
+       }
+@@ -204,7 +207,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
+       tm->tcpm_saddr = *saddr;
+       tm->tcpm_daddr = *daddr;
+-      tcpm_suck_dst(tm, dst, true);
++      tcpm_suck_dst(tm, dst, reclaim);
+       if (likely(!reclaim)) {
+               tm->tcpm_next = tcp_metrics_hash[hash].chain;
+@@ -551,8 +554,6 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
+       return ret;
+ }
+-static DEFINE_SEQLOCK(fastopen_seqlock);
+-
+ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
+                           struct tcp_fastopen_cookie *cookie)
+ {
+-- 
+2.40.1
+
diff --git a/queue-5.4/word-at-a-time-use-the-same-return-type-for-has_zero.patch b/queue-5.4/word-at-a-time-use-the-same-return-type-for-has_zero.patch
new file mode 100644 (file)
index 0000000..2c0d63b
--- /dev/null
@@ -0,0 +1,74 @@
+From 17a3d2fbb253e6da25631ee9197e405aa671bbf4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Aug 2023 15:22:17 -0700
+Subject: word-at-a-time: use the same return type for has_zero regardless of
+ endianness
+
+From: ndesaulniers@google.com <ndesaulniers@google.com>
+
+[ Upstream commit 79e8328e5acbe691bbde029a52c89d70dcbc22f3 ]
+
+Compiling big-endian targets with Clang produces the diagnostic:
+
+  fs/namei.c:2173:13: warning: use of bitwise '|' with boolean operands [-Wbitwise-instead-of-logical]
+       } while (!(has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants)));
+                 ~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+                                               ||
+  fs/namei.c:2173:13: note: cast one or both operands to int to silence this warning
+
+It appears that when has_zero was introduced, two definitions were
+produced with different signatures (in particular different return
+types).
+
+Looking at the usage in hash_name() in fs/namei.c, I suspect that
+has_zero() is meant to be invoked twice per while loop iteration; using
+logical-or would not update `bdata` when `a` did not have zeros.  So I
+think it's preferred to always return an unsigned long rather than a
+bool than update the while loop in hash_name() to use a logical-or
+rather than bitwise-or.
+
+[ Also changed powerpc version to do the same  - Linus ]
+
+Link: https://github.com/ClangBuiltLinux/linux/issues/1832
+Link: https://lore.kernel.org/lkml/20230801-bitwise-v1-1-799bec468dc4@google.com/
+Fixes: 36126f8f2ed8 ("word-at-a-time: make the interfaces truly generic")
+Debugged-by: Nathan Chancellor <nathan@kernel.org>
+Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
+Acked-by: Heiko Carstens <hca@linux.ibm.com>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/include/asm/word-at-a-time.h | 2 +-
+ include/asm-generic/word-at-a-time.h      | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h
+index f3f4710d4ff52..99129b0cd8b8a 100644
+--- a/arch/powerpc/include/asm/word-at-a-time.h
++++ b/arch/powerpc/include/asm/word-at-a-time.h
+@@ -34,7 +34,7 @@ static inline long find_zero(unsigned long mask)
+       return leading_zero_bits >> 3;
+ }
+-static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
++static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
+ {
+       unsigned long rhs = val | c->low_bits;
+       *data = rhs;
+diff --git a/include/asm-generic/word-at-a-time.h b/include/asm-generic/word-at-a-time.h
+index 20c93f08c9933..95a1d214108a5 100644
+--- a/include/asm-generic/word-at-a-time.h
++++ b/include/asm-generic/word-at-a-time.h
+@@ -38,7 +38,7 @@ static inline long find_zero(unsigned long mask)
+       return (mask >> 8) ? byte : byte + 1;
+ }
+-static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
++static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
+ {
+       unsigned long rhs = val | c->low_bits;
+       *data = rhs;
+-- 
+2.40.1
+