--- /dev/null
+From a645d88fa9de1bfe63729527e48559436ee01df2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 08:44:11 +0200
+Subject: bpf: sockmap: Remove preempt_disable in sock_map_sk_acquire
+
+From: Tomas Glozar <tglozar@redhat.com>
+
+[ Upstream commit 13d2618b48f15966d1adfe1ff6a1985f5eef40ba ]
+
+Disabling preemption in sock_map_sk_acquire conflicts with GFP_ATOMIC
+allocation later in sk_psock_init_link on PREEMPT_RT kernels, since
+GFP_ATOMIC might sleep on RT (see bpf: Make BPF and PREEMPT_RT co-exist
+patchset notes for details).
+
+This causes calling bpf_map_update_elem on BPF_MAP_TYPE_SOCKMAP maps to
+BUG (sleeping function called from invalid context) on RT kernels.
+
+preempt_disable was introduced together with lock_sk and rcu_read_lock
+in commit 99ba2b5aba24e ("bpf: sockhash, disallow bpf_tcp_close and update
+in parallel"), probably to match disabled migration of BPF programs, and
+is no longer necessary.
+
+Remove preempt_disable to fix BUG in sock_map_update_common on RT.
+
+Signed-off-by: Tomas Glozar <tglozar@redhat.com>
+Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
+Link: https://lore.kernel.org/all/20200224140131.461979697@linutronix.de/
+Fixes: 99ba2b5aba24 ("bpf: sockhash, disallow bpf_tcp_close and update in parallel")
+Reviewed-by: John Fastabend <john.fastabend@gmail.com>
+Link: https://lore.kernel.org/r/20230728064411.305576-1-tglozar@redhat.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock_map.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/net/core/sock_map.c b/net/core/sock_map.c
+index 5bce6d4d20573..5b82ff0e2680f 100644
+--- a/net/core/sock_map.c
++++ b/net/core/sock_map.c
+@@ -115,7 +115,6 @@ static void sock_map_sk_acquire(struct sock *sk)
+ __acquires(&sk->sk_lock.slock)
+ {
+ lock_sock(sk);
+- preempt_disable();
+ rcu_read_lock();
+ }
+
+@@ -123,7 +122,6 @@ static void sock_map_sk_release(struct sock *sk)
+ __releases(&sk->sk_lock.slock)
+ {
+ rcu_read_unlock();
+- preempt_enable();
+ release_sock(sk);
+ }
+
+--
+2.40.1
+
--- /dev/null
+From 1294b643c934eb581fa0901fd70eaa12a6586afc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Jul 2020 16:43:21 +0200
+Subject: driver core: add device probe log helper
+
+From: Andrzej Hajda <a.hajda@samsung.com>
+
+[ Upstream commit a787e5400a1ceeb0ef92d71ec43aeb35b1fa1334 ]
+
+During probe every time driver gets resource it should usually check for
+error printk some message if it is not -EPROBE_DEFER and return the error.
+This pattern is simple but requires adding few lines after any resource
+acquisition code, as a result it is often omitted or implemented only
+partially.
+dev_err_probe helps to replace such code sequences with simple call,
+so code:
+ if (err != -EPROBE_DEFER)
+ dev_err(dev, ...);
+ return err;
+becomes:
+ return dev_err_probe(dev, err, ...);
+
+Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
+Reviewed-by: Rafael J. Wysocki <rafael@kernel.org>
+Reviewed-by: Mark Brown <broonie@kernel.org>
+Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
+Link: https://lore.kernel.org/r/20200713144324.23654-2-a.hajda@samsung.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: ef45e8400f5b ("net: ll_temac: fix error checking of irq_of_parse_and_map()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/base/core.c | 42 ++++++++++++++++++++++++++++++++++++++++++
+ include/linux/device.h | 3 +++
+ 2 files changed, 45 insertions(+)
+
+diff --git a/drivers/base/core.c b/drivers/base/core.c
+index f8e157ede44f8..4949aba7e1880 100644
+--- a/drivers/base/core.c
++++ b/drivers/base/core.c
+@@ -3399,6 +3399,48 @@ define_dev_printk_level(_dev_info, KERN_INFO);
+
+ #endif
+
++/**
++ * dev_err_probe - probe error check and log helper
++ * @dev: the pointer to the struct device
++ * @err: error value to test
++ * @fmt: printf-style format string
++ * @...: arguments as specified in the format string
++ *
++ * This helper implements common pattern present in probe functions for error
++ * checking: print debug or error message depending if the error value is
++ * -EPROBE_DEFER and propagate error upwards.
++ * It replaces code sequence:
++ * if (err != -EPROBE_DEFER)
++ * dev_err(dev, ...);
++ * else
++ * dev_dbg(dev, ...);
++ * return err;
++ * with
++ * return dev_err_probe(dev, err, ...);
++ *
++ * Returns @err.
++ *
++ */
++int dev_err_probe(const struct device *dev, int err, const char *fmt, ...)
++{
++ struct va_format vaf;
++ va_list args;
++
++ va_start(args, fmt);
++ vaf.fmt = fmt;
++ vaf.va = &args;
++
++ if (err != -EPROBE_DEFER)
++ dev_err(dev, "error %d: %pV", err, &vaf);
++ else
++ dev_dbg(dev, "error %d: %pV", err, &vaf);
++
++ va_end(args);
++
++ return err;
++}
++EXPORT_SYMBOL_GPL(dev_err_probe);
++
+ static inline bool fwnode_is_primary(struct fwnode_handle *fwnode)
+ {
+ return fwnode && !IS_ERR(fwnode->secondary);
+diff --git a/include/linux/device.h b/include/linux/device.h
+index d74275e2047a4..c7be3a8073ec3 100644
+--- a/include/linux/device.h
++++ b/include/linux/device.h
+@@ -1871,6 +1871,9 @@ do { \
+ WARN_ONCE(condition, "%s %s: " format, \
+ dev_driver_string(dev), dev_name(dev), ## arg)
+
++extern __printf(3, 4)
++int dev_err_probe(const struct device *dev, int err, const char *fmt, ...);
++
+ /* Create alias, so I can be autoloaded. */
+ #define MODULE_ALIAS_CHARDEV(major,minor) \
+ MODULE_ALIAS("char-major-" __stringify(major) "-" __stringify(minor))
+--
+2.40.1
+
--- /dev/null
+From 6208e0ab4491793fa444a0e2d46381a6f450b4b9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Aug 2023 14:43:18 +0800
+Subject: ip6mr: Fix skb_under_panic in ip6mr_cache_report()
+
+From: Yue Haibing <yuehaibing@huawei.com>
+
+[ Upstream commit 30e0191b16e8a58e4620fa3e2839ddc7b9d4281c ]
+
+skbuff: skb_under_panic: text:ffffffff88771f69 len:56 put:-4
+ head:ffff88805f86a800 data:ffff887f5f86a850 tail:0x88 end:0x2c0 dev:pim6reg
+ ------------[ cut here ]------------
+ kernel BUG at net/core/skbuff.c:192!
+ invalid opcode: 0000 [#1] PREEMPT SMP KASAN
+ CPU: 2 PID: 22968 Comm: kworker/2:11 Not tainted 6.5.0-rc3-00044-g0a8db05b571a #236
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
+ Workqueue: ipv6_addrconf addrconf_dad_work
+ RIP: 0010:skb_panic+0x152/0x1d0
+ Call Trace:
+ <TASK>
+ skb_push+0xc4/0xe0
+ ip6mr_cache_report+0xd69/0x19b0
+ reg_vif_xmit+0x406/0x690
+ dev_hard_start_xmit+0x17e/0x6e0
+ __dev_queue_xmit+0x2d6a/0x3d20
+ vlan_dev_hard_start_xmit+0x3ab/0x5c0
+ dev_hard_start_xmit+0x17e/0x6e0
+ __dev_queue_xmit+0x2d6a/0x3d20
+ neigh_connected_output+0x3ed/0x570
+ ip6_finish_output2+0x5b5/0x1950
+ ip6_finish_output+0x693/0x11c0
+ ip6_output+0x24b/0x880
+ NF_HOOK.constprop.0+0xfd/0x530
+ ndisc_send_skb+0x9db/0x1400
+ ndisc_send_rs+0x12a/0x6c0
+ addrconf_dad_completed+0x3c9/0xea0
+ addrconf_dad_work+0x849/0x1420
+ process_one_work+0xa22/0x16e0
+ worker_thread+0x679/0x10c0
+ ret_from_fork+0x28/0x60
+ ret_from_fork_asm+0x11/0x20
+
+When setup a vlan device on dev pim6reg, DAD ns packet may sent on reg_vif_xmit().
+reg_vif_xmit()
+ ip6mr_cache_report()
+ skb_push(skb, -skb_network_offset(pkt));//skb_network_offset(pkt) is 4
+And skb_push declared as:
+ void *skb_push(struct sk_buff *skb, unsigned int len);
+ skb->data -= len;
+ //0xffff88805f86a84c - 0xfffffffc = 0xffff887f5f86a850
+skb->data is set to 0xffff887f5f86a850, which is invalid mem addr, lead to skb_push() fails.
+
+Fixes: 14fb64e1f449 ("[IPV6] MROUTE: Support PIM-SM (SSM).")
+Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/ip6mr.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
+index 6248e00c2bf72..6642bc7b9870f 100644
+--- a/net/ipv6/ip6mr.c
++++ b/net/ipv6/ip6mr.c
+@@ -1065,7 +1065,7 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
+ And all this only to mangle msg->im6_msgtype and
+ to set msg->im6_mbz to "mbz" :-)
+ */
+- skb_push(skb, -skb_network_offset(pkt));
++ __skb_pull(skb, skb_network_offset(pkt));
+
+ skb_push(skb, sizeof(*msg));
+ skb_reset_transport_header(skb);
+--
+2.40.1
+
--- /dev/null
+From 9cb08a9a1f2ca26200cc1759c9d1572b5b49c0b8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 27 Jul 2023 20:29:39 +0200
+Subject: KVM: s390: fix sthyi error handling
+
+From: Heiko Carstens <hca@linux.ibm.com>
+
+[ Upstream commit 0c02cc576eac161601927b41634f80bfd55bfa9e ]
+
+Commit 9fb6c9b3fea1 ("s390/sthyi: add cache to store hypervisor info")
+added cache handling for store hypervisor info. This also changed the
+possible return code for sthyi_fill().
+
+Instead of only returning a condition code like the sthyi instruction would
+do, it can now also return a negative error value (-ENOMEM). handle_styhi()
+was not changed accordingly. In case of an error, the negative error value
+would incorrectly injected into the guest PSW.
+
+Add proper error handling to prevent this, and update the comment which
+describes the possible return values of sthyi_fill().
+
+Fixes: 9fb6c9b3fea1 ("s390/sthyi: add cache to store hypervisor info")
+Reviewed-by: Christian Borntraeger <borntraeger@linux.ibm.com>
+Link: https://lore.kernel.org/r/20230727182939.2050744-1-hca@linux.ibm.com
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/kernel/sthyi.c | 6 +++---
+ arch/s390/kvm/intercept.c | 9 ++++++---
+ 2 files changed, 9 insertions(+), 6 deletions(-)
+
+diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c
+index 888cc2f166db7..ce6084e28d904 100644
+--- a/arch/s390/kernel/sthyi.c
++++ b/arch/s390/kernel/sthyi.c
+@@ -460,9 +460,9 @@ static int sthyi_update_cache(u64 *rc)
+ *
+ * Fills the destination with system information returned by the STHYI
+ * instruction. The data is generated by emulation or execution of STHYI,
+- * if available. The return value is the condition code that would be
+- * returned, the rc parameter is the return code which is passed in
+- * register R2 + 1.
++ * if available. The return value is either a negative error value or
++ * the condition code that would be returned, the rc parameter is the
++ * return code which is passed in register R2 + 1.
+ */
+ int sthyi_fill(void *dst, u64 *rc)
+ {
+diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
+index a389fa85cca2d..5450d43d26ea5 100644
+--- a/arch/s390/kvm/intercept.c
++++ b/arch/s390/kvm/intercept.c
+@@ -360,8 +360,8 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu)
+ */
+ int handle_sthyi(struct kvm_vcpu *vcpu)
+ {
+- int reg1, reg2, r = 0;
+- u64 code, addr, cc = 0, rc = 0;
++ int reg1, reg2, cc = 0, r = 0;
++ u64 code, addr, rc = 0;
+ struct sthyi_sctns *sctns = NULL;
+
+ if (!test_kvm_facility(vcpu->kvm, 74))
+@@ -392,7 +392,10 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
+ return -ENOMEM;
+
+ cc = sthyi_fill(sctns, &rc);
+-
++ if (cc < 0) {
++ free_page((unsigned long)sctns);
++ return cc;
++ }
+ out:
+ if (!cc) {
+ r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE);
+--
+2.40.1
+
--- /dev/null
+From 53d16071fc47e48ef818e06a35382907195ba225 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 27 Jul 2023 08:56:19 +0000
+Subject: mISDN: hfcpci: Fix potential deadlock on &hc->lock
+
+From: Chengfeng Ye <dg573847474@gmail.com>
+
+[ Upstream commit 56c6be35fcbed54279df0a2c9e60480a61841d6f ]
+
+As &hc->lock is acquired by both timer _hfcpci_softirq() and hardirq
+hfcpci_int(), the timer should disable irq before lock acquisition
+otherwise deadlock could happen if the timmer is preemtped by the hadr irq.
+
+Possible deadlock scenario:
+hfcpci_softirq() (timer)
+ -> _hfcpci_softirq()
+ -> spin_lock(&hc->lock);
+ <irq interruption>
+ -> hfcpci_int()
+ -> spin_lock(&hc->lock); (deadlock here)
+
+This flaw was found by an experimental static analysis tool I am developing
+for irq-related deadlock.
+
+The tentative patch fixes the potential deadlock by spin_lock_irq()
+in timer.
+
+Fixes: b36b654a7e82 ("mISDN: Create /sys/class/mISDN")
+Signed-off-by: Chengfeng Ye <dg573847474@gmail.com>
+Link: https://lore.kernel.org/r/20230727085619.7419-1-dg573847474@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/isdn/hardware/mISDN/hfcpci.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c
+index 41ff2e3dc8430..0a683a66fc612 100644
+--- a/drivers/isdn/hardware/mISDN/hfcpci.c
++++ b/drivers/isdn/hardware/mISDN/hfcpci.c
+@@ -839,7 +839,7 @@ hfcpci_fill_fifo(struct bchannel *bch)
+ *z1t = cpu_to_le16(new_z1); /* now send data */
+ if (bch->tx_idx < bch->tx_skb->len)
+ return;
+- dev_kfree_skb(bch->tx_skb);
++ dev_kfree_skb_any(bch->tx_skb);
+ if (get_next_bframe(bch))
+ goto next_t_frame;
+ return;
+@@ -895,7 +895,7 @@ hfcpci_fill_fifo(struct bchannel *bch)
+ }
+ bz->za[new_f1].z1 = cpu_to_le16(new_z1); /* for next buffer */
+ bz->f1 = new_f1; /* next frame */
+- dev_kfree_skb(bch->tx_skb);
++ dev_kfree_skb_any(bch->tx_skb);
+ get_next_bframe(bch);
+ }
+
+@@ -1119,7 +1119,7 @@ tx_birq(struct bchannel *bch)
+ if (bch->tx_skb && bch->tx_idx < bch->tx_skb->len)
+ hfcpci_fill_fifo(bch);
+ else {
+- dev_kfree_skb(bch->tx_skb);
++ dev_kfree_skb_any(bch->tx_skb);
+ if (get_next_bframe(bch))
+ hfcpci_fill_fifo(bch);
+ }
+@@ -2272,7 +2272,7 @@ _hfcpci_softirq(struct device *dev, void *unused)
+ return 0;
+
+ if (hc->hw.int_m2 & HFCPCI_IRQ_ENABLE) {
+- spin_lock(&hc->lock);
++ spin_lock_irq(&hc->lock);
+ bch = Sel_BCS(hc, hc->hw.bswapped ? 2 : 1);
+ if (bch && bch->state == ISDN_P_B_RAW) { /* B1 rx&tx */
+ main_rec_hfcpci(bch);
+@@ -2283,7 +2283,7 @@ _hfcpci_softirq(struct device *dev, void *unused)
+ main_rec_hfcpci(bch);
+ tx_birq(bch);
+ }
+- spin_unlock(&hc->lock);
++ spin_unlock_irq(&hc->lock);
+ }
+ return 0;
+ }
+--
+2.40.1
+
--- /dev/null
+From 8feab57693cfbbe011d879f465bb1ef42603e8ca Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 15:03:17 +0000
+Subject: net: add missing data-race annotation for sk_ll_usec
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit e5f0d2dd3c2faa671711dac6d3ff3cef307bcfe3 ]
+
+In a prior commit I forgot that sk_getsockopt() reads
+sk->sk_ll_usec without holding a lock.
+
+Fixes: 0dbffbb5335a ("net: annotate data race around sk_ll_usec")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index e1204da609a1b..636427d400d7f 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1474,7 +1474,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
+
+ #ifdef CONFIG_NET_RX_BUSY_POLL
+ case SO_BUSY_POLL:
+- v.val = sk->sk_ll_usec;
++ v.val = READ_ONCE(sk->sk_ll_usec);
+ break;
+ #endif
+
+--
+2.40.1
+
--- /dev/null
+From 3e0684dc4a13ae50a1c000ff7f924fb842ea6259 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 15:03:16 +0000
+Subject: net: add missing data-race annotations around sk->sk_peek_off
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 11695c6e966b0ec7ed1d16777d294cef865a5c91 ]
+
+sk_getsockopt() runs locklessly, thus we need to annotate the read
+of sk->sk_peek_off.
+
+While we are at it, add corresponding annotations to sk_set_peek_off()
+and unix_set_peek_off().
+
+Fixes: b9bb53f3836f ("sock: convert sk_peek_offset functions to WRITE_ONCE")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c | 4 ++--
+ net/unix/af_unix.c | 2 +-
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index 6d695da921094..e1204da609a1b 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1444,7 +1444,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
+ if (!sock->ops->set_peek_off)
+ return -EOPNOTSUPP;
+
+- v.val = sk->sk_peek_off;
++ v.val = READ_ONCE(sk->sk_peek_off);
+ break;
+ case SO_NOFCS:
+ v.val = sock_flag(sk, SOCK_NOFCS);
+@@ -2652,7 +2652,7 @@ EXPORT_SYMBOL(__sk_mem_reclaim);
+
+ int sk_set_peek_off(struct sock *sk, int val)
+ {
+- sk->sk_peek_off = val;
++ WRITE_ONCE(sk->sk_peek_off, val);
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(sk_set_peek_off);
+diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
+index 01fd049da104a..f966b64d2939a 100644
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -701,7 +701,7 @@ static int unix_set_peek_off(struct sock *sk, int val)
+ if (mutex_lock_interruptible(&u->iolock))
+ return -EINTR;
+
+- sk->sk_peek_off = val;
++ WRITE_ONCE(sk->sk_peek_off, val);
+ mutex_unlock(&u->iolock);
+
+ return 0;
+--
+2.40.1
+
--- /dev/null
+From a66bb1ab22fc670307c90897bfb25a83f8d38c14 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 15:03:14 +0000
+Subject: net: add missing READ_ONCE(sk->sk_rcvbuf) annotation
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit b4b553253091cafe9ec38994acf42795e073bef5 ]
+
+In a prior commit, I forgot to change sk_getsockopt()
+when reading sk->sk_rcvbuf locklessly.
+
+Fixes: ebb3b78db7bf ("tcp: annotate sk->sk_rcvbuf lockless reads")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index e6d26cfba32d5..6d695da921094 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1262,7 +1262,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
+ break;
+
+ case SO_RCVBUF:
+- v.val = sk->sk_rcvbuf;
++ v.val = READ_ONCE(sk->sk_rcvbuf);
+ break;
+
+ case SO_REUSEADDR:
+--
+2.40.1
+
--- /dev/null
+From 510bfde141d282599b53223172b4fad554e9e0ea Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 15:03:11 +0000
+Subject: net: add missing READ_ONCE(sk->sk_rcvlowat) annotation
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit e6d12bdb435d23ff6c1890c852d85408a2f496ee ]
+
+In a prior commit, I forgot to change sk_getsockopt()
+when reading sk->sk_rcvlowat locklessly.
+
+Fixes: eac66402d1c3 ("net: annotate sk->sk_rcvlowat lockless reads")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index 539c39ad1e488..a73111be68581 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1350,7 +1350,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
+ break;
+
+ case SO_RCVLOWAT:
+- v.val = sk->sk_rcvlowat;
++ v.val = READ_ONCE(sk->sk_rcvlowat);
+ break;
+
+ case SO_SNDLOWAT:
+--
+2.40.1
+
--- /dev/null
+From 01ab33478ac6646afcb236b3c1ecf6084c4d30c4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 15:03:13 +0000
+Subject: net: add missing READ_ONCE(sk->sk_sndbuf) annotation
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 74bc084327c643499474ba75df485607da37dd6e ]
+
+In a prior commit, I forgot to change sk_getsockopt()
+when reading sk->sk_sndbuf locklessly.
+
+Fixes: e292f05e0df7 ("tcp: annotate sk->sk_sndbuf lockless reads")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index a73111be68581..e6d26cfba32d5 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1258,7 +1258,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
+ break;
+
+ case SO_SNDBUF:
+- v.val = sk->sk_sndbuf;
++ v.val = READ_ONCE(sk->sk_sndbuf);
+ break;
+
+ case SO_RCVBUF:
+--
+2.40.1
+
--- /dev/null
+From 453396818d23d8b006379c482cb71b839407d1b8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 15:03:10 +0000
+Subject: net: annotate data-races around sk->sk_max_pacing_rate
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit ea7f45ef77b39e72244d282e47f6cb1ef4135cd2 ]
+
+sk_getsockopt() runs locklessly. This means sk->sk_max_pacing_rate
+can be read while other threads are changing its value.
+
+Fixes: 62748f32d501 ("net: introduce SO_MAX_PACING_RATE")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index d55eea5538bce..539c39ad1e488 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1117,7 +1117,8 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
+ cmpxchg(&sk->sk_pacing_status,
+ SK_PACING_NONE,
+ SK_PACING_NEEDED);
+- sk->sk_max_pacing_rate = ulval;
++ /* Pairs with READ_ONCE() from sk_getsockopt() */
++ WRITE_ONCE(sk->sk_max_pacing_rate, ulval);
+ sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval);
+ break;
+ }
+@@ -1478,12 +1479,14 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
+ #endif
+
+ case SO_MAX_PACING_RATE:
++ /* The READ_ONCE() pair with the WRITE_ONCE() in sk_setsockopt() */
+ if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) {
+ lv = sizeof(v.ulval);
+- v.ulval = sk->sk_max_pacing_rate;
++ v.ulval = READ_ONCE(sk->sk_max_pacing_rate);
+ } else {
+ /* 32bit version */
+- v.val = min_t(unsigned long, sk->sk_max_pacing_rate, ~0U);
++ v.val = min_t(unsigned long, ~0U,
++ READ_ONCE(sk->sk_max_pacing_rate));
+ }
+ break;
+
+--
+2.40.1
+
--- /dev/null
+From a467630d0bf7ccb82a56cd9824e27eb0d26d0534 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Aug 2023 09:32:48 +0800
+Subject: net: dcb: choose correct policy to parse DCB_ATTR_BCN
+
+From: Lin Ma <linma@zju.edu.cn>
+
+[ Upstream commit 31d49ba033095f6e8158c60f69714a500922e0c3 ]
+
+The dcbnl_bcn_setcfg uses erroneous policy to parse tb[DCB_ATTR_BCN],
+which is introduced in commit 859ee3c43812 ("DCB: Add support for DCB
+BCN"). Please see the comment in below code
+
+static int dcbnl_bcn_setcfg(...)
+{
+ ...
+ ret = nla_parse_nested_deprecated(..., dcbnl_pfc_up_nest, .. )
+ // !!! dcbnl_pfc_up_nest for attributes
+ // DCB_PFC_UP_ATTR_0 to DCB_PFC_UP_ATTR_ALL in enum dcbnl_pfc_up_attrs
+ ...
+ for (i = DCB_BCN_ATTR_RP_0; i <= DCB_BCN_ATTR_RP_7; i++) {
+ // !!! DCB_BCN_ATTR_RP_0 to DCB_BCN_ATTR_RP_7 in enum dcbnl_bcn_attrs
+ ...
+ value_byte = nla_get_u8(data[i]);
+ ...
+ }
+ ...
+ for (i = DCB_BCN_ATTR_BCNA_0; i <= DCB_BCN_ATTR_RI; i++) {
+ // !!! DCB_BCN_ATTR_BCNA_0 to DCB_BCN_ATTR_RI in enum dcbnl_bcn_attrs
+ ...
+ value_int = nla_get_u32(data[i]);
+ ...
+ }
+ ...
+}
+
+That is, the nla_parse_nested_deprecated uses dcbnl_pfc_up_nest
+attributes to parse nlattr defined in dcbnl_pfc_up_attrs. But the
+following access code fetch each nlattr as dcbnl_bcn_attrs attributes.
+By looking up the associated nla_policy for dcbnl_bcn_attrs. We can find
+the beginning part of these two policies are "same".
+
+static const struct nla_policy dcbnl_pfc_up_nest[...] = {
+ [DCB_PFC_UP_ATTR_0] = {.type = NLA_U8},
+ [DCB_PFC_UP_ATTR_1] = {.type = NLA_U8},
+ [DCB_PFC_UP_ATTR_2] = {.type = NLA_U8},
+ [DCB_PFC_UP_ATTR_3] = {.type = NLA_U8},
+ [DCB_PFC_UP_ATTR_4] = {.type = NLA_U8},
+ [DCB_PFC_UP_ATTR_5] = {.type = NLA_U8},
+ [DCB_PFC_UP_ATTR_6] = {.type = NLA_U8},
+ [DCB_PFC_UP_ATTR_7] = {.type = NLA_U8},
+ [DCB_PFC_UP_ATTR_ALL] = {.type = NLA_FLAG},
+};
+
+static const struct nla_policy dcbnl_bcn_nest[...] = {
+ [DCB_BCN_ATTR_RP_0] = {.type = NLA_U8},
+ [DCB_BCN_ATTR_RP_1] = {.type = NLA_U8},
+ [DCB_BCN_ATTR_RP_2] = {.type = NLA_U8},
+ [DCB_BCN_ATTR_RP_3] = {.type = NLA_U8},
+ [DCB_BCN_ATTR_RP_4] = {.type = NLA_U8},
+ [DCB_BCN_ATTR_RP_5] = {.type = NLA_U8},
+ [DCB_BCN_ATTR_RP_6] = {.type = NLA_U8},
+ [DCB_BCN_ATTR_RP_7] = {.type = NLA_U8},
+ [DCB_BCN_ATTR_RP_ALL] = {.type = NLA_FLAG},
+ // from here is somewhat different
+ [DCB_BCN_ATTR_BCNA_0] = {.type = NLA_U32},
+ ...
+ [DCB_BCN_ATTR_ALL] = {.type = NLA_FLAG},
+};
+
+Therefore, the current code is buggy and this
+nla_parse_nested_deprecated could overflow the dcbnl_pfc_up_nest and use
+the adjacent nla_policy to parse attributes from DCB_BCN_ATTR_BCNA_0.
+
+Hence use the correct policy dcbnl_bcn_nest to parse the nested
+tb[DCB_ATTR_BCN] TLV.
+
+Fixes: 859ee3c43812 ("DCB: Add support for DCB BCN")
+Signed-off-by: Lin Ma <linma@zju.edu.cn>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://lore.kernel.org/r/20230801013248.87240-1-linma@zju.edu.cn
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/dcb/dcbnl.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
+index b53d5e1d026fe..71e97e2a36845 100644
+--- a/net/dcb/dcbnl.c
++++ b/net/dcb/dcbnl.c
+@@ -946,7 +946,7 @@ static int dcbnl_bcn_setcfg(struct net_device *netdev, struct nlmsghdr *nlh,
+ return -EOPNOTSUPP;
+
+ ret = nla_parse_nested_deprecated(data, DCB_BCN_ATTR_MAX,
+- tb[DCB_ATTR_BCN], dcbnl_pfc_up_nest,
++ tb[DCB_ATTR_BCN], dcbnl_bcn_nest,
+ NULL);
+ if (ret)
+ return ret;
+--
+2.40.1
+
--- /dev/null
+From 0146920e4977df2a528283ba1c346d18702f23b6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 31 Jul 2023 10:42:32 +0300
+Subject: net: ll_temac: fix error checking of irq_of_parse_and_map()
+
+From: Dan Carpenter <dan.carpenter@linaro.org>
+
+[ Upstream commit ef45e8400f5bb66b03cc949f76c80e2a118447de ]
+
+Most kernel functions return negative error codes but some irq functions
+return zero on error. In this code irq_of_parse_and_map(), returns zero
+and platform_get_irq() returns negative error codes. We need to handle
+both cases appropriately.
+
+Fixes: 8425c41d1ef7 ("net: ll_temac: Extend support to non-device-tree platforms")
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Acked-by: Esben Haabendal <esben@geanix.com>
+Reviewed-by: Yang Yingliang <yangyingliang@huawei.com>
+Reviewed-by: Harini Katakam <harini.katakam@amd.com>
+Link: https://lore.kernel.org/r/3d0aef75-06e0-45a5-a2a6-2cc4738d4143@moroto.mountain
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/xilinx/ll_temac_main.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
+index 9756d83994fca..86edc95919146 100644
+--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
++++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
+@@ -1481,12 +1481,16 @@ static int temac_probe(struct platform_device *pdev)
+ }
+
+ /* Error handle returned DMA RX and TX interrupts */
+- if (lp->rx_irq < 0)
+- return dev_err_probe(&pdev->dev, lp->rx_irq,
++ if (lp->rx_irq <= 0) {
++ rc = lp->rx_irq ?: -EINVAL;
++ return dev_err_probe(&pdev->dev, rc,
+ "could not get DMA RX irq\n");
+- if (lp->tx_irq < 0)
+- return dev_err_probe(&pdev->dev, lp->tx_irq,
++ }
++ if (lp->tx_irq <= 0) {
++ rc = lp->tx_irq ?: -EINVAL;
++ return dev_err_probe(&pdev->dev, rc,
+ "could not get DMA TX irq\n");
++ }
+
+ if (temac_np) {
+ /* Retrieve the MAC address */
+--
+2.40.1
+
--- /dev/null
+From 3b5f7c539ee1ec97d5fe660d8c4c960263578049 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 15 Sep 2022 19:42:14 +0800
+Subject: net: ll_temac: Switch to use dev_err_probe() helper
+
+From: Yang Yingliang <yangyingliang@huawei.com>
+
+[ Upstream commit 75ae8c284c00dc3584b7c173f6fcf96ee15bd02c ]
+
+dev_err() can be replace with dev_err_probe() which will check if error
+code is -EPROBE_DEFER.
+
+Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: ef45e8400f5b ("net: ll_temac: fix error checking of irq_of_parse_and_map()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/xilinx/ll_temac_main.c | 16 ++++++----------
+ 1 file changed, 6 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
+index a109438f4a78e..9756d83994fca 100644
+--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
++++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
+@@ -1481,16 +1481,12 @@ static int temac_probe(struct platform_device *pdev)
+ }
+
+ /* Error handle returned DMA RX and TX interrupts */
+- if (lp->rx_irq < 0) {
+- if (lp->rx_irq != -EPROBE_DEFER)
+- dev_err(&pdev->dev, "could not get DMA RX irq\n");
+- return lp->rx_irq;
+- }
+- if (lp->tx_irq < 0) {
+- if (lp->tx_irq != -EPROBE_DEFER)
+- dev_err(&pdev->dev, "could not get DMA TX irq\n");
+- return lp->tx_irq;
+- }
++ if (lp->rx_irq < 0)
++ return dev_err_probe(&pdev->dev, lp->rx_irq,
++ "could not get DMA RX irq\n");
++ if (lp->tx_irq < 0)
++ return dev_err_probe(&pdev->dev, lp->tx_irq,
++ "could not get DMA TX irq\n");
+
+ if (temac_np) {
+ /* Retrieve the MAC address */
+--
+2.40.1
+
--- /dev/null
+From 8dcc22a25e8380ddb4f3de2c569743474f92d001 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Jul 2023 20:15:27 +0800
+Subject: net/mlx5: DR, fix memory leak in mlx5dr_cmd_create_reformat_ctx
+
+From: Zhengchao Shao <shaozhengchao@huawei.com>
+
+[ Upstream commit 5dd77585dd9d0e03dd1bceb95f0269a7eaf6b936 ]
+
+when mlx5_cmd_exec failed in mlx5dr_cmd_create_reformat_ctx, the memory
+pointed by 'in' is not released, which will cause memory leak. Move memory
+release after mlx5_cmd_exec.
+
+Fixes: 1d9186476e12 ("net/mlx5: DR, Add direct rule command utilities")
+Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+index 64f6f529f6eb1..45b90c7698787 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+@@ -423,11 +423,12 @@ int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev,
+
+ err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
+ if (err)
+- return err;
++ goto err_free_in;
+
+ *reformat_id = MLX5_GET(alloc_packet_reformat_context_out, out, packet_reformat_id);
+- kvfree(in);
+
++err_free_in:
++ kvfree(in);
+ return err;
+ }
+
+--
+2.40.1
+
--- /dev/null
+From 28773084d652361ddb715321b9a8bde21797f5b6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Jul 2023 14:56:55 +0800
+Subject: net/mlx5e: fix return value check in mlx5e_ipsec_remove_trailer()
+
+From: Yuanjun Gong <ruc_gongyuanjun@163.com>
+
+[ Upstream commit e5bcb7564d3bd0c88613c76963c5349be9c511c5 ]
+
+mlx5e_ipsec_remove_trailer() should return an error code if function
+pskb_trim() returns an unexpected value.
+
+Fixes: 2ac9cfe78223 ("net/mlx5e: IPSec, Add Innova IPSec offload TX data path")
+Signed-off-by: Yuanjun Gong <ruc_gongyuanjun@163.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+index 0dd17514caae8..d212706f1bdea 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+@@ -121,7 +121,9 @@ static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x)
+
+ trailer_len = alen + plen + 2;
+
+- pskb_trim(skb, skb->len - trailer_len);
++ ret = pskb_trim(skb, skb->len - trailer_len);
++ if (unlikely(ret))
++ return ret;
+ if (skb->protocol == htons(ETH_P_IP)) {
+ ipv4hdr->tot_len = htons(ntohs(ipv4hdr->tot_len) - trailer_len);
+ ip_send_check(ipv4hdr);
+--
+2.40.1
+
--- /dev/null
+From 0f7864e133323a51b83c24ca54fa3a70eaefdfe9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 29 Jul 2023 08:32:01 -0400
+Subject: net/sched: cls_fw: No longer copy tcf_result on update to avoid
+ use-after-free
+
+From: valis <sec@valis.email>
+
+[ Upstream commit 76e42ae831991c828cffa8c37736ebfb831ad5ec ]
+
+When fw_change() is called on an existing filter, the whole
+tcf_result struct is always copied into the new instance of the filter.
+
+This causes a problem when updating a filter bound to a class,
+as tcf_unbind_filter() is always called on the old instance in the
+success path, decreasing filter_cnt of the still referenced class
+and allowing it to be deleted, leading to a use-after-free.
+
+Fix this by no longer copying the tcf_result struct from the old filter.
+
+Fixes: e35a8ee5993b ("net: sched: fw use RCU")
+Reported-by: valis <sec@valis.email>
+Reported-by: Bing-Jhong Billy Jheng <billy@starlabs.sg>
+Signed-off-by: valis <sec@valis.email>
+Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Reviewed-by: Victor Nogueira <victor@mojatatu.com>
+Reviewed-by: Pedro Tammela <pctammela@mojatatu.com>
+Reviewed-by: M A Ramdhan <ramdhan@starlabs.sg>
+Link: https://lore.kernel.org/r/20230729123202.72406-3-jhs@mojatatu.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/cls_fw.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
+index 41f0898a5a565..08c41f1976c47 100644
+--- a/net/sched/cls_fw.c
++++ b/net/sched/cls_fw.c
+@@ -266,7 +266,6 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
+ return -ENOBUFS;
+
+ fnew->id = f->id;
+- fnew->res = f->res;
+ fnew->ifindex = f->ifindex;
+ fnew->tp = f->tp;
+
+--
+2.40.1
+
--- /dev/null
+From 11fbf7e4c4837579b471ecb7275030d4334b4329 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 29 Jul 2023 08:32:02 -0400
+Subject: net/sched: cls_route: No longer copy tcf_result on update to avoid
+ use-after-free
+
+From: valis <sec@valis.email>
+
+[ Upstream commit b80b829e9e2c1b3f7aae34855e04d8f6ecaf13c8 ]
+
+When route4_change() is called on an existing filter, the whole
+tcf_result struct is always copied into the new instance of the filter.
+
+This causes a problem when updating a filter bound to a class,
+as tcf_unbind_filter() is always called on the old instance in the
+success path, decreasing filter_cnt of the still referenced class
+and allowing it to be deleted, leading to a use-after-free.
+
+Fix this by no longer copying the tcf_result struct from the old filter.
+
+Fixes: 1109c00547fc ("net: sched: RCU cls_route")
+Reported-by: valis <sec@valis.email>
+Reported-by: Bing-Jhong Billy Jheng <billy@starlabs.sg>
+Signed-off-by: valis <sec@valis.email>
+Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Reviewed-by: Victor Nogueira <victor@mojatatu.com>
+Reviewed-by: Pedro Tammela <pctammela@mojatatu.com>
+Reviewed-by: M A Ramdhan <ramdhan@starlabs.sg>
+Link: https://lore.kernel.org/r/20230729123202.72406-4-jhs@mojatatu.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/cls_route.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
+index b775e681cb56e..1ad4b3e60eb3b 100644
+--- a/net/sched/cls_route.c
++++ b/net/sched/cls_route.c
+@@ -511,7 +511,6 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
+ if (fold) {
+ f->id = fold->id;
+ f->iif = fold->iif;
+- f->res = fold->res;
+ f->handle = fold->handle;
+
+ f->tp = fold->tp;
+--
+2.40.1
+
--- /dev/null
+From f1928d23ee09cc2a4251c02d68267e3c579c4077 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 26 Jul 2023 09:51:51 -0400
+Subject: net: sched: cls_u32: Fix match key mis-addressing
+
+From: Jamal Hadi Salim <jhs@mojatatu.com>
+
+[ Upstream commit e68409db995380d1badacba41ff24996bd396171 ]
+
+A match entry is uniquely identified with an "address" or "path" in the
+form of: hashtable ID(12b):bucketid(8b):nodeid(12b).
+
+When creating table match entries all of hash table id, bucket id and
+node (match entry id) are needed to be either specified by the user or
+reasonable in-kernel defaults are used. The in-kernel default for a table id is
+0x800(omnipresent root table); for bucketid it is 0x0. Prior to this fix there
+was none for a nodeid i.e. the code assumed that the user passed the correct
+nodeid and if the user passes a nodeid of 0 (as Mingi Cho did) then that is what
+was used. But nodeid of 0 is reserved for identifying the table. This is not
+a problem until we dump. The dump code notices that the nodeid is zero and
+assumes it is referencing a table and therefore references table struct
+tc_u_hnode instead of what was created i.e match entry struct tc_u_knode.
+
+Ming does an equivalent of:
+tc filter add dev dummy0 parent 10: prio 1 handle 0x1000 \
+protocol ip u32 match ip src 10.0.0.1/32 classid 10:1 action ok
+
+Essentially specifying a table id 0, bucketid 1 and nodeid of zero
+Tableid 0 is remapped to the default of 0x800.
+Bucketid 1 is ignored and defaults to 0x00.
+Nodeid was assumed to be what Ming passed - 0x000
+
+dumping before fix shows:
+~$ tc filter ls dev dummy0 parent 10:
+filter protocol ip pref 1 u32 chain 0
+filter protocol ip pref 1 u32 chain 0 fh 800: ht divisor 1
+filter protocol ip pref 1 u32 chain 0 fh 800: ht divisor -30591
+
+Note that the last line reports a table instead of a match entry
+(you can tell this because it says "ht divisor...").
+As a result of reporting the wrong data type (misinterpretting of struct
+tc_u_knode as being struct tc_u_hnode) the divisor is reported with value
+of -30591. Ming identified this as part of the heap address
+(physmap_base is 0xffff8880 (-30591 - 1)).
+
+The fix is to ensure that when table entry matches are added and no
+nodeid is specified (i.e nodeid == 0) then we get the next available
+nodeid from the table's pool.
+
+After the fix, this is what the dump shows:
+$ tc filter ls dev dummy0 parent 10:
+filter protocol ip pref 1 u32 chain 0
+filter protocol ip pref 1 u32 chain 0 fh 800: ht divisor 1
+filter protocol ip pref 1 u32 chain 0 fh 800::800 order 2048 key ht 800 bkt 0 flowid 10:1 not_in_hw
+ match 0a000001/ffffffff at 12
+ action order 1: gact action pass
+ random type none pass val 0
+ index 1 ref 1 bind 1
+
+Reported-by: Mingi Cho <mgcho.minic@gmail.com>
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Link: https://lore.kernel.org/r/20230726135151.416917-1-jhs@mojatatu.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/cls_u32.c | 56 ++++++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 50 insertions(+), 6 deletions(-)
+
+diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
+index e5cc2b4d38d5a..316ccbb01b55f 100644
+--- a/net/sched/cls_u32.c
++++ b/net/sched/cls_u32.c
+@@ -1004,18 +1004,62 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
+ return -EINVAL;
+ }
+
++ /* At this point, we need to derive the new handle that will be used to
++ * uniquely map the identity of this table match entry. The
++ * identity of the entry that we need to construct is 32 bits made of:
++ * htid(12b):bucketid(8b):node/entryid(12b)
++ *
++ * At this point _we have the table(ht)_ in which we will insert this
++ * entry. We carry the table's id in variable "htid".
++ * Note that earlier code picked the ht selection either by a) the user
++ * providing the htid specified via TCA_U32_HASH attribute or b) when
++ * no such attribute is passed then the root ht, is default to at ID
++ * 0x[800][00][000]. Rule: the root table has a single bucket with ID 0.
++ * If OTOH the user passed us the htid, they may also pass a bucketid of
++ * choice. 0 is fine. For example a user htid is 0x[600][01][000] it is
++ * indicating hash bucketid of 1. Rule: the entry/node ID _cannot_ be
++ * passed via the htid, so even if it was non-zero it will be ignored.
++ *
++ * We may also have a handle, if the user passed one. The handle also
++ * carries the same addressing of htid(12b):bucketid(8b):node/entryid(12b).
++ * Rule: the bucketid on the handle is ignored even if one was passed;
++ * rather the value on "htid" is always assumed to be the bucketid.
++ */
+ if (handle) {
++ /* Rule: The htid from handle and tableid from htid must match */
+ if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) {
+ NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch");
+ return -EINVAL;
+ }
+- handle = htid | TC_U32_NODE(handle);
+- err = idr_alloc_u32(&ht->handle_idr, NULL, &handle, handle,
+- GFP_KERNEL);
+- if (err)
+- return err;
+- } else
++ /* Ok, so far we have a valid htid(12b):bucketid(8b) but we
++ * need to finalize the table entry identification with the last
++ * part - the node/entryid(12b)). Rule: Nodeid _cannot be 0_ for
++ * entries. Rule: nodeid of 0 is reserved only for tables(see
++ * earlier code which processes TC_U32_DIVISOR attribute).
++ * Rule: The nodeid can only be derived from the handle (and not
++ * htid).
++ * Rule: if the handle specified zero for the node id example
++ * 0x60000000, then pick a new nodeid from the pool of IDs
++ * this hash table has been allocating from.
++ * If OTOH it is specified (i.e for example the user passed a
++ * handle such as 0x60000123), then we use it generate our final
++ * handle which is used to uniquely identify the match entry.
++ */
++ if (!TC_U32_NODE(handle)) {
++ handle = gen_new_kid(ht, htid);
++ } else {
++ handle = htid | TC_U32_NODE(handle);
++ err = idr_alloc_u32(&ht->handle_idr, NULL, &handle,
++ handle, GFP_KERNEL);
++ if (err)
++ return err;
++ }
++ } else {
++ /* The user did not give us a handle; lets just generate one
++ * from the table's pool of nodeids.
++ */
+ handle = gen_new_kid(ht, htid);
++ }
+
+ if (tb[TCA_U32_SEL] == NULL) {
+ NL_SET_ERR_MSG_MOD(extack, "Selector not specified");
+--
+2.40.1
+
--- /dev/null
+From 0007f909cecca5fc8d0578376641fe2716721ef3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 29 Jul 2023 08:32:00 -0400
+Subject: net/sched: cls_u32: No longer copy tcf_result on update to avoid
+ use-after-free
+
+From: valis <sec@valis.email>
+
+[ Upstream commit 3044b16e7c6fe5d24b1cdbcf1bd0a9d92d1ebd81 ]
+
+When u32_change() is called on an existing filter, the whole
+tcf_result struct is always copied into the new instance of the filter.
+
+This causes a problem when updating a filter bound to a class,
+as tcf_unbind_filter() is always called on the old instance in the
+success path, decreasing filter_cnt of the still referenced class
+and allowing it to be deleted, leading to a use-after-free.
+
+Fix this by no longer copying the tcf_result struct from the old filter.
+
+Fixes: de5df63228fc ("net: sched: cls_u32 changes to knode must appear atomic to readers")
+Reported-by: valis <sec@valis.email>
+Reported-by: M A Ramdhan <ramdhan@starlabs.sg>
+Signed-off-by: valis <sec@valis.email>
+Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Reviewed-by: Victor Nogueira <victor@mojatatu.com>
+Reviewed-by: Pedro Tammela <pctammela@mojatatu.com>
+Reviewed-by: M A Ramdhan <ramdhan@starlabs.sg>
+Link: https://lore.kernel.org/r/20230729123202.72406-2-jhs@mojatatu.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/cls_u32.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
+index 316ccbb01b55f..65598207a2fcb 100644
+--- a/net/sched/cls_u32.c
++++ b/net/sched/cls_u32.c
+@@ -814,7 +814,6 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
+
+ new->ifindex = n->ifindex;
+ new->fshift = n->fshift;
+- new->res = n->res;
+ new->flags = n->flags;
+ RCU_INIT_POINTER(new->ht_down, ht);
+
+--
+2.40.1
+
--- /dev/null
+From e81737ba90b46165e4cebc56f4732b414145510d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 17:18:12 +0200
+Subject: perf test uprobe_from_different_cu: Skip if there is no gcc
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Georg Müller <georgmueller@gmx.net>
+
+[ Upstream commit 98ce8e4a9dcfb448b30a2d7a16190f4a00382377 ]
+
+Without gcc, the test will fail.
+
+On cleanup, ignore probe removal errors. Otherwise, in case of an error
+adding the probe, the temporary directory is not removed.
+
+Fixes: 56cbeacf14353057 ("perf probe: Add test for regression introduced by switch to die_get_decl_file()")
+Signed-off-by: Georg Müller <georgmueller@gmx.net>
+Acked-by: Ian Rogers <irogers@google.com>
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Georg Müller <georgmueller@gmx.net>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20230728151812.454806-2-georgmueller@gmx.net
+Link: https://lore.kernel.org/r/CAP-5=fUP6UuLgRty3t2=fQsQi3k4hDMz415vWdp1x88QMvZ8ug@mail.gmail.com/
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/tests/shell/test_uprobe_from_different_cu.sh | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/tools/perf/tests/shell/test_uprobe_from_different_cu.sh b/tools/perf/tests/shell/test_uprobe_from_different_cu.sh
+index 00d2e0e2e0c28..319f36ebb9a40 100644
+--- a/tools/perf/tests/shell/test_uprobe_from_different_cu.sh
++++ b/tools/perf/tests/shell/test_uprobe_from_different_cu.sh
+@@ -4,6 +4,12 @@
+
+ set -e
+
++# skip if there's no gcc
++if ! [ -x "$(command -v gcc)" ]; then
++ echo "failed: no gcc compiler"
++ exit 2
++fi
++
+ temp_dir=$(mktemp -d /tmp/perf-uprobe-different-cu-sh.XXXXXXXXXX)
+
+ cleanup()
+@@ -11,7 +17,7 @@ cleanup()
+ trap - EXIT TERM INT
+ if [[ "${temp_dir}" =~ ^/tmp/perf-uprobe-different-cu-sh.*$ ]]; then
+ echo "--- Cleaning up ---"
+- perf probe -x ${temp_dir}/testfile -d foo
++ perf probe -x ${temp_dir}/testfile -d foo || true
+ rm -f "${temp_dir}/"*
+ rmdir "${temp_dir}"
+ fi
+--
+2.40.1
+
--- /dev/null
+From 39ea9761c47658832a933b9f8f24fea7d671ee2c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 26 Jul 2023 15:53:14 +0800
+Subject: rtnetlink: let rtnl_bridge_setlink checks IFLA_BRIDGE_MODE length
+
+From: Lin Ma <linma@zju.edu.cn>
+
+[ Upstream commit d73ef2d69c0dba5f5a1cb9600045c873bab1fb7f ]
+
+There are totally 9 ndo_bridge_setlink handlers in the current kernel,
+which are 1) bnxt_bridge_setlink, 2) be_ndo_bridge_setlink 3)
+i40e_ndo_bridge_setlink 4) ice_bridge_setlink 5)
+ixgbe_ndo_bridge_setlink 6) mlx5e_bridge_setlink 7)
+nfp_net_bridge_setlink 8) qeth_l2_bridge_setlink 9) br_setlink.
+
+By investigating the code, we find that 1-7 parse and use nlattr
+IFLA_BRIDGE_MODE but 3 and 4 forget to do the nla_len check. This can
+lead to an out-of-attribute read and allow a malformed nlattr (e.g.,
+length 0) to be viewed as a 2 byte integer.
+
+To avoid such issues, also for other ndo_bridge_setlink handlers in the
+future. This patch adds the nla_len check in rtnl_bridge_setlink and
+does an early error return if length mismatches. To make it works, the
+break is removed from the parsing for IFLA_BRIDGE_FLAGS to make sure
+this nla_for_each_nested iterates every attribute.
+
+Fixes: b1edc14a3fbf ("ice: Implement ice_bridge_getlink and ice_bridge_setlink")
+Fixes: 51616018dd1b ("i40e: Add support for getlink, setlink ndo ops")
+Suggested-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Lin Ma <linma@zju.edu.cn>
+Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
+Reviewed-by: Hangbin Liu <liuhangbin@gmail.com>
+Link: https://lore.kernel.org/r/20230726075314.1059224-1-linma@zju.edu.cn
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/rtnetlink.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
+index 1db92a44548f0..3eaf7c706b0ec 100644
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -4590,13 +4590,17 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+ if (br_spec) {
+ nla_for_each_nested(attr, br_spec, rem) {
+- if (nla_type(attr) == IFLA_BRIDGE_FLAGS) {
++ if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !have_flags) {
+ if (nla_len(attr) < sizeof(flags))
+ return -EINVAL;
+
+ have_flags = true;
+ flags = nla_get_u16(attr);
+- break;
++ }
++
++ if (nla_type(attr) == IFLA_BRIDGE_MODE) {
++ if (nla_len(attr) < sizeof(u16))
++ return -EINVAL;
+ }
+ }
+ }
+--
+2.40.1
+
arm64-fix-bit-shifting-ub-in-the-midr_cpu_model-macro.patch
perf-fix-function-pointer-case.patch
loop-select-i-o-scheduler-none-from-inside-add_disk.patch
+word-at-a-time-use-the-same-return-type-for-has_zero.patch
+kvm-s390-fix-sthyi-error-handling.patch
+net-mlx5-dr-fix-memory-leak-in-mlx5dr_cmd_create_ref.patch
+net-mlx5e-fix-return-value-check-in-mlx5e_ipsec_remo.patch
+rtnetlink-let-rtnl_bridge_setlink-checks-ifla_bridge.patch
+perf-test-uprobe_from_different_cu-skip-if-there-is-.patch
+net-sched-cls_u32-fix-match-key-mis-addressing.patch
+misdn-hfcpci-fix-potential-deadlock-on-hc-lock.patch
+net-annotate-data-races-around-sk-sk_max_pacing_rate.patch
+net-add-missing-read_once-sk-sk_rcvlowat-annotation.patch
+net-add-missing-read_once-sk-sk_sndbuf-annotation.patch
+net-add-missing-read_once-sk-sk_rcvbuf-annotation.patch
+net-add-missing-data-race-annotations-around-sk-sk_p.patch
+net-add-missing-data-race-annotation-for-sk_ll_usec.patch
+net-sched-cls_u32-no-longer-copy-tcf_result-on-updat.patch
+net-sched-cls_fw-no-longer-copy-tcf_result-on-update.patch
+net-sched-cls_route-no-longer-copy-tcf_result-on-upd.patch
+bpf-sockmap-remove-preempt_disable-in-sock_map_sk_ac.patch
+driver-core-add-device-probe-log-helper.patch
+net-ll_temac-switch-to-use-dev_err_probe-helper.patch
+net-ll_temac-fix-error-checking-of-irq_of_parse_and_.patch
+net-dcb-choose-correct-policy-to-parse-dcb_attr_bcn.patch
+ip6mr-fix-skb_under_panic-in-ip6mr_cache_report.patch
+tcp_metrics-fix-addr_same-helper.patch
+tcp_metrics-annotate-data-races-around-tm-tcpm_stamp.patch
+tcp_metrics-annotate-data-races-around-tm-tcpm_lock.patch
+tcp_metrics-annotate-data-races-around-tm-tcpm_vals.patch
+tcp_metrics-annotate-data-races-around-tm-tcpm_net.patch
+tcp_metrics-fix-data-race-in-tcpm_suck_dst-vs-fastop.patch
--- /dev/null
+From 74c204ba460603978f9f32d0e5f2fefca483212b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Aug 2023 13:14:57 +0000
+Subject: tcp_metrics: annotate data-races around tm->tcpm_lock
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 285ce119a3c6c4502585936650143e54c8692788 ]
+
+tm->tcpm_lock can be read or written locklessly.
+
+Add needed READ_ONCE()/WRITE_ONCE() to document this.
+
+Fixes: 51c5d0c4b169 ("tcp: Maintain dynamic metrics in local cache.")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20230802131500.1478140-4-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_metrics.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
+index 6a5a6683194c9..2dac82154af85 100644
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -59,7 +59,8 @@ static inline struct net *tm_net(struct tcp_metrics_block *tm)
+ static bool tcp_metric_locked(struct tcp_metrics_block *tm,
+ enum tcp_metric_index idx)
+ {
+- return tm->tcpm_lock & (1 << idx);
++ /* Paired with WRITE_ONCE() in tcpm_suck_dst() */
++ return READ_ONCE(tm->tcpm_lock) & (1 << idx);
+ }
+
+ static u32 tcp_metric_get(struct tcp_metrics_block *tm,
+@@ -110,7 +111,8 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
+ val |= 1 << TCP_METRIC_CWND;
+ if (dst_metric_locked(dst, RTAX_REORDERING))
+ val |= 1 << TCP_METRIC_REORDERING;
+- tm->tcpm_lock = val;
++ /* Paired with READ_ONCE() in tcp_metric_locked() */
++ WRITE_ONCE(tm->tcpm_lock, val);
+
+ msval = dst_metric_raw(dst, RTAX_RTT);
+ tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC;
+--
+2.40.1
+
--- /dev/null
+From 48d653fc96b8daec9d90fef32b5146001c748981 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Aug 2023 13:14:59 +0000
+Subject: tcp_metrics: annotate data-races around tm->tcpm_net
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit d5d986ce42c71a7562d32c4e21e026b0f87befec ]
+
+tm->tcpm_net can be read or written locklessly.
+
+Instead of changing write_pnet() and read_pnet() and potentially
+hurt performance, add the needed READ_ONCE()/WRITE_ONCE()
+in tm_net() and tcpm_new().
+
+Fixes: 849e8a0ca8d5 ("tcp_metrics: Add a field tcpm_net and verify it matches on lookup")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20230802131500.1478140-6-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_metrics.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
+index 29261aa96eb40..3d0452bb6c2a1 100644
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -40,7 +40,7 @@ struct tcp_fastopen_metrics {
+
+ struct tcp_metrics_block {
+ struct tcp_metrics_block __rcu *tcpm_next;
+- possible_net_t tcpm_net;
++ struct net *tcpm_net;
+ struct inetpeer_addr tcpm_saddr;
+ struct inetpeer_addr tcpm_daddr;
+ unsigned long tcpm_stamp;
+@@ -51,9 +51,10 @@ struct tcp_metrics_block {
+ struct rcu_head rcu_head;
+ };
+
+-static inline struct net *tm_net(struct tcp_metrics_block *tm)
++static inline struct net *tm_net(const struct tcp_metrics_block *tm)
+ {
+- return read_pnet(&tm->tcpm_net);
++ /* Paired with the WRITE_ONCE() in tcpm_new() */
++ return READ_ONCE(tm->tcpm_net);
+ }
+
+ static bool tcp_metric_locked(struct tcp_metrics_block *tm,
+@@ -197,7 +198,9 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
+ if (!tm)
+ goto out_unlock;
+ }
+- write_pnet(&tm->tcpm_net, net);
++ /* Paired with the READ_ONCE() in tm_net() */
++ WRITE_ONCE(tm->tcpm_net, net);
++
+ tm->tcpm_saddr = *saddr;
+ tm->tcpm_daddr = *daddr;
+
+--
+2.40.1
+
--- /dev/null
+From 890588969f8887c6c738ab038d1b311948460346 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Aug 2023 13:14:56 +0000
+Subject: tcp_metrics: annotate data-races around tm->tcpm_stamp
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 949ad62a5d5311d36fce2e14fe5fed3f936da51c ]
+
+tm->tcpm_stamp can be read or written locklessly.
+
+Add needed READ_ONCE()/WRITE_ONCE() to document this.
+
+Also constify tcpm_check_stamp() dst argument.
+
+Fixes: 51c5d0c4b169 ("tcp: Maintain dynamic metrics in local cache.")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20230802131500.1478140-3-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_metrics.c | 19 +++++++++++++------
+ 1 file changed, 13 insertions(+), 6 deletions(-)
+
+diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
+index f3fb19df72e1c..6a5a6683194c9 100644
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -97,7 +97,7 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
+ u32 msval;
+ u32 val;
+
+- tm->tcpm_stamp = jiffies;
++ WRITE_ONCE(tm->tcpm_stamp, jiffies);
+
+ val = 0;
+ if (dst_metric_locked(dst, RTAX_RTT))
+@@ -131,9 +131,15 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
+
+ #define TCP_METRICS_TIMEOUT (60 * 60 * HZ)
+
+-static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst)
++static void tcpm_check_stamp(struct tcp_metrics_block *tm,
++ const struct dst_entry *dst)
+ {
+- if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT)))
++ unsigned long limit;
++
++ if (!tm)
++ return;
++ limit = READ_ONCE(tm->tcpm_stamp) + TCP_METRICS_TIMEOUT;
++ if (unlikely(time_after(jiffies, limit)))
+ tcpm_suck_dst(tm, dst, false);
+ }
+
+@@ -174,7 +180,8 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
+ oldest = deref_locked(tcp_metrics_hash[hash].chain);
+ for (tm = deref_locked(oldest->tcpm_next); tm;
+ tm = deref_locked(tm->tcpm_next)) {
+- if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp))
++ if (time_before(READ_ONCE(tm->tcpm_stamp),
++ READ_ONCE(oldest->tcpm_stamp)))
+ oldest = tm;
+ }
+ tm = oldest;
+@@ -431,7 +438,7 @@ void tcp_update_metrics(struct sock *sk)
+ tp->reordering);
+ }
+ }
+- tm->tcpm_stamp = jiffies;
++ WRITE_ONCE(tm->tcpm_stamp, jiffies);
+ out_unlock:
+ rcu_read_unlock();
+ }
+@@ -642,7 +649,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
+ }
+
+ if (nla_put_msecs(msg, TCP_METRICS_ATTR_AGE,
+- jiffies - tm->tcpm_stamp,
++ jiffies - READ_ONCE(tm->tcpm_stamp),
+ TCP_METRICS_ATTR_PAD) < 0)
+ goto nla_put_failure;
+
+--
+2.40.1
+
--- /dev/null
+From ae4b0cb37d2715f02184ae4e91ac925e36646ffc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Aug 2023 13:14:58 +0000
+Subject: tcp_metrics: annotate data-races around tm->tcpm_vals[]
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 8c4d04f6b443869d25e59822f7cec88d647028a9 ]
+
+tm->tcpm_vals[] values can be read or written locklessly.
+
+Add needed READ_ONCE()/WRITE_ONCE() to document this,
+and force use of tcp_metric_get() and tcp_metric_set()
+
+Fixes: 51c5d0c4b169 ("tcp: Maintain dynamic metrics in local cache.")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_metrics.c | 23 ++++++++++++++---------
+ 1 file changed, 14 insertions(+), 9 deletions(-)
+
+diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
+index 2dac82154af85..29261aa96eb40 100644
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -63,17 +63,19 @@ static bool tcp_metric_locked(struct tcp_metrics_block *tm,
+ return READ_ONCE(tm->tcpm_lock) & (1 << idx);
+ }
+
+-static u32 tcp_metric_get(struct tcp_metrics_block *tm,
++static u32 tcp_metric_get(const struct tcp_metrics_block *tm,
+ enum tcp_metric_index idx)
+ {
+- return tm->tcpm_vals[idx];
++ /* Paired with WRITE_ONCE() in tcp_metric_set() */
++ return READ_ONCE(tm->tcpm_vals[idx]);
+ }
+
+ static void tcp_metric_set(struct tcp_metrics_block *tm,
+ enum tcp_metric_index idx,
+ u32 val)
+ {
+- tm->tcpm_vals[idx] = val;
++ /* Paired with READ_ONCE() in tcp_metric_get() */
++ WRITE_ONCE(tm->tcpm_vals[idx], val);
+ }
+
+ static bool addr_same(const struct inetpeer_addr *a,
+@@ -115,13 +117,16 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
+ WRITE_ONCE(tm->tcpm_lock, val);
+
+ msval = dst_metric_raw(dst, RTAX_RTT);
+- tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC;
++ tcp_metric_set(tm, TCP_METRIC_RTT, msval * USEC_PER_MSEC);
+
+ msval = dst_metric_raw(dst, RTAX_RTTVAR);
+- tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC;
+- tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);
+- tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);
+- tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
++ tcp_metric_set(tm, TCP_METRIC_RTTVAR, msval * USEC_PER_MSEC);
++ tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
++ dst_metric_raw(dst, RTAX_SSTHRESH));
++ tcp_metric_set(tm, TCP_METRIC_CWND,
++ dst_metric_raw(dst, RTAX_CWND));
++ tcp_metric_set(tm, TCP_METRIC_REORDERING,
++ dst_metric_raw(dst, RTAX_REORDERING));
+ if (fastopen_clear) {
+ tm->tcpm_fastopen.mss = 0;
+ tm->tcpm_fastopen.syn_loss = 0;
+@@ -662,7 +667,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
+ if (!nest)
+ goto nla_put_failure;
+ for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) {
+- u32 val = tm->tcpm_vals[i];
++ u32 val = tcp_metric_get(tm, i);
+
+ if (!val)
+ continue;
+--
+2.40.1
+
--- /dev/null
+From 826092f3bf5fc15cb80742a236be21e8fe528b07 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Aug 2023 13:14:55 +0000
+Subject: tcp_metrics: fix addr_same() helper
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit e6638094d7af6c7b9dcca05ad009e79e31b4f670 ]
+
+Because v4 and v6 families use separate inetpeer trees (respectively
+net->ipv4.peers and net->ipv6.peers), inetpeer_addr_cmp(a, b) assumes
+a & b share the same family.
+
+tcp_metrics use a common hash table, where entries can have different
+families.
+
+We must therefore make sure to not call inetpeer_addr_cmp()
+if the families do not match.
+
+Fixes: d39d14ffa24c ("net: Add helper function to compare inetpeer addresses")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20230802131500.1478140-2-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_metrics.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
+index 0af6249a993af..f3fb19df72e1c 100644
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -78,7 +78,7 @@ static void tcp_metric_set(struct tcp_metrics_block *tm,
+ static bool addr_same(const struct inetpeer_addr *a,
+ const struct inetpeer_addr *b)
+ {
+- return inetpeer_addr_cmp(a, b) == 0;
++ return (a->family == b->family) && !inetpeer_addr_cmp(a, b);
+ }
+
+ struct tcpm_hash_bucket {
+--
+2.40.1
+
--- /dev/null
+From 009a6aa28f35122e7ec9c4dad022b440a505a498 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Aug 2023 13:15:00 +0000
+Subject: tcp_metrics: fix data-race in tcpm_suck_dst() vs fastopen
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit ddf251fa2bc1d3699eec0bae6ed0bc373b8fda79 ]
+
+Whenever tcpm_new() reclaims an old entry, tcpm_suck_dst()
+would overwrite data that could be read from tcp_fastopen_cache_get()
+or tcp_metrics_fill_info().
+
+We need to acquire fastopen_seqlock to maintain consistency.
+
+For newly allocated objects, tcpm_new() can switch to kzalloc()
+to avoid an extra fastopen_seqlock acquisition.
+
+Fixes: 1fe4c481ba63 ("net-tcp: Fast Open client - cookie cache")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20230802131500.1478140-7-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_metrics.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
+index 3d0452bb6c2a1..e89e19a6852ce 100644
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -93,6 +93,7 @@ static struct tcpm_hash_bucket *tcp_metrics_hash __read_mostly;
+ static unsigned int tcp_metrics_hash_log __read_mostly;
+
+ static DEFINE_SPINLOCK(tcp_metrics_lock);
++static DEFINE_SEQLOCK(fastopen_seqlock);
+
+ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
+ const struct dst_entry *dst,
+@@ -129,11 +130,13 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
+ tcp_metric_set(tm, TCP_METRIC_REORDERING,
+ dst_metric_raw(dst, RTAX_REORDERING));
+ if (fastopen_clear) {
++ write_seqlock(&fastopen_seqlock);
+ tm->tcpm_fastopen.mss = 0;
+ tm->tcpm_fastopen.syn_loss = 0;
+ tm->tcpm_fastopen.try_exp = 0;
+ tm->tcpm_fastopen.cookie.exp = false;
+ tm->tcpm_fastopen.cookie.len = 0;
++ write_sequnlock(&fastopen_seqlock);
+ }
+ }
+
+@@ -194,7 +197,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
+ }
+ tm = oldest;
+ } else {
+- tm = kmalloc(sizeof(*tm), GFP_ATOMIC);
++ tm = kzalloc(sizeof(*tm), GFP_ATOMIC);
+ if (!tm)
+ goto out_unlock;
+ }
+@@ -204,7 +207,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
+ tm->tcpm_saddr = *saddr;
+ tm->tcpm_daddr = *daddr;
+
+- tcpm_suck_dst(tm, dst, true);
++ tcpm_suck_dst(tm, dst, reclaim);
+
+ if (likely(!reclaim)) {
+ tm->tcpm_next = tcp_metrics_hash[hash].chain;
+@@ -551,8 +554,6 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
+ return ret;
+ }
+
+-static DEFINE_SEQLOCK(fastopen_seqlock);
+-
+ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
+ struct tcp_fastopen_cookie *cookie)
+ {
+--
+2.40.1
+
--- /dev/null
+From 17a3d2fbb253e6da25631ee9197e405aa671bbf4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Aug 2023 15:22:17 -0700
+Subject: word-at-a-time: use the same return type for has_zero regardless of
+ endianness
+
+From: ndesaulniers@google.com <ndesaulniers@google.com>
+
+[ Upstream commit 79e8328e5acbe691bbde029a52c89d70dcbc22f3 ]
+
+Compiling big-endian targets with Clang produces the diagnostic:
+
+ fs/namei.c:2173:13: warning: use of bitwise '|' with boolean operands [-Wbitwise-instead-of-logical]
+ } while (!(has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants)));
+ ~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ ||
+ fs/namei.c:2173:13: note: cast one or both operands to int to silence this warning
+
+It appears that when has_zero was introduced, two definitions were
+produced with different signatures (in particular different return
+types).
+
+Looking at the usage in hash_name() in fs/namei.c, I suspect that
+has_zero() is meant to be invoked twice per while loop iteration; using
+logical-or would not update `bdata` when `a` did not have zeros. So I
+think it's preferred to always return an unsigned long rather than a
+bool than update the while loop in hash_name() to use a logical-or
+rather than bitwise-or.
+
+[ Also changed powerpc version to do the same - Linus ]
+
+Link: https://github.com/ClangBuiltLinux/linux/issues/1832
+Link: https://lore.kernel.org/lkml/20230801-bitwise-v1-1-799bec468dc4@google.com/
+Fixes: 36126f8f2ed8 ("word-at-a-time: make the interfaces truly generic")
+Debugged-by: Nathan Chancellor <nathan@kernel.org>
+Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
+Acked-by: Heiko Carstens <hca@linux.ibm.com>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/include/asm/word-at-a-time.h | 2 +-
+ include/asm-generic/word-at-a-time.h | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h
+index f3f4710d4ff52..99129b0cd8b8a 100644
+--- a/arch/powerpc/include/asm/word-at-a-time.h
++++ b/arch/powerpc/include/asm/word-at-a-time.h
+@@ -34,7 +34,7 @@ static inline long find_zero(unsigned long mask)
+ return leading_zero_bits >> 3;
+ }
+
+-static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
++static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
+ {
+ unsigned long rhs = val | c->low_bits;
+ *data = rhs;
+diff --git a/include/asm-generic/word-at-a-time.h b/include/asm-generic/word-at-a-time.h
+index 20c93f08c9933..95a1d214108a5 100644
+--- a/include/asm-generic/word-at-a-time.h
++++ b/include/asm-generic/word-at-a-time.h
+@@ -38,7 +38,7 @@ static inline long find_zero(unsigned long mask)
+ return (mask >> 8) ? byte : byte + 1;
+ }
+
+-static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
++static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
+ {
+ unsigned long rhs = val | c->low_bits;
+ *data = rhs;
+--
+2.40.1
+