--- /dev/null
+From ba9e2ef98d7b71254487dcca7051b49ea764e93b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 13:07:11 +0200
+Subject: asm-generic: remove a broken and needless ifdef conditional
+
+From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
+
+[ Upstream commit e2a619ca0b38f2114347b7078b8a67d72d457a3d ]
+
+Commit 527701eda5f1 ("lib: Add a generic version of devmem_is_allowed()")
+introduces the config symbol GENERIC_LIB_DEVMEM_IS_ALLOWED, but then
+falsely refers to CONFIG_GENERIC_DEVMEM_IS_ALLOWED (note the missing LIB
+in the reference) in ./include/asm-generic/io.h.
+
+Luckily, ./scripts/checkkconfigsymbols.py warns on non-existing configs:
+
+GENERIC_DEVMEM_IS_ALLOWED
+Referencing files: include/asm-generic/io.h
+
+The actual fix, though, is simply to not to make this function declaration
+dependent on any kernel config. For architectures that intend to use
+the generic version, the arch's 'select GENERIC_LIB_DEVMEM_IS_ALLOWED' will
+lead to picking the function definition, and for other architectures, this
+function is simply defined elsewhere.
+
+The wrong '#ifndef' on a non-existing config symbol also always had the
+same effect (although more by mistake than by intent). So, there is no
+functional change.
+
+Remove this broken and needless ifdef conditional.
+
+Fixes: 527701eda5f1 ("lib: Add a generic version of devmem_is_allowed()")
+Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/asm-generic/io.h | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
+index 7ce93aaf69f8..98954dda5734 100644
+--- a/include/asm-generic/io.h
++++ b/include/asm-generic/io.h
+@@ -1125,9 +1125,7 @@ static inline void memcpy_toio(volatile void __iomem *addr, const void *buffer,
+ }
+ #endif
+
+-#ifndef CONFIG_GENERIC_DEVMEM_IS_ALLOWED
+ extern int devmem_is_allowed(unsigned long pfn);
+-#endif
+
+ #endif /* __KERNEL__ */
+
+--
+2.35.1
+
--- /dev/null
+From 2dc82bcc5bb40f4cf6076f892ace8fdfa3041183 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Jul 2022 10:35:46 -0400
+Subject: Documentation: fix sctp_wmem in ip-sysctl.rst
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit aa709da0e032cee7c202047ecd75f437bb0126ed ]
+
+Since commit 1033990ac5b2 ("sctp: implement memory accounting on tx path"),
+SCTP has supported memory accounting on tx path where 'sctp_wmem' is used
+by sk_wmem_schedule(). So we should fix the description for this option in
+ip-sysctl.rst accordingly.
+
+v1->v2:
+ - Improve the description as Marcelo suggested.
+
+Fixes: 1033990ac5b2 ("sctp: implement memory accounting on tx path")
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/networking/ip-sysctl.rst | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
+index b8b67041f955..ba0e8e6337c0 100644
+--- a/Documentation/networking/ip-sysctl.rst
++++ b/Documentation/networking/ip-sysctl.rst
+@@ -2808,7 +2808,14 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max
+ Default: 4K
+
+ sctp_wmem - vector of 3 INTEGERs: min, default, max
+- Currently this tunable has no effect.
++ Only the first value ("min") is used, "default" and "max" are
++ ignored.
++
++ min: Minimum size of send buffer that can be used by SCTP sockets.
++ It is guaranteed to each SCTP socket (but not association) even
++ under moderate memory pressure.
++
++ Default: 4K
+
+ addr_scope_policy - INTEGER
+ Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00
+--
+2.35.1
+
--- /dev/null
+From abb2ff0414e76999b74ab47eeba4bd461a796d5f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 10:54:01 -0700
+Subject: i40e: Fix interface init with MSI interrupts (no MSI-X)
+
+From: Michal Maloszewski <michal.maloszewski@intel.com>
+
+[ Upstream commit 5fcbb711024aac6d4db385623e6f2fdf019f7782 ]
+
+Fix the inability to bring an interface up on a setup with
+only MSI interrupts enabled (no MSI-X).
+Solution is to add a default number of QPs = 1. This is enough,
+since without MSI-X support driver enables only a basic feature set.
+
+Fixes: bc6d33c8d93f ("i40e: Fix the number of queues available to be mapped for use")
+Signed-off-by: Dawid Lukwinski <dawid.lukwinski@intel.com>
+Signed-off-by: Michal Maloszewski <michal.maloszewski@intel.com>
+Tested-by: Dave Switzer <david.switzer@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Link: https://lore.kernel.org/r/20220722175401.112572-1-anthony.l.nguyen@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/i40e/i40e_main.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
+index c801b128e5b2..b07d55c99317 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
+@@ -1908,11 +1908,15 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
+ * non-zero req_queue_pairs says that user requested a new
+ * queue count via ethtool's set_channels, so use this
+ * value for queues distribution across traffic classes
++ * We need at least one queue pair for the interface
++ * to be usable as we see in else statement.
+ */
+ if (vsi->req_queue_pairs > 0)
+ vsi->num_queue_pairs = vsi->req_queue_pairs;
+ else if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+ vsi->num_queue_pairs = pf->num_lan_msix;
++ else
++ vsi->num_queue_pairs = 1;
+ }
+
+ /* Number of queues per enabled TC */
+--
+2.35.1
+
--- /dev/null
+From 9df4f4593611ea640dfab3ff20f6ee5a5dd41485 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Jul 2022 12:20:42 +0200
+Subject: ice: check (DD | EOF) bits on Rx descriptor rather than (EOP | RS)
+
+From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+
+[ Upstream commit 283d736ff7c7e96ac5b32c6c0de40372f8eb171e ]
+
+Tx side sets EOP and RS bits on descriptors to indicate that a
+particular descriptor is the last one and needs to generate an irq when
+it was sent. These bits should not be checked on completion path
+regardless whether it's the Tx or the Rx. DD bit serves this purpose and
+it indicates that a particular descriptor is either for Rx or was
+successfully Txed. EOF is also set as loopback test does not xmit
+fragmented frames.
+
+Look at (DD | EOF) bits setting in ice_lbtest_receive_frames() instead
+of EOP and RS pair.
+
+Fixes: 0e674aeb0b77 ("ice: Add handler for ethtool selftest")
+Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Tested-by: George Kuruvinakunnel <george.kuruvinakunnel@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_ethtool.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
+index 982db894754f..9b9c2b885486 100644
+--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
++++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
+@@ -651,7 +651,8 @@ static int ice_lbtest_receive_frames(struct ice_ring *rx_ring)
+ rx_desc = ICE_RX_DESC(rx_ring, i);
+
+ if (!(rx_desc->wb.status_error0 &
+- cpu_to_le16(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS)))
++ (cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S)) |
++ cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)))))
+ continue;
+
+ rx_buf = &rx_ring->rx_buf[i];
+--
+2.35.1
+
--- /dev/null
+From ef5016a05107a9ae75d07a6689f3e249bcbf0772 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Jul 2022 12:20:43 +0200
+Subject: ice: do not setup vlan for loopback VSI
+
+From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+
+[ Upstream commit cc019545a238518fa9da1e2a889f6e1bb1005a63 ]
+
+Currently loopback test is failiing due to the error returned from
+ice_vsi_vlan_setup(). Skip calling it when preparing loopback VSI.
+
+Fixes: 0e674aeb0b77 ("ice: Add handler for ethtool selftest")
+Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Tested-by: George Kuruvinakunnel <george.kuruvinakunnel@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_main.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
+index 188abf36a5b2..b9d45c7dbef1 100644
+--- a/drivers/net/ethernet/intel/ice/ice_main.c
++++ b/drivers/net/ethernet/intel/ice/ice_main.c
+@@ -5481,10 +5481,12 @@ int ice_vsi_cfg(struct ice_vsi *vsi)
+ if (vsi->netdev) {
+ ice_set_rx_mode(vsi->netdev);
+
+- err = ice_vsi_vlan_setup(vsi);
++ if (vsi->type != ICE_VSI_LB) {
++ err = ice_vsi_vlan_setup(vsi);
+
+- if (err)
+- return err;
++ if (err)
++ return err;
++ }
+ }
+ ice_vsi_cfg_dcb_rings(vsi);
+
+--
+2.35.1
+
--- /dev/null
+From 6876cbac8e50d133f62830c030fc2c5947133c45 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jul 2022 10:17:44 -0700
+Subject: igmp: Fix data-races around sysctl_igmp_qrv.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 8ebcc62c738f68688ee7c6fec2efe5bc6d3d7e60 ]
+
+While reading sysctl_igmp_qrv, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its readers.
+
+This test can be packed into a helper, so such changes will be in the
+follow-up series after net is merged into net-next.
+
+ qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+
+Fixes: a9fe8e29945d ("ipv4: implement igmp_qrv sysctl to tune igmp robustness variable")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/igmp.c | 24 +++++++++++++-----------
+ 1 file changed, 13 insertions(+), 11 deletions(-)
+
+diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
+index 9f4674244aff..e07d10b2c486 100644
+--- a/net/ipv4/igmp.c
++++ b/net/ipv4/igmp.c
+@@ -827,7 +827,7 @@ static void igmp_ifc_event(struct in_device *in_dev)
+ struct net *net = dev_net(in_dev->dev);
+ if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
+ return;
+- WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv);
++ WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv));
+ igmp_ifc_start_timer(in_dev, 1);
+ }
+
+@@ -1009,7 +1009,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
+ * received value was zero, use the default or statically
+ * configured value.
+ */
+- in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv;
++ in_dev->mr_qrv = ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+ in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL;
+
+ /* RFC3376, 8.3. Query Response Interval:
+@@ -1189,7 +1189,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im,
+ pmc->interface = im->interface;
+ in_dev_hold(in_dev);
+ pmc->multiaddr = im->multiaddr;
+- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+ pmc->sfmode = im->sfmode;
+ if (pmc->sfmode == MCAST_INCLUDE) {
+ struct ip_sf_list *psf;
+@@ -1240,9 +1240,11 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im)
+ swap(im->tomb, pmc->tomb);
+ swap(im->sources, pmc->sources);
+ for (psf = im->sources; psf; psf = psf->sf_next)
+- psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++ psf->sf_crcount = in_dev->mr_qrv ?:
++ READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+ } else {
+- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++ im->crcount = in_dev->mr_qrv ?:
++ READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+ }
+ in_dev_put(pmc->interface);
+ kfree_pmc(pmc);
+@@ -1349,7 +1351,7 @@ static void igmp_group_added(struct ip_mc_list *im)
+ if (in_dev->dead)
+ return;
+
+- im->unsolicit_count = net->ipv4.sysctl_igmp_qrv;
++ im->unsolicit_count = READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+ if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) {
+ spin_lock_bh(&im->lock);
+ igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY);
+@@ -1363,7 +1365,7 @@ static void igmp_group_added(struct ip_mc_list *im)
+ * IN() to IN(A).
+ */
+ if (im->sfmode == MCAST_EXCLUDE)
+- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++ im->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+
+ igmp_ifc_event(in_dev);
+ #endif
+@@ -1754,7 +1756,7 @@ static void ip_mc_reset(struct in_device *in_dev)
+
+ in_dev->mr_qi = IGMP_QUERY_INTERVAL;
+ in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL;
+- in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
++ in_dev->mr_qrv = READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+ }
+ #else
+ static void ip_mc_reset(struct in_device *in_dev)
+@@ -1888,7 +1890,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
+ #ifdef CONFIG_IP_MULTICAST
+ if (psf->sf_oldin &&
+ !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) {
+- psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++ psf->sf_crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+ psf->sf_next = pmc->tomb;
+ pmc->tomb = psf;
+ rv = 1;
+@@ -1952,7 +1954,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
+ /* filter mode change */
+ pmc->sfmode = MCAST_INCLUDE;
+ #ifdef CONFIG_IP_MULTICAST
+- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+ WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
+ for (psf = pmc->sources; psf; psf = psf->sf_next)
+ psf->sf_crcount = 0;
+@@ -2131,7 +2133,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
+ #ifdef CONFIG_IP_MULTICAST
+ /* else no filters; keep old mode for reports */
+
+- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+ WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
+ for (psf = pmc->sources; psf; psf = psf->sf_next)
+ psf->sf_crcount = 0;
+--
+2.35.1
+
--- /dev/null
+From 9aa6f4cced2c928ce2f202bef7c6a419240f5e60 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:22:05 -0700
+Subject: ipv4: Fix data-races around sysctl_fib_notify_on_flag_change.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 96b9bd8c6d125490f9adfb57d387ef81a55a103e ]
+
+While reading sysctl_fib_notify_on_flag_change, it can be changed
+concurrently. Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: 680aea08e78c ("net: ipv4: Emit notification when fib hardware flags are changed")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/fib_trie.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
+index a9cd9c2bd84e..19c6e7b93d3d 100644
+--- a/net/ipv4/fib_trie.c
++++ b/net/ipv4/fib_trie.c
+@@ -1037,6 +1037,7 @@ fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri)
+
+ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
+ {
++ u8 fib_notify_on_flag_change;
+ struct fib_alias *fa_match;
+ struct sk_buff *skb;
+ int err;
+@@ -1058,14 +1059,16 @@ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
+ WRITE_ONCE(fa_match->offload, fri->offload);
+ WRITE_ONCE(fa_match->trap, fri->trap);
+
++ fib_notify_on_flag_change = READ_ONCE(net->ipv4.sysctl_fib_notify_on_flag_change);
++
+ /* 2 means send notifications only if offload_failed was changed. */
+- if (net->ipv4.sysctl_fib_notify_on_flag_change == 2 &&
++ if (fib_notify_on_flag_change == 2 &&
+ READ_ONCE(fa_match->offload_failed) == fri->offload_failed)
+ goto out;
+
+ WRITE_ONCE(fa_match->offload_failed, fri->offload_failed);
+
+- if (!net->ipv4.sysctl_fib_notify_on_flag_change)
++ if (!fib_notify_on_flag_change)
+ goto out;
+
+ skb = nlmsg_new(fib_nlmsg_size(fa_match->fa_info), GFP_ATOMIC);
+--
+2.35.1
+
--- /dev/null
+From 71714997e0c6d4b2bbdcaae339b2498d17d1c689 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Jul 2022 09:33:07 +0800
+Subject: ipv6/addrconf: fix a null-ptr-deref bug for ip6_ptr
+
+From: Ziyang Xuan <william.xuanziyang@huawei.com>
+
+[ Upstream commit 85f0173df35e5462d89947135a6a5599c6c3ef6f ]
+
+Change net device's MTU to smaller than IPV6_MIN_MTU or unregister
+device while matching route. That may trigger null-ptr-deref bug
+for ip6_ptr probability as following.
+
+=========================================================
+BUG: KASAN: null-ptr-deref in find_match.part.0+0x70/0x134
+Read of size 4 at addr 0000000000000308 by task ping6/263
+
+CPU: 2 PID: 263 Comm: ping6 Not tainted 5.19.0-rc7+ #14
+Call trace:
+ dump_backtrace+0x1a8/0x230
+ show_stack+0x20/0x70
+ dump_stack_lvl+0x68/0x84
+ print_report+0xc4/0x120
+ kasan_report+0x84/0x120
+ __asan_load4+0x94/0xd0
+ find_match.part.0+0x70/0x134
+ __find_rr_leaf+0x408/0x470
+ fib6_table_lookup+0x264/0x540
+ ip6_pol_route+0xf4/0x260
+ ip6_pol_route_output+0x58/0x70
+ fib6_rule_lookup+0x1a8/0x330
+ ip6_route_output_flags_noref+0xd8/0x1a0
+ ip6_route_output_flags+0x58/0x160
+ ip6_dst_lookup_tail+0x5b4/0x85c
+ ip6_dst_lookup_flow+0x98/0x120
+ rawv6_sendmsg+0x49c/0xc70
+ inet_sendmsg+0x68/0x94
+
+Reproducer as following:
+Firstly, prepare conditions:
+$ip netns add ns1
+$ip netns add ns2
+$ip link add veth1 type veth peer name veth2
+$ip link set veth1 netns ns1
+$ip link set veth2 netns ns2
+$ip netns exec ns1 ip -6 addr add 2001:0db8:0:f101::1/64 dev veth1
+$ip netns exec ns2 ip -6 addr add 2001:0db8:0:f101::2/64 dev veth2
+$ip netns exec ns1 ifconfig veth1 up
+$ip netns exec ns2 ifconfig veth2 up
+$ip netns exec ns1 ip -6 route add 2000::/64 dev veth1 metric 1
+$ip netns exec ns2 ip -6 route add 2001::/64 dev veth2 metric 1
+
+Secondly, execute the following two commands in two ssh windows
+respectively:
+$ip netns exec ns1 sh
+$while true; do ip -6 addr add 2001:0db8:0:f101::1/64 dev veth1; ip -6 route add 2000::/64 dev veth1 metric 1; ping6 2000::2; done
+
+$ip netns exec ns1 sh
+$while true; do ip link set veth1 mtu 1000; ip link set veth1 mtu 1500; sleep 5; done
+
+It is because ip6_ptr has been assigned to NULL in addrconf_ifdown() firstly,
+then ip6_ignore_linkdown() accesses ip6_ptr directly without NULL check.
+
+ cpu0 cpu1
+fib6_table_lookup
+__find_rr_leaf
+ addrconf_notify [ NETDEV_CHANGEMTU ]
+ addrconf_ifdown
+ RCU_INIT_POINTER(dev->ip6_ptr, NULL)
+find_match
+ip6_ignore_linkdown
+
+So we can add NULL check for ip6_ptr before using in ip6_ignore_linkdown() to
+fix the null-ptr-deref bug.
+
+Fixes: dcd1f572954f ("net/ipv6: Remove fib6_idev")
+Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20220728013307.656257-1-william.xuanziyang@huawei.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/addrconf.h | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/include/net/addrconf.h b/include/net/addrconf.h
+index 59940e230b78..53627afab104 100644
+--- a/include/net/addrconf.h
++++ b/include/net/addrconf.h
+@@ -403,6 +403,9 @@ static inline bool ip6_ignore_linkdown(const struct net_device *dev)
+ {
+ const struct inet6_dev *idev = __in6_dev_get(dev);
+
++ if (unlikely(!idev))
++ return true;
++
+ return !!idev->cnf.ignore_routes_with_linkdown;
+ }
+
+--
+2.35.1
+
--- /dev/null
+From 081e5abb4ae9e32d3d465ab28d17d9ae7a168959 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:16:30 +0200
+Subject: macsec: always read MACSEC_SA_ATTR_PN as a u64
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+[ Upstream commit c630d1fe6219769049c87d1a6a0e9a6de55328a1 ]
+
+Currently, MACSEC_SA_ATTR_PN is handled inconsistently, sometimes as a
+u32, sometimes forced into a u64 without checking the actual length of
+the attribute. Instead, we can use nla_get_u64 everywhere, which will
+read up to 64 bits into a u64, capped by the actual length of the
+attribute coming from userspace.
+
+This fixes several issues:
+ - the check in validate_add_rxsa doesn't work with 32-bit attributes
+ - the checks in validate_add_txsa and validate_upd_sa incorrectly
+ reject X << 32 (with X != 0)
+
+Fixes: 48ef50fa866a ("macsec: Netlink support of XPN cipher suites (IEEE 802.1AEbw)")
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/macsec.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
+index 1f2eb576533c..3e74dcc1f875 100644
+--- a/drivers/net/macsec.c
++++ b/drivers/net/macsec.c
+@@ -1696,7 +1696,7 @@ static bool validate_add_rxsa(struct nlattr **attrs)
+ return false;
+
+ if (attrs[MACSEC_SA_ATTR_PN] &&
+- *(u64 *)nla_data(attrs[MACSEC_SA_ATTR_PN]) == 0)
++ nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)
+ return false;
+
+ if (attrs[MACSEC_SA_ATTR_ACTIVE]) {
+@@ -1939,7 +1939,7 @@ static bool validate_add_txsa(struct nlattr **attrs)
+ if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN)
+ return false;
+
+- if (nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0)
++ if (nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)
+ return false;
+
+ if (attrs[MACSEC_SA_ATTR_ACTIVE]) {
+@@ -2293,7 +2293,7 @@ static bool validate_upd_sa(struct nlattr **attrs)
+ if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN)
+ return false;
+
+- if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0)
++ if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)
+ return false;
+
+ if (attrs[MACSEC_SA_ATTR_ACTIVE]) {
+--
+2.35.1
+
--- /dev/null
+From 8539634486e6ae2111a44ae0f8fc36b4a1f84782 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:16:28 +0200
+Subject: macsec: fix error message in macsec_add_rxsa and _txsa
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+[ Upstream commit 3240eac4ff20e51b87600dbd586ed814daf313db ]
+
+The expected length is MACSEC_SALT_LEN, not MACSEC_SA_ATTR_SALT.
+
+Fixes: 48ef50fa866a ("macsec: Netlink support of XPN cipher suites (IEEE 802.1AEbw)")
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/macsec.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
+index f72d4380374d..9ede0d7cd0b5 100644
+--- a/drivers/net/macsec.c
++++ b/drivers/net/macsec.c
+@@ -1768,7 +1768,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
+ if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) {
+ pr_notice("macsec: nl: add_rxsa: bad salt length: %d != %d\n",
+ nla_len(tb_sa[MACSEC_SA_ATTR_SALT]),
+- MACSEC_SA_ATTR_SALT);
++ MACSEC_SALT_LEN);
+ rtnl_unlock();
+ return -EINVAL;
+ }
+@@ -2010,7 +2010,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info)
+ if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) {
+ pr_notice("macsec: nl: add_txsa: bad salt length: %d != %d\n",
+ nla_len(tb_sa[MACSEC_SA_ATTR_SALT]),
+- MACSEC_SA_ATTR_SALT);
++ MACSEC_SALT_LEN);
+ rtnl_unlock();
+ return -EINVAL;
+ }
+--
+2.35.1
+
--- /dev/null
+From 414439ad3b6120d214261de3ba8fdcd99b3ee897 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:16:27 +0200
+Subject: macsec: fix NULL deref in macsec_add_rxsa
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+[ Upstream commit f46040eeaf2e523a4096199fd93a11e794818009 ]
+
+Commit 48ef50fa866a added a test on tb_sa[MACSEC_SA_ATTR_PN], but
+nothing guarantees that it's not NULL at this point. The same code was
+added to macsec_add_txsa, but there it's not a problem because
+validate_add_txsa checks that the MACSEC_SA_ATTR_PN attribute is
+present.
+
+Note: it's not possible to reproduce with iproute, because iproute
+doesn't allow creating an SA without specifying the PN.
+
+Fixes: 48ef50fa866a ("macsec: Netlink support of XPN cipher suites (IEEE 802.1AEbw)")
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=208315
+Reported-by: Frantisek Sumsal <fsumsal@redhat.com>
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/macsec.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
+index e53b40359fd1..f72d4380374d 100644
+--- a/drivers/net/macsec.c
++++ b/drivers/net/macsec.c
+@@ -1751,7 +1751,8 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
+ }
+
+ pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN;
+- if (nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) {
++ if (tb_sa[MACSEC_SA_ATTR_PN] &&
++ nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) {
+ pr_notice("macsec: nl: add_rxsa: bad pn length: %d != %d\n",
+ nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len);
+ rtnl_unlock();
+--
+2.35.1
+
--- /dev/null
+From 25efaa6a023c1491338eb330272d4839419e2058 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:16:29 +0200
+Subject: macsec: limit replay window size with XPN
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+[ Upstream commit b07a0e2044057f201d694ab474f5c42a02b6465b ]
+
+IEEE 802.1AEbw-2013 (section 10.7.8) specifies that the maximum value
+of the replay window is 2^30-1, to help with recovery of the upper
+bits of the PN.
+
+To avoid leaving the existing macsec device in an inconsistent state
+if this test fails during changelink, reuse the cleanup mechanism
+introduced for HW offload. This wasn't needed until now because
+macsec_changelink_common could not fail during changelink, as
+modifying the cipher suite was not allowed.
+
+Finally, this must happen after handling IFLA_MACSEC_CIPHER_SUITE so
+that secy->xpn is set.
+
+Fixes: 48ef50fa866a ("macsec: Netlink support of XPN cipher suites (IEEE 802.1AEbw)")
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/macsec.c | 16 ++++++++++++----
+ 1 file changed, 12 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
+index 9ede0d7cd0b5..1f2eb576533c 100644
+--- a/drivers/net/macsec.c
++++ b/drivers/net/macsec.c
+@@ -241,6 +241,7 @@ static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb)
+ #define DEFAULT_SEND_SCI true
+ #define DEFAULT_ENCRYPT false
+ #define DEFAULT_ENCODING_SA 0
++#define MACSEC_XPN_MAX_REPLAY_WINDOW (((1 << 30) - 1))
+
+ static bool send_sci(const struct macsec_secy *secy)
+ {
+@@ -3739,9 +3740,6 @@ static int macsec_changelink_common(struct net_device *dev,
+ secy->operational = tx_sa && tx_sa->active;
+ }
+
+- if (data[IFLA_MACSEC_WINDOW])
+- secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]);
+-
+ if (data[IFLA_MACSEC_ENCRYPT])
+ tx_sc->encrypt = !!nla_get_u8(data[IFLA_MACSEC_ENCRYPT]);
+
+@@ -3787,6 +3785,16 @@ static int macsec_changelink_common(struct net_device *dev,
+ }
+ }
+
++ if (data[IFLA_MACSEC_WINDOW]) {
++ secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]);
++
++ /* IEEE 802.1AEbw-2013 10.7.8 - maximum replay window
++ * for XPN cipher suites */
++ if (secy->xpn &&
++ secy->replay_window > MACSEC_XPN_MAX_REPLAY_WINDOW)
++ return -EINVAL;
++ }
++
+ return 0;
+ }
+
+@@ -3816,7 +3824,7 @@ static int macsec_changelink(struct net_device *dev, struct nlattr *tb[],
+
+ ret = macsec_changelink_common(dev, data);
+ if (ret)
+- return ret;
++ goto cleanup;
+
+ /* If h/w offloading is available, propagate to the device */
+ if (macsec_is_offloaded(macsec)) {
+--
+2.35.1
+
--- /dev/null
+From b5fa6dd358db1cb266dc248d241e66bc021864e0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:22:00 -0700
+Subject: net: Fix data-races around sysctl_[rw]mem(_offset)?.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 02739545951ad4c1215160db7fbf9b7a918d3c0b ]
+
+While reading these sysctl variables, they can be changed concurrently.
+Thus, we need to add READ_ONCE() to their readers.
+
+ - .sysctl_rmem
+ - .sysctl_rwmem
+ - .sysctl_rmem_offset
+ - .sysctl_wmem_offset
+ - sysctl_tcp_rmem[1, 2]
+ - sysctl_tcp_wmem[1, 2]
+ - sysctl_decnet_rmem[1]
+ - sysctl_decnet_wmem[1]
+ - sysctl_tipc_rmem[1]
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/sock.h | 8 ++++----
+ net/decnet/af_decnet.c | 4 ++--
+ net/ipv4/tcp.c | 6 +++---
+ net/ipv4/tcp_input.c | 13 +++++++------
+ net/ipv4/tcp_output.c | 2 +-
+ net/mptcp/protocol.c | 6 +++---
+ net/tipc/socket.c | 2 +-
+ 7 files changed, 21 insertions(+), 20 deletions(-)
+
+diff --git a/include/net/sock.h b/include/net/sock.h
+index 96f51d4b1649..819c53965ef3 100644
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -2765,18 +2765,18 @@ static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto)
+ {
+ /* Does this proto have per netns sysctl_wmem ? */
+ if (proto->sysctl_wmem_offset)
+- return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset);
++ return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset));
+
+- return *proto->sysctl_wmem;
++ return READ_ONCE(*proto->sysctl_wmem);
+ }
+
+ static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto)
+ {
+ /* Does this proto have per netns sysctl_rmem ? */
+ if (proto->sysctl_rmem_offset)
+- return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset);
++ return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset));
+
+- return *proto->sysctl_rmem;
++ return READ_ONCE(*proto->sysctl_rmem);
+ }
+
+ /* Default TCP Small queue budget is ~1 ms of data (1sec >> 10)
+diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
+index dc92a67baea3..7d542eb46172 100644
+--- a/net/decnet/af_decnet.c
++++ b/net/decnet/af_decnet.c
+@@ -480,8 +480,8 @@ static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gf
+ sk->sk_family = PF_DECnet;
+ sk->sk_protocol = 0;
+ sk->sk_allocation = gfp;
+- sk->sk_sndbuf = sysctl_decnet_wmem[1];
+- sk->sk_rcvbuf = sysctl_decnet_rmem[1];
++ sk->sk_sndbuf = READ_ONCE(sysctl_decnet_wmem[1]);
++ sk->sk_rcvbuf = READ_ONCE(sysctl_decnet_rmem[1]);
+
+ /* Initialization of DECnet Session Control Port */
+ scp = DN_SK(sk);
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 7ba9059c263a..2097eeaf30a6 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -458,8 +458,8 @@ void tcp_init_sock(struct sock *sk)
+
+ icsk->icsk_sync_mss = tcp_sync_mss;
+
+- WRITE_ONCE(sk->sk_sndbuf, sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
+- WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
++ WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]));
++ WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]));
+
+ sk_sockets_allocated_inc(sk);
+ sk->sk_route_forced_caps = NETIF_F_GSO;
+@@ -1722,7 +1722,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
+ if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
+ cap = sk->sk_rcvbuf >> 1;
+ else
+- cap = sock_net(sk)->ipv4.sysctl_tcp_rmem[2] >> 1;
++ cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
+ val = min(val, cap);
+ WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 566745f527fe..e007bdc20e82 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -426,7 +426,7 @@ static void tcp_sndbuf_expand(struct sock *sk)
+
+ if (sk->sk_sndbuf < sndmem)
+ WRITE_ONCE(sk->sk_sndbuf,
+- min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]));
++ min(sndmem, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[2])));
+ }
+
+ /* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
+@@ -461,7 +461,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb,
+ struct tcp_sock *tp = tcp_sk(sk);
+ /* Optimize this! */
+ int truesize = tcp_win_from_space(sk, skbtruesize) >> 1;
+- int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
++ int window = tcp_win_from_space(sk, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])) >> 1;
+
+ while (tp->rcv_ssthresh <= window) {
+ if (truesize <= skb->len)
+@@ -566,16 +566,17 @@ static void tcp_clamp_window(struct sock *sk)
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct net *net = sock_net(sk);
++ int rmem2;
+
+ icsk->icsk_ack.quick = 0;
++ rmem2 = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]);
+
+- if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
++ if (sk->sk_rcvbuf < rmem2 &&
+ !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
+ !tcp_under_memory_pressure(sk) &&
+ sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
+ WRITE_ONCE(sk->sk_rcvbuf,
+- min(atomic_read(&sk->sk_rmem_alloc),
+- net->ipv4.sysctl_tcp_rmem[2]));
++ min(atomic_read(&sk->sk_rmem_alloc), rmem2));
+ }
+ if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
+ tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
+@@ -737,7 +738,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
+
+ do_div(rcvwin, tp->advmss);
+ rcvbuf = min_t(u64, rcvwin * rcvmem,
+- sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
+ if (rcvbuf > sk->sk_rcvbuf) {
+ WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
+
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 51f31311fdb6..9c9a0f7a3dee 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -238,7 +238,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
+ *rcv_wscale = 0;
+ if (wscale_ok) {
+ /* Set window scaling on max possible window */
+- space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
++ space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
+ space = max_t(u32, space, sysctl_rmem_max);
+ space = min_t(u32, space, *window_clamp);
+ *rcv_wscale = clamp_t(int, ilog2(space) - 15,
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index 01ede89e3c46..7f96e0c42a09 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -1899,7 +1899,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
+
+ do_div(rcvwin, advmss);
+ rcvbuf = min_t(u64, rcvwin * rcvmem,
+- sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
+
+ if (rcvbuf > sk->sk_rcvbuf) {
+ u32 window_clamp;
+@@ -2532,8 +2532,8 @@ static int mptcp_init_sock(struct sock *sk)
+ icsk->icsk_ca_ops = NULL;
+
+ sk_sockets_allocated_inc(sk);
+- sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
+- sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
++ sk->sk_rcvbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
++ sk->sk_sndbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
+
+ return 0;
+ }
+diff --git a/net/tipc/socket.c b/net/tipc/socket.c
+index 43509c7e90fc..f1c3b8eb4b3d 100644
+--- a/net/tipc/socket.c
++++ b/net/tipc/socket.c
+@@ -517,7 +517,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
+ timer_setup(&sk->sk_timer, tipc_sk_timeout, 0);
+ sk->sk_shutdown = 0;
+ sk->sk_backlog_rcv = tipc_sk_backlog_rcv;
+- sk->sk_rcvbuf = sysctl_tipc_rmem[1];
++ sk->sk_rcvbuf = READ_ONCE(sysctl_tipc_rmem[1]);
+ sk->sk_data_ready = tipc_data_ready;
+ sk->sk_write_space = tipc_write_space;
+ sk->sk_destruct = tipc_sock_destruct;
+--
+2.35.1
+
--- /dev/null
+From f1ecee4a3ddd64f73a30dc33bf38ec8ea0ecbd2a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 17:29:02 +0800
+Subject: net: macsec: fix potential resource leak in macsec_add_rxsa() and
+ macsec_add_txsa()
+
+From: Jianglei Nie <niejianglei2021@163.com>
+
+[ Upstream commit c7b205fbbf3cffa374721bb7623f7aa8c46074f1 ]
+
+init_rx_sa() allocates relevant resource for rx_sa->stats and rx_sa->
+key.tfm with alloc_percpu() and macsec_alloc_tfm(). When some error
+occurs after init_rx_sa() is called in macsec_add_rxsa(), the function
+released rx_sa with kfree() without releasing rx_sa->stats and rx_sa->
+key.tfm, which will lead to a resource leak.
+
+We should call macsec_rxsa_put() instead of kfree() to decrease the ref
+count of rx_sa and release the relevant resource if the refcount is 0.
+The same bug exists in macsec_add_txsa() for tx_sa as well. This patch
+fixes the above two bugs.
+
+Fixes: 3cf3227a21d1 ("net: macsec: hardware offloading infrastructure")
+Signed-off-by: Jianglei Nie <niejianglei2021@163.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/macsec.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
+index 3e74dcc1f875..354890948f8a 100644
+--- a/drivers/net/macsec.c
++++ b/drivers/net/macsec.c
+@@ -1842,7 +1842,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
+ return 0;
+
+ cleanup:
+- kfree(rx_sa);
++ macsec_rxsa_put(rx_sa);
+ rtnl_unlock();
+ return err;
+ }
+@@ -2085,7 +2085,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info)
+
+ cleanup:
+ secy->operational = was_operational;
+- kfree(tx_sa);
++ macsec_txsa_put(tx_sa);
+ rtnl_unlock();
+ return err;
+ }
+--
+2.35.1
+
--- /dev/null
+From 7a044e3d41151e64b7892601fd0f0c4a7477df81 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 17:06:35 +0000
+Subject: net: mld: fix reference count leak in mld_{query | report}_work()
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 3e7d18b9dca388940a19cae30bfc1f76dccd8c28 ]
+
+mld_{query | report}_work() processes queued events.
+If there are too many events in the queue, it re-queue a work.
+And then, it returns without in6_dev_put().
+But if queuing is failed, it should call in6_dev_put(), but it doesn't.
+So, a reference count leak would occur.
+
+THREAD0 THREAD1
+mld_report_work()
+ spin_lock_bh()
+ if (!mod_delayed_work())
+ in6_dev_hold();
+ spin_unlock_bh()
+ spin_lock_bh()
+ schedule_delayed_work()
+ spin_unlock_bh()
+
+Script to reproduce(by Hangbin Liu):
+ ip netns add ns1
+ ip netns add ns2
+ ip netns exec ns1 sysctl -w net.ipv6.conf.all.force_mld_version=1
+ ip netns exec ns2 sysctl -w net.ipv6.conf.all.force_mld_version=1
+
+ ip -n ns1 link add veth0 type veth peer name veth0 netns ns2
+ ip -n ns1 link set veth0 up
+ ip -n ns2 link set veth0 up
+
+ for i in `seq 50`; do
+ for j in `seq 100`; do
+ ip -n ns1 addr add 2021:${i}::${j}/64 dev veth0
+ ip -n ns2 addr add 2022:${i}::${j}/64 dev veth0
+ done
+ done
+ modprobe -r veth
+ ip -a netns del
+
+splat looks like:
+ unregister_netdevice: waiting for veth0 to become free. Usage count = 2
+ leaked reference.
+ ipv6_add_dev+0x324/0xec0
+ addrconf_notify+0x481/0xd10
+ raw_notifier_call_chain+0xe3/0x120
+ call_netdevice_notifiers+0x106/0x160
+ register_netdevice+0x114c/0x16b0
+ veth_newlink+0x48b/0xa50 [veth]
+ rtnl_newlink+0x11a2/0x1a40
+ rtnetlink_rcv_msg+0x63f/0xc00
+ netlink_rcv_skb+0x1df/0x3e0
+ netlink_unicast+0x5de/0x850
+ netlink_sendmsg+0x6c9/0xa90
+ ____sys_sendmsg+0x76a/0x780
+ __sys_sendmsg+0x27c/0x340
+ do_syscall_64+0x43/0x90
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+Tested-by: Hangbin Liu <liuhangbin@gmail.com>
+Fixes: f185de28d9ae ("mld: add new workqueues for process mld events")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/mcast.c | 14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
+index 7f695c39d9a8..87c699d57b36 100644
+--- a/net/ipv6/mcast.c
++++ b/net/ipv6/mcast.c
+@@ -1522,7 +1522,6 @@ static void mld_query_work(struct work_struct *work)
+
+ if (++cnt >= MLD_MAX_QUEUE) {
+ rework = true;
+- schedule_delayed_work(&idev->mc_query_work, 0);
+ break;
+ }
+ }
+@@ -1533,8 +1532,10 @@ static void mld_query_work(struct work_struct *work)
+ __mld_query_work(skb);
+ mutex_unlock(&idev->mc_lock);
+
+- if (!rework)
+- in6_dev_put(idev);
++ if (rework && queue_delayed_work(mld_wq, &idev->mc_query_work, 0))
++ return;
++
++ in6_dev_put(idev);
+ }
+
+ /* called with rcu_read_lock() */
+@@ -1624,7 +1625,6 @@ static void mld_report_work(struct work_struct *work)
+
+ if (++cnt >= MLD_MAX_QUEUE) {
+ rework = true;
+- schedule_delayed_work(&idev->mc_report_work, 0);
+ break;
+ }
+ }
+@@ -1635,8 +1635,10 @@ static void mld_report_work(struct work_struct *work)
+ __mld_report_work(skb);
+ mutex_unlock(&idev->mc_lock);
+
+- if (!rework)
+- in6_dev_put(idev);
++ if (rework && queue_delayed_work(mld_wq, &idev->mc_report_work, 0))
++ return;
++
++ in6_dev_put(idev);
+ }
+
+ static bool is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type,
+--
+2.35.1
+
--- /dev/null
+From f5e90e5ec1e647d90d283e99ba9e3a2f84f312d5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 14:20:57 +0300
+Subject: net: pcs: xpcs: propagate xpcs_read error to xpcs_get_state_c37_sgmii
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+[ Upstream commit 27161db0904ee48e59140aa8d0835939a666c1f1 ]
+
+While phylink_pcs_ops :: pcs_get_state does return void, xpcs_get_state()
+does check for a non-zero return code from xpcs_get_state_c37_sgmii()
+and prints that as a message to the kernel log.
+
+However, a non-zero return code from xpcs_read() is translated into
+"return false" (i.e. zero as int) and the I/O error is therefore not
+printed. Fix that.
+
+Fixes: b97b5331b8ab ("net: pcs: add C37 SGMII AN support for intel mGbE controller")
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Link: https://lore.kernel.org/r/20220720112057.3504398-1-vladimir.oltean@nxp.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/pcs/pcs-xpcs.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
+index 7de631f5356f..fd4cbf8a55ad 100644
+--- a/drivers/net/pcs/pcs-xpcs.c
++++ b/drivers/net/pcs/pcs-xpcs.c
+@@ -890,7 +890,7 @@ static int xpcs_get_state_c37_sgmii(struct dw_xpcs *xpcs,
+ */
+ ret = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_INTR_STS);
+ if (ret < 0)
+- return false;
++ return ret;
+
+ if (ret & DW_VR_MII_C37_ANSGM_SP_LNKSTS) {
+ int speed_value;
+--
+2.35.1
+
--- /dev/null
+From 85b30d6c4b4c5891b9c1df8e41f2e0e08f4360f8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Jul 2022 18:22:20 -0700
+Subject: net: ping6: Fix memleak in ipv6_renew_options().
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit e27326009a3d247b831eda38878c777f6f4eb3d1 ]
+
+When we close ping6 sockets, some resources are left unfreed because
+pingv6_prot is missing sk->sk_prot->destroy(). As reported by
+syzbot [0], just three syscalls leak 96 bytes and easily cause OOM.
+
+ struct ipv6_sr_hdr *hdr;
+ char data[24] = {0};
+ int fd;
+
+ hdr = (struct ipv6_sr_hdr *)data;
+ hdr->hdrlen = 2;
+ hdr->type = IPV6_SRCRT_TYPE_4;
+
+ fd = socket(AF_INET6, SOCK_DGRAM, NEXTHDR_ICMP);
+ setsockopt(fd, IPPROTO_IPV6, IPV6_RTHDR, data, 24);
+ close(fd);
+
+To fix memory leaks, let's add a destroy function.
+
+Note the socket() syscall checks if the GID is within the range of
+net.ipv4.ping_group_range. The default value is [1, 0] so that no
+GID meets the condition (1 <= GID <= 0). Thus, the local DoS does
+not succeed until we change the default value. However, at least
+Ubuntu/Fedora/RHEL loosen it.
+
+ $ cat /usr/lib/sysctl.d/50-default.conf
+ ...
+ -net.ipv4.ping_group_range = 0 2147483647
+
+Also, there could be another path reported with these options, and
+some of them require CAP_NET_RAW.
+
+ setsockopt
+ IPV6_ADDRFORM (inet6_sk(sk)->pktoptions)
+ IPV6_RECVPATHMTU (inet6_sk(sk)->rxpmtu)
+ IPV6_HOPOPTS (inet6_sk(sk)->opt)
+ IPV6_RTHDRDSTOPTS (inet6_sk(sk)->opt)
+ IPV6_RTHDR (inet6_sk(sk)->opt)
+ IPV6_DSTOPTS (inet6_sk(sk)->opt)
+ IPV6_2292PKTOPTIONS (inet6_sk(sk)->opt)
+
+ getsockopt
+ IPV6_FLOWLABEL_MGR (inet6_sk(sk)->ipv6_fl_list)
+
+For the record, I left a different splat with syzbot's one.
+
+ unreferenced object 0xffff888006270c60 (size 96):
+ comm "repro2", pid 231, jiffies 4294696626 (age 13.118s)
+ hex dump (first 32 bytes):
+ 01 00 00 00 44 00 00 00 00 00 00 00 00 00 00 00 ....D...........
+ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+ backtrace:
+ [<00000000f6bc7ea9>] sock_kmalloc (net/core/sock.c:2564 net/core/sock.c:2554)
+ [<000000006d699550>] do_ipv6_setsockopt.constprop.0 (net/ipv6/ipv6_sockglue.c:715)
+ [<00000000c3c3b1f5>] ipv6_setsockopt (net/ipv6/ipv6_sockglue.c:1024)
+ [<000000007096a025>] __sys_setsockopt (net/socket.c:2254)
+ [<000000003a8ff47b>] __x64_sys_setsockopt (net/socket.c:2265 net/socket.c:2262 net/socket.c:2262)
+ [<000000007c409dcb>] do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80)
+ [<00000000e939c4a9>] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120)
+
+[0]: https://syzkaller.appspot.com/bug?extid=a8430774139ec3ab7176
+
+Fixes: 6d0bfe226116 ("net: ipv6: Add IPv6 support to the ping socket.")
+Reported-by: syzbot+a8430774139ec3ab7176@syzkaller.appspotmail.com
+Reported-by: Ayushman Dutta <ayudutta@amazon.com>
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20220728012220.46918-1-kuniyu@amazon.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/ping.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
+index 6ac88fe24a8e..135e3a060caa 100644
+--- a/net/ipv6/ping.c
++++ b/net/ipv6/ping.c
+@@ -22,6 +22,11 @@
+ #include <linux/proc_fs.h>
+ #include <net/ping.h>
+
++static void ping_v6_destroy(struct sock *sk)
++{
++ inet6_destroy_sock(sk);
++}
++
+ /* Compatibility glue so we can support IPv6 when it's compiled as a module */
+ static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len,
+ int *addr_len)
+@@ -166,6 +171,7 @@ struct proto pingv6_prot = {
+ .owner = THIS_MODULE,
+ .init = ping_init_sock,
+ .close = ping_close,
++ .destroy = ping_v6_destroy,
+ .connect = ip6_datagram_connect_v6_only,
+ .disconnect = __udp_disconnect,
+ .setsockopt = ipv6_setsockopt,
+--
+2.35.1
+
--- /dev/null
+From b986ef8c75eb0129ac75c5196992a37541149183 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 21:10:03 +0800
+Subject: net: sungem_phy: Add of_node_put() for reference returned by
+ of_get_parent()
+
+From: Liang He <windhl@126.com>
+
+[ Upstream commit ebbbe23fdf6070e31509638df3321688358cc211 ]
+
+In bcm5421_init(), we should call of_node_put() for the reference
+returned by of_get_parent() which has increased the refcount.
+
+Fixes: 3c326fe9cb7a ("[PATCH] ppc64: Add new PHY to sungem")
+Signed-off-by: Liang He <windhl@126.com>
+Link: https://lore.kernel.org/r/20220720131003.1287426-1-windhl@126.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/sungem_phy.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/sungem_phy.c b/drivers/net/sungem_phy.c
+index 291fa449993f..45f295403cb5 100644
+--- a/drivers/net/sungem_phy.c
++++ b/drivers/net/sungem_phy.c
+@@ -454,6 +454,7 @@ static int bcm5421_init(struct mii_phy* phy)
+ int can_low_power = 1;
+ if (np == NULL || of_get_property(np, "no-autolowpower", NULL))
+ can_low_power = 0;
++ of_node_put(np);
+ if (can_low_power) {
+ /* Enable automatic low-power */
+ sungem_phy_write(phy, 0x1c, 0x9002);
+--
+2.35.1
+
--- /dev/null
+From e1bf6422281150d1b6fb11cbaeb4d17c644404c8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Jul 2022 12:11:27 +0300
+Subject: net/tls: Remove the context from the list in tls_device_down
+
+From: Maxim Mikityanskiy <maximmi@nvidia.com>
+
+[ Upstream commit f6336724a4d4220c89a4ec38bca84b03b178b1a3 ]
+
+tls_device_down takes a reference on all contexts it's going to move to
+the degraded state (software fallback). If sk_destruct runs afterwards,
+it can reduce the reference counter back to 1 and return early without
+destroying the context. Then tls_device_down will release the reference
+it took and call tls_device_free_ctx. However, the context will still
+stay in tls_device_down_list forever. The list will contain an item,
+memory for which is released, making a memory corruption possible.
+
+Fix the above bug by properly removing the context from all lists before
+any call to tls_device_free_ctx.
+
+Fixes: 3740651bf7e2 ("tls: Fix context leak on tls_device_down")
+Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tls/tls_device.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
+index 4e33150cfb9e..cf75969375cf 100644
+--- a/net/tls/tls_device.c
++++ b/net/tls/tls_device.c
+@@ -1351,8 +1351,13 @@ static int tls_device_down(struct net_device *netdev)
+ * by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW.
+ * Now release the ref taken above.
+ */
+- if (refcount_dec_and_test(&ctx->refcount))
++ if (refcount_dec_and_test(&ctx->refcount)) {
++ /* sk_destruct ran after tls_device_down took a ref, and
++ * it returned early. Complete the destruction here.
++ */
++ list_del(&ctx->list);
+ tls_device_free_ctx(ctx);
++ }
+ }
+
+ up_write(&device_offload_lock);
+--
+2.35.1
+
--- /dev/null
+From 573a6eb0d3b6dc31b9f97275d07a98dcfebf6c8a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jul 2022 12:42:06 +0200
+Subject: netfilter: nf_queue: do not allow packet truncation below transport
+ header offset
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 99a63d36cb3ed5ca3aa6fcb64cffbeaf3b0fb164 ]
+
+Domingo Dirutigliano and Nicola Guerrera report kernel panic when
+sending nf_queue verdict with 1-byte nfta_payload attribute.
+
+The IP/IPv6 stack pulls the IP(v6) header from the packet after the
+input hook.
+
+If user truncates the packet below the header size, this skb_pull() will
+result in a malformed skb (skb->len < 0).
+
+Fixes: 7af4cc3fa158 ("[NETFILTER]: Add "nfnetlink_queue" netfilter queue handler over nfnetlink")
+Reported-by: Domingo Dirutigliano <pwnzer0tt1@proton.me>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Reviewed-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nfnetlink_queue.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
+index 8787d0613ad8..5329ebf19a18 100644
+--- a/net/netfilter/nfnetlink_queue.c
++++ b/net/netfilter/nfnetlink_queue.c
+@@ -836,11 +836,16 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
+ }
+
+ static int
+-nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
++nfqnl_mangle(void *data, unsigned int data_len, struct nf_queue_entry *e, int diff)
+ {
+ struct sk_buff *nskb;
+
+ if (diff < 0) {
++ unsigned int min_len = skb_transport_offset(e->skb);
++
++ if (data_len < min_len)
++ return -EINVAL;
++
+ if (pskb_trim(e->skb, data_len))
+ return -ENOMEM;
+ } else if (diff > 0) {
+--
+2.35.1
+
--- /dev/null
+From e1fc39bcf4c9b83ef7a2c9c35e442f3c69a74d91 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 24 Jul 2022 13:51:13 +0530
+Subject: octeontx2-pf: cn10k: Fix egress ratelimit configuration
+
+From: Sunil Goutham <sgoutham@marvell.com>
+
+[ Upstream commit b354eaeec8637d87003945439209251d76a2bb95 ]
+
+NIX_AF_TLXX_PIR/CIR register format has changed from OcteonTx2
+to CN10K. CN10K supports larger burst size. Fix burst exponent
+and burst mantissa configuration for CN10K.
+
+Also fixed 'maxrate' from u32 to u64 since 'police.rate_bytes_ps'
+passed by stack is also u64.
+
+Fixes: e638a83f167e ("octeontx2-pf: TC_MATCHALL egress ratelimiting offload")
+Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
+Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/marvell/octeontx2/nic/otx2_tc.c | 76 ++++++++++++++-----
+ 1 file changed, 55 insertions(+), 21 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+index 626961a41089..ff569e261be4 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+@@ -28,6 +28,9 @@
+ #define MAX_RATE_EXPONENT 0x0FULL
+ #define MAX_RATE_MANTISSA 0xFFULL
+
++#define CN10K_MAX_BURST_MANTISSA 0x7FFFULL
++#define CN10K_MAX_BURST_SIZE 8453888ULL
++
+ /* Bitfields in NIX_TLX_PIR register */
+ #define TLX_RATE_MANTISSA GENMASK_ULL(8, 1)
+ #define TLX_RATE_EXPONENT GENMASK_ULL(12, 9)
+@@ -35,6 +38,9 @@
+ #define TLX_BURST_MANTISSA GENMASK_ULL(36, 29)
+ #define TLX_BURST_EXPONENT GENMASK_ULL(40, 37)
+
++#define CN10K_TLX_BURST_MANTISSA GENMASK_ULL(43, 29)
++#define CN10K_TLX_BURST_EXPONENT GENMASK_ULL(47, 44)
++
+ struct otx2_tc_flow_stats {
+ u64 bytes;
+ u64 pkts;
+@@ -77,33 +83,42 @@ int otx2_tc_alloc_ent_bitmap(struct otx2_nic *nic)
+ }
+ EXPORT_SYMBOL(otx2_tc_alloc_ent_bitmap);
+
+-static void otx2_get_egress_burst_cfg(u32 burst, u32 *burst_exp,
+- u32 *burst_mantissa)
++static void otx2_get_egress_burst_cfg(struct otx2_nic *nic, u32 burst,
++ u32 *burst_exp, u32 *burst_mantissa)
+ {
++ int max_burst, max_mantissa;
+ unsigned int tmp;
+
++ if (is_dev_otx2(nic->pdev)) {
++ max_burst = MAX_BURST_SIZE;
++ max_mantissa = MAX_BURST_MANTISSA;
++ } else {
++ max_burst = CN10K_MAX_BURST_SIZE;
++ max_mantissa = CN10K_MAX_BURST_MANTISSA;
++ }
++
+ /* Burst is calculated as
+ * ((256 + BURST_MANTISSA) << (1 + BURST_EXPONENT)) / 256
+ * Max supported burst size is 130,816 bytes.
+ */
+- burst = min_t(u32, burst, MAX_BURST_SIZE);
++ burst = min_t(u32, burst, max_burst);
+ if (burst) {
+ *burst_exp = ilog2(burst) ? ilog2(burst) - 1 : 0;
+ tmp = burst - rounddown_pow_of_two(burst);
+- if (burst < MAX_BURST_MANTISSA)
++ if (burst < max_mantissa)
+ *burst_mantissa = tmp * 2;
+ else
+ *burst_mantissa = tmp / (1ULL << (*burst_exp - 7));
+ } else {
+ *burst_exp = MAX_BURST_EXPONENT;
+- *burst_mantissa = MAX_BURST_MANTISSA;
++ *burst_mantissa = max_mantissa;
+ }
+ }
+
+-static void otx2_get_egress_rate_cfg(u32 maxrate, u32 *exp,
++static void otx2_get_egress_rate_cfg(u64 maxrate, u32 *exp,
+ u32 *mantissa, u32 *div_exp)
+ {
+- unsigned int tmp;
++ u64 tmp;
+
+ /* Rate calculation by hardware
+ *
+@@ -132,21 +147,44 @@ static void otx2_get_egress_rate_cfg(u32 maxrate, u32 *exp,
+ }
+ }
+
+-static int otx2_set_matchall_egress_rate(struct otx2_nic *nic, u32 burst, u32 maxrate)
++static u64 otx2_get_txschq_rate_regval(struct otx2_nic *nic,
++ u64 maxrate, u32 burst)
+ {
+- struct otx2_hw *hw = &nic->hw;
+- struct nix_txschq_config *req;
+ u32 burst_exp, burst_mantissa;
+ u32 exp, mantissa, div_exp;
++ u64 regval = 0;
++
++ /* Get exponent and mantissa values from the desired rate */
++ otx2_get_egress_burst_cfg(nic, burst, &burst_exp, &burst_mantissa);
++ otx2_get_egress_rate_cfg(maxrate, &exp, &mantissa, &div_exp);
++
++ if (is_dev_otx2(nic->pdev)) {
++ regval = FIELD_PREP(TLX_BURST_EXPONENT, (u64)burst_exp) |
++ FIELD_PREP(TLX_BURST_MANTISSA, (u64)burst_mantissa) |
++ FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) |
++ FIELD_PREP(TLX_RATE_EXPONENT, exp) |
++ FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0);
++ } else {
++ regval = FIELD_PREP(CN10K_TLX_BURST_EXPONENT, (u64)burst_exp) |
++ FIELD_PREP(CN10K_TLX_BURST_MANTISSA, (u64)burst_mantissa) |
++ FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) |
++ FIELD_PREP(TLX_RATE_EXPONENT, exp) |
++ FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0);
++ }
++
++ return regval;
++}
++
++static int otx2_set_matchall_egress_rate(struct otx2_nic *nic,
++ u32 burst, u64 maxrate)
++{
++ struct otx2_hw *hw = &nic->hw;
++ struct nix_txschq_config *req;
+ int txschq, err;
+
+ /* All SQs share the same TL4, so pick the first scheduler */
+ txschq = hw->txschq_list[NIX_TXSCH_LVL_TL4][0];
+
+- /* Get exponent and mantissa values from the desired rate */
+- otx2_get_egress_burst_cfg(burst, &burst_exp, &burst_mantissa);
+- otx2_get_egress_rate_cfg(maxrate, &exp, &mantissa, &div_exp);
+-
+ mutex_lock(&nic->mbox.lock);
+ req = otx2_mbox_alloc_msg_nix_txschq_cfg(&nic->mbox);
+ if (!req) {
+@@ -157,11 +195,7 @@ static int otx2_set_matchall_egress_rate(struct otx2_nic *nic, u32 burst, u32 ma
+ req->lvl = NIX_TXSCH_LVL_TL4;
+ req->num_regs = 1;
+ req->reg[0] = NIX_AF_TL4X_PIR(txschq);
+- req->regval[0] = FIELD_PREP(TLX_BURST_EXPONENT, burst_exp) |
+- FIELD_PREP(TLX_BURST_MANTISSA, burst_mantissa) |
+- FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) |
+- FIELD_PREP(TLX_RATE_EXPONENT, exp) |
+- FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0);
++ req->regval[0] = otx2_get_txschq_rate_regval(nic, maxrate, burst);
+
+ err = otx2_sync_mbox_msg(&nic->mbox);
+ mutex_unlock(&nic->mbox.lock);
+@@ -196,7 +230,7 @@ static int otx2_tc_egress_matchall_install(struct otx2_nic *nic,
+ struct netlink_ext_ack *extack = cls->common.extack;
+ struct flow_action *actions = &cls->rule->action;
+ struct flow_action_entry *entry;
+- u32 rate;
++ u64 rate;
+ int err;
+
+ err = otx2_tc_validate_flow(nic, actions, extack);
+@@ -218,7 +252,7 @@ static int otx2_tc_egress_matchall_install(struct otx2_nic *nic,
+ }
+ /* Convert bytes per second to Mbps */
+ rate = entry->police.rate_bytes_ps * 8;
+- rate = max_t(u32, rate / 1000000, 1);
++ rate = max_t(u64, rate / 1000000, 1);
+ err = otx2_set_matchall_egress_rate(nic, entry->police.burst, rate);
+ if (err)
+ return err;
+--
+2.35.1
+
--- /dev/null
+From c2604438cc7c262202cdf9f88f33229a28bcddb2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 24 Jul 2022 13:51:14 +0530
+Subject: octeontx2-pf: Fix UDP/TCP src and dst port tc filters
+
+From: Subbaraya Sundeep <sbhatta@marvell.com>
+
+[ Upstream commit 59e1be6f83b928a04189bbf3ab683a1fc6248db3 ]
+
+Check the mask for non-zero value before installing tc filters
+for L4 source and destination ports. Otherwise installing a
+filter for source port installs destination port too and
+vice-versa.
+
+Fixes: 1d4d9e42c240 ("octeontx2-pf: Add tc flower hardware offload on ingress traffic")
+Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
+Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/marvell/octeontx2/nic/otx2_tc.c | 30 +++++++++++--------
+ 1 file changed, 18 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+index ff569e261be4..75388a65f349 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+@@ -605,21 +605,27 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node,
+
+ flow_spec->dport = match.key->dst;
+ flow_mask->dport = match.mask->dst;
+- if (ip_proto == IPPROTO_UDP)
+- req->features |= BIT_ULL(NPC_DPORT_UDP);
+- else if (ip_proto == IPPROTO_TCP)
+- req->features |= BIT_ULL(NPC_DPORT_TCP);
+- else if (ip_proto == IPPROTO_SCTP)
+- req->features |= BIT_ULL(NPC_DPORT_SCTP);
++
++ if (flow_mask->dport) {
++ if (ip_proto == IPPROTO_UDP)
++ req->features |= BIT_ULL(NPC_DPORT_UDP);
++ else if (ip_proto == IPPROTO_TCP)
++ req->features |= BIT_ULL(NPC_DPORT_TCP);
++ else if (ip_proto == IPPROTO_SCTP)
++ req->features |= BIT_ULL(NPC_DPORT_SCTP);
++ }
+
+ flow_spec->sport = match.key->src;
+ flow_mask->sport = match.mask->src;
+- if (ip_proto == IPPROTO_UDP)
+- req->features |= BIT_ULL(NPC_SPORT_UDP);
+- else if (ip_proto == IPPROTO_TCP)
+- req->features |= BIT_ULL(NPC_SPORT_TCP);
+- else if (ip_proto == IPPROTO_SCTP)
+- req->features |= BIT_ULL(NPC_SPORT_SCTP);
++
++ if (flow_mask->sport) {
++ if (ip_proto == IPPROTO_UDP)
++ req->features |= BIT_ULL(NPC_SPORT_UDP);
++ else if (ip_proto == IPPROTO_TCP)
++ req->features |= BIT_ULL(NPC_SPORT_TCP);
++ else if (ip_proto == IPPROTO_SCTP)
++ req->features |= BIT_ULL(NPC_SPORT_SCTP);
++ }
+ }
+
+ return otx2_tc_parse_actions(nic, &rule->action, req, f, node);
+--
+2.35.1
+
--- /dev/null
+From 47347b30c7057567055b6144446ddea16815dacf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 24 Jul 2022 14:00:12 +0800
+Subject: perf symbol: Correct address for bss symbols
+
+From: Leo Yan <leo.yan@linaro.org>
+
+[ Upstream commit 2d86612aacb7805f72873691a2644d7279ed0630 ]
+
+When using 'perf mem' and 'perf c2c', an issue is observed that tool
+reports the wrong offset for global data symbols. This is a common
+issue on both x86 and Arm64 platforms.
+
+Let's see an example, for a test program, below is the disassembly for
+its .bss section which is dumped with objdump:
+
+ ...
+
+ Disassembly of section .bss:
+
+ 0000000000004040 <completed.0>:
+ ...
+
+ 0000000000004080 <buf1>:
+ ...
+
+ 00000000000040c0 <buf2>:
+ ...
+
+ 0000000000004100 <thread>:
+ ...
+
+First we used 'perf mem record' to run the test program and then used
+'perf --debug verbose=4 mem report' to observe what's the symbol info
+for 'buf1' and 'buf2' structures.
+
+ # ./perf mem record -e ldlat-loads,ldlat-stores -- false_sharing.exe 8
+ # ./perf --debug verbose=4 mem report
+ ...
+ dso__load_sym_internal: adjusting symbol: st_value: 0x40c0 sh_addr: 0x4040 sh_offset: 0x3028
+ symbol__new: buf2 0x30a8-0x30e8
+ ...
+ dso__load_sym_internal: adjusting symbol: st_value: 0x4080 sh_addr: 0x4040 sh_offset: 0x3028
+ symbol__new: buf1 0x3068-0x30a8
+ ...
+
+The perf tool relies on libelf to parse symbols, in executable and
+shared object files, 'st_value' holds a virtual address; 'sh_addr' is
+the address at which section's first byte should reside in memory, and
+'sh_offset' is the byte offset from the beginning of the file to the
+first byte in the section. The perf tool uses below formula to convert
+a symbol's memory address to a file address:
+
+ file_address = st_value - sh_addr + sh_offset
+ ^
+ ` Memory address
+
+We can see the final adjusted address ranges for buf1 and buf2 are
+[0x30a8-0x30e8) and [0x3068-0x30a8) respectively, apparently this is
+incorrect, in the code, the structure for 'buf1' and 'buf2' specifies
+compiler attribute with 64-byte alignment.
+
+The problem happens for 'sh_offset', libelf returns it as 0x3028 which
+is not 64-byte aligned, combining with disassembly, it's likely libelf
+doesn't respect the alignment for .bss section, therefore, it doesn't
+return the aligned value for 'sh_offset'.
+
+Suggested by Fangrui Song, ELF file contains program header which
+contains PT_LOAD segments, the fields p_vaddr and p_offset in PT_LOAD
+segments contain the execution info. A better choice for converting
+memory address to file address is using the formula:
+
+ file_address = st_value - p_vaddr + p_offset
+
+This patch introduces elf_read_program_header() which returns the
+program header based on the passed 'st_value', then it uses the formula
+above to calculate the symbol file address; and the debugging log is
+updated respectively.
+
+After applying the change:
+
+ # ./perf --debug verbose=4 mem report
+ ...
+ dso__load_sym_internal: adjusting symbol: st_value: 0x40c0 p_vaddr: 0x3d28 p_offset: 0x2d28
+ symbol__new: buf2 0x30c0-0x3100
+ ...
+ dso__load_sym_internal: adjusting symbol: st_value: 0x4080 p_vaddr: 0x3d28 p_offset: 0x2d28
+ symbol__new: buf1 0x3080-0x30c0
+ ...
+
+Fixes: f17e04afaff84b5c ("perf report: Fix ELF symbol parsing")
+Reported-by: Chang Rui <changruinj@gmail.com>
+Suggested-by: Fangrui Song <maskray@google.com>
+Signed-off-by: Leo Yan <leo.yan@linaro.org>
+Acked-by: Namhyung Kim <namhyung@kernel.org>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Ian Rogers <irogers@google.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20220724060013.171050-2-leo.yan@linaro.org
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/util/symbol-elf.c | 45 ++++++++++++++++++++++++++++++++----
+ 1 file changed, 41 insertions(+), 4 deletions(-)
+
+diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
+index ecd377938eea..ef6ced5c5746 100644
+--- a/tools/perf/util/symbol-elf.c
++++ b/tools/perf/util/symbol-elf.c
+@@ -233,6 +233,33 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
+ return NULL;
+ }
+
++static int elf_read_program_header(Elf *elf, u64 vaddr, GElf_Phdr *phdr)
++{
++ size_t i, phdrnum;
++ u64 sz;
++
++ if (elf_getphdrnum(elf, &phdrnum))
++ return -1;
++
++ for (i = 0; i < phdrnum; i++) {
++ if (gelf_getphdr(elf, i, phdr) == NULL)
++ return -1;
++
++ if (phdr->p_type != PT_LOAD)
++ continue;
++
++ sz = max(phdr->p_memsz, phdr->p_filesz);
++ if (!sz)
++ continue;
++
++ if (vaddr >= phdr->p_vaddr && (vaddr < phdr->p_vaddr + sz))
++ return 0;
++ }
++
++ /* Not found any valid program header */
++ return -1;
++}
++
+ static bool want_demangle(bool is_kernel_sym)
+ {
+ return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
+@@ -1209,6 +1236,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
+ sym.st_value);
+ used_opd = true;
+ }
++
+ /*
+ * When loading symbols in a data mapping, ABS symbols (which
+ * has a value of SHN_ABS in its st_shndx) failed at
+@@ -1262,11 +1290,20 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
+ goto out_elf_end;
+ } else if ((used_opd && runtime_ss->adjust_symbols) ||
+ (!used_opd && syms_ss->adjust_symbols)) {
++ GElf_Phdr phdr;
++
++ if (elf_read_program_header(syms_ss->elf,
++ (u64)sym.st_value, &phdr)) {
++ pr_warning("%s: failed to find program header for "
++ "symbol: %s st_value: %#" PRIx64 "\n",
++ __func__, elf_name, (u64)sym.st_value);
++ continue;
++ }
+ pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " "
+- "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__,
+- (u64)sym.st_value, (u64)shdr.sh_addr,
+- (u64)shdr.sh_offset);
+- sym.st_value -= shdr.sh_addr - shdr.sh_offset;
++ "p_vaddr: %#" PRIx64 " p_offset: %#" PRIx64 "\n",
++ __func__, (u64)sym.st_value, (u64)phdr.p_vaddr,
++ (u64)phdr.p_offset);
++ sym.st_value -= phdr.p_vaddr - phdr.p_offset;
+ }
+
+ demangled = demangle_sym(dso, kmodule, elf_name);
+--
+2.35.1
+
--- /dev/null
+From 8f85722e7d52d3f218ef18d54b8a84fb170f4f22 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Jul 2022 20:44:04 +0000
+Subject: Revert "tcp: change pingpong threshold to 3"
+
+From: Wei Wang <weiwan@google.com>
+
+[ Upstream commit 4d8f24eeedc58d5f87b650ddda73c16e8ba56559 ]
+
+This reverts commit 4a41f453bedfd5e9cd040bad509d9da49feb3e2c.
+
+This to-be-reverted commit was meant to apply a stricter rule for the
+stack to enter pingpong mode. However, the condition used to check for
+interactive session "before(tp->lsndtime, icsk->icsk_ack.lrcvtime)" is
+jiffy based and might be too coarse, which delays the stack entering
+pingpong mode.
+We revert this patch so that we no longer use the above condition to
+determine interactive session, and also reduce pingpong threshold to 1.
+
+Fixes: 4a41f453bedf ("tcp: change pingpong threshold to 3")
+Reported-by: LemmyHuang <hlm3280@163.com>
+Suggested-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Wei Wang <weiwan@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20220721204404.388396-1-weiwan@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/inet_connection_sock.h | 10 +---------
+ net/ipv4/tcp_output.c | 15 ++++++---------
+ 2 files changed, 7 insertions(+), 18 deletions(-)
+
+diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
+index fa6a87246a7b..695ed45841f0 100644
+--- a/include/net/inet_connection_sock.h
++++ b/include/net/inet_connection_sock.h
+@@ -315,7 +315,7 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
+
+ struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
+
+-#define TCP_PINGPONG_THRESH 3
++#define TCP_PINGPONG_THRESH 1
+
+ static inline void inet_csk_enter_pingpong_mode(struct sock *sk)
+ {
+@@ -332,14 +332,6 @@ static inline bool inet_csk_in_pingpong_mode(struct sock *sk)
+ return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
+ }
+
+-static inline void inet_csk_inc_pingpong_cnt(struct sock *sk)
+-{
+- struct inet_connection_sock *icsk = inet_csk(sk);
+-
+- if (icsk->icsk_ack.pingpong < U8_MAX)
+- icsk->icsk_ack.pingpong++;
+-}
+-
+ static inline bool inet_csk_has_ulp(struct sock *sk)
+ {
+ return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops;
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 3a84553fb4ed..51f31311fdb6 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -167,16 +167,13 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
+ if (tcp_packets_in_flight(tp) == 0)
+ tcp_ca_event(sk, CA_EVENT_TX_START);
+
+- /* If this is the first data packet sent in response to the
+- * previous received data,
+- * and it is a reply for ato after last received packet,
+- * increase pingpong count.
+- */
+- if (before(tp->lsndtime, icsk->icsk_ack.lrcvtime) &&
+- (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
+- inet_csk_inc_pingpong_cnt(sk);
+-
+ tp->lsndtime = now;
++
++ /* If it is a reply for ato after last received
++ * packet, enter pingpong mode.
++ */
++ if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
++ inet_csk_enter_pingpong_mode(sk);
+ }
+
+ /* Account for an ACK we sent. */
+--
+2.35.1
+
--- /dev/null
+From 9b15aec0a530a3401f9c95b7cbfa9863e92f1217 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Jul 2022 15:17:21 +0200
+Subject: s390/archrandom: prevent CPACF trng invocations in interrupt context
+
+From: Harald Freudenberger <freude@linux.ibm.com>
+
+[ Upstream commit 918e75f77af7d2e049bb70469ec0a2c12782d96a ]
+
+This patch slightly reworks the s390 arch_get_random_seed_{int,long}
+implementation: Make sure the CPACF trng instruction is never
+called in any interrupt context. This is done by adding an
+additional condition in_task().
+
+Justification:
+
+There are some constrains to satisfy for the invocation of the
+arch_get_random_seed_{int,long}() functions:
+- They should provide good random data during kernel initialization.
+- They should not be called in interrupt context as the TRNG
+ instruction is relatively heavy weight and may for example
+ make some network loads cause to timeout and buck.
+
+However, it was not clear what kind of interrupt context is exactly
+encountered during kernel init or network traffic eventually calling
+arch_get_random_seed_long().
+
+After some days of investigations it is clear that the s390
+start_kernel function is not running in any interrupt context and
+so the trng is called:
+
+Jul 11 18:33:39 t35lp54 kernel: [<00000001064e90ca>] arch_get_random_seed_long.part.0+0x32/0x70
+Jul 11 18:33:39 t35lp54 kernel: [<000000010715f246>] random_init+0xf6/0x238
+Jul 11 18:33:39 t35lp54 kernel: [<000000010712545c>] start_kernel+0x4a4/0x628
+Jul 11 18:33:39 t35lp54 kernel: [<000000010590402a>] startup_continue+0x2a/0x40
+
+The condition in_task() is true and the CPACF trng provides random data
+during kernel startup.
+
+The network traffic however, is more difficult. A typical call stack
+looks like this:
+
+Jul 06 17:37:07 t35lp54 kernel: [<000000008b5600fc>] extract_entropy.constprop.0+0x23c/0x240
+Jul 06 17:37:07 t35lp54 kernel: [<000000008b560136>] crng_reseed+0x36/0xd8
+Jul 06 17:37:07 t35lp54 kernel: [<000000008b5604b8>] crng_make_state+0x78/0x340
+Jul 06 17:37:07 t35lp54 kernel: [<000000008b5607e0>] _get_random_bytes+0x60/0xf8
+Jul 06 17:37:07 t35lp54 kernel: [<000000008b56108a>] get_random_u32+0xda/0x248
+Jul 06 17:37:07 t35lp54 kernel: [<000000008aefe7a8>] kfence_guarded_alloc+0x48/0x4b8
+Jul 06 17:37:07 t35lp54 kernel: [<000000008aeff35e>] __kfence_alloc+0x18e/0x1b8
+Jul 06 17:37:07 t35lp54 kernel: [<000000008aef7f10>] __kmalloc_node_track_caller+0x368/0x4d8
+Jul 06 17:37:07 t35lp54 kernel: [<000000008b611eac>] kmalloc_reserve+0x44/0xa0
+Jul 06 17:37:07 t35lp54 kernel: [<000000008b611f98>] __alloc_skb+0x90/0x178
+Jul 06 17:37:07 t35lp54 kernel: [<000000008b6120dc>] __napi_alloc_skb+0x5c/0x118
+Jul 06 17:37:07 t35lp54 kernel: [<000000008b8f06b4>] qeth_extract_skb+0x13c/0x680
+Jul 06 17:37:07 t35lp54 kernel: [<000000008b8f6526>] qeth_poll+0x256/0x3f8
+Jul 06 17:37:07 t35lp54 kernel: [<000000008b63d76e>] __napi_poll.constprop.0+0x46/0x2f8
+Jul 06 17:37:07 t35lp54 kernel: [<000000008b63dbec>] net_rx_action+0x1cc/0x408
+Jul 06 17:37:07 t35lp54 kernel: [<000000008b937302>] __do_softirq+0x132/0x6b0
+Jul 06 17:37:07 t35lp54 kernel: [<000000008abf46ce>] __irq_exit_rcu+0x13e/0x170
+Jul 06 17:37:07 t35lp54 kernel: [<000000008abf531a>] irq_exit_rcu+0x22/0x50
+Jul 06 17:37:07 t35lp54 kernel: [<000000008b922506>] do_io_irq+0xe6/0x198
+Jul 06 17:37:07 t35lp54 kernel: [<000000008b935826>] io_int_handler+0xd6/0x110
+Jul 06 17:37:07 t35lp54 kernel: [<000000008b9358a6>] psw_idle_exit+0x0/0xa
+Jul 06 17:37:07 t35lp54 kernel: ([<000000008ab9c59a>] arch_cpu_idle+0x52/0xe0)
+Jul 06 17:37:07 t35lp54 kernel: [<000000008b933cfe>] default_idle_call+0x6e/0xd0
+Jul 06 17:37:07 t35lp54 kernel: [<000000008ac59f4e>] do_idle+0xf6/0x1b0
+Jul 06 17:37:07 t35lp54 kernel: [<000000008ac5a28e>] cpu_startup_entry+0x36/0x40
+Jul 06 17:37:07 t35lp54 kernel: [<000000008abb0d90>] smp_start_secondary+0x148/0x158
+Jul 06 17:37:07 t35lp54 kernel: [<000000008b935b9e>] restart_int_handler+0x6e/0x90
+
+which confirms that the call is in softirq context. So in_task() covers exactly
+the cases where we want to have CPACF trng called: not in nmi, not in hard irq,
+not in soft irq but in normal task context and during kernel init.
+
+Signed-off-by: Harald Freudenberger <freude@linux.ibm.com>
+Acked-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Reviewed-by: Juergen Christ <jchrist@linux.ibm.com>
+Link: https://lore.kernel.org/r/20220713131721.257907-1-freude@linux.ibm.com
+Fixes: e4f74400308c ("s390/archrandom: simplify back to earlier design and initialize earlier")
+[agordeev@linux.ibm.com changed desc, added Fixes and Link, removed -stable]
+Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/include/asm/archrandom.h | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h
+index 2c6e1c6ecbe7..4120c428dc37 100644
+--- a/arch/s390/include/asm/archrandom.h
++++ b/arch/s390/include/asm/archrandom.h
+@@ -2,7 +2,7 @@
+ /*
+ * Kernel interface for the s390 arch_random_* functions
+ *
+- * Copyright IBM Corp. 2017, 2020
++ * Copyright IBM Corp. 2017, 2022
+ *
+ * Author: Harald Freudenberger <freude@de.ibm.com>
+ *
+@@ -14,6 +14,7 @@
+ #ifdef CONFIG_ARCH_RANDOM
+
+ #include <linux/static_key.h>
++#include <linux/preempt.h>
+ #include <linux/atomic.h>
+ #include <asm/cpacf.h>
+
+@@ -32,7 +33,8 @@ static inline bool __must_check arch_get_random_int(unsigned int *v)
+
+ static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
+ {
+- if (static_branch_likely(&s390_arch_random_available)) {
++ if (static_branch_likely(&s390_arch_random_available) &&
++ in_task()) {
+ cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v));
+ atomic64_add(sizeof(*v), &s390_arch_random_counter);
+ return true;
+@@ -42,7 +44,8 @@ static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
+
+ static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
+ {
+- if (static_branch_likely(&s390_arch_random_available)) {
++ if (static_branch_likely(&s390_arch_random_available) &&
++ in_task()) {
+ cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v));
+ atomic64_add(sizeof(*v), &s390_arch_random_counter);
+ return true;
+--
+2.35.1
+
--- /dev/null
+From 2852775159d84567f50f76f5f682f72d328646ce Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 10:51:20 +0800
+Subject: scsi: core: Fix warning in scsi_alloc_sgtables()
+
+From: Jason Yan <yanaijie@huawei.com>
+
+[ Upstream commit d9a434fa0c12ed5f7afe1e9dd30003ab5d059b85 ]
+
+As explained in SG_IO howto[1]:
+
+"If iovec_count is non-zero then 'dxfer_len' should be equal to the sum of
+iov_len lengths. If not, the minimum of the two is the transfer length."
+
+When iovec_count is non-zero and dxfer_len is zero, the sg_io() just
+genarated a null bio, and finally caused a warning below. To fix it, skip
+generating a bio for this request if dxfer_len is zero.
+
+[1] https://tldp.org/HOWTO/SCSI-Generic-HOWTO/x198.html
+
+WARNING: CPU: 2 PID: 3643 at drivers/scsi/scsi_lib.c:1032 scsi_alloc_sgtables+0xc7d/0xf70 drivers/scsi/scsi_lib.c:1032
+Modules linked in:
+
+CPU: 2 PID: 3643 Comm: syz-executor397 Not tainted
+5.17.0-rc3-syzkaller-00316-gb81b1829e7e3 #0
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-204/01/2014
+RIP: 0010:scsi_alloc_sgtables+0xc7d/0xf70 drivers/scsi/scsi_lib.c:1032
+Code: e7 fc 31 ff 44 89 f6 e8 c1 4e e7 fc 45 85 f6 0f 84 1a f5 ff ff e8
+93 4c e7 fc 83 c5 01 0f b7 ed e9 0f f5 ff ff e8 83 4c e7 fc <0f> 0b 41
+ bc 0a 00 00 00 e9 2b fb ff ff 41 bc 09 00 00 00 e9 20 fb
+RSP: 0018:ffffc90000d07558 EFLAGS: 00010293
+RAX: 0000000000000000 RBX: ffff88801bfc96a0 RCX: 0000000000000000
+RDX: ffff88801c876000 RSI: ffffffff849060bd RDI: 0000000000000003
+RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
+R10: ffffffff849055b9 R11: 0000000000000000 R12: ffff888012b8c000
+R13: ffff88801bfc9580 R14: 0000000000000000 R15: ffff88801432c000
+FS: 00007effdec8e700(0000) GS:ffff88802cc00000(0000)
+knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007effdec6d718 CR3: 00000000206d6000 CR4: 0000000000150ee0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ <TASK>
+ scsi_setup_scsi_cmnd drivers/scsi/scsi_lib.c:1219 [inline]
+ scsi_prepare_cmd drivers/scsi/scsi_lib.c:1614 [inline]
+ scsi_queue_rq+0x283e/0x3630 drivers/scsi/scsi_lib.c:1730
+ blk_mq_dispatch_rq_list+0x6ea/0x22e0 block/blk-mq.c:1851
+ __blk_mq_sched_dispatch_requests+0x20b/0x410 block/blk-mq-sched.c:299
+ blk_mq_sched_dispatch_requests+0xfb/0x180 block/blk-mq-sched.c:332
+ __blk_mq_run_hw_queue+0xf9/0x350 block/blk-mq.c:1968
+ __blk_mq_delay_run_hw_queue+0x5b6/0x6c0 block/blk-mq.c:2045
+ blk_mq_run_hw_queue+0x30f/0x480 block/blk-mq.c:2096
+ blk_mq_sched_insert_request+0x340/0x440 block/blk-mq-sched.c:451
+ blk_execute_rq+0xcc/0x340 block/blk-mq.c:1231
+ sg_io+0x67c/0x1210 drivers/scsi/scsi_ioctl.c:485
+ scsi_ioctl_sg_io drivers/scsi/scsi_ioctl.c:866 [inline]
+ scsi_ioctl+0xa66/0x1560 drivers/scsi/scsi_ioctl.c:921
+ sd_ioctl+0x199/0x2a0 drivers/scsi/sd.c:1576
+ blkdev_ioctl+0x37a/0x800 block/ioctl.c:588
+ vfs_ioctl fs/ioctl.c:51 [inline]
+ __do_sys_ioctl fs/ioctl.c:874 [inline]
+ __se_sys_ioctl fs/ioctl.c:860 [inline]
+ __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:860
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+RIP: 0033:0x7effdecdc5d9
+Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 81 14 00 00 90 48 89 f8 48 89
+f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01
+f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48
+RSP: 002b:00007effdec8e2f8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
+RAX: ffffffffffffffda RBX: 00007effded664c0 RCX: 00007effdecdc5d9
+RDX: 0000000020002300 RSI: 0000000000002285 RDI: 0000000000000004
+RBP: 00007effded34034 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000003
+R13: 00007effded34054 R14: 2f30656c69662f2e R15: 00007effded664c8
+
+Link: https://lore.kernel.org/r/20220720025120.3226770-1-yanaijie@huawei.com
+Fixes: 25636e282fe9 ("block: fix SG_IO vector request data length handling")
+Reported-by: syzbot+d44b35ecfb807e5af0b5@syzkaller.appspotmail.com
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Signed-off-by: Jason Yan <yanaijie@huawei.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/scsi_ioctl.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c
+index a06c61f22742..6e2f82152b4a 100644
+--- a/drivers/scsi/scsi_ioctl.c
++++ b/drivers/scsi/scsi_ioctl.c
+@@ -457,7 +457,7 @@ static int sg_io(struct scsi_device *sdev, struct gendisk *disk,
+ goto out_free_cdb;
+
+ ret = 0;
+- if (hdr->iovec_count) {
++ if (hdr->iovec_count && hdr->dxfer_len) {
+ struct iov_iter i;
+ struct iovec *iov = NULL;
+
+--
+2.35.1
+
--- /dev/null
+From b4d21334619c4ef06ef1832f7db1b552e104b4d9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 10:24:48 -0400
+Subject: scsi: mpt3sas: Stop fw fault watchdog work item during system
+ shutdown
+
+From: David Jeffery <djeffery@redhat.com>
+
+[ Upstream commit 0fde22c5420ed258ee538a760291c2f3935f6a01 ]
+
+During system shutdown or reboot, mpt3sas will reset the firmware back to
+ready state. However, the driver leaves running a watchdog work item
+intended to keep the firmware in operational state. This causes a second,
+unneeded reset on shutdown and moves the firmware back to operational
+instead of in ready state as intended. And if the mpt3sas_fwfault_debug
+module parameter is set, this extra reset also panics the system.
+
+mpt3sas's scsih_shutdown needs to stop the watchdog before resetting the
+firmware back to ready state.
+
+Link: https://lore.kernel.org/r/20220722142448.6289-1-djeffery@redhat.com
+Fixes: fae21608c31c ("scsi: mpt3sas: Transition IOC to Ready state during shutdown")
+Tested-by: Laurence Oberman <loberman@redhat.com>
+Acked-by: Sreekanth Reddy <sreekanth.reddy@broadcom.com>
+Signed-off-by: David Jeffery <djeffery@redhat.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/mpt3sas/mpt3sas_scsih.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+index af275ac42795..5351959fbaba 100644
+--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
++++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+@@ -11386,6 +11386,7 @@ scsih_shutdown(struct pci_dev *pdev)
+ _scsih_ir_shutdown(ioc);
+ _scsih_nvme_shutdown(ioc);
+ mpt3sas_base_mask_interrupts(ioc);
++ mpt3sas_base_stop_watchdog(ioc);
+ ioc->shost_recovery = 1;
+ mpt3sas_base_make_ioc_ready(ioc, SOFT_RESET);
+ ioc->shost_recovery = 0;
+--
+2.35.1
+
--- /dev/null
+From b30e11a395cf847b4266f0f0971244256bc61cd9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 19 Jul 2022 15:15:29 +0800
+Subject: scsi: ufs: host: Hold reference returned by of_parse_phandle()
+
+From: Liang He <windhl@126.com>
+
+[ Upstream commit a3435afba87dc6cd83f5595e7607f3c40f93ef01 ]
+
+In ufshcd_populate_vreg(), we should hold the reference returned by
+of_parse_phandle() and then use it to call of_node_put() for refcount
+balance.
+
+Link: https://lore.kernel.org/r/20220719071529.1081166-1-windhl@126.com
+Fixes: aa4976130934 ("ufs: Add regulator enable support")
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Signed-off-by: Liang He <windhl@126.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/ufs/ufshcd-pltfrm.c | 15 +++++++++++++--
+ 1 file changed, 13 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/scsi/ufs/ufshcd-pltfrm.c b/drivers/scsi/ufs/ufshcd-pltfrm.c
+index 87975d1a21c8..adc302b1a57a 100644
+--- a/drivers/scsi/ufs/ufshcd-pltfrm.c
++++ b/drivers/scsi/ufs/ufshcd-pltfrm.c
+@@ -107,9 +107,20 @@ static int ufshcd_parse_clock_info(struct ufs_hba *hba)
+ return ret;
+ }
+
++static bool phandle_exists(const struct device_node *np,
++ const char *phandle_name, int index)
++{
++ struct device_node *parse_np = of_parse_phandle(np, phandle_name, index);
++
++ if (parse_np)
++ of_node_put(parse_np);
++
++ return parse_np != NULL;
++}
++
+ #define MAX_PROP_SIZE 32
+ static int ufshcd_populate_vreg(struct device *dev, const char *name,
+- struct ufs_vreg **out_vreg)
++ struct ufs_vreg **out_vreg)
+ {
+ char prop_name[MAX_PROP_SIZE];
+ struct ufs_vreg *vreg = NULL;
+@@ -121,7 +132,7 @@ static int ufshcd_populate_vreg(struct device *dev, const char *name,
+ }
+
+ snprintf(prop_name, MAX_PROP_SIZE, "%s-supply", name);
+- if (!of_parse_phandle(np, prop_name, 0)) {
++ if (!phandle_exists(np, prop_name, 0)) {
+ dev_info(dev, "%s: Unable to find %s regulator, assuming enabled\n",
+ __func__, prop_name);
+ goto out;
+--
+2.35.1
+
--- /dev/null
+From 05bc0f8f5e2083a8ed36ff7f1514d05da48dc4a7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 23 Jul 2022 09:58:09 +0800
+Subject: sctp: fix sleep in atomic context bug in timer handlers
+
+From: Duoming Zhou <duoming@zju.edu.cn>
+
+[ Upstream commit b89fc26f741d9f9efb51cba3e9b241cf1380ec5a ]
+
+There are sleep in atomic context bugs in timer handlers of sctp
+such as sctp_generate_t3_rtx_event(), sctp_generate_probe_event(),
+sctp_generate_t1_init_event(), sctp_generate_timeout_event(),
+sctp_generate_t3_rtx_event() and so on.
+
+The root cause is sctp_sched_prio_init_sid() with GFP_KERNEL parameter
+that may sleep could be called by different timer handlers which is in
+interrupt context.
+
+One of the call paths that could trigger bug is shown below:
+
+ (interrupt context)
+sctp_generate_probe_event
+ sctp_do_sm
+ sctp_side_effects
+ sctp_cmd_interpreter
+ sctp_outq_teardown
+ sctp_outq_init
+ sctp_sched_set_sched
+ n->init_sid(..,GFP_KERNEL)
+ sctp_sched_prio_init_sid //may sleep
+
+This patch changes gfp_t parameter of init_sid in sctp_sched_set_sched()
+from GFP_KERNEL to GFP_ATOMIC in order to prevent sleep in atomic
+context bugs.
+
+Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations")
+Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Link: https://lore.kernel.org/r/20220723015809.11553-1-duoming@zju.edu.cn
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sctp/stream_sched.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
+index 99e5f69fbb74..a2e1d34f52c5 100644
+--- a/net/sctp/stream_sched.c
++++ b/net/sctp/stream_sched.c
+@@ -163,7 +163,7 @@ int sctp_sched_set_sched(struct sctp_association *asoc,
+ if (!SCTP_SO(&asoc->stream, i)->ext)
+ continue;
+
+- ret = n->init_sid(&asoc->stream, i, GFP_KERNEL);
++ ret = n->init_sid(&asoc->stream, i, GFP_ATOMIC);
+ if (ret)
+ goto err;
+ }
+--
+2.35.1
+
--- /dev/null
+From 75448a2fa4330709ebd12f9c467aa925fc030e5e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Jul 2022 18:11:06 -0400
+Subject: sctp: leave the err path free in sctp_stream_init to sctp_stream_free
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit 181d8d2066c000ba0a0e6940a7ad80f1a0e68e9d ]
+
+A NULL pointer dereference was reported by Wei Chen:
+
+ BUG: kernel NULL pointer dereference, address: 0000000000000000
+ RIP: 0010:__list_del_entry_valid+0x26/0x80
+ Call Trace:
+ <TASK>
+ sctp_sched_dequeue_common+0x1c/0x90
+ sctp_sched_prio_dequeue+0x67/0x80
+ __sctp_outq_teardown+0x299/0x380
+ sctp_outq_free+0x15/0x20
+ sctp_association_free+0xc3/0x440
+ sctp_do_sm+0x1ca7/0x2210
+ sctp_assoc_bh_rcv+0x1f6/0x340
+
+This happens when calling sctp_sendmsg without connecting to server first.
+In this case, a data chunk already queues up in send queue of client side
+when processing the INIT_ACK from server in sctp_process_init() where it
+calls sctp_stream_init() to alloc stream_in. If it fails to alloc stream_in
+all stream_out will be freed in sctp_stream_init's err path. Then in the
+asoc freeing it will crash when dequeuing this data chunk as stream_out
+is missing.
+
+As we can't free stream out before dequeuing all data from send queue, and
+this patch is to fix it by moving the err path stream_out/in freeing in
+sctp_stream_init() to sctp_stream_free() which is eventually called when
+freeing the asoc in sctp_association_free(). This fix also makes the code
+in sctp_process_init() more clear.
+
+Note that in sctp_association_init() when it fails in sctp_stream_init(),
+sctp_association_free() will not be called, and in that case it should
+go to 'stream_free' err path to free stream instead of 'fail_init'.
+
+Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations")
+Reported-by: Wei Chen <harperchen1110@gmail.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Link: https://lore.kernel.org/r/831a3dc100c4908ff76e5bcc363be97f2778bc0b.1658787066.git.lucien.xin@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sctp/associola.c | 5 ++---
+ net/sctp/stream.c | 19 +++----------------
+ 2 files changed, 5 insertions(+), 19 deletions(-)
+
+diff --git a/net/sctp/associola.c b/net/sctp/associola.c
+index be29da09cc7a..3460abceba44 100644
+--- a/net/sctp/associola.c
++++ b/net/sctp/associola.c
+@@ -229,9 +229,8 @@ static struct sctp_association *sctp_association_init(
+ if (!sctp_ulpq_init(&asoc->ulpq, asoc))
+ goto fail_init;
+
+- if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams,
+- 0, gfp))
+- goto fail_init;
++ if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, 0, gfp))
++ goto stream_free;
+
+ /* Initialize default path MTU. */
+ asoc->pathmtu = sp->pathmtu;
+diff --git a/net/sctp/stream.c b/net/sctp/stream.c
+index 6dc95dcc0ff4..ef9fceadef8d 100644
+--- a/net/sctp/stream.c
++++ b/net/sctp/stream.c
+@@ -137,7 +137,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
+
+ ret = sctp_stream_alloc_out(stream, outcnt, gfp);
+ if (ret)
+- goto out_err;
++ return ret;
+
+ for (i = 0; i < stream->outcnt; i++)
+ SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
+@@ -145,22 +145,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
+ handle_in:
+ sctp_stream_interleave_init(stream);
+ if (!incnt)
+- goto out;
+-
+- ret = sctp_stream_alloc_in(stream, incnt, gfp);
+- if (ret)
+- goto in_err;
+-
+- goto out;
++ return 0;
+
+-in_err:
+- sched->free(stream);
+- genradix_free(&stream->in);
+-out_err:
+- genradix_free(&stream->out);
+- stream->outcnt = 0;
+-out:
+- return ret;
++ return sctp_stream_alloc_in(stream, incnt, gfp);
+ }
+
+ int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid)
+--
+2.35.1
+
net-ping6-fix-memleak-in-ipv6_renew_options.patch
ipv6-addrconf-fix-a-null-ptr-deref-bug-for-ip6_ptr.patch
net-tls-remove-the-context-from-the-list-in-tls_device_down.patch
+igmp-fix-data-races-around-sysctl_igmp_qrv.patch
+s390-archrandom-prevent-cpacf-trng-invocations-in-in.patch
+net-pcs-xpcs-propagate-xpcs_read-error-to-xpcs_get_s.patch
+net-sungem_phy-add-of_node_put-for-reference-returne.patch
+tcp-fix-data-races-around-sysctl_tcp_dsack.patch-17026
+tcp-fix-a-data-race-around-sysctl_tcp_app_win.patch-22294
+tcp-fix-a-data-race-around-sysctl_tcp_adv_win_scale.patch-19790
+tcp-fix-a-data-race-around-sysctl_tcp_frto.patch-3670
+tcp-fix-a-data-race-around-sysctl_tcp_nometrics_save.patch-5497
+tcp-fix-data-races-around-sysctl_tcp_no_ssthresh_met.patch
+tcp-fix-data-races-around-sysctl_tcp_moderate_rcvbuf.patch-32656
+tcp-fix-a-data-race-around-sysctl_tcp_limit_output_b.patch
+tcp-fix-a-data-race-around-sysctl_tcp_challenge_ack_.patch
+tcp-fix-a-data-race-around-sysctl_tcp_min_tso_segs.patch
+tcp-fix-a-data-race-around-sysctl_tcp_min_rtt_wlen.patch
+tcp-fix-a-data-race-around-sysctl_tcp_autocorking.patch
+tcp-fix-a-data-race-around-sysctl_tcp_invalid_rateli.patch
+asm-generic-remove-a-broken-and-needless-ifdef-condi.patch
+revert-tcp-change-pingpong-threshold-to-3.patch-30941
+net-tls-remove-the-context-from-the-list-in-tls_devi.patch
+documentation-fix-sctp_wmem-in-ip-sysctl.rst.patch
+macsec-fix-null-deref-in-macsec_add_rxsa.patch
+macsec-fix-error-message-in-macsec_add_rxsa-and-_txs.patch
+macsec-limit-replay-window-size-with-xpn.patch
+macsec-always-read-macsec_sa_attr_pn-as-a-u64.patch
+net-macsec-fix-potential-resource-leak-in-macsec_add.patch
+net-mld-fix-reference-count-leak-in-mld_-query-repor.patch
+tcp-fix-data-races-around-sk_pacing_rate.patch
+net-fix-data-races-around-sysctl_-rw-mem-_offset.patch
+tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_dela.patch
+tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_slac.patch
+tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_nr.patch
+tcp-fix-data-races-around-sysctl_tcp_reflect_tos.patch
+ipv4-fix-data-races-around-sysctl_fib_notify_on_flag.patch
+i40e-fix-interface-init-with-msi-interrupts-no-msi-x.patch
+sctp-fix-sleep-in-atomic-context-bug-in-timer-handle.patch
+octeontx2-pf-cn10k-fix-egress-ratelimit-configuratio.patch
+octeontx2-pf-fix-udp-tcp-src-and-dst-port-tc-filters.patch-781
+netfilter-nf_queue-do-not-allow-packet-truncation-be.patch
+ice-check-dd-eof-bits-on-rx-descriptor-rather-than-e.patch
+ice-do-not-setup-vlan-for-loopback-vsi.patch-1510
+scsi-mpt3sas-stop-fw-fault-watchdog-work-item-during.patch
+scsi-ufs-host-hold-reference-returned-by-of_parse_ph.patch
+scsi-core-fix-warning-in-scsi_alloc_sgtables.patch-8274
+virtio-net-fix-the-race-between-refill-work-and-clos.patch
+perf-symbol-correct-address-for-bss-symbols.patch
+sfc-disable-softirqs-for-ptp-tx.patch
+sctp-leave-the-err-path-free-in-sctp_stream_init-to-.patch
+watch_queue-fix-missing-rcu-annotation.patch-18505
+watch_queue-fix-missing-locking-in-add_watch_to_obje.patch
+net-ping6-fix-memleak-in-ipv6_renew_options.patch-12523
+ipv6-addrconf-fix-a-null-ptr-deref-bug-for-ip6_ptr.patch-17245
--- /dev/null
+From ab14b4d9c053d14bda83bf8b9a4e08617547b767 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jul 2022 08:45:04 +0200
+Subject: sfc: disable softirqs for ptp TX
+
+From: Alejandro Lucero <alejandro.lucero-palau@amd.com>
+
+[ Upstream commit 67c3b611d92fc238c43734878bc3e232ab570c79 ]
+
+Sending a PTP packet can imply to use the normal TX driver datapath but
+invoked from the driver's ptp worker. The kernel generic TX code
+disables softirqs and preemption before calling specific driver TX code,
+but the ptp worker does not. Although current ptp driver functionality
+does not require it, there are several reasons for doing so:
+
+ 1) The invoked code is always executed with softirqs disabled for non
+ PTP packets.
+ 2) Better if a ptp packet transmission is not interrupted by softirq
+ handling which could lead to high latencies.
+ 3) netdev_xmit_more used by the TX code requires preemption to be
+ disabled.
+
+Indeed a solution for dealing with kernel preemption state based on static
+kernel configuration is not possible since the introduction of dynamic
+preemption level configuration at boot time using the static calls
+functionality.
+
+Fixes: f79c957a0b537 ("drivers: net: sfc: use netdev_xmit_more helper")
+Signed-off-by: Alejandro Lucero <alejandro.lucero-palau@amd.com>
+Link: https://lore.kernel.org/r/20220726064504.49613-1-alejandro.lucero-palau@amd.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/sfc/ptp.c | 22 ++++++++++++++++++++++
+ 1 file changed, 22 insertions(+)
+
+diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
+index 725b0f38813a..a2b4e3befa59 100644
+--- a/drivers/net/ethernet/sfc/ptp.c
++++ b/drivers/net/ethernet/sfc/ptp.c
+@@ -1100,7 +1100,29 @@ static void efx_ptp_xmit_skb_queue(struct efx_nic *efx, struct sk_buff *skb)
+
+ tx_queue = efx_channel_get_tx_queue(ptp_data->channel, type);
+ if (tx_queue && tx_queue->timestamping) {
++ /* This code invokes normal driver TX code which is always
++ * protected from softirqs when called from generic TX code,
++ * which in turn disables preemption. Look at __dev_queue_xmit
++ * which uses rcu_read_lock_bh disabling preemption for RCU
++ * plus disabling softirqs. We do not need RCU reader
++ * protection here.
++ *
++ * Although it is theoretically safe for current PTP TX/RX code
++ * running without disabling softirqs, there are three good
++ * reasond for doing so:
++ *
++ * 1) The code invoked is mainly implemented for non-PTP
++ * packets and it is always executed with softirqs
++ * disabled.
++ * 2) This being a single PTP packet, better to not
++ * interrupt its processing by softirqs which can lead
++ * to high latencies.
++ * 3) netdev_xmit_more checks preemption is disabled and
++ * triggers a BUG_ON if not.
++ */
++ local_bh_disable();
+ efx_enqueue_skb(tx_queue, skb);
++ local_bh_enable();
+ } else {
+ WARN_ONCE(1, "PTP channel has no timestamped tx queue\n");
+ dev_kfree_skb_any(skb);
+--
+2.35.1
+
--- /dev/null
+From 2c2c4964d511d85932c83e2cc5ff64cb8ae5c52e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:14 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_adv_win_scale.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 36eeee75ef0157e42fb6593dcc65daab289b559e ]
+
+While reading sysctl_tcp_adv_win_scale, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/tcp.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/include/net/tcp.h b/include/net/tcp.h
+index 8ce8aafeef0f..76b0d7f2b967 100644
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -1406,7 +1406,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space,
+
+ static inline int tcp_win_from_space(const struct sock *sk, int space)
+ {
+- int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale;
++ int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale);
+
+ return tcp_adv_win_scale <= 0 ?
+ (space>>(-tcp_adv_win_scale)) :
+--
+2.35.1
+
--- /dev/null
+From 01d0355715b3e8b0718bc8922ecd294148810b21 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:13 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_app_win.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 02ca527ac5581cf56749db9fd03d854e842253dd ]
+
+While reading sysctl_tcp_app_win, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index e066c527a723..1c940517f5f5 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -526,7 +526,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb,
+ */
+ static void tcp_init_buffer_space(struct sock *sk)
+ {
+- int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
++ int tcp_app_win = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_app_win);
+ struct tcp_sock *tp = tcp_sk(sk);
+ int maxwin;
+
+--
+2.35.1
+
--- /dev/null
+From 935b89148da41562bb34b63fc5d5bc9cd8206477 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:25 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_autocorking.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 85225e6f0a76e6745bc841c9f25169c509b573d8 ]
+
+While reading sysctl_tcp_autocorking, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: f54b311142a9 ("tcp: auto corking")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 1abdb8712655..7ba9059c263a 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -694,7 +694,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
+ int size_goal)
+ {
+ return skb->len < size_goal &&
+- sock_net(sk)->ipv4.sysctl_tcp_autocorking &&
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) &&
+ !tcp_rtx_queue_empty(sk) &&
+ refcount_read(&sk->sk_wmem_alloc) > skb->truesize;
+ }
+--
+2.35.1
+
--- /dev/null
+From 820771349c258fcb893ad8ff9992b0cc1d27de45 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:21 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_challenge_ack_limit.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit db3815a2fa691da145cfbe834584f31ad75df9ff ]
+
+While reading sysctl_tcp_challenge_ack_limit, it can be changed
+concurrently. Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 282f23c6ee34 ("tcp: implement RFC 5961 3.2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 426f8fe02850..a5357ebfbcc0 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -3622,7 +3622,7 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
+ /* Then check host-wide RFC 5961 rate limit. */
+ now = jiffies / HZ;
+ if (now != challenge_timestamp) {
+- u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
++ u32 ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit);
+ u32 half = (ack_limit + 1) >> 1;
+
+ challenge_timestamp = now;
+--
+2.35.1
+
--- /dev/null
+From bdefd924f38e2c716687fca5a9dde96310d3da23 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:22:01 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_comp_sack_delay_ns.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 4866b2b0f7672b6d760c4b8ece6fb56f965dcc8a ]
+
+While reading sysctl_tcp_comp_sack_delay_ns, it can be changed
+concurrently. Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 6d82aa242092 ("tcp: add tcp_comp_sack_delay_ns sysctl")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index e007bdc20e82..486ca1d5b436 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -5512,7 +5512,8 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
+ if (tp->srtt_us && tp->srtt_us < rtt)
+ rtt = tp->srtt_us;
+
+- delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
++ delay = min_t(unsigned long,
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns),
+ rtt * (NSEC_PER_USEC >> 3)/20);
+ sock_hold(sk);
+ hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
+--
+2.35.1
+
--- /dev/null
+From e0220c760de403bd66edcf660c26d8142a0ea7b5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:22:03 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_comp_sack_nr.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 79f55473bfc8ac51bd6572929a679eeb4da22251 ]
+
+While reading sysctl_tcp_comp_sack_nr, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 9c21d2fc41c0 ("tcp: add tcp_comp_sack_nr sysctl")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 7b593865b4ae..a33e6aa42a4c 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -5491,7 +5491,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
+ }
+
+ if (!tcp_is_sack(tp) ||
+- tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
++ tp->compressed_ack >= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr))
+ goto send_now;
+
+ if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
+--
+2.35.1
+
--- /dev/null
+From d01503cad72cd3674173d67418ac2e164136ede7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:22:02 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_comp_sack_slack_ns.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 22396941a7f343d704738360f9ef0e6576489d43 ]
+
+While reading sysctl_tcp_comp_sack_slack_ns, it can be changed
+concurrently. Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: a70437cc09a1 ("tcp: add hrtimer slack to sack compression")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 486ca1d5b436..7b593865b4ae 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -5517,7 +5517,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
+ rtt * (NSEC_PER_USEC >> 3)/20);
+ sock_hold(sk);
+ hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
+- sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns,
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns),
+ HRTIMER_MODE_REL_PINNED_SOFT);
+ }
+
+--
+2.35.1
+
--- /dev/null
+From 64fd28e6e558585b066d00ac8cd29d08a2eb58b1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:15 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_frto.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 706c6202a3589f290e1ef9be0584a8f4a3cc0507 ]
+
+While reading sysctl_tcp_frto, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 1c940517f5f5..b9fd51826aea 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -2167,7 +2167,7 @@ void tcp_enter_loss(struct sock *sk)
+ * loss recovery is underway except recurring timeout(s) on
+ * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
+ */
+- tp->frto = net->ipv4.sysctl_tcp_frto &&
++ tp->frto = READ_ONCE(net->ipv4.sysctl_tcp_frto) &&
+ (new_recovery || icsk->icsk_retransmits) &&
+ !inet_csk(sk)->icsk_mtup.probe_size;
+ }
+--
+2.35.1
+
--- /dev/null
+From 1d33fed2a2ede95683224e4c14a1d86273544711 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:26 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_invalid_ratelimit.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 2afdbe7b8de84c28e219073a6661080e1b3ded48 ]
+
+While reading sysctl_tcp_invalid_ratelimit, it can be changed
+concurrently. Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 032ee4236954 ("tcp: helpers to mitigate ACK loops by rate-limiting out-of-window dupacks")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index b925c766f1d2..018be3f346e6 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -3574,7 +3574,8 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
+ if (*last_oow_ack_time) {
+ s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
+
+- if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
++ if (0 <= elapsed &&
++ elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) {
+ NET_INC_STATS(net, mib_idx);
+ return true; /* rate-limited: don't send yet! */
+ }
+--
+2.35.1
+
--- /dev/null
+From 68b671415407d91c1e4a99211604b999304afc85 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:20 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_limit_output_bytes.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 9fb90193fbd66b4c5409ef729fd081861f8b6351 ]
+
+While reading sysctl_tcp_limit_output_bytes, it can be changed
+concurrently. Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 46d3ceabd8d9 ("tcp: TCP Small Queues")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_output.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index caf9283f9b0f..8b6d89bb2d36 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2506,7 +2506,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
+ sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift));
+ if (sk->sk_pacing_status == SK_PACING_NONE)
+ limit = min_t(unsigned long, limit,
+- sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes));
+ limit <<= factor;
+
+ if (static_branch_unlikely(&tcp_tx_delay_enabled) &&
+--
+2.35.1
+
--- /dev/null
+From 1a6202a794340f35493415e040698c5eef71a1a4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:24 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_min_rtt_wlen.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 1330ffacd05fc9ac4159d19286ce119e22450ed2 ]
+
+While reading sysctl_tcp_min_rtt_wlen, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: f672258391b4 ("tcp: track min RTT using windowed min-filter")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index a5357ebfbcc0..b925c766f1d2 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -3050,7 +3050,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
+
+ static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
+ {
+- u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
++ u32 wlen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen) * HZ;
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
+--
+2.35.1
+
--- /dev/null
+From 4ffa907eee81733f9847740d879df8380366c7a1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:22 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_min_tso_segs.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit e0bb4ab9dfddd872622239f49fb2bd403b70853b ]
+
+While reading sysctl_tcp_min_tso_segs, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 95bd09eb2750 ("tcp: TSO packets automatic sizing")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_output.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 8b6d89bb2d36..3a84553fb4ed 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -1989,7 +1989,7 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
+
+ min_tso = ca_ops->min_tso_segs ?
+ ca_ops->min_tso_segs(sk) :
+- sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
+
+ tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
+ return min_t(u32, tso_segs, sk->sk_gso_max_segs);
+--
+2.35.1
+
--- /dev/null
+From 72a1d5ff658140f598f89c99c368e997e4dd6ae7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:16 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_nometrics_save.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 8499a2454d9e8a55ce616ede9f9580f36fd5b0f3 ]
+
+While reading sysctl_tcp_nometrics_save, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_metrics.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
+index a501150deaa3..9dcc418a26f2 100644
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -329,7 +329,7 @@ void tcp_update_metrics(struct sock *sk)
+ int m;
+
+ sk_dst_confirm(sk);
+- if (net->ipv4.sysctl_tcp_nometrics_save || !dst)
++ if (READ_ONCE(net->ipv4.sysctl_tcp_nometrics_save) || !dst)
+ return;
+
+ rcu_read_lock();
+--
+2.35.1
+
--- /dev/null
+From 4f19f4db66e4df97f9b37509bbe6497cffa150c7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:21:59 -0700
+Subject: tcp: Fix data-races around sk_pacing_rate.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 59bf6c65a09fff74215517aecffbbdcd67df76e3 ]
+
+While reading sysctl_tcp_pacing_(ss|ca)_ratio, they can be changed
+concurrently. Thus, we need to add READ_ONCE() to their readers.
+
+Fixes: 43e122b014c9 ("tcp: refine pacing rate determination")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 018be3f346e6..566745f527fe 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -902,9 +902,9 @@ static void tcp_update_pacing_rate(struct sock *sk)
+ * end of slow start and should slow down.
+ */
+ if (tcp_snd_cwnd(tp) < tp->snd_ssthresh / 2)
+- rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
++ rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio);
+ else
+- rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
++ rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio);
+
+ rate *= max(tcp_snd_cwnd(tp), tp->packets_out);
+
+--
+2.35.1
+
--- /dev/null
+From f9e076cbb094b959e6e2304894fff853a915d812 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:12 -0700
+Subject: tcp: Fix data-races around sysctl_tcp_dsack.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 58ebb1c8b35a8ef38cd6927431e0fa7b173a632d ]
+
+While reading sysctl_tcp_dsack, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 2d21d8bf3b8c..e066c527a723 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -4419,7 +4419,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
+ {
+ struct tcp_sock *tp = tcp_sk(sk);
+
+- if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
++ if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
+ int mib_idx;
+
+ if (before(seq, tp->rcv_nxt))
+@@ -4466,7 +4466,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
+ tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
+
+- if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
++ if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
+ u32 end_seq = TCP_SKB_CB(skb)->end_seq;
+
+ tcp_rcv_spurious_retrans(sk, skb);
+--
+2.35.1
+
--- /dev/null
+From b3a4a46c8114c2565fd8b6758f66f760b12588da Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:18 -0700
+Subject: tcp: Fix data-races around sysctl_tcp_moderate_rcvbuf.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 780476488844e070580bfc9e3bc7832ec1cea883 ]
+
+While reading sysctl_tcp_moderate_rcvbuf, it can be changed
+concurrently. Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 2 +-
+ net/mptcp/protocol.c | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index b9fd51826aea..426f8fe02850 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -716,7 +716,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
+ * <prev RTT . ><current RTT .. ><next RTT .... >
+ */
+
+- if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
+ !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
+ int rcvmem, rcvbuf;
+ u64 rcvwin, grow;
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index d6def23b8cba..01ede89e3c46 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -1881,7 +1881,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
+ if (msk->rcvq_space.copied <= msk->rcvq_space.space)
+ goto new_measure;
+
+- if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
+ !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
+ int rcvmem, rcvbuf;
+ u64 rcvwin, grow;
+--
+2.35.1
+
--- /dev/null
+From 6b07ef9a9ac13f3e43fb3b713c23345768eb51d6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:17 -0700
+Subject: tcp: Fix data-races around sysctl_tcp_no_ssthresh_metrics_save.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit ab1ba21b523ab496b1a4a8e396333b24b0a18f9a ]
+
+While reading sysctl_tcp_no_ssthresh_metrics_save, it can be changed
+concurrently. Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: 65e6d90168f3 ("net-tcp: Disable TCP ssthresh metrics cache by default")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_metrics.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
+index 9dcc418a26f2..d58e672be31c 100644
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -385,7 +385,7 @@ void tcp_update_metrics(struct sock *sk)
+
+ if (tcp_in_initial_slowstart(tp)) {
+ /* Slow start still did not finish. */
+- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
++ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
+ !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
+ val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
+ if (val && (tcp_snd_cwnd(tp) >> 1) > val)
+@@ -401,7 +401,7 @@ void tcp_update_metrics(struct sock *sk)
+ } else if (!tcp_in_slow_start(tp) &&
+ icsk->icsk_ca_state == TCP_CA_Open) {
+ /* Cong. avoidance phase, cwnd is reliable. */
+- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
++ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
+ !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
+ tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
+ max(tcp_snd_cwnd(tp) >> 1, tp->snd_ssthresh));
+@@ -418,7 +418,7 @@ void tcp_update_metrics(struct sock *sk)
+ tcp_metric_set(tm, TCP_METRIC_CWND,
+ (val + tp->snd_ssthresh) >> 1);
+ }
+- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
++ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
+ !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
+ val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
+ if (val && tp->snd_ssthresh > val)
+@@ -463,7 +463,7 @@ void tcp_init_metrics(struct sock *sk)
+ if (tcp_metric_locked(tm, TCP_METRIC_CWND))
+ tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND);
+
+- val = net->ipv4.sysctl_tcp_no_ssthresh_metrics_save ?
++ val = READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) ?
+ 0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
+ if (val) {
+ tp->snd_ssthresh = val;
+--
+2.35.1
+
--- /dev/null
+From 813c29e1b67ed114af8c31db5698f4506e3f650b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:22:04 -0700
+Subject: tcp: Fix data-races around sysctl_tcp_reflect_tos.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 870e3a634b6a6cb1543b359007aca73fe6a03ac5 ]
+
+While reading sysctl_tcp_reflect_tos, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: ac8f1710c12b ("tcp: reflect tos value received in SYN to the socket")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Acked-by: Wei Wang <weiwan@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_ipv4.c | 4 ++--
+ net/ipv6/tcp_ipv6.c | 4 ++--
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index fba02cf6b468..dae0776c4948 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -1004,7 +1004,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
+ if (skb) {
+ __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
+
+- tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
++ tos = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
+ (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
+ (inet_sk(sk)->tos & INET_ECN_MASK) :
+ inet_sk(sk)->tos;
+@@ -1590,7 +1590,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
+ /* Set ToS of the new socket based upon the value of incoming SYN.
+ * ECT bits are set later in tcp_init_transfer().
+ */
+- if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
+ newinet->tos = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
+
+ if (!dst) {
+diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
+index beaa0c2ada23..8ab39cf57d43 100644
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -542,7 +542,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
+ if (np->repflow && ireq->pktopts)
+ fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
+
+- tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
++ tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
+ (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
+ (np->tclass & INET_ECN_MASK) :
+ np->tclass;
+@@ -1364,7 +1364,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
+ /* Set ToS of the new socket based upon the value of incoming SYN.
+ * ECT bits are set later in tcp_init_transfer().
+ */
+- if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
+ newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
+
+ /* Clone native IPv6 options from listening socket (if any)
+--
+2.35.1
+
--- /dev/null
+From ef017bfd0c1275b4874825a97bff716abdb151ad Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Jul 2022 15:21:59 +0800
+Subject: virtio-net: fix the race between refill work and close
+
+From: Jason Wang <jasowang@redhat.com>
+
+[ Upstream commit 5a159128faff151b7fe5f4eb0f310b1e0a2d56bf ]
+
+We try using cancel_delayed_work_sync() to prevent the work from
+enabling NAPI. This is insufficient since we don't disable the source
+of the refill work scheduling. This means an NAPI poll callback after
+cancel_delayed_work_sync() can schedule the refill work then can
+re-enable the NAPI that leads to use-after-free [1].
+
+Since the work can enable NAPI, we can't simply disable NAPI before
+calling cancel_delayed_work_sync(). So fix this by introducing a
+dedicated boolean to control whether or not the work could be
+scheduled from NAPI.
+
+[1]
+==================================================================
+BUG: KASAN: use-after-free in refill_work+0x43/0xd4
+Read of size 2 at addr ffff88810562c92e by task kworker/2:1/42
+
+CPU: 2 PID: 42 Comm: kworker/2:1 Not tainted 5.19.0-rc1+ #480
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
+Workqueue: events refill_work
+Call Trace:
+ <TASK>
+ dump_stack_lvl+0x34/0x44
+ print_report.cold+0xbb/0x6ac
+ ? _printk+0xad/0xde
+ ? refill_work+0x43/0xd4
+ kasan_report+0xa8/0x130
+ ? refill_work+0x43/0xd4
+ refill_work+0x43/0xd4
+ process_one_work+0x43d/0x780
+ worker_thread+0x2a0/0x6f0
+ ? process_one_work+0x780/0x780
+ kthread+0x167/0x1a0
+ ? kthread_exit+0x50/0x50
+ ret_from_fork+0x22/0x30
+ </TASK>
+...
+
+Fixes: b2baed69e605c ("virtio_net: set/cancel work on ndo_open/ndo_stop")
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/virtio_net.c | 37 ++++++++++++++++++++++++++++++++++---
+ 1 file changed, 34 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
+index 318c681ad63e..53cefad2a79d 100644
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -213,9 +213,15 @@ struct virtnet_info {
+ /* Packet virtio header size */
+ u8 hdr_len;
+
+- /* Work struct for refilling if we run low on memory. */
++ /* Work struct for delayed refilling if we run low on memory. */
+ struct delayed_work refill;
+
++ /* Is delayed refill enabled? */
++ bool refill_enabled;
++
++ /* The lock to synchronize the access to refill_enabled */
++ spinlock_t refill_lock;
++
+ /* Work struct for config space updates */
+ struct work_struct config_work;
+
+@@ -319,6 +325,20 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
+ return p;
+ }
+
++static void enable_delayed_refill(struct virtnet_info *vi)
++{
++ spin_lock_bh(&vi->refill_lock);
++ vi->refill_enabled = true;
++ spin_unlock_bh(&vi->refill_lock);
++}
++
++static void disable_delayed_refill(struct virtnet_info *vi)
++{
++ spin_lock_bh(&vi->refill_lock);
++ vi->refill_enabled = false;
++ spin_unlock_bh(&vi->refill_lock);
++}
++
+ static void virtqueue_napi_schedule(struct napi_struct *napi,
+ struct virtqueue *vq)
+ {
+@@ -1454,8 +1474,12 @@ static int virtnet_receive(struct receive_queue *rq, int budget,
+ }
+
+ if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) {
+- if (!try_fill_recv(vi, rq, GFP_ATOMIC))
+- schedule_delayed_work(&vi->refill, 0);
++ if (!try_fill_recv(vi, rq, GFP_ATOMIC)) {
++ spin_lock(&vi->refill_lock);
++ if (vi->refill_enabled)
++ schedule_delayed_work(&vi->refill, 0);
++ spin_unlock(&vi->refill_lock);
++ }
+ }
+
+ u64_stats_update_begin(&rq->stats.syncp);
+@@ -1578,6 +1602,8 @@ static int virtnet_open(struct net_device *dev)
+ struct virtnet_info *vi = netdev_priv(dev);
+ int i, err;
+
++ enable_delayed_refill(vi);
++
+ for (i = 0; i < vi->max_queue_pairs; i++) {
+ if (i < vi->curr_queue_pairs)
+ /* Make sure we have some buffers: if oom use wq. */
+@@ -1958,6 +1984,8 @@ static int virtnet_close(struct net_device *dev)
+ struct virtnet_info *vi = netdev_priv(dev);
+ int i;
+
++ /* Make sure NAPI doesn't schedule refill work */
++ disable_delayed_refill(vi);
+ /* Make sure refill_work doesn't re-enable napi! */
+ cancel_delayed_work_sync(&vi->refill);
+
+@@ -2455,6 +2483,8 @@ static int virtnet_restore_up(struct virtio_device *vdev)
+
+ virtio_device_ready(vdev);
+
++ enable_delayed_refill(vi);
++
+ if (netif_running(vi->dev)) {
+ err = virtnet_open(vi->dev);
+ if (err)
+@@ -3162,6 +3192,7 @@ static int virtnet_probe(struct virtio_device *vdev)
+ vdev->priv = vi;
+
+ INIT_WORK(&vi->config_work, virtnet_config_changed_work);
++ spin_lock_init(&vi->refill_lock);
+
+ /* If we can receive ANY GSO packets, we must allocate large ones. */
+ if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
+--
+2.35.1
+
--- /dev/null
+From bfe5eaf5aac9d468598ad292627d44632264d877 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Jul 2022 10:31:12 +0100
+Subject: watch_queue: Fix missing locking in add_watch_to_object()
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+[ Upstream commit e64ab2dbd882933b65cd82ff6235d705ad65dbb6 ]
+
+If a watch is being added to a queue, it needs to guard against
+interference from addition of a new watch, manual removal of a watch and
+removal of a watch due to some other queue being destroyed.
+
+KEYCTL_WATCH_KEY guards against this for the same {key,queue} pair by
+holding the key->sem writelocked and by holding refs on both the key and
+the queue - but that doesn't prevent interaction from other {key,queue}
+pairs.
+
+While add_watch_to_object() does take the spinlock on the event queue,
+it doesn't take the lock on the source's watch list. The assumption was
+that the caller would prevent that (say by taking key->sem) - but that
+doesn't prevent interference from the destruction of another queue.
+
+Fix this by locking the watcher list in add_watch_to_object().
+
+Fixes: c73be61cede5 ("pipe: Add general notification queue support")
+Reported-by: syzbot+03d7b43290037d1f87ca@syzkaller.appspotmail.com
+Signed-off-by: David Howells <dhowells@redhat.com>
+cc: keyrings@vger.kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/watch_queue.c | 58 +++++++++++++++++++++++++++-----------------
+ 1 file changed, 36 insertions(+), 22 deletions(-)
+
+diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
+index 7019d337ce86..1059ef6c3711 100644
+--- a/kernel/watch_queue.c
++++ b/kernel/watch_queue.c
+@@ -457,6 +457,33 @@ void init_watch(struct watch *watch, struct watch_queue *wqueue)
+ rcu_assign_pointer(watch->queue, wqueue);
+ }
+
++static int add_one_watch(struct watch *watch, struct watch_list *wlist, struct watch_queue *wqueue)
++{
++ const struct cred *cred;
++ struct watch *w;
++
++ hlist_for_each_entry(w, &wlist->watchers, list_node) {
++ struct watch_queue *wq = rcu_access_pointer(w->queue);
++ if (wqueue == wq && watch->id == w->id)
++ return -EBUSY;
++ }
++
++ cred = current_cred();
++ if (atomic_inc_return(&cred->user->nr_watches) > task_rlimit(current, RLIMIT_NOFILE)) {
++ atomic_dec(&cred->user->nr_watches);
++ return -EAGAIN;
++ }
++
++ watch->cred = get_cred(cred);
++ rcu_assign_pointer(watch->watch_list, wlist);
++
++ kref_get(&wqueue->usage);
++ kref_get(&watch->usage);
++ hlist_add_head(&watch->queue_node, &wqueue->watches);
++ hlist_add_head_rcu(&watch->list_node, &wlist->watchers);
++ return 0;
++}
++
+ /**
+ * add_watch_to_object - Add a watch on an object to a watch list
+ * @watch: The watch to add
+@@ -471,34 +498,21 @@ void init_watch(struct watch *watch, struct watch_queue *wqueue)
+ */
+ int add_watch_to_object(struct watch *watch, struct watch_list *wlist)
+ {
+- struct watch_queue *wqueue = rcu_access_pointer(watch->queue);
+- struct watch *w;
+-
+- hlist_for_each_entry(w, &wlist->watchers, list_node) {
+- struct watch_queue *wq = rcu_access_pointer(w->queue);
+- if (wqueue == wq && watch->id == w->id)
+- return -EBUSY;
+- }
+-
+- watch->cred = get_current_cred();
+- rcu_assign_pointer(watch->watch_list, wlist);
++ struct watch_queue *wqueue;
++ int ret = -ENOENT;
+
+- if (atomic_inc_return(&watch->cred->user->nr_watches) >
+- task_rlimit(current, RLIMIT_NOFILE)) {
+- atomic_dec(&watch->cred->user->nr_watches);
+- put_cred(watch->cred);
+- return -EAGAIN;
+- }
++ rcu_read_lock();
+
++ wqueue = rcu_access_pointer(watch->queue);
+ if (lock_wqueue(wqueue)) {
+- kref_get(&wqueue->usage);
+- kref_get(&watch->usage);
+- hlist_add_head(&watch->queue_node, &wqueue->watches);
++ spin_lock(&wlist->lock);
++ ret = add_one_watch(watch, wlist, wqueue);
++ spin_unlock(&wlist->lock);
+ unlock_wqueue(wqueue);
+ }
+
+- hlist_add_head_rcu(&watch->list_node, &wlist->watchers);
+- return 0;
++ rcu_read_unlock();
++ return ret;
+ }
+ EXPORT_SYMBOL(add_watch_to_object);
+
+--
+2.35.1
+
--- /dev/null
+From efb849e71a853a285d2b1728572cb644e20913e9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Jul 2022 10:31:06 +0100
+Subject: watch_queue: Fix missing rcu annotation
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit e0339f036ef4beb9b20f0b6532a1e0ece7f594c6 ]
+
+Since __post_watch_notification() walks wlist->watchers with only the
+RCU read lock held, we need to use RCU methods to add to the list (we
+already use RCU methods to remove from the list).
+
+Fix add_watch_to_object() to use hlist_add_head_rcu() instead of
+hlist_add_head() for that list.
+
+Fixes: c73be61cede5 ("pipe: Add general notification queue support")
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/watch_queue.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
+index debebcd2664e..7019d337ce86 100644
+--- a/kernel/watch_queue.c
++++ b/kernel/watch_queue.c
+@@ -497,7 +497,7 @@ int add_watch_to_object(struct watch *watch, struct watch_list *wlist)
+ unlock_wqueue(wqueue);
+ }
+
+- hlist_add_head(&watch->list_node, &wlist->watchers);
++ hlist_add_head_rcu(&watch->list_node, &wlist->watchers);
+ return 0;
+ }
+ EXPORT_SYMBOL(add_watch_to_object);
+--
+2.35.1
+