--- /dev/null
+From 407d51997d610cc6d9c828fa8881ea99848b284a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Jul 2022 10:35:46 -0400
+Subject: Documentation: fix sctp_wmem in ip-sysctl.rst
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit aa709da0e032cee7c202047ecd75f437bb0126ed ]
+
+Since commit 1033990ac5b2 ("sctp: implement memory accounting on tx path"),
+SCTP has supported memory accounting on tx path where 'sctp_wmem' is used
+by sk_wmem_schedule(). So we should fix the description for this option in
+ip-sysctl.rst accordingly.
+
+v1->v2:
+ - Improve the description as Marcelo suggested.
+
+Fixes: 1033990ac5b2 ("sctp: implement memory accounting on tx path")
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/networking/ip-sysctl.rst | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
+index 8899b474edbf..e29017d4d7a2 100644
+--- a/Documentation/networking/ip-sysctl.rst
++++ b/Documentation/networking/ip-sysctl.rst
+@@ -2848,7 +2848,14 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max
+ Default: 4K
+
+ sctp_wmem - vector of 3 INTEGERs: min, default, max
+- Currently this tunable has no effect.
++ Only the first value ("min") is used, "default" and "max" are
++ ignored.
++
++ min: Minimum size of send buffer that can be used by SCTP sockets.
++ It is guaranteed to each SCTP socket (but not association) even
++ under moderate memory pressure.
++
++ Default: 4K
+
+ addr_scope_policy - INTEGER
+ Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00
+--
+2.35.1
+
--- /dev/null
+From 05fa122d8f66e657cb0fc88add89518817dcdee5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 10:54:01 -0700
+Subject: i40e: Fix interface init with MSI interrupts (no MSI-X)
+
+From: Michal Maloszewski <michal.maloszewski@intel.com>
+
+[ Upstream commit 5fcbb711024aac6d4db385623e6f2fdf019f7782 ]
+
+Fix the inability to bring an interface up on a setup with
+only MSI interrupts enabled (no MSI-X).
+Solution is to add a default number of QPs = 1. This is enough,
+since without MSI-X support driver enables only a basic feature set.
+
+Fixes: bc6d33c8d93f ("i40e: Fix the number of queues available to be mapped for use")
+Signed-off-by: Dawid Lukwinski <dawid.lukwinski@intel.com>
+Signed-off-by: Michal Maloszewski <michal.maloszewski@intel.com>
+Tested-by: Dave Switzer <david.switzer@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Link: https://lore.kernel.org/r/20220722175401.112572-1-anthony.l.nguyen@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/i40e/i40e_main.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
+index 6f01bffd7e5c..9471f47089b2 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
+@@ -1920,11 +1920,15 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
+ * non-zero req_queue_pairs says that user requested a new
+ * queue count via ethtool's set_channels, so use this
+ * value for queues distribution across traffic classes
++ * We need at least one queue pair for the interface
++ * to be usable as we see in else statement.
+ */
+ if (vsi->req_queue_pairs > 0)
+ vsi->num_queue_pairs = vsi->req_queue_pairs;
+ else if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+ vsi->num_queue_pairs = pf->num_lan_msix;
++ else
++ vsi->num_queue_pairs = 1;
+ }
+
+ /* Number of queues per enabled TC */
+--
+2.35.1
+
--- /dev/null
+From 14d9c1b800454ad8885b03a9c8ac5afc9f07e813 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:22:05 -0700
+Subject: ipv4: Fix data-races around sysctl_fib_notify_on_flag_change.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 96b9bd8c6d125490f9adfb57d387ef81a55a103e ]
+
+While reading sysctl_fib_notify_on_flag_change, it can be changed
+concurrently. Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: 680aea08e78c ("net: ipv4: Emit notification when fib hardware flags are changed")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/fib_trie.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
+index 43a496272227..c1b53854047b 100644
+--- a/net/ipv4/fib_trie.c
++++ b/net/ipv4/fib_trie.c
+@@ -1042,6 +1042,7 @@ fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri)
+
+ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
+ {
++ u8 fib_notify_on_flag_change;
+ struct fib_alias *fa_match;
+ struct sk_buff *skb;
+ int err;
+@@ -1063,14 +1064,16 @@ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
+ WRITE_ONCE(fa_match->offload, fri->offload);
+ WRITE_ONCE(fa_match->trap, fri->trap);
+
++ fib_notify_on_flag_change = READ_ONCE(net->ipv4.sysctl_fib_notify_on_flag_change);
++
+ /* 2 means send notifications only if offload_failed was changed. */
+- if (net->ipv4.sysctl_fib_notify_on_flag_change == 2 &&
++ if (fib_notify_on_flag_change == 2 &&
+ READ_ONCE(fa_match->offload_failed) == fri->offload_failed)
+ goto out;
+
+ WRITE_ONCE(fa_match->offload_failed, fri->offload_failed);
+
+- if (!net->ipv4.sysctl_fib_notify_on_flag_change)
++ if (!fib_notify_on_flag_change)
+ goto out;
+
+ skb = nlmsg_new(fib_nlmsg_size(fa_match->fa_info), GFP_ATOMIC);
+--
+2.35.1
+
--- /dev/null
+From de3e71dc13c0475f43550af3feec3f5cd1fff72c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:16:30 +0200
+Subject: macsec: always read MACSEC_SA_ATTR_PN as a u64
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+[ Upstream commit c630d1fe6219769049c87d1a6a0e9a6de55328a1 ]
+
+Currently, MACSEC_SA_ATTR_PN is handled inconsistently, sometimes as a
+u32, sometimes forced into a u64 without checking the actual length of
+the attribute. Instead, we can use nla_get_u64 everywhere, which will
+read up to 64 bits into a u64, capped by the actual length of the
+attribute coming from userspace.
+
+This fixes several issues:
+ - the check in validate_add_rxsa doesn't work with 32-bit attributes
+ - the checks in validate_add_txsa and validate_upd_sa incorrectly
+ reject X << 32 (with X != 0)
+
+Fixes: 48ef50fa866a ("macsec: Netlink support of XPN cipher suites (IEEE 802.1AEbw)")
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/macsec.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
+index b3834e353c22..95578f04f212 100644
+--- a/drivers/net/macsec.c
++++ b/drivers/net/macsec.c
+@@ -1698,7 +1698,7 @@ static bool validate_add_rxsa(struct nlattr **attrs)
+ return false;
+
+ if (attrs[MACSEC_SA_ATTR_PN] &&
+- *(u64 *)nla_data(attrs[MACSEC_SA_ATTR_PN]) == 0)
++ nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)
+ return false;
+
+ if (attrs[MACSEC_SA_ATTR_ACTIVE]) {
+@@ -1941,7 +1941,7 @@ static bool validate_add_txsa(struct nlattr **attrs)
+ if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN)
+ return false;
+
+- if (nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0)
++ if (nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)
+ return false;
+
+ if (attrs[MACSEC_SA_ATTR_ACTIVE]) {
+@@ -2295,7 +2295,7 @@ static bool validate_upd_sa(struct nlattr **attrs)
+ if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN)
+ return false;
+
+- if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0)
++ if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)
+ return false;
+
+ if (attrs[MACSEC_SA_ATTR_ACTIVE]) {
+--
+2.35.1
+
--- /dev/null
+From e53ed1206d6ed5463fbd3e146f28f13fc16358c8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:16:28 +0200
+Subject: macsec: fix error message in macsec_add_rxsa and _txsa
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+[ Upstream commit 3240eac4ff20e51b87600dbd586ed814daf313db ]
+
+The expected length is MACSEC_SALT_LEN, not MACSEC_SA_ATTR_SALT.
+
+Fixes: 48ef50fa866a ("macsec: Netlink support of XPN cipher suites (IEEE 802.1AEbw)")
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/macsec.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
+index 769a1eca6bd8..634452d3ecc5 100644
+--- a/drivers/net/macsec.c
++++ b/drivers/net/macsec.c
+@@ -1770,7 +1770,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
+ if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) {
+ pr_notice("macsec: nl: add_rxsa: bad salt length: %d != %d\n",
+ nla_len(tb_sa[MACSEC_SA_ATTR_SALT]),
+- MACSEC_SA_ATTR_SALT);
++ MACSEC_SALT_LEN);
+ rtnl_unlock();
+ return -EINVAL;
+ }
+@@ -2012,7 +2012,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info)
+ if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) {
+ pr_notice("macsec: nl: add_txsa: bad salt length: %d != %d\n",
+ nla_len(tb_sa[MACSEC_SA_ATTR_SALT]),
+- MACSEC_SA_ATTR_SALT);
++ MACSEC_SALT_LEN);
+ rtnl_unlock();
+ return -EINVAL;
+ }
+--
+2.35.1
+
--- /dev/null
+From c8087e08cedefd6ad82ce66bcafb7dba58474712 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:16:27 +0200
+Subject: macsec: fix NULL deref in macsec_add_rxsa
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+[ Upstream commit f46040eeaf2e523a4096199fd93a11e794818009 ]
+
+Commit 48ef50fa866a added a test on tb_sa[MACSEC_SA_ATTR_PN], but
+nothing guarantees that it's not NULL at this point. The same code was
+added to macsec_add_txsa, but there it's not a problem because
+validate_add_txsa checks that the MACSEC_SA_ATTR_PN attribute is
+present.
+
+Note: it's not possible to reproduce with iproute, because iproute
+doesn't allow creating an SA without specifying the PN.
+
+Fixes: 48ef50fa866a ("macsec: Netlink support of XPN cipher suites (IEEE 802.1AEbw)")
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=208315
+Reported-by: Frantisek Sumsal <fsumsal@redhat.com>
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/macsec.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
+index 817577e713d7..769a1eca6bd8 100644
+--- a/drivers/net/macsec.c
++++ b/drivers/net/macsec.c
+@@ -1753,7 +1753,8 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
+ }
+
+ pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN;
+- if (nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) {
++ if (tb_sa[MACSEC_SA_ATTR_PN] &&
++ nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) {
+ pr_notice("macsec: nl: add_rxsa: bad pn length: %d != %d\n",
+ nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len);
+ rtnl_unlock();
+--
+2.35.1
+
--- /dev/null
+From 5fb6923084846a2bd14faa0eeff50e07078145b5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:16:29 +0200
+Subject: macsec: limit replay window size with XPN
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+[ Upstream commit b07a0e2044057f201d694ab474f5c42a02b6465b ]
+
+IEEE 802.1AEbw-2013 (section 10.7.8) specifies that the maximum value
+of the replay window is 2^30-1, to help with recovery of the upper
+bits of the PN.
+
+To avoid leaving the existing macsec device in an inconsistent state
+if this test fails during changelink, reuse the cleanup mechanism
+introduced for HW offload. This wasn't needed until now because
+macsec_changelink_common could not fail during changelink, as
+modifying the cipher suite was not allowed.
+
+Finally, this must happen after handling IFLA_MACSEC_CIPHER_SUITE so
+that secy->xpn is set.
+
+Fixes: 48ef50fa866a ("macsec: Netlink support of XPN cipher suites (IEEE 802.1AEbw)")
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/macsec.c | 16 ++++++++++++----
+ 1 file changed, 12 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
+index 634452d3ecc5..b3834e353c22 100644
+--- a/drivers/net/macsec.c
++++ b/drivers/net/macsec.c
+@@ -243,6 +243,7 @@ static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb)
+ #define DEFAULT_SEND_SCI true
+ #define DEFAULT_ENCRYPT false
+ #define DEFAULT_ENCODING_SA 0
++#define MACSEC_XPN_MAX_REPLAY_WINDOW (((1 << 30) - 1))
+
+ static bool send_sci(const struct macsec_secy *secy)
+ {
+@@ -3746,9 +3747,6 @@ static int macsec_changelink_common(struct net_device *dev,
+ secy->operational = tx_sa && tx_sa->active;
+ }
+
+- if (data[IFLA_MACSEC_WINDOW])
+- secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]);
+-
+ if (data[IFLA_MACSEC_ENCRYPT])
+ tx_sc->encrypt = !!nla_get_u8(data[IFLA_MACSEC_ENCRYPT]);
+
+@@ -3794,6 +3792,16 @@ static int macsec_changelink_common(struct net_device *dev,
+ }
+ }
+
++ if (data[IFLA_MACSEC_WINDOW]) {
++ secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]);
++
++ /* IEEE 802.1AEbw-2013 10.7.8 - maximum replay window
++ * for XPN cipher suites */
++ if (secy->xpn &&
++ secy->replay_window > MACSEC_XPN_MAX_REPLAY_WINDOW)
++ return -EINVAL;
++ }
++
+ return 0;
+ }
+
+@@ -3823,7 +3831,7 @@ static int macsec_changelink(struct net_device *dev, struct nlattr *tb[],
+
+ ret = macsec_changelink_common(dev, data);
+ if (ret)
+- return ret;
++ goto cleanup;
+
+ /* If h/w offloading is available, propagate to the device */
+ if (macsec_is_offloaded(macsec)) {
+--
+2.35.1
+
--- /dev/null
+From 767f53c37c140978535d3e6ac6a78f0232e8b90b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 2 Apr 2022 08:15:16 -0400
+Subject: mlxsw: spectrum_router: simplify list unwinding
+
+From: Tom Rix <trix@redhat.com>
+
+[ Upstream commit 6f2f36e5f932c58e370bff79aba7f05963ea1c2a ]
+
+The setting of i here
+err_nexthop6_group_get:
+ i = nrt6;
+Is redundant, i is already nrt6. So remove
+this statement.
+
+The for loop for the unwinding
+err_rt6_create:
+ for (i--; i >= 0; i--) {
+Is equivelent to
+ for (; i > 0; i--) {
+
+Two consecutive labels can be reduced to one.
+
+Signed-off-by: Tom Rix <trix@redhat.com>
+Reviewed-by: Ido Schimmel <idosch@nvidia.com>
+Link: https://lore.kernel.org/r/20220402121516.2750284-1-trix@redhat.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/mellanox/mlxsw/spectrum_router.c | 20 ++++++++-----------
+ 1 file changed, 8 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+index c00d6c4ed37c..245d36696486 100644
+--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+@@ -7022,7 +7022,7 @@ mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
+ mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
+ if (IS_ERR(mlxsw_sp_rt6)) {
+ err = PTR_ERR(mlxsw_sp_rt6);
+- goto err_rt6_create;
++ goto err_rt6_unwind;
+ }
+
+ list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
+@@ -7031,14 +7031,12 @@ mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
+
+ err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, op_ctx, fib6_entry);
+ if (err)
+- goto err_nexthop6_group_update;
++ goto err_rt6_unwind;
+
+ return 0;
+
+-err_nexthop6_group_update:
+- i = nrt6;
+-err_rt6_create:
+- for (i--; i >= 0; i--) {
++err_rt6_unwind:
++ for (; i > 0; i--) {
+ fib6_entry->nrt6--;
+ mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
+ struct mlxsw_sp_rt6, list);
+@@ -7166,7 +7164,7 @@ mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
+ mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
+ if (IS_ERR(mlxsw_sp_rt6)) {
+ err = PTR_ERR(mlxsw_sp_rt6);
+- goto err_rt6_create;
++ goto err_rt6_unwind;
+ }
+ list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
+ fib6_entry->nrt6++;
+@@ -7174,7 +7172,7 @@ mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
+
+ err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
+ if (err)
+- goto err_nexthop6_group_get;
++ goto err_rt6_unwind;
+
+ err = mlxsw_sp_nexthop_group_vr_link(fib_entry->nh_group,
+ fib_node->fib);
+@@ -7193,10 +7191,8 @@ mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
+ mlxsw_sp_nexthop_group_vr_unlink(fib_entry->nh_group, fib_node->fib);
+ err_nexthop_group_vr_link:
+ mlxsw_sp_nexthop6_group_put(mlxsw_sp, fib_entry);
+-err_nexthop6_group_get:
+- i = nrt6;
+-err_rt6_create:
+- for (i--; i >= 0; i--) {
++err_rt6_unwind:
++ for (; i > 0; i--) {
+ fib6_entry->nrt6--;
+ mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
+ struct mlxsw_sp_rt6, list);
+--
+2.35.1
+
--- /dev/null
+From 0a782d12ad419f820fbaf1d637a788097014b5b1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Apr 2022 14:55:36 -0700
+Subject: mptcp: don't send RST for single subflow
+
+From: Geliang Tang <geliang.tang@suse.com>
+
+[ Upstream commit 1761fed2567807f26fbd53032ff622f55978c7a9 ]
+
+When a bad checksum is detected and a single subflow is in use, don't
+send RST + MP_FAIL, send data_ack + MP_FAIL instead.
+
+So invoke tcp_send_active_reset() only when mptcp_has_another_subflow()
+is true.
+
+Signed-off-by: Geliang Tang <geliang.tang@suse.com>
+Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/subflow.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
+index 7919e259175d..ccae50eba664 100644
+--- a/net/mptcp/subflow.c
++++ b/net/mptcp/subflow.c
+@@ -1221,14 +1221,14 @@ static bool subflow_check_data_avail(struct sock *ssk)
+ /* RFC 8684 section 3.7. */
+ if (subflow->send_mp_fail) {
+ if (mptcp_has_another_subflow(ssk)) {
++ ssk->sk_err = EBADMSG;
++ tcp_set_state(ssk, TCP_CLOSE);
++ subflow->reset_transient = 0;
++ subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
++ tcp_send_active_reset(ssk, GFP_ATOMIC);
+ while ((skb = skb_peek(&ssk->sk_receive_queue)))
+ sk_eat_skb(ssk, skb);
+ }
+- ssk->sk_err = EBADMSG;
+- tcp_set_state(ssk, TCP_CLOSE);
+- subflow->reset_transient = 0;
+- subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
+- tcp_send_active_reset(ssk, GFP_ATOMIC);
+ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
+ return true;
+ }
+--
+2.35.1
+
--- /dev/null
+From 43ae80e638fda0d574ade85ebc27ebd893ec0433 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 23 Jul 2022 04:24:11 +0300
+Subject: net: dsa: fix reference counting for LAG FDBs
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+[ Upstream commit c7560d1203b7a1ea0b99a5c575547e95d564b2a8 ]
+
+Due to an invalid conflict resolution on my side while working on 2
+different series (LAG FDBs and FDB isolation), dsa_switch_do_lag_fdb_add()
+does not store the database associated with a dsa_mac_addr structure.
+
+So after adding an FDB entry associated with a LAG, dsa_mac_addr_find()
+fails to find it while deleting it, because &a->db is zeroized memory
+for all stored FDB entries of lag->fdbs, and dsa_switch_do_lag_fdb_del()
+returns -ENOENT rather than deleting the entry.
+
+Fixes: c26933639b54 ("net: dsa: request drivers to perform FDB isolation")
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Link: https://lore.kernel.org/r/20220723012411.1125066-1-vladimir.oltean@nxp.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/dsa/switch.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/net/dsa/switch.c b/net/dsa/switch.c
+index d8a80cf9742c..52f84ea349d2 100644
+--- a/net/dsa/switch.c
++++ b/net/dsa/switch.c
+@@ -363,6 +363,7 @@ static int dsa_switch_do_lag_fdb_add(struct dsa_switch *ds, struct dsa_lag *lag,
+
+ ether_addr_copy(a->addr, addr);
+ a->vid = vid;
++ a->db = db;
+ refcount_set(&a->refcount, 1);
+ list_add_tail(&a->list, &lag->fdbs);
+
+--
+2.35.1
+
--- /dev/null
+From 8591925a436afb2b3927d6f50f39b68eec4dc35d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:22:00 -0700
+Subject: net: Fix data-races around sysctl_[rw]mem(_offset)?.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 02739545951ad4c1215160db7fbf9b7a918d3c0b ]
+
+While reading these sysctl variables, they can be changed concurrently.
+Thus, we need to add READ_ONCE() to their readers.
+
+ - .sysctl_rmem
+ - .sysctl_rwmem
+ - .sysctl_rmem_offset
+ - .sysctl_wmem_offset
+ - sysctl_tcp_rmem[1, 2]
+ - sysctl_tcp_wmem[1, 2]
+ - sysctl_decnet_rmem[1]
+ - sysctl_decnet_wmem[1]
+ - sysctl_tipc_rmem[1]
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/sock.h | 8 ++++----
+ net/decnet/af_decnet.c | 4 ++--
+ net/ipv4/tcp.c | 6 +++---
+ net/ipv4/tcp_input.c | 13 +++++++------
+ net/ipv4/tcp_output.c | 2 +-
+ net/mptcp/protocol.c | 6 +++---
+ net/tipc/socket.c | 2 +-
+ 7 files changed, 21 insertions(+), 20 deletions(-)
+
+diff --git a/include/net/sock.h b/include/net/sock.h
+index 6bef0ffb1e7b..9563a093fdfc 100644
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -2834,18 +2834,18 @@ static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto)
+ {
+ /* Does this proto have per netns sysctl_wmem ? */
+ if (proto->sysctl_wmem_offset)
+- return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset);
++ return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset));
+
+- return *proto->sysctl_wmem;
++ return READ_ONCE(*proto->sysctl_wmem);
+ }
+
+ static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto)
+ {
+ /* Does this proto have per netns sysctl_rmem ? */
+ if (proto->sysctl_rmem_offset)
+- return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset);
++ return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset));
+
+- return *proto->sysctl_rmem;
++ return READ_ONCE(*proto->sysctl_rmem);
+ }
+
+ /* Default TCP Small queue budget is ~1 ms of data (1sec >> 10)
+diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
+index dc92a67baea3..7d542eb46172 100644
+--- a/net/decnet/af_decnet.c
++++ b/net/decnet/af_decnet.c
+@@ -480,8 +480,8 @@ static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gf
+ sk->sk_family = PF_DECnet;
+ sk->sk_protocol = 0;
+ sk->sk_allocation = gfp;
+- sk->sk_sndbuf = sysctl_decnet_wmem[1];
+- sk->sk_rcvbuf = sysctl_decnet_rmem[1];
++ sk->sk_sndbuf = READ_ONCE(sysctl_decnet_wmem[1]);
++ sk->sk_rcvbuf = READ_ONCE(sysctl_decnet_rmem[1]);
+
+ /* Initialization of DECnet Session Control Port */
+ scp = DN_SK(sk);
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 60b46f2a6896..91735d631a28 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -452,8 +452,8 @@ void tcp_init_sock(struct sock *sk)
+
+ icsk->icsk_sync_mss = tcp_sync_mss;
+
+- WRITE_ONCE(sk->sk_sndbuf, sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
+- WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
++ WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]));
++ WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]));
+
+ sk_sockets_allocated_inc(sk);
+ }
+@@ -1743,7 +1743,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
+ if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
+ cap = sk->sk_rcvbuf >> 1;
+ else
+- cap = sock_net(sk)->ipv4.sysctl_tcp_rmem[2] >> 1;
++ cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
+ val = min(val, cap);
+ WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index de066fad7dfe..f09b1321a960 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -426,7 +426,7 @@ static void tcp_sndbuf_expand(struct sock *sk)
+
+ if (sk->sk_sndbuf < sndmem)
+ WRITE_ONCE(sk->sk_sndbuf,
+- min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]));
++ min(sndmem, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[2])));
+ }
+
+ /* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
+@@ -461,7 +461,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb,
+ struct tcp_sock *tp = tcp_sk(sk);
+ /* Optimize this! */
+ int truesize = tcp_win_from_space(sk, skbtruesize) >> 1;
+- int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
++ int window = tcp_win_from_space(sk, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])) >> 1;
+
+ while (tp->rcv_ssthresh <= window) {
+ if (truesize <= skb->len)
+@@ -574,16 +574,17 @@ static void tcp_clamp_window(struct sock *sk)
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct net *net = sock_net(sk);
++ int rmem2;
+
+ icsk->icsk_ack.quick = 0;
++ rmem2 = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]);
+
+- if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
++ if (sk->sk_rcvbuf < rmem2 &&
+ !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
+ !tcp_under_memory_pressure(sk) &&
+ sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
+ WRITE_ONCE(sk->sk_rcvbuf,
+- min(atomic_read(&sk->sk_rmem_alloc),
+- net->ipv4.sysctl_tcp_rmem[2]));
++ min(atomic_read(&sk->sk_rmem_alloc), rmem2));
+ }
+ if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
+ tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
+@@ -745,7 +746,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
+
+ do_div(rcvwin, tp->advmss);
+ rcvbuf = min_t(u64, rcvwin * rcvmem,
+- sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
+ if (rcvbuf > sk->sk_rcvbuf) {
+ WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
+
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 60c9f7f444e0..66836b8bd46f 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -238,7 +238,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
+ *rcv_wscale = 0;
+ if (wscale_ok) {
+ /* Set window scaling on max possible window */
+- space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
++ space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
+ space = max_t(u32, space, sysctl_rmem_max);
+ space = min_t(u32, space, *window_clamp);
+ *rcv_wscale = clamp_t(int, ilog2(space) - 15,
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index e2790a6e90fb..07b5a2044cab 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -1900,7 +1900,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
+
+ do_div(rcvwin, advmss);
+ rcvbuf = min_t(u64, rcvwin * rcvmem,
+- sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
+
+ if (rcvbuf > sk->sk_rcvbuf) {
+ u32 window_clamp;
+@@ -2597,8 +2597,8 @@ static int mptcp_init_sock(struct sock *sk)
+ mptcp_ca_reset(sk);
+
+ sk_sockets_allocated_inc(sk);
+- sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
+- sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
++ sk->sk_rcvbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
++ sk->sk_sndbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
+
+ return 0;
+ }
+diff --git a/net/tipc/socket.c b/net/tipc/socket.c
+index 43509c7e90fc..f1c3b8eb4b3d 100644
+--- a/net/tipc/socket.c
++++ b/net/tipc/socket.c
+@@ -517,7 +517,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
+ timer_setup(&sk->sk_timer, tipc_sk_timeout, 0);
+ sk->sk_shutdown = 0;
+ sk->sk_backlog_rcv = tipc_sk_backlog_rcv;
+- sk->sk_rcvbuf = sysctl_tipc_rmem[1];
++ sk->sk_rcvbuf = READ_ONCE(sysctl_tipc_rmem[1]);
+ sk->sk_data_ready = tipc_data_ready;
+ sk->sk_write_space = tipc_write_space;
+ sk->sk_destruct = tipc_sock_destruct;
+--
+2.35.1
+
--- /dev/null
+From a1ea53d946a4be3b0122dd566b0e08163dfb61f7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jul 2022 14:59:23 -0700
+Subject: net/funeth: Fix fun_xdp_tx() and XDP packet reclaim
+
+From: Dimitris Michailidis <d.michailidis@fungible.com>
+
+[ Upstream commit 51a83391d77bb0f7ff0aef06ca4c7f5aa9e80b4c ]
+
+The current implementation of fun_xdp_tx(), used for XPD_TX, is
+incorrect in that it takes an address/length pair and later releases it
+with page_frag_free(). It is OK for XDP_TX but the same code is used by
+ndo_xdp_xmit. In that case it loses the XDP memory type and releases the
+packet incorrectly for some of the types. Assorted breakage follows.
+
+Change fun_xdp_tx() to take xdp_frame and rely on xdp_return_frame() in
+reclaim.
+
+Fixes: db37bc177dae ("net/funeth: add the data path")
+Signed-off-by: Dimitris Michailidis <dmichail@fungible.com>
+Link: https://lore.kernel.org/r/20220726215923.7887-1-dmichail@fungible.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/fungible/funeth/funeth_rx.c | 5 ++++-
+ .../net/ethernet/fungible/funeth/funeth_tx.c | 20 +++++++++----------
+ .../ethernet/fungible/funeth/funeth_txrx.h | 6 +++---
+ 3 files changed, 16 insertions(+), 15 deletions(-)
+
+diff --git a/drivers/net/ethernet/fungible/funeth/funeth_rx.c b/drivers/net/ethernet/fungible/funeth/funeth_rx.c
+index 0f6a549b9f67..29a6c2ede43a 100644
+--- a/drivers/net/ethernet/fungible/funeth/funeth_rx.c
++++ b/drivers/net/ethernet/fungible/funeth/funeth_rx.c
+@@ -142,6 +142,7 @@ static void *fun_run_xdp(struct funeth_rxq *q, skb_frag_t *frags, void *buf_va,
+ int ref_ok, struct funeth_txq *xdp_q)
+ {
+ struct bpf_prog *xdp_prog;
++ struct xdp_frame *xdpf;
+ struct xdp_buff xdp;
+ u32 act;
+
+@@ -163,7 +164,9 @@ static void *fun_run_xdp(struct funeth_rxq *q, skb_frag_t *frags, void *buf_va,
+ case XDP_TX:
+ if (unlikely(!ref_ok))
+ goto pass;
+- if (!fun_xdp_tx(xdp_q, xdp.data, xdp.data_end - xdp.data))
++
++ xdpf = xdp_convert_buff_to_frame(&xdp);
++ if (!xdpf || !fun_xdp_tx(xdp_q, xdpf))
+ goto xdp_error;
+ FUN_QSTAT_INC(q, xdp_tx);
+ q->xdp_flush |= FUN_XDP_FLUSH_TX;
+diff --git a/drivers/net/ethernet/fungible/funeth/funeth_tx.c b/drivers/net/ethernet/fungible/funeth/funeth_tx.c
+index ff6e29237253..2f6698b98b03 100644
+--- a/drivers/net/ethernet/fungible/funeth/funeth_tx.c
++++ b/drivers/net/ethernet/fungible/funeth/funeth_tx.c
+@@ -466,7 +466,7 @@ static unsigned int fun_xdpq_clean(struct funeth_txq *q, unsigned int budget)
+
+ do {
+ fun_xdp_unmap(q, reclaim_idx);
+- page_frag_free(q->info[reclaim_idx].vaddr);
++ xdp_return_frame(q->info[reclaim_idx].xdpf);
+
+ trace_funeth_tx_free(q, reclaim_idx, 1, head);
+
+@@ -479,11 +479,11 @@ static unsigned int fun_xdpq_clean(struct funeth_txq *q, unsigned int budget)
+ return npkts;
+ }
+
+-bool fun_xdp_tx(struct funeth_txq *q, void *data, unsigned int len)
++bool fun_xdp_tx(struct funeth_txq *q, struct xdp_frame *xdpf)
+ {
+ struct fun_eth_tx_req *req;
+ struct fun_dataop_gl *gle;
+- unsigned int idx;
++ unsigned int idx, len;
+ dma_addr_t dma;
+
+ if (fun_txq_avail(q) < FUN_XDP_CLEAN_THRES)
+@@ -494,7 +494,8 @@ bool fun_xdp_tx(struct funeth_txq *q, void *data, unsigned int len)
+ return false;
+ }
+
+- dma = dma_map_single(q->dma_dev, data, len, DMA_TO_DEVICE);
++ len = xdpf->len;
++ dma = dma_map_single(q->dma_dev, xdpf->data, len, DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(q->dma_dev, dma))) {
+ FUN_QSTAT_INC(q, tx_map_err);
+ return false;
+@@ -514,7 +515,7 @@ bool fun_xdp_tx(struct funeth_txq *q, void *data, unsigned int len)
+ gle = (struct fun_dataop_gl *)req->dataop.imm;
+ fun_dataop_gl_init(gle, 0, 0, len, dma);
+
+- q->info[idx].vaddr = data;
++ q->info[idx].xdpf = xdpf;
+
+ u64_stats_update_begin(&q->syncp);
+ q->stats.tx_bytes += len;
+@@ -545,12 +546,9 @@ int fun_xdp_xmit_frames(struct net_device *dev, int n,
+ if (unlikely(q_idx >= fp->num_xdpqs))
+ return -ENXIO;
+
+- for (q = xdpqs[q_idx], i = 0; i < n; i++) {
+- const struct xdp_frame *xdpf = frames[i];
+-
+- if (!fun_xdp_tx(q, xdpf->data, xdpf->len))
++ for (q = xdpqs[q_idx], i = 0; i < n; i++)
++ if (!fun_xdp_tx(q, frames[i]))
+ break;
+- }
+
+ if (unlikely(flags & XDP_XMIT_FLUSH))
+ fun_txq_wr_db(q);
+@@ -577,7 +575,7 @@ static void fun_xdpq_purge(struct funeth_txq *q)
+ unsigned int idx = q->cons_cnt & q->mask;
+
+ fun_xdp_unmap(q, idx);
+- page_frag_free(q->info[idx].vaddr);
++ xdp_return_frame(q->info[idx].xdpf);
+ q->cons_cnt++;
+ }
+ }
+diff --git a/drivers/net/ethernet/fungible/funeth/funeth_txrx.h b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h
+index 04c9f91b7489..8708e2895946 100644
+--- a/drivers/net/ethernet/fungible/funeth/funeth_txrx.h
++++ b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h
+@@ -95,8 +95,8 @@ struct funeth_txq_stats { /* per Tx queue SW counters */
+
+ struct funeth_tx_info { /* per Tx descriptor state */
+ union {
+- struct sk_buff *skb; /* associated packet */
+- void *vaddr; /* start address for XDP */
++ struct sk_buff *skb; /* associated packet (sk_buff path) */
++ struct xdp_frame *xdpf; /* associated XDP frame (XDP path) */
+ };
+ };
+
+@@ -245,7 +245,7 @@ static inline int fun_irq_node(const struct fun_irq *p)
+ int fun_rxq_napi_poll(struct napi_struct *napi, int budget);
+ int fun_txq_napi_poll(struct napi_struct *napi, int budget);
+ netdev_tx_t fun_start_xmit(struct sk_buff *skb, struct net_device *netdev);
+-bool fun_xdp_tx(struct funeth_txq *q, void *data, unsigned int len);
++bool fun_xdp_tx(struct funeth_txq *q, struct xdp_frame *xdpf);
+ int fun_xdp_xmit_frames(struct net_device *dev, int n,
+ struct xdp_frame **frames, u32 flags);
+
+--
+2.35.1
+
--- /dev/null
+From e0139086e45540d8c295043d262c80845e70a0fa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 17:29:02 +0800
+Subject: net: macsec: fix potential resource leak in macsec_add_rxsa() and
+ macsec_add_txsa()
+
+From: Jianglei Nie <niejianglei2021@163.com>
+
+[ Upstream commit c7b205fbbf3cffa374721bb7623f7aa8c46074f1 ]
+
+init_rx_sa() allocates relevant resource for rx_sa->stats and rx_sa->
+key.tfm with alloc_percpu() and macsec_alloc_tfm(). When some error
+occurs after init_rx_sa() is called in macsec_add_rxsa(), the function
+released rx_sa with kfree() without releasing rx_sa->stats and rx_sa->
+key.tfm, which will lead to a resource leak.
+
+We should call macsec_rxsa_put() instead of kfree() to decrease the ref
+count of rx_sa and release the relevant resource if the refcount is 0.
+The same bug exists in macsec_add_txsa() for tx_sa as well. This patch
+fixes the above two bugs.
+
+Fixes: 3cf3227a21d1 ("net: macsec: hardware offloading infrastructure")
+Signed-off-by: Jianglei Nie <niejianglei2021@163.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/macsec.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
+index 95578f04f212..f354fad05714 100644
+--- a/drivers/net/macsec.c
++++ b/drivers/net/macsec.c
+@@ -1844,7 +1844,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
+ return 0;
+
+ cleanup:
+- kfree(rx_sa);
++ macsec_rxsa_put(rx_sa);
+ rtnl_unlock();
+ return err;
+ }
+@@ -2087,7 +2087,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info)
+
+ cleanup:
+ secy->operational = was_operational;
+- kfree(tx_sa);
++ macsec_txsa_put(tx_sa);
+ rtnl_unlock();
+ return err;
+ }
+--
+2.35.1
+
--- /dev/null
+From 832b41ea646d7b3cbb1120ae329394a2803c0a37 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 17:06:35 +0000
+Subject: net: mld: fix reference count leak in mld_{query | report}_work()
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 3e7d18b9dca388940a19cae30bfc1f76dccd8c28 ]
+
+mld_{query | report}_work() processes queued events.
+If there are too many events in the queue, it re-queue a work.
+And then, it returns without in6_dev_put().
+But if queuing is failed, it should call in6_dev_put(), but it doesn't.
+So, a reference count leak would occur.
+
+THREAD0 THREAD1
+mld_report_work()
+ spin_lock_bh()
+ if (!mod_delayed_work())
+ in6_dev_hold();
+ spin_unlock_bh()
+ spin_lock_bh()
+ schedule_delayed_work()
+ spin_unlock_bh()
+
+Script to reproduce(by Hangbin Liu):
+ ip netns add ns1
+ ip netns add ns2
+ ip netns exec ns1 sysctl -w net.ipv6.conf.all.force_mld_version=1
+ ip netns exec ns2 sysctl -w net.ipv6.conf.all.force_mld_version=1
+
+ ip -n ns1 link add veth0 type veth peer name veth0 netns ns2
+ ip -n ns1 link set veth0 up
+ ip -n ns2 link set veth0 up
+
+ for i in `seq 50`; do
+ for j in `seq 100`; do
+ ip -n ns1 addr add 2021:${i}::${j}/64 dev veth0
+ ip -n ns2 addr add 2022:${i}::${j}/64 dev veth0
+ done
+ done
+ modprobe -r veth
+ ip -a netns del
+
+splat looks like:
+ unregister_netdevice: waiting for veth0 to become free. Usage count = 2
+ leaked reference.
+ ipv6_add_dev+0x324/0xec0
+ addrconf_notify+0x481/0xd10
+ raw_notifier_call_chain+0xe3/0x120
+ call_netdevice_notifiers+0x106/0x160
+ register_netdevice+0x114c/0x16b0
+ veth_newlink+0x48b/0xa50 [veth]
+ rtnl_newlink+0x11a2/0x1a40
+ rtnetlink_rcv_msg+0x63f/0xc00
+ netlink_rcv_skb+0x1df/0x3e0
+ netlink_unicast+0x5de/0x850
+ netlink_sendmsg+0x6c9/0xa90
+ ____sys_sendmsg+0x76a/0x780
+ __sys_sendmsg+0x27c/0x340
+ do_syscall_64+0x43/0x90
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+Tested-by: Hangbin Liu <liuhangbin@gmail.com>
+Fixes: f185de28d9ae ("mld: add new workqueues for process mld events")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/mcast.c | 14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
+index 7f695c39d9a8..87c699d57b36 100644
+--- a/net/ipv6/mcast.c
++++ b/net/ipv6/mcast.c
+@@ -1522,7 +1522,6 @@ static void mld_query_work(struct work_struct *work)
+
+ if (++cnt >= MLD_MAX_QUEUE) {
+ rework = true;
+- schedule_delayed_work(&idev->mc_query_work, 0);
+ break;
+ }
+ }
+@@ -1533,8 +1532,10 @@ static void mld_query_work(struct work_struct *work)
+ __mld_query_work(skb);
+ mutex_unlock(&idev->mc_lock);
+
+- if (!rework)
+- in6_dev_put(idev);
++ if (rework && queue_delayed_work(mld_wq, &idev->mc_query_work, 0))
++ return;
++
++ in6_dev_put(idev);
+ }
+
+ /* called with rcu_read_lock() */
+@@ -1624,7 +1625,6 @@ static void mld_report_work(struct work_struct *work)
+
+ if (++cnt >= MLD_MAX_QUEUE) {
+ rework = true;
+- schedule_delayed_work(&idev->mc_report_work, 0);
+ break;
+ }
+ }
+@@ -1635,8 +1635,10 @@ static void mld_report_work(struct work_struct *work)
+ __mld_report_work(skb);
+ mutex_unlock(&idev->mc_lock);
+
+- if (!rework)
+- in6_dev_put(idev);
++ if (rework && queue_delayed_work(mld_wq, &idev->mc_report_work, 0))
++ return;
++
++ in6_dev_put(idev);
+ }
+
+ static bool is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type,
+--
+2.35.1
+
--- /dev/null
+From fb395a0519c1ca508a62c862f77166ef1c5de868 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 14:20:57 +0300
+Subject: net: pcs: xpcs: propagate xpcs_read error to xpcs_get_state_c37_sgmii
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+[ Upstream commit 27161db0904ee48e59140aa8d0835939a666c1f1 ]
+
+While phylink_pcs_ops :: pcs_get_state does return void, xpcs_get_state()
+does check for a non-zero return code from xpcs_get_state_c37_sgmii()
+and prints that as a message to the kernel log.
+
+However, a non-zero return code from xpcs_read() is translated into
+"return false" (i.e. zero as int) and the I/O error is therefore not
+printed. Fix that.
+
+Fixes: b97b5331b8ab ("net: pcs: add C37 SGMII AN support for intel mGbE controller")
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Link: https://lore.kernel.org/r/20220720112057.3504398-1-vladimir.oltean@nxp.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/pcs/pcs-xpcs.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
+index 61418d4dc0cd..8768f6e34846 100644
+--- a/drivers/net/pcs/pcs-xpcs.c
++++ b/drivers/net/pcs/pcs-xpcs.c
+@@ -898,7 +898,7 @@ static int xpcs_get_state_c37_sgmii(struct dw_xpcs *xpcs,
+ */
+ ret = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_INTR_STS);
+ if (ret < 0)
+- return false;
++ return ret;
+
+ if (ret & DW_VR_MII_C37_ANSGM_SP_LNKSTS) {
+ int speed_value;
+--
+2.35.1
+
--- /dev/null
+From ab53afa87f58275e1dfae6c3ac2fc50e6990f675 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 21:10:03 +0800
+Subject: net: sungem_phy: Add of_node_put() for reference returned by
+ of_get_parent()
+
+From: Liang He <windhl@126.com>
+
+[ Upstream commit ebbbe23fdf6070e31509638df3321688358cc211 ]
+
+In bcm5421_init(), we should call of_node_put() for the reference
+returned by of_get_parent() which has increased the refcount.
+
+Fixes: 3c326fe9cb7a ("[PATCH] ppc64: Add new PHY to sungem")
+Signed-off-by: Liang He <windhl@126.com>
+Link: https://lore.kernel.org/r/20220720131003.1287426-1-windhl@126.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/sungem_phy.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/sungem_phy.c b/drivers/net/sungem_phy.c
+index 4daac5fda073..0d40d265b688 100644
+--- a/drivers/net/sungem_phy.c
++++ b/drivers/net/sungem_phy.c
+@@ -454,6 +454,7 @@ static int bcm5421_init(struct mii_phy* phy)
+ int can_low_power = 1;
+ if (np == NULL || of_get_property(np, "no-autolowpower", NULL))
+ can_low_power = 0;
++ of_node_put(np);
+ if (can_low_power) {
+ /* Enable automatic low-power */
+ sungem_phy_write(phy, 0x1c, 0x9002);
+--
+2.35.1
+
--- /dev/null
+From 56fee236aaf4c707f71eda52291b415d7669e1d6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jul 2022 12:42:06 +0200
+Subject: netfilter: nf_queue: do not allow packet truncation below transport
+ header offset
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 99a63d36cb3ed5ca3aa6fcb64cffbeaf3b0fb164 ]
+
+Domingo Dirutigliano and Nicola Guerrera report kernel panic when
+sending nf_queue verdict with 1-byte nfta_payload attribute.
+
+The IP/IPv6 stack pulls the IP(v6) header from the packet after the
+input hook.
+
+If user truncates the packet below the header size, this skb_pull() will
+result in a malformed skb (skb->len < 0).
+
+Fixes: 7af4cc3fa158 ("[NETFILTER]: Add "nfnetlink_queue" netfilter queue handler over nfnetlink")
+Reported-by: Domingo Dirutigliano <pwnzer0tt1@proton.me>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Reviewed-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nfnetlink_queue.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
+index a364f8e5e698..87a9009d5234 100644
+--- a/net/netfilter/nfnetlink_queue.c
++++ b/net/netfilter/nfnetlink_queue.c
+@@ -843,11 +843,16 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
+ }
+
+ static int
+-nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
++nfqnl_mangle(void *data, unsigned int data_len, struct nf_queue_entry *e, int diff)
+ {
+ struct sk_buff *nskb;
+
+ if (diff < 0) {
++ unsigned int min_len = skb_transport_offset(e->skb);
++
++ if (data_len < min_len)
++ return -EINVAL;
++
+ if (pskb_trim(e->skb, data_len))
+ return -ENOMEM;
+ } else if (diff > 0) {
+--
+2.35.1
+
--- /dev/null
+From d399289594df0f50484c56285796f595c4530776 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 24 Jul 2022 13:51:13 +0530
+Subject: octeontx2-pf: cn10k: Fix egress ratelimit configuration
+
+From: Sunil Goutham <sgoutham@marvell.com>
+
+[ Upstream commit b354eaeec8637d87003945439209251d76a2bb95 ]
+
+NIX_AF_TLXX_PIR/CIR register format has changed from OcteonTx2
+to CN10K. CN10K supports larger burst size. Fix burst exponent
+and burst mantissa configuration for CN10K.
+
+Also fixed 'maxrate' from u32 to u64 since 'police.rate_bytes_ps'
+passed by stack is also u64.
+
+Fixes: e638a83f167e ("octeontx2-pf: TC_MATCHALL egress ratelimiting offload")
+Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
+Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/marvell/octeontx2/nic/otx2_tc.c | 76 ++++++++++++++-----
+ 1 file changed, 55 insertions(+), 21 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+index a3d720b1b32c..e64318c110fd 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+@@ -28,6 +28,9 @@
+ #define MAX_RATE_EXPONENT 0x0FULL
+ #define MAX_RATE_MANTISSA 0xFFULL
+
++#define CN10K_MAX_BURST_MANTISSA 0x7FFFULL
++#define CN10K_MAX_BURST_SIZE 8453888ULL
++
+ /* Bitfields in NIX_TLX_PIR register */
+ #define TLX_RATE_MANTISSA GENMASK_ULL(8, 1)
+ #define TLX_RATE_EXPONENT GENMASK_ULL(12, 9)
+@@ -35,6 +38,9 @@
+ #define TLX_BURST_MANTISSA GENMASK_ULL(36, 29)
+ #define TLX_BURST_EXPONENT GENMASK_ULL(40, 37)
+
++#define CN10K_TLX_BURST_MANTISSA GENMASK_ULL(43, 29)
++#define CN10K_TLX_BURST_EXPONENT GENMASK_ULL(47, 44)
++
+ struct otx2_tc_flow_stats {
+ u64 bytes;
+ u64 pkts;
+@@ -77,33 +83,42 @@ int otx2_tc_alloc_ent_bitmap(struct otx2_nic *nic)
+ }
+ EXPORT_SYMBOL(otx2_tc_alloc_ent_bitmap);
+
+-static void otx2_get_egress_burst_cfg(u32 burst, u32 *burst_exp,
+- u32 *burst_mantissa)
++static void otx2_get_egress_burst_cfg(struct otx2_nic *nic, u32 burst,
++ u32 *burst_exp, u32 *burst_mantissa)
+ {
++ int max_burst, max_mantissa;
+ unsigned int tmp;
+
++ if (is_dev_otx2(nic->pdev)) {
++ max_burst = MAX_BURST_SIZE;
++ max_mantissa = MAX_BURST_MANTISSA;
++ } else {
++ max_burst = CN10K_MAX_BURST_SIZE;
++ max_mantissa = CN10K_MAX_BURST_MANTISSA;
++ }
++
+ /* Burst is calculated as
+ * ((256 + BURST_MANTISSA) << (1 + BURST_EXPONENT)) / 256
+ * Max supported burst size is 130,816 bytes.
+ */
+- burst = min_t(u32, burst, MAX_BURST_SIZE);
++ burst = min_t(u32, burst, max_burst);
+ if (burst) {
+ *burst_exp = ilog2(burst) ? ilog2(burst) - 1 : 0;
+ tmp = burst - rounddown_pow_of_two(burst);
+- if (burst < MAX_BURST_MANTISSA)
++ if (burst < max_mantissa)
+ *burst_mantissa = tmp * 2;
+ else
+ *burst_mantissa = tmp / (1ULL << (*burst_exp - 7));
+ } else {
+ *burst_exp = MAX_BURST_EXPONENT;
+- *burst_mantissa = MAX_BURST_MANTISSA;
++ *burst_mantissa = max_mantissa;
+ }
+ }
+
+-static void otx2_get_egress_rate_cfg(u32 maxrate, u32 *exp,
++static void otx2_get_egress_rate_cfg(u64 maxrate, u32 *exp,
+ u32 *mantissa, u32 *div_exp)
+ {
+- unsigned int tmp;
++ u64 tmp;
+
+ /* Rate calculation by hardware
+ *
+@@ -132,21 +147,44 @@ static void otx2_get_egress_rate_cfg(u32 maxrate, u32 *exp,
+ }
+ }
+
+-static int otx2_set_matchall_egress_rate(struct otx2_nic *nic, u32 burst, u32 maxrate)
++static u64 otx2_get_txschq_rate_regval(struct otx2_nic *nic,
++ u64 maxrate, u32 burst)
+ {
+- struct otx2_hw *hw = &nic->hw;
+- struct nix_txschq_config *req;
+ u32 burst_exp, burst_mantissa;
+ u32 exp, mantissa, div_exp;
++ u64 regval = 0;
++
++ /* Get exponent and mantissa values from the desired rate */
++ otx2_get_egress_burst_cfg(nic, burst, &burst_exp, &burst_mantissa);
++ otx2_get_egress_rate_cfg(maxrate, &exp, &mantissa, &div_exp);
++
++ if (is_dev_otx2(nic->pdev)) {
++ regval = FIELD_PREP(TLX_BURST_EXPONENT, (u64)burst_exp) |
++ FIELD_PREP(TLX_BURST_MANTISSA, (u64)burst_mantissa) |
++ FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) |
++ FIELD_PREP(TLX_RATE_EXPONENT, exp) |
++ FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0);
++ } else {
++ regval = FIELD_PREP(CN10K_TLX_BURST_EXPONENT, (u64)burst_exp) |
++ FIELD_PREP(CN10K_TLX_BURST_MANTISSA, (u64)burst_mantissa) |
++ FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) |
++ FIELD_PREP(TLX_RATE_EXPONENT, exp) |
++ FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0);
++ }
++
++ return regval;
++}
++
++static int otx2_set_matchall_egress_rate(struct otx2_nic *nic,
++ u32 burst, u64 maxrate)
++{
++ struct otx2_hw *hw = &nic->hw;
++ struct nix_txschq_config *req;
+ int txschq, err;
+
+ /* All SQs share the same TL4, so pick the first scheduler */
+ txschq = hw->txschq_list[NIX_TXSCH_LVL_TL4][0];
+
+- /* Get exponent and mantissa values from the desired rate */
+- otx2_get_egress_burst_cfg(burst, &burst_exp, &burst_mantissa);
+- otx2_get_egress_rate_cfg(maxrate, &exp, &mantissa, &div_exp);
+-
+ mutex_lock(&nic->mbox.lock);
+ req = otx2_mbox_alloc_msg_nix_txschq_cfg(&nic->mbox);
+ if (!req) {
+@@ -157,11 +195,7 @@ static int otx2_set_matchall_egress_rate(struct otx2_nic *nic, u32 burst, u32 ma
+ req->lvl = NIX_TXSCH_LVL_TL4;
+ req->num_regs = 1;
+ req->reg[0] = NIX_AF_TL4X_PIR(txschq);
+- req->regval[0] = FIELD_PREP(TLX_BURST_EXPONENT, burst_exp) |
+- FIELD_PREP(TLX_BURST_MANTISSA, burst_mantissa) |
+- FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) |
+- FIELD_PREP(TLX_RATE_EXPONENT, exp) |
+- FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0);
++ req->regval[0] = otx2_get_txschq_rate_regval(nic, maxrate, burst);
+
+ err = otx2_sync_mbox_msg(&nic->mbox);
+ mutex_unlock(&nic->mbox.lock);
+@@ -230,7 +264,7 @@ static int otx2_tc_egress_matchall_install(struct otx2_nic *nic,
+ struct netlink_ext_ack *extack = cls->common.extack;
+ struct flow_action *actions = &cls->rule->action;
+ struct flow_action_entry *entry;
+- u32 rate;
++ u64 rate;
+ int err;
+
+ err = otx2_tc_validate_flow(nic, actions, extack);
+@@ -256,7 +290,7 @@ static int otx2_tc_egress_matchall_install(struct otx2_nic *nic,
+ }
+ /* Convert bytes per second to Mbps */
+ rate = entry->police.rate_bytes_ps * 8;
+- rate = max_t(u32, rate / 1000000, 1);
++ rate = max_t(u64, rate / 1000000, 1);
+ err = otx2_set_matchall_egress_rate(nic, entry->police.burst, rate);
+ if (err)
+ return err;
+--
+2.35.1
+
--- /dev/null
+From 88ab9e31f4c220a0b915becc3ec0e1b3131f3952 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 24 Jul 2022 14:00:12 +0800
+Subject: perf symbol: Correct address for bss symbols
+
+From: Leo Yan <leo.yan@linaro.org>
+
+[ Upstream commit 2d86612aacb7805f72873691a2644d7279ed0630 ]
+
+When using 'perf mem' and 'perf c2c', an issue is observed that tool
+reports the wrong offset for global data symbols. This is a common
+issue on both x86 and Arm64 platforms.
+
+Let's see an example, for a test program, below is the disassembly for
+its .bss section which is dumped with objdump:
+
+ ...
+
+ Disassembly of section .bss:
+
+ 0000000000004040 <completed.0>:
+ ...
+
+ 0000000000004080 <buf1>:
+ ...
+
+ 00000000000040c0 <buf2>:
+ ...
+
+ 0000000000004100 <thread>:
+ ...
+
+First we used 'perf mem record' to run the test program and then used
+'perf --debug verbose=4 mem report' to observe what's the symbol info
+for 'buf1' and 'buf2' structures.
+
+ # ./perf mem record -e ldlat-loads,ldlat-stores -- false_sharing.exe 8
+ # ./perf --debug verbose=4 mem report
+ ...
+ dso__load_sym_internal: adjusting symbol: st_value: 0x40c0 sh_addr: 0x4040 sh_offset: 0x3028
+ symbol__new: buf2 0x30a8-0x30e8
+ ...
+ dso__load_sym_internal: adjusting symbol: st_value: 0x4080 sh_addr: 0x4040 sh_offset: 0x3028
+ symbol__new: buf1 0x3068-0x30a8
+ ...
+
+The perf tool relies on libelf to parse symbols, in executable and
+shared object files, 'st_value' holds a virtual address; 'sh_addr' is
+the address at which section's first byte should reside in memory, and
+'sh_offset' is the byte offset from the beginning of the file to the
+first byte in the section. The perf tool uses below formula to convert
+a symbol's memory address to a file address:
+
+ file_address = st_value - sh_addr + sh_offset
+ ^
+ ` Memory address
+
+We can see the final adjusted address ranges for buf1 and buf2 are
+[0x30a8-0x30e8) and [0x3068-0x30a8) respectively, apparently this is
+incorrect, in the code, the structure for 'buf1' and 'buf2' specifies
+compiler attribute with 64-byte alignment.
+
+The problem happens for 'sh_offset', libelf returns it as 0x3028 which
+is not 64-byte aligned, combining with disassembly, it's likely libelf
+doesn't respect the alignment for .bss section, therefore, it doesn't
+return the aligned value for 'sh_offset'.
+
+Suggested by Fangrui Song, ELF file contains program header which
+contains PT_LOAD segments, the fields p_vaddr and p_offset in PT_LOAD
+segments contain the execution info. A better choice for converting
+memory address to file address is using the formula:
+
+ file_address = st_value - p_vaddr + p_offset
+
+This patch introduces elf_read_program_header() which returns the
+program header based on the passed 'st_value', then it uses the formula
+above to calculate the symbol file address; and the debugging log is
+updated respectively.
+
+After applying the change:
+
+ # ./perf --debug verbose=4 mem report
+ ...
+ dso__load_sym_internal: adjusting symbol: st_value: 0x40c0 p_vaddr: 0x3d28 p_offset: 0x2d28
+ symbol__new: buf2 0x30c0-0x3100
+ ...
+ dso__load_sym_internal: adjusting symbol: st_value: 0x4080 p_vaddr: 0x3d28 p_offset: 0x2d28
+ symbol__new: buf1 0x3080-0x30c0
+ ...
+
+Fixes: f17e04afaff84b5c ("perf report: Fix ELF symbol parsing")
+Reported-by: Chang Rui <changruinj@gmail.com>
+Suggested-by: Fangrui Song <maskray@google.com>
+Signed-off-by: Leo Yan <leo.yan@linaro.org>
+Acked-by: Namhyung Kim <namhyung@kernel.org>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Ian Rogers <irogers@google.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20220724060013.171050-2-leo.yan@linaro.org
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/util/symbol-elf.c | 45 ++++++++++++++++++++++++++++++++----
+ 1 file changed, 41 insertions(+), 4 deletions(-)
+
+diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
+index ecd377938eea..ef6ced5c5746 100644
+--- a/tools/perf/util/symbol-elf.c
++++ b/tools/perf/util/symbol-elf.c
+@@ -233,6 +233,33 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
+ return NULL;
+ }
+
++static int elf_read_program_header(Elf *elf, u64 vaddr, GElf_Phdr *phdr)
++{
++ size_t i, phdrnum;
++ u64 sz;
++
++ if (elf_getphdrnum(elf, &phdrnum))
++ return -1;
++
++ for (i = 0; i < phdrnum; i++) {
++ if (gelf_getphdr(elf, i, phdr) == NULL)
++ return -1;
++
++ if (phdr->p_type != PT_LOAD)
++ continue;
++
++ sz = max(phdr->p_memsz, phdr->p_filesz);
++ if (!sz)
++ continue;
++
++ if (vaddr >= phdr->p_vaddr && (vaddr < phdr->p_vaddr + sz))
++ return 0;
++ }
++
++ /* Not found any valid program header */
++ return -1;
++}
++
+ static bool want_demangle(bool is_kernel_sym)
+ {
+ return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
+@@ -1209,6 +1236,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
+ sym.st_value);
+ used_opd = true;
+ }
++
+ /*
+ * When loading symbols in a data mapping, ABS symbols (which
+ * has a value of SHN_ABS in its st_shndx) failed at
+@@ -1262,11 +1290,20 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
+ goto out_elf_end;
+ } else if ((used_opd && runtime_ss->adjust_symbols) ||
+ (!used_opd && syms_ss->adjust_symbols)) {
++ GElf_Phdr phdr;
++
++ if (elf_read_program_header(syms_ss->elf,
++ (u64)sym.st_value, &phdr)) {
++ pr_warning("%s: failed to find program header for "
++ "symbol: %s st_value: %#" PRIx64 "\n",
++ __func__, elf_name, (u64)sym.st_value);
++ continue;
++ }
+ pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " "
+- "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__,
+- (u64)sym.st_value, (u64)shdr.sh_addr,
+- (u64)shdr.sh_offset);
+- sym.st_value -= shdr.sh_addr - shdr.sh_offset;
++ "p_vaddr: %#" PRIx64 " p_offset: %#" PRIx64 "\n",
++ __func__, (u64)sym.st_value, (u64)phdr.p_vaddr,
++ (u64)phdr.p_offset);
++ sym.st_value -= phdr.p_vaddr - phdr.p_offset;
+ }
+
+ demangled = demangle_sym(dso, kmodule, elf_name);
+--
+2.35.1
+
--- /dev/null
+From 1a6777f31913eae21ac5845aca0720d4ee8ccbae Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 10:02:23 -0700
+Subject: scsi: ufs: core: Fix a race condition related to device management
+
+From: Bart Van Assche <bvanassche@acm.org>
+
+[ Upstream commit f5c2976e0cb0f6236013bfb479868531b04f61d4 ]
+
+If a device management command completion happens after
+wait_for_completion_timeout() times out and before ufshcd_clear_cmds() is
+called, then the completion code may crash on the complete() call in
+__ufshcd_transfer_req_compl().
+
+Fix the following crash:
+
+ Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008
+ Call trace:
+ complete+0x64/0x178
+ __ufshcd_transfer_req_compl+0x30c/0x9c0
+ ufshcd_poll+0xf0/0x208
+ ufshcd_sl_intr+0xb8/0xf0
+ ufshcd_intr+0x168/0x2f4
+ __handle_irq_event_percpu+0xa0/0x30c
+ handle_irq_event+0x84/0x178
+ handle_fasteoi_irq+0x150/0x2e8
+ __handle_domain_irq+0x114/0x1e4
+ gic_handle_irq.31846+0x58/0x300
+ el1_irq+0xe4/0x1c0
+ efi_header_end+0x110/0x680
+ __irq_exit_rcu+0x108/0x124
+ __handle_domain_irq+0x118/0x1e4
+ gic_handle_irq.31846+0x58/0x300
+ el1_irq+0xe4/0x1c0
+ cpuidle_enter_state+0x3ac/0x8c4
+ do_idle+0x2fc/0x55c
+ cpu_startup_entry+0x84/0x90
+ kernel_init+0x0/0x310
+ start_kernel+0x0/0x608
+ start_kernel+0x4ec/0x608
+
+Link: https://lore.kernel.org/r/20220720170228.1598842-1-bvanassche@acm.org
+Fixes: 5a0b0cb9bee7 ("[SCSI] ufs: Add support for sending NOP OUT UPIU")
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Avri Altman <avri.altman@wdc.com>
+Cc: Bean Huo <beanhuo@micron.com>
+Cc: Stanley Chu <stanley.chu@mediatek.com>
+Signed-off-by: Bart Van Assche <bvanassche@acm.org>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/ufs/ufshcd.c | 58 +++++++++++++++++++++++++++------------
+ 1 file changed, 40 insertions(+), 18 deletions(-)
+
+diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
+index a34c1fab0246..874490f7f5e7 100644
+--- a/drivers/scsi/ufs/ufshcd.c
++++ b/drivers/scsi/ufs/ufshcd.c
+@@ -2947,37 +2947,59 @@ ufshcd_dev_cmd_completion(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
+ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba,
+ struct ufshcd_lrb *lrbp, int max_timeout)
+ {
+- int err = 0;
+- unsigned long time_left;
++ unsigned long time_left = msecs_to_jiffies(max_timeout);
+ unsigned long flags;
++ bool pending;
++ int err;
+
++retry:
+ time_left = wait_for_completion_timeout(hba->dev_cmd.complete,
+- msecs_to_jiffies(max_timeout));
++ time_left);
+
+- spin_lock_irqsave(hba->host->host_lock, flags);
+- hba->dev_cmd.complete = NULL;
+ if (likely(time_left)) {
++ /*
++ * The completion handler called complete() and the caller of
++ * this function still owns the @lrbp tag so the code below does
++ * not trigger any race conditions.
++ */
++ hba->dev_cmd.complete = NULL;
+ err = ufshcd_get_tr_ocs(lrbp);
+ if (!err)
+ err = ufshcd_dev_cmd_completion(hba, lrbp);
+- }
+- spin_unlock_irqrestore(hba->host->host_lock, flags);
+-
+- if (!time_left) {
++ } else {
+ err = -ETIMEDOUT;
+ dev_dbg(hba->dev, "%s: dev_cmd request timedout, tag %d\n",
+ __func__, lrbp->task_tag);
+- if (!ufshcd_clear_cmds(hba, 1U << lrbp->task_tag))
++ if (ufshcd_clear_cmds(hba, 1U << lrbp->task_tag) == 0) {
+ /* successfully cleared the command, retry if needed */
+ err = -EAGAIN;
+- /*
+- * in case of an error, after clearing the doorbell,
+- * we also need to clear the outstanding_request
+- * field in hba
+- */
+- spin_lock_irqsave(&hba->outstanding_lock, flags);
+- __clear_bit(lrbp->task_tag, &hba->outstanding_reqs);
+- spin_unlock_irqrestore(&hba->outstanding_lock, flags);
++ /*
++ * Since clearing the command succeeded we also need to
++ * clear the task tag bit from the outstanding_reqs
++ * variable.
++ */
++ spin_lock_irqsave(&hba->outstanding_lock, flags);
++ pending = test_bit(lrbp->task_tag,
++ &hba->outstanding_reqs);
++ if (pending) {
++ hba->dev_cmd.complete = NULL;
++ __clear_bit(lrbp->task_tag,
++ &hba->outstanding_reqs);
++ }
++ spin_unlock_irqrestore(&hba->outstanding_lock, flags);
++
++ if (!pending) {
++ /*
++ * The completion handler ran while we tried to
++ * clear the command.
++ */
++ time_left = 1;
++ goto retry;
++ }
++ } else {
++ dev_err(hba->dev, "%s: failed to clear tag %d\n",
++ __func__, lrbp->task_tag);
++ }
+ }
+
+ return err;
+--
+2.35.1
+
--- /dev/null
+From 25704b34acfee56b5b270bb72659201066253eb4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Jun 2022 14:44:41 -0700
+Subject: scsi: ufs: Support clearing multiple commands at once
+
+From: Bart Van Assche <bvanassche@acm.org>
+
+[ Upstream commit d1a7644648b7cdacaf8d1013a4285001911e9bc8 ]
+
+Modify ufshcd_clear_cmd() such that it supports clearing multiple commands
+at once instead of one command at a time. This change will be used in a
+later patch to reduce the time spent in the reset handler.
+
+Link: https://lore.kernel.org/r/20220613214442.212466-3-bvanassche@acm.org
+Reviewed-by: Stanley Chu <stanley.chu@mediatek.com>
+Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
+Signed-off-by: Bart Van Assche <bvanassche@acm.org>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/ufs/ufshcd.c | 42 ++++++++++++++++++++++++++-------------
+ 1 file changed, 28 insertions(+), 14 deletions(-)
+
+diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
+index 452ad0612067..a34c1fab0246 100644
+--- a/drivers/scsi/ufs/ufshcd.c
++++ b/drivers/scsi/ufs/ufshcd.c
+@@ -734,17 +734,28 @@ static enum utp_ocs ufshcd_get_tr_ocs(struct ufshcd_lrb *lrbp)
+ }
+
+ /**
+- * ufshcd_utrl_clear - Clear a bit in UTRLCLR register
++ * ufshcd_utrl_clear() - Clear requests from the controller request list.
+ * @hba: per adapter instance
+- * @pos: position of the bit to be cleared
++ * @mask: mask with one bit set for each request to be cleared
+ */
+-static inline void ufshcd_utrl_clear(struct ufs_hba *hba, u32 pos)
++static inline void ufshcd_utrl_clear(struct ufs_hba *hba, u32 mask)
+ {
+ if (hba->quirks & UFSHCI_QUIRK_BROKEN_REQ_LIST_CLR)
+- ufshcd_writel(hba, (1 << pos), REG_UTP_TRANSFER_REQ_LIST_CLEAR);
+- else
+- ufshcd_writel(hba, ~(1 << pos),
+- REG_UTP_TRANSFER_REQ_LIST_CLEAR);
++ mask = ~mask;
++ /*
++ * From the UFSHCI specification: "UTP Transfer Request List CLear
++ * Register (UTRLCLR): This field is bit significant. Each bit
++ * corresponds to a slot in the UTP Transfer Request List, where bit 0
++ * corresponds to request slot 0. A bit in this field is set to ‘0’
++ * by host software to indicate to the host controller that a transfer
++ * request slot is cleared. The host controller
++ * shall free up any resources associated to the request slot
++ * immediately, and shall set the associated bit in UTRLDBR to ‘0’. The
++ * host software indicates no change to request slots by setting the
++ * associated bits in this field to ‘1’. Bits in this field shall only
++ * be set ‘1’ or ‘0’ by host software when UTRLRSR is set to ‘1’."
++ */
++ ufshcd_writel(hba, ~mask, REG_UTP_TRANSFER_REQ_LIST_CLEAR);
+ }
+
+ /**
+@@ -2853,16 +2864,19 @@ static int ufshcd_compose_dev_cmd(struct ufs_hba *hba,
+ return ufshcd_compose_devman_upiu(hba, lrbp);
+ }
+
+-static int
+-ufshcd_clear_cmd(struct ufs_hba *hba, int tag)
++/*
++ * Clear all the requests from the controller for which a bit has been set in
++ * @mask and wait until the controller confirms that these requests have been
++ * cleared.
++ */
++static int ufshcd_clear_cmds(struct ufs_hba *hba, u32 mask)
+ {
+ int err = 0;
+ unsigned long flags;
+- u32 mask = 1 << tag;
+
+ /* clear outstanding transaction before retry */
+ spin_lock_irqsave(hba->host->host_lock, flags);
+- ufshcd_utrl_clear(hba, tag);
++ ufshcd_utrl_clear(hba, mask);
+ spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+ /*
+@@ -2953,7 +2967,7 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba,
+ err = -ETIMEDOUT;
+ dev_dbg(hba->dev, "%s: dev_cmd request timedout, tag %d\n",
+ __func__, lrbp->task_tag);
+- if (!ufshcd_clear_cmd(hba, lrbp->task_tag))
++ if (!ufshcd_clear_cmds(hba, 1U << lrbp->task_tag))
+ /* successfully cleared the command, retry if needed */
+ err = -EAGAIN;
+ /*
+@@ -6988,7 +7002,7 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
+ /* clear the commands that were pending for corresponding LUN */
+ for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs) {
+ if (hba->lrb[pos].lun == lun) {
+- err = ufshcd_clear_cmd(hba, pos);
++ err = ufshcd_clear_cmds(hba, 1U << pos);
+ if (err)
+ break;
+ __ufshcd_transfer_req_compl(hba, 1U << pos);
+@@ -7090,7 +7104,7 @@ static int ufshcd_try_to_abort_task(struct ufs_hba *hba, int tag)
+ goto out;
+ }
+
+- err = ufshcd_clear_cmd(hba, tag);
++ err = ufshcd_clear_cmds(hba, 1U << tag);
+ if (err)
+ dev_err(hba->dev, "%s: Failed clearing cmd at tag %d, err %d\n",
+ __func__, tag, err);
+--
+2.35.1
+
--- /dev/null
+From 8f865e9cd6d0a57debad4c7d4515d949d4fc426a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 23 Jul 2022 09:58:09 +0800
+Subject: sctp: fix sleep in atomic context bug in timer handlers
+
+From: Duoming Zhou <duoming@zju.edu.cn>
+
+[ Upstream commit b89fc26f741d9f9efb51cba3e9b241cf1380ec5a ]
+
+There are sleep in atomic context bugs in timer handlers of sctp
+such as sctp_generate_t3_rtx_event(), sctp_generate_probe_event(),
+sctp_generate_t1_init_event(), sctp_generate_timeout_event(),
+sctp_generate_t3_rtx_event() and so on.
+
+The root cause is sctp_sched_prio_init_sid() with GFP_KERNEL parameter
+that may sleep could be called by different timer handlers which is in
+interrupt context.
+
+One of the call paths that could trigger bug is shown below:
+
+ (interrupt context)
+sctp_generate_probe_event
+ sctp_do_sm
+ sctp_side_effects
+ sctp_cmd_interpreter
+ sctp_outq_teardown
+ sctp_outq_init
+ sctp_sched_set_sched
+ n->init_sid(..,GFP_KERNEL)
+ sctp_sched_prio_init_sid //may sleep
+
+This patch changes gfp_t parameter of init_sid in sctp_sched_set_sched()
+from GFP_KERNEL to GFP_ATOMIC in order to prevent sleep in atomic
+context bugs.
+
+Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations")
+Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Link: https://lore.kernel.org/r/20220723015809.11553-1-duoming@zju.edu.cn
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sctp/stream_sched.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
+index 99e5f69fbb74..a2e1d34f52c5 100644
+--- a/net/sctp/stream_sched.c
++++ b/net/sctp/stream_sched.c
+@@ -163,7 +163,7 @@ int sctp_sched_set_sched(struct sctp_association *asoc,
+ if (!SCTP_SO(&asoc->stream, i)->ext)
+ continue;
+
+- ret = n->init_sid(&asoc->stream, i, GFP_KERNEL);
++ ret = n->init_sid(&asoc->stream, i, GFP_ATOMIC);
+ if (ret)
+ goto err;
+ }
+--
+2.35.1
+
--- /dev/null
+From 52db25ddcd09238cbf3c260287edf1cc11e7926a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Jul 2022 18:11:06 -0400
+Subject: sctp: leave the err path free in sctp_stream_init to sctp_stream_free
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit 181d8d2066c000ba0a0e6940a7ad80f1a0e68e9d ]
+
+A NULL pointer dereference was reported by Wei Chen:
+
+ BUG: kernel NULL pointer dereference, address: 0000000000000000
+ RIP: 0010:__list_del_entry_valid+0x26/0x80
+ Call Trace:
+ <TASK>
+ sctp_sched_dequeue_common+0x1c/0x90
+ sctp_sched_prio_dequeue+0x67/0x80
+ __sctp_outq_teardown+0x299/0x380
+ sctp_outq_free+0x15/0x20
+ sctp_association_free+0xc3/0x440
+ sctp_do_sm+0x1ca7/0x2210
+ sctp_assoc_bh_rcv+0x1f6/0x340
+
+This happens when calling sctp_sendmsg without connecting to server first.
+In this case, a data chunk already queues up in send queue of client side
+when processing the INIT_ACK from server in sctp_process_init() where it
+calls sctp_stream_init() to alloc stream_in. If it fails to alloc stream_in
+all stream_out will be freed in sctp_stream_init's err path. Then in the
+asoc freeing it will crash when dequeuing this data chunk as stream_out
+is missing.
+
+As we can't free stream out before dequeuing all data from send queue, and
+this patch is to fix it by moving the err path stream_out/in freeing in
+sctp_stream_init() to sctp_stream_free() which is eventually called when
+freeing the asoc in sctp_association_free(). This fix also makes the code
+in sctp_process_init() more clear.
+
+Note that in sctp_association_init() when it fails in sctp_stream_init(),
+sctp_association_free() will not be called, and in that case it should
+go to 'stream_free' err path to free stream instead of 'fail_init'.
+
+Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations")
+Reported-by: Wei Chen <harperchen1110@gmail.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Link: https://lore.kernel.org/r/831a3dc100c4908ff76e5bcc363be97f2778bc0b.1658787066.git.lucien.xin@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sctp/associola.c | 5 ++---
+ net/sctp/stream.c | 19 +++----------------
+ 2 files changed, 5 insertions(+), 19 deletions(-)
+
+diff --git a/net/sctp/associola.c b/net/sctp/associola.c
+index be29da09cc7a..3460abceba44 100644
+--- a/net/sctp/associola.c
++++ b/net/sctp/associola.c
+@@ -229,9 +229,8 @@ static struct sctp_association *sctp_association_init(
+ if (!sctp_ulpq_init(&asoc->ulpq, asoc))
+ goto fail_init;
+
+- if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams,
+- 0, gfp))
+- goto fail_init;
++ if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, 0, gfp))
++ goto stream_free;
+
+ /* Initialize default path MTU. */
+ asoc->pathmtu = sp->pathmtu;
+diff --git a/net/sctp/stream.c b/net/sctp/stream.c
+index 6dc95dcc0ff4..ef9fceadef8d 100644
+--- a/net/sctp/stream.c
++++ b/net/sctp/stream.c
+@@ -137,7 +137,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
+
+ ret = sctp_stream_alloc_out(stream, outcnt, gfp);
+ if (ret)
+- goto out_err;
++ return ret;
+
+ for (i = 0; i < stream->outcnt; i++)
+ SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
+@@ -145,22 +145,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
+ handle_in:
+ sctp_stream_interleave_init(stream);
+ if (!incnt)
+- goto out;
+-
+- ret = sctp_stream_alloc_in(stream, incnt, gfp);
+- if (ret)
+- goto in_err;
+-
+- goto out;
++ return 0;
+
+-in_err:
+- sched->free(stream);
+- genradix_free(&stream->in);
+-out_err:
+- genradix_free(&stream->out);
+- stream->outcnt = 0;
+-out:
+- return ret;
++ return sctp_stream_alloc_in(stream, incnt, gfp);
+ }
+
+ int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid)
+--
+2.35.1
+
net-ping6-fix-memleak-in-ipv6_renew_options.patch
ipv6-addrconf-fix-a-null-ptr-deref-bug-for-ip6_ptr.patch
net-tls-remove-the-context-from-the-list-in-tls_device_down.patch
+net-pcs-xpcs-propagate-xpcs_read-error-to-xpcs_get_s.patch
+net-sungem_phy-add-of_node_put-for-reference-returne.patch
+mlxsw-spectrum_router-simplify-list-unwinding.patch
+tcp-fix-a-data-race-around-sysctl_tcp_min_tso_segs.patch
+tcp-fix-a-data-race-around-sysctl_tcp_tso_rtt_log.patch
+tcp-fix-a-data-race-around-sysctl_tcp_min_rtt_wlen.patch
+tcp-fix-a-data-race-around-sysctl_tcp_autocorking.patch
+tcp-fix-a-data-race-around-sysctl_tcp_invalid_rateli.patch
+documentation-fix-sctp_wmem-in-ip-sysctl.rst.patch
+macsec-fix-null-deref-in-macsec_add_rxsa.patch
+macsec-fix-error-message-in-macsec_add_rxsa-and-_txs.patch
+macsec-limit-replay-window-size-with-xpn.patch
+macsec-always-read-macsec_sa_attr_pn-as-a-u64.patch
+net-macsec-fix-potential-resource-leak-in-macsec_add.patch
+net-mld-fix-reference-count-leak-in-mld_-query-repor.patch
+tcp-fix-data-races-around-sk_pacing_rate.patch
+net-fix-data-races-around-sysctl_-rw-mem-_offset.patch
+tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_dela.patch
+tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_slac.patch
+tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_nr.patch
+tcp-fix-data-races-around-sysctl_tcp_reflect_tos.patch
+ipv4-fix-data-races-around-sysctl_fib_notify_on_flag.patch
+i40e-fix-interface-init-with-msi-interrupts-no-msi-x.patch
+net-dsa-fix-reference-counting-for-lag-fdbs.patch
+sctp-fix-sleep-in-atomic-context-bug-in-timer-handle.patch
+octeontx2-pf-cn10k-fix-egress-ratelimit-configuratio.patch
+netfilter-nf_queue-do-not-allow-packet-truncation-be.patch
+scsi-ufs-support-clearing-multiple-commands-at-once.patch
+scsi-ufs-core-fix-a-race-condition-related-to-device.patch
+mptcp-don-t-send-rst-for-single-subflow.patch
+virtio-net-fix-the-race-between-refill-work-and-clos.patch
+perf-symbol-correct-address-for-bss-symbols.patch
+sfc-disable-softirqs-for-ptp-tx.patch
+sctp-leave-the-err-path-free-in-sctp_stream_init-to-.patch
+net-funeth-fix-fun_xdp_tx-and-xdp-packet-reclaim.patch
+stmmac-dwmac-mediatek-fix-resource-leak-in-probe.patch
--- /dev/null
+From 8b1ab4a8751f1c86e1857d655def64a74f6328b6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jul 2022 08:45:04 +0200
+Subject: sfc: disable softirqs for ptp TX
+
+From: Alejandro Lucero <alejandro.lucero-palau@amd.com>
+
+[ Upstream commit 67c3b611d92fc238c43734878bc3e232ab570c79 ]
+
+Sending a PTP packet can imply to use the normal TX driver datapath but
+invoked from the driver's ptp worker. The kernel generic TX code
+disables softirqs and preemption before calling specific driver TX code,
+but the ptp worker does not. Although current ptp driver functionality
+does not require it, there are several reasons for doing so:
+
+ 1) The invoked code is always executed with softirqs disabled for non
+ PTP packets.
+ 2) Better if a ptp packet transmission is not interrupted by softirq
+ handling which could lead to high latencies.
+ 3) netdev_xmit_more used by the TX code requires preemption to be
+ disabled.
+
+Indeed a solution for dealing with kernel preemption state based on static
+kernel configuration is not possible since the introduction of dynamic
+preemption level configuration at boot time using the static calls
+functionality.
+
+Fixes: f79c957a0b537 ("drivers: net: sfc: use netdev_xmit_more helper")
+Signed-off-by: Alejandro Lucero <alejandro.lucero-palau@amd.com>
+Link: https://lore.kernel.org/r/20220726064504.49613-1-alejandro.lucero-palau@amd.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/sfc/ptp.c | 22 ++++++++++++++++++++++
+ 1 file changed, 22 insertions(+)
+
+diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
+index 4625f85acab2..10ad0b93d283 100644
+--- a/drivers/net/ethernet/sfc/ptp.c
++++ b/drivers/net/ethernet/sfc/ptp.c
+@@ -1100,7 +1100,29 @@ static void efx_ptp_xmit_skb_queue(struct efx_nic *efx, struct sk_buff *skb)
+
+ tx_queue = efx_channel_get_tx_queue(ptp_data->channel, type);
+ if (tx_queue && tx_queue->timestamping) {
++ /* This code invokes normal driver TX code which is always
++ * protected from softirqs when called from generic TX code,
++ * which in turn disables preemption. Look at __dev_queue_xmit
++ * which uses rcu_read_lock_bh disabling preemption for RCU
++ * plus disabling softirqs. We do not need RCU reader
++ * protection here.
++ *
++ * Although it is theoretically safe for current PTP TX/RX code
++ * running without disabling softirqs, there are three good
++ * reasond for doing so:
++ *
++ * 1) The code invoked is mainly implemented for non-PTP
++ * packets and it is always executed with softirqs
++ * disabled.
++ * 2) This being a single PTP packet, better to not
++ * interrupt its processing by softirqs which can lead
++ * to high latencies.
++ * 3) netdev_xmit_more checks preemption is disabled and
++ * triggers a BUG_ON if not.
++ */
++ local_bh_disable();
+ efx_enqueue_skb(tx_queue, skb);
++ local_bh_enable();
+ } else {
+ WARN_ONCE(1, "PTP channel has no timestamped tx queue\n");
+ dev_kfree_skb_any(skb);
+--
+2.35.1
+
--- /dev/null
+From e00acc7d26f75cab80c8949b32135e2f01805d50 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Jul 2022 14:52:09 +0300
+Subject: stmmac: dwmac-mediatek: fix resource leak in probe
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+[ Upstream commit 4d3d3a1b244fd54629a6b7047f39a7bbc8d11910 ]
+
+If mediatek_dwmac_clks_config() fails, then call stmmac_remove_config_dt()
+before returning. Otherwise it is a resource leak.
+
+Fixes: fa4b3ca60e80 ("stmmac: dwmac-mediatek: fix clock issue")
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Link: https://lore.kernel.org/r/YuJ4aZyMUlG6yGGa@kili
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
+index ca8ab290013c..d42e1afb6521 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
+@@ -688,18 +688,19 @@ static int mediatek_dwmac_probe(struct platform_device *pdev)
+
+ ret = mediatek_dwmac_clks_config(priv_plat, true);
+ if (ret)
+- return ret;
++ goto err_remove_config_dt;
+
+ ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+- if (ret) {
+- stmmac_remove_config_dt(pdev, plat_dat);
++ if (ret)
+ goto err_drv_probe;
+- }
+
+ return 0;
+
+ err_drv_probe:
+ mediatek_dwmac_clks_config(priv_plat, false);
++err_remove_config_dt:
++ stmmac_remove_config_dt(pdev, plat_dat);
++
+ return ret;
+ }
+
+--
+2.35.1
+
--- /dev/null
+From 01a8b323793dea3387afc38484649d5790b4aeed Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:25 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_autocorking.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 85225e6f0a76e6745bc841c9f25169c509b573d8 ]
+
+While reading sysctl_tcp_autocorking, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: f54b311142a9 ("tcp: auto corking")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 97fed1217b7f..60b46f2a6896 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -686,7 +686,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
+ int size_goal)
+ {
+ return skb->len < size_goal &&
+- sock_net(sk)->ipv4.sysctl_tcp_autocorking &&
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) &&
+ !tcp_rtx_queue_empty(sk) &&
+ refcount_read(&sk->sk_wmem_alloc) > skb->truesize &&
+ tcp_skb_can_collapse_to(skb);
+--
+2.35.1
+
--- /dev/null
+From 02ba4d04348b13331c2da63b04a04a0b950dd91e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:22:01 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_comp_sack_delay_ns.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 4866b2b0f7672b6d760c4b8ece6fb56f965dcc8a ]
+
+While reading sysctl_tcp_comp_sack_delay_ns, it can be changed
+concurrently. Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 6d82aa242092 ("tcp: add tcp_comp_sack_delay_ns sysctl")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index f09b1321a960..3591a25a8631 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -5546,7 +5546,8 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
+ if (tp->srtt_us && tp->srtt_us < rtt)
+ rtt = tp->srtt_us;
+
+- delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
++ delay = min_t(unsigned long,
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns),
+ rtt * (NSEC_PER_USEC >> 3)/20);
+ sock_hold(sk);
+ hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
+--
+2.35.1
+
--- /dev/null
+From f6fa1599ed2f020baa02e25ceedda7ac6662f41e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:22:03 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_comp_sack_nr.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 79f55473bfc8ac51bd6572929a679eeb4da22251 ]
+
+While reading sysctl_tcp_comp_sack_nr, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 9c21d2fc41c0 ("tcp: add tcp_comp_sack_nr sysctl")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 5de396075a27..9221c8c7b9a9 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -5525,7 +5525,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
+ }
+
+ if (!tcp_is_sack(tp) ||
+- tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
++ tp->compressed_ack >= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr))
+ goto send_now;
+
+ if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
+--
+2.35.1
+
--- /dev/null
+From b4651439481b0ac6716ce52630ecc8edca98b399 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:22:02 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_comp_sack_slack_ns.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 22396941a7f343d704738360f9ef0e6576489d43 ]
+
+While reading sysctl_tcp_comp_sack_slack_ns, it can be changed
+concurrently. Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: a70437cc09a1 ("tcp: add hrtimer slack to sack compression")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 3591a25a8631..5de396075a27 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -5551,7 +5551,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
+ rtt * (NSEC_PER_USEC >> 3)/20);
+ sock_hold(sk);
+ hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
+- sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns,
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns),
+ HRTIMER_MODE_REL_PINNED_SOFT);
+ }
+
+--
+2.35.1
+
--- /dev/null
+From 38838e77f2d5f3e773dae060bb45a5bf78bed89e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:26 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_invalid_ratelimit.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 2afdbe7b8de84c28e219073a6661080e1b3ded48 ]
+
+While reading sysctl_tcp_invalid_ratelimit, it can be changed
+concurrently. Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 032ee4236954 ("tcp: helpers to mitigate ACK loops by rate-limiting out-of-window dupacks")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index f3b658fa3e7b..db78197a44ff 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -3581,7 +3581,8 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
+ if (*last_oow_ack_time) {
+ s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
+
+- if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
++ if (0 <= elapsed &&
++ elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) {
+ NET_INC_STATS(net, mib_idx);
+ return true; /* rate-limited: don't send yet! */
+ }
+--
+2.35.1
+
--- /dev/null
+From f6cd91e480589d9df11b81dae87321d73f5512f6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:24 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_min_rtt_wlen.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 1330ffacd05fc9ac4159d19286ce119e22450ed2 ]
+
+While reading sysctl_tcp_min_rtt_wlen, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: f672258391b4 ("tcp: track min RTT using windowed min-filter")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 78e16891f12b..f3b658fa3e7b 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -3058,7 +3058,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
+
+ static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
+ {
+- u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
++ u32 wlen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen) * HZ;
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
+--
+2.35.1
+
--- /dev/null
+From c6fc7e6fbd65bc1659e195592c02e193e9bde34b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:22 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_min_tso_segs.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit e0bb4ab9dfddd872622239f49fb2bd403b70853b ]
+
+While reading sysctl_tcp_min_tso_segs, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 95bd09eb2750 ("tcp: TSO packets automatic sizing")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_output.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 6a3adb0222f4..08466421e7e0 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -1990,7 +1990,7 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
+
+ min_tso = ca_ops->min_tso_segs ?
+ ca_ops->min_tso_segs(sk) :
+- sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
+
+ tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
+ return min_t(u32, tso_segs, sk->sk_gso_max_segs);
+--
+2.35.1
+
--- /dev/null
+From 32e56d364a747fcbd462c5f42fe5c67a57b23f57 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:23 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_tso_rtt_log.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 2455e61b85e9c99af38cd889a7101f1d48b33cb4 ]
+
+While reading sysctl_tcp_tso_rtt_log, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 65466904b015 ("tcp: adjust TSO packet sizes based on min_rtt")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_output.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 08466421e7e0..60c9f7f444e0 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -1971,7 +1971,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
+
+ bytes = sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift);
+
+- r = tcp_min_rtt(tcp_sk(sk)) >> sock_net(sk)->ipv4.sysctl_tcp_tso_rtt_log;
++ r = tcp_min_rtt(tcp_sk(sk)) >> READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_rtt_log);
+ if (r < BITS_PER_TYPE(sk->sk_gso_max_size))
+ bytes += sk->sk_gso_max_size >> r;
+
+--
+2.35.1
+
--- /dev/null
+From 01b5a0cfd3caecff42d228bbde2550dbd28747ef Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:21:59 -0700
+Subject: tcp: Fix data-races around sk_pacing_rate.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 59bf6c65a09fff74215517aecffbbdcd67df76e3 ]
+
+While reading sysctl_tcp_pacing_(ss|ca)_ratio, they can be changed
+concurrently. Thus, we need to add READ_ONCE() to their readers.
+
+Fixes: 43e122b014c9 ("tcp: refine pacing rate determination")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index db78197a44ff..de066fad7dfe 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -910,9 +910,9 @@ static void tcp_update_pacing_rate(struct sock *sk)
+ * end of slow start and should slow down.
+ */
+ if (tcp_snd_cwnd(tp) < tp->snd_ssthresh / 2)
+- rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
++ rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio);
+ else
+- rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
++ rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio);
+
+ rate *= max(tcp_snd_cwnd(tp), tp->packets_out);
+
+--
+2.35.1
+
--- /dev/null
+From 82f76e8e3169ac0577d81c1deea468e0a54b08ea Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:22:04 -0700
+Subject: tcp: Fix data-races around sysctl_tcp_reflect_tos.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 870e3a634b6a6cb1543b359007aca73fe6a03ac5 ]
+
+While reading sysctl_tcp_reflect_tos, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: ac8f1710c12b ("tcp: reflect tos value received in SYN to the socket")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Acked-by: Wei Wang <weiwan@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_ipv4.c | 4 ++--
+ net/ipv6/tcp_ipv6.c | 4 ++--
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index a57f96b86874..1db9938163c4 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -1007,7 +1007,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
+ if (skb) {
+ __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
+
+- tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
++ tos = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
+ (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
+ (inet_sk(sk)->tos & INET_ECN_MASK) :
+ inet_sk(sk)->tos;
+@@ -1527,7 +1527,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
+ /* Set ToS of the new socket based upon the value of incoming SYN.
+ * ECT bits are set later in tcp_init_transfer().
+ */
+- if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
+ newinet->tos = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
+
+ if (!dst) {
+diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
+index 5185c11dc444..979e0d7b2119 100644
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -546,7 +546,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
+ if (np->repflow && ireq->pktopts)
+ fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
+
+- tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
++ tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
+ (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
+ (np->tclass & INET_ECN_MASK) :
+ np->tclass;
+@@ -1314,7 +1314,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
+ /* Set ToS of the new socket based upon the value of incoming SYN.
+ * ECT bits are set later in tcp_init_transfer().
+ */
+- if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
+ newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
+
+ /* Clone native IPv6 options from listening socket (if any)
+--
+2.35.1
+
--- /dev/null
+From ca596e791e5e6909c71d2ea7d91a4ecc5db11e57 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Jul 2022 15:21:59 +0800
+Subject: virtio-net: fix the race between refill work and close
+
+From: Jason Wang <jasowang@redhat.com>
+
+[ Upstream commit 5a159128faff151b7fe5f4eb0f310b1e0a2d56bf ]
+
+We try using cancel_delayed_work_sync() to prevent the work from
+enabling NAPI. This is insufficient since we don't disable the source
+of the refill work scheduling. This means an NAPI poll callback after
+cancel_delayed_work_sync() can schedule the refill work then can
+re-enable the NAPI that leads to use-after-free [1].
+
+Since the work can enable NAPI, we can't simply disable NAPI before
+calling cancel_delayed_work_sync(). So fix this by introducing a
+dedicated boolean to control whether or not the work could be
+scheduled from NAPI.
+
+[1]
+==================================================================
+BUG: KASAN: use-after-free in refill_work+0x43/0xd4
+Read of size 2 at addr ffff88810562c92e by task kworker/2:1/42
+
+CPU: 2 PID: 42 Comm: kworker/2:1 Not tainted 5.19.0-rc1+ #480
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
+Workqueue: events refill_work
+Call Trace:
+ <TASK>
+ dump_stack_lvl+0x34/0x44
+ print_report.cold+0xbb/0x6ac
+ ? _printk+0xad/0xde
+ ? refill_work+0x43/0xd4
+ kasan_report+0xa8/0x130
+ ? refill_work+0x43/0xd4
+ refill_work+0x43/0xd4
+ process_one_work+0x43d/0x780
+ worker_thread+0x2a0/0x6f0
+ ? process_one_work+0x780/0x780
+ kthread+0x167/0x1a0
+ ? kthread_exit+0x50/0x50
+ ret_from_fork+0x22/0x30
+ </TASK>
+...
+
+Fixes: b2baed69e605c ("virtio_net: set/cancel work on ndo_open/ndo_stop")
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/virtio_net.c | 37 ++++++++++++++++++++++++++++++++++---
+ 1 file changed, 34 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
+index c7804fce204c..206904e60784 100644
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -242,9 +242,15 @@ struct virtnet_info {
+ /* Packet virtio header size */
+ u8 hdr_len;
+
+- /* Work struct for refilling if we run low on memory. */
++ /* Work struct for delayed refilling if we run low on memory. */
+ struct delayed_work refill;
+
++ /* Is delayed refill enabled? */
++ bool refill_enabled;
++
++ /* The lock to synchronize the access to refill_enabled */
++ spinlock_t refill_lock;
++
+ /* Work struct for config space updates */
+ struct work_struct config_work;
+
+@@ -348,6 +354,20 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
+ return p;
+ }
+
++static void enable_delayed_refill(struct virtnet_info *vi)
++{
++ spin_lock_bh(&vi->refill_lock);
++ vi->refill_enabled = true;
++ spin_unlock_bh(&vi->refill_lock);
++}
++
++static void disable_delayed_refill(struct virtnet_info *vi)
++{
++ spin_lock_bh(&vi->refill_lock);
++ vi->refill_enabled = false;
++ spin_unlock_bh(&vi->refill_lock);
++}
++
+ static void virtqueue_napi_schedule(struct napi_struct *napi,
+ struct virtqueue *vq)
+ {
+@@ -1527,8 +1547,12 @@ static int virtnet_receive(struct receive_queue *rq, int budget,
+ }
+
+ if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) {
+- if (!try_fill_recv(vi, rq, GFP_ATOMIC))
+- schedule_delayed_work(&vi->refill, 0);
++ if (!try_fill_recv(vi, rq, GFP_ATOMIC)) {
++ spin_lock(&vi->refill_lock);
++ if (vi->refill_enabled)
++ schedule_delayed_work(&vi->refill, 0);
++ spin_unlock(&vi->refill_lock);
++ }
+ }
+
+ u64_stats_update_begin(&rq->stats.syncp);
+@@ -1651,6 +1675,8 @@ static int virtnet_open(struct net_device *dev)
+ struct virtnet_info *vi = netdev_priv(dev);
+ int i, err;
+
++ enable_delayed_refill(vi);
++
+ for (i = 0; i < vi->max_queue_pairs; i++) {
+ if (i < vi->curr_queue_pairs)
+ /* Make sure we have some buffers: if oom use wq. */
+@@ -2033,6 +2059,8 @@ static int virtnet_close(struct net_device *dev)
+ struct virtnet_info *vi = netdev_priv(dev);
+ int i;
+
++ /* Make sure NAPI doesn't schedule refill work */
++ disable_delayed_refill(vi);
+ /* Make sure refill_work doesn't re-enable napi! */
+ cancel_delayed_work_sync(&vi->refill);
+
+@@ -2792,6 +2820,8 @@ static int virtnet_restore_up(struct virtio_device *vdev)
+
+ virtio_device_ready(vdev);
+
++ enable_delayed_refill(vi);
++
+ if (netif_running(vi->dev)) {
+ err = virtnet_open(vi->dev);
+ if (err)
+@@ -3534,6 +3564,7 @@ static int virtnet_probe(struct virtio_device *vdev)
+ vdev->priv = vi;
+
+ INIT_WORK(&vi->config_work, virtnet_config_changed_work);
++ spin_lock_init(&vi->refill_lock);
+
+ /* If we can receive ANY GSO packets, we must allocate large ones. */
+ if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
+--
+2.35.1
+