From: Sasha Levin Date: Sun, 31 Jul 2022 02:41:27 +0000 (-0400) Subject: Fixes for 5.18 X-Git-Tag: v5.4.209~40 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=88345d57a48a5fde397d4452c016aa265200924d;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.18 Signed-off-by: Sasha Levin --- diff --git a/queue-5.18/documentation-fix-sctp_wmem-in-ip-sysctl.rst.patch b/queue-5.18/documentation-fix-sctp_wmem-in-ip-sysctl.rst.patch new file mode 100644 index 00000000000..b02da48b903 --- /dev/null +++ b/queue-5.18/documentation-fix-sctp_wmem-in-ip-sysctl.rst.patch @@ -0,0 +1,49 @@ +From 407d51997d610cc6d9c828fa8881ea99848b284a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 21 Jul 2022 10:35:46 -0400 +Subject: Documentation: fix sctp_wmem in ip-sysctl.rst + +From: Xin Long + +[ Upstream commit aa709da0e032cee7c202047ecd75f437bb0126ed ] + +Since commit 1033990ac5b2 ("sctp: implement memory accounting on tx path"), +SCTP has supported memory accounting on tx path where 'sctp_wmem' is used +by sk_wmem_schedule(). So we should fix the description for this option in +ip-sysctl.rst accordingly. + +v1->v2: + - Improve the description as Marcelo suggested. + +Fixes: 1033990ac5b2 ("sctp: implement memory accounting on tx path") +Signed-off-by: Xin Long +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + Documentation/networking/ip-sysctl.rst | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst +index 8899b474edbf..e29017d4d7a2 100644 +--- a/Documentation/networking/ip-sysctl.rst ++++ b/Documentation/networking/ip-sysctl.rst +@@ -2848,7 +2848,14 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max + Default: 4K + + sctp_wmem - vector of 3 INTEGERs: min, default, max +- Currently this tunable has no effect. ++ Only the first value ("min") is used, "default" and "max" are ++ ignored. ++ ++ min: Minimum size of send buffer that can be used by SCTP sockets. ++ It is guaranteed to each SCTP socket (but not association) even ++ under moderate memory pressure. ++ ++ Default: 4K + + addr_scope_policy - INTEGER + Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00 +-- +2.35.1 + diff --git a/queue-5.18/i40e-fix-interface-init-with-msi-interrupts-no-msi-x.patch b/queue-5.18/i40e-fix-interface-init-with-msi-interrupts-no-msi-x.patch new file mode 100644 index 00000000000..708e22d76b5 --- /dev/null +++ b/queue-5.18/i40e-fix-interface-init-with-msi-interrupts-no-msi-x.patch @@ -0,0 +1,49 @@ +From 05fa122d8f66e657cb0fc88add89518817dcdee5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Jul 2022 10:54:01 -0700 +Subject: i40e: Fix interface init with MSI interrupts (no MSI-X) + +From: Michal Maloszewski + +[ Upstream commit 5fcbb711024aac6d4db385623e6f2fdf019f7782 ] + +Fix the inability to bring an interface up on a setup with +only MSI interrupts enabled (no MSI-X). +Solution is to add a default number of QPs = 1. This is enough, +since without MSI-X support driver enables only a basic feature set. + +Fixes: bc6d33c8d93f ("i40e: Fix the number of queues available to be mapped for use") +Signed-off-by: Dawid Lukwinski +Signed-off-by: Michal Maloszewski +Tested-by: Dave Switzer +Signed-off-by: Tony Nguyen +Link: https://lore.kernel.org/r/20220722175401.112572-1-anthony.l.nguyen@intel.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/i40e/i40e_main.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c +index 6f01bffd7e5c..9471f47089b2 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_main.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c +@@ -1920,11 +1920,15 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, + * non-zero req_queue_pairs says that user requested a new + * queue count via ethtool's set_channels, so use this + * value for queues distribution across traffic classes ++ * We need at least one queue pair for the interface ++ * to be usable as we see in else statement. + */ + if (vsi->req_queue_pairs > 0) + vsi->num_queue_pairs = vsi->req_queue_pairs; + else if (pf->flags & I40E_FLAG_MSIX_ENABLED) + vsi->num_queue_pairs = pf->num_lan_msix; ++ else ++ vsi->num_queue_pairs = 1; + } + + /* Number of queues per enabled TC */ +-- +2.35.1 + diff --git a/queue-5.18/ipv4-fix-data-races-around-sysctl_fib_notify_on_flag.patch b/queue-5.18/ipv4-fix-data-races-around-sysctl_fib_notify_on_flag.patch new file mode 100644 index 00000000000..1cf92292b00 --- /dev/null +++ b/queue-5.18/ipv4-fix-data-races-around-sysctl_fib_notify_on_flag.patch @@ -0,0 +1,54 @@ +From 14d9c1b800454ad8885b03a9c8ac5afc9f07e813 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Jul 2022 11:22:05 -0700 +Subject: ipv4: Fix data-races around sysctl_fib_notify_on_flag_change. + +From: Kuniyuki Iwashima + +[ Upstream commit 96b9bd8c6d125490f9adfb57d387ef81a55a103e ] + +While reading sysctl_fib_notify_on_flag_change, it can be changed +concurrently. Thus, we need to add READ_ONCE() to its readers. + +Fixes: 680aea08e78c ("net: ipv4: Emit notification when fib hardware flags are changed") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/fib_trie.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c +index 43a496272227..c1b53854047b 100644 +--- a/net/ipv4/fib_trie.c ++++ b/net/ipv4/fib_trie.c +@@ -1042,6 +1042,7 @@ fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri) + + void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri) + { ++ u8 fib_notify_on_flag_change; + struct fib_alias *fa_match; + struct sk_buff *skb; + int err; +@@ -1063,14 +1064,16 @@ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri) + WRITE_ONCE(fa_match->offload, fri->offload); + WRITE_ONCE(fa_match->trap, fri->trap); + ++ fib_notify_on_flag_change = READ_ONCE(net->ipv4.sysctl_fib_notify_on_flag_change); ++ + /* 2 means send notifications only if offload_failed was changed. */ +- if (net->ipv4.sysctl_fib_notify_on_flag_change == 2 && ++ if (fib_notify_on_flag_change == 2 && + READ_ONCE(fa_match->offload_failed) == fri->offload_failed) + goto out; + + WRITE_ONCE(fa_match->offload_failed, fri->offload_failed); + +- if (!net->ipv4.sysctl_fib_notify_on_flag_change) ++ if (!fib_notify_on_flag_change) + goto out; + + skb = nlmsg_new(fib_nlmsg_size(fa_match->fa_info), GFP_ATOMIC); +-- +2.35.1 + diff --git a/queue-5.18/macsec-always-read-macsec_sa_attr_pn-as-a-u64.patch b/queue-5.18/macsec-always-read-macsec_sa_attr_pn-as-a-u64.patch new file mode 100644 index 00000000000..1da41afbc5a --- /dev/null +++ b/queue-5.18/macsec-always-read-macsec_sa_attr_pn-as-a-u64.patch @@ -0,0 +1,62 @@ +From de3e71dc13c0475f43550af3feec3f5cd1fff72c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Jul 2022 11:16:30 +0200 +Subject: macsec: always read MACSEC_SA_ATTR_PN as a u64 + +From: Sabrina Dubroca + +[ Upstream commit c630d1fe6219769049c87d1a6a0e9a6de55328a1 ] + +Currently, MACSEC_SA_ATTR_PN is handled inconsistently, sometimes as a +u32, sometimes forced into a u64 without checking the actual length of +the attribute. Instead, we can use nla_get_u64 everywhere, which will +read up to 64 bits into a u64, capped by the actual length of the +attribute coming from userspace. + +This fixes several issues: + - the check in validate_add_rxsa doesn't work with 32-bit attributes + - the checks in validate_add_txsa and validate_upd_sa incorrectly + reject X << 32 (with X != 0) + +Fixes: 48ef50fa866a ("macsec: Netlink support of XPN cipher suites (IEEE 802.1AEbw)") +Signed-off-by: Sabrina Dubroca +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/macsec.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c +index b3834e353c22..95578f04f212 100644 +--- a/drivers/net/macsec.c ++++ b/drivers/net/macsec.c +@@ -1698,7 +1698,7 @@ static bool validate_add_rxsa(struct nlattr **attrs) + return false; + + if (attrs[MACSEC_SA_ATTR_PN] && +- *(u64 *)nla_data(attrs[MACSEC_SA_ATTR_PN]) == 0) ++ nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0) + return false; + + if (attrs[MACSEC_SA_ATTR_ACTIVE]) { +@@ -1941,7 +1941,7 @@ static bool validate_add_txsa(struct nlattr **attrs) + if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN) + return false; + +- if (nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0) ++ if (nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0) + return false; + + if (attrs[MACSEC_SA_ATTR_ACTIVE]) { +@@ -2295,7 +2295,7 @@ static bool validate_upd_sa(struct nlattr **attrs) + if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN) + return false; + +- if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0) ++ if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0) + return false; + + if (attrs[MACSEC_SA_ATTR_ACTIVE]) { +-- +2.35.1 + diff --git a/queue-5.18/macsec-fix-error-message-in-macsec_add_rxsa-and-_txs.patch b/queue-5.18/macsec-fix-error-message-in-macsec_add_rxsa-and-_txs.patch new file mode 100644 index 00000000000..a281cd71040 --- /dev/null +++ b/queue-5.18/macsec-fix-error-message-in-macsec_add_rxsa-and-_txs.patch @@ -0,0 +1,44 @@ +From e53ed1206d6ed5463fbd3e146f28f13fc16358c8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Jul 2022 11:16:28 +0200 +Subject: macsec: fix error message in macsec_add_rxsa and _txsa + +From: Sabrina Dubroca + +[ Upstream commit 3240eac4ff20e51b87600dbd586ed814daf313db ] + +The expected length is MACSEC_SALT_LEN, not MACSEC_SA_ATTR_SALT. + +Fixes: 48ef50fa866a ("macsec: Netlink support of XPN cipher suites (IEEE 802.1AEbw)") +Signed-off-by: Sabrina Dubroca +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/macsec.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c +index 769a1eca6bd8..634452d3ecc5 100644 +--- a/drivers/net/macsec.c ++++ b/drivers/net/macsec.c +@@ -1770,7 +1770,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) + if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) { + pr_notice("macsec: nl: add_rxsa: bad salt length: %d != %d\n", + nla_len(tb_sa[MACSEC_SA_ATTR_SALT]), +- MACSEC_SA_ATTR_SALT); ++ MACSEC_SALT_LEN); + rtnl_unlock(); + return -EINVAL; + } +@@ -2012,7 +2012,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) + if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) { + pr_notice("macsec: nl: add_txsa: bad salt length: %d != %d\n", + nla_len(tb_sa[MACSEC_SA_ATTR_SALT]), +- MACSEC_SA_ATTR_SALT); ++ MACSEC_SALT_LEN); + rtnl_unlock(); + return -EINVAL; + } +-- +2.35.1 + diff --git a/queue-5.18/macsec-fix-null-deref-in-macsec_add_rxsa.patch b/queue-5.18/macsec-fix-null-deref-in-macsec_add_rxsa.patch new file mode 100644 index 00000000000..17892ee2470 --- /dev/null +++ b/queue-5.18/macsec-fix-null-deref-in-macsec_add_rxsa.patch @@ -0,0 +1,45 @@ +From c8087e08cedefd6ad82ce66bcafb7dba58474712 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Jul 2022 11:16:27 +0200 +Subject: macsec: fix NULL deref in macsec_add_rxsa + +From: Sabrina Dubroca + +[ Upstream commit f46040eeaf2e523a4096199fd93a11e794818009 ] + +Commit 48ef50fa866a added a test on tb_sa[MACSEC_SA_ATTR_PN], but +nothing guarantees that it's not NULL at this point. The same code was +added to macsec_add_txsa, but there it's not a problem because +validate_add_txsa checks that the MACSEC_SA_ATTR_PN attribute is +present. + +Note: it's not possible to reproduce with iproute, because iproute +doesn't allow creating an SA without specifying the PN. + +Fixes: 48ef50fa866a ("macsec: Netlink support of XPN cipher suites (IEEE 802.1AEbw)") +Link: https://bugzilla.kernel.org/show_bug.cgi?id=208315 +Reported-by: Frantisek Sumsal +Signed-off-by: Sabrina Dubroca +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/macsec.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c +index 817577e713d7..769a1eca6bd8 100644 +--- a/drivers/net/macsec.c ++++ b/drivers/net/macsec.c +@@ -1753,7 +1753,8 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) + } + + pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN; +- if (nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) { ++ if (tb_sa[MACSEC_SA_ATTR_PN] && ++ nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) { + pr_notice("macsec: nl: add_rxsa: bad pn length: %d != %d\n", + nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len); + rtnl_unlock(); +-- +2.35.1 + diff --git a/queue-5.18/macsec-limit-replay-window-size-with-xpn.patch b/queue-5.18/macsec-limit-replay-window-size-with-xpn.patch new file mode 100644 index 00000000000..f2f18baab08 --- /dev/null +++ b/queue-5.18/macsec-limit-replay-window-size-with-xpn.patch @@ -0,0 +1,81 @@ +From 5fb6923084846a2bd14faa0eeff50e07078145b5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Jul 2022 11:16:29 +0200 +Subject: macsec: limit replay window size with XPN + +From: Sabrina Dubroca + +[ Upstream commit b07a0e2044057f201d694ab474f5c42a02b6465b ] + +IEEE 802.1AEbw-2013 (section 10.7.8) specifies that the maximum value +of the replay window is 2^30-1, to help with recovery of the upper +bits of the PN. + +To avoid leaving the existing macsec device in an inconsistent state +if this test fails during changelink, reuse the cleanup mechanism +introduced for HW offload. This wasn't needed until now because +macsec_changelink_common could not fail during changelink, as +modifying the cipher suite was not allowed. + +Finally, this must happen after handling IFLA_MACSEC_CIPHER_SUITE so +that secy->xpn is set. + +Fixes: 48ef50fa866a ("macsec: Netlink support of XPN cipher suites (IEEE 802.1AEbw)") +Signed-off-by: Sabrina Dubroca +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/macsec.c | 16 ++++++++++++---- + 1 file changed, 12 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c +index 634452d3ecc5..b3834e353c22 100644 +--- a/drivers/net/macsec.c ++++ b/drivers/net/macsec.c +@@ -243,6 +243,7 @@ static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb) + #define DEFAULT_SEND_SCI true + #define DEFAULT_ENCRYPT false + #define DEFAULT_ENCODING_SA 0 ++#define MACSEC_XPN_MAX_REPLAY_WINDOW (((1 << 30) - 1)) + + static bool send_sci(const struct macsec_secy *secy) + { +@@ -3746,9 +3747,6 @@ static int macsec_changelink_common(struct net_device *dev, + secy->operational = tx_sa && tx_sa->active; + } + +- if (data[IFLA_MACSEC_WINDOW]) +- secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]); +- + if (data[IFLA_MACSEC_ENCRYPT]) + tx_sc->encrypt = !!nla_get_u8(data[IFLA_MACSEC_ENCRYPT]); + +@@ -3794,6 +3792,16 @@ static int macsec_changelink_common(struct net_device *dev, + } + } + ++ if (data[IFLA_MACSEC_WINDOW]) { ++ secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]); ++ ++ /* IEEE 802.1AEbw-2013 10.7.8 - maximum replay window ++ * for XPN cipher suites */ ++ if (secy->xpn && ++ secy->replay_window > MACSEC_XPN_MAX_REPLAY_WINDOW) ++ return -EINVAL; ++ } ++ + return 0; + } + +@@ -3823,7 +3831,7 @@ static int macsec_changelink(struct net_device *dev, struct nlattr *tb[], + + ret = macsec_changelink_common(dev, data); + if (ret) +- return ret; ++ goto cleanup; + + /* If h/w offloading is available, propagate to the device */ + if (macsec_is_offloaded(macsec)) { +-- +2.35.1 + diff --git a/queue-5.18/mlxsw-spectrum_router-simplify-list-unwinding.patch b/queue-5.18/mlxsw-spectrum_router-simplify-list-unwinding.patch new file mode 100644 index 00000000000..6e95f4ed55f --- /dev/null +++ b/queue-5.18/mlxsw-spectrum_router-simplify-list-unwinding.patch @@ -0,0 +1,97 @@ +From 767f53c37c140978535d3e6ac6a78f0232e8b90b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 2 Apr 2022 08:15:16 -0400 +Subject: mlxsw: spectrum_router: simplify list unwinding + +From: Tom Rix + +[ Upstream commit 6f2f36e5f932c58e370bff79aba7f05963ea1c2a ] + +The setting of i here +err_nexthop6_group_get: + i = nrt6; +Is redundant, i is already nrt6. So remove +this statement. + +The for loop for the unwinding +err_rt6_create: + for (i--; i >= 0; i--) { +Is equivelent to + for (; i > 0; i--) { + +Two consecutive labels can be reduced to one. + +Signed-off-by: Tom Rix +Reviewed-by: Ido Schimmel +Link: https://lore.kernel.org/r/20220402121516.2750284-1-trix@redhat.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + .../ethernet/mellanox/mlxsw/spectrum_router.c | 20 ++++++++----------- + 1 file changed, 8 insertions(+), 12 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +index c00d6c4ed37c..245d36696486 100644 +--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +@@ -7022,7 +7022,7 @@ mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp, + mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]); + if (IS_ERR(mlxsw_sp_rt6)) { + err = PTR_ERR(mlxsw_sp_rt6); +- goto err_rt6_create; ++ goto err_rt6_unwind; + } + + list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); +@@ -7031,14 +7031,12 @@ mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp, + + err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, op_ctx, fib6_entry); + if (err) +- goto err_nexthop6_group_update; ++ goto err_rt6_unwind; + + return 0; + +-err_nexthop6_group_update: +- i = nrt6; +-err_rt6_create: +- for (i--; i >= 0; i--) { ++err_rt6_unwind: ++ for (; i > 0; i--) { + fib6_entry->nrt6--; + mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list, + struct mlxsw_sp_rt6, list); +@@ -7166,7 +7164,7 @@ mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp, + mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]); + if (IS_ERR(mlxsw_sp_rt6)) { + err = PTR_ERR(mlxsw_sp_rt6); +- goto err_rt6_create; ++ goto err_rt6_unwind; + } + list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); + fib6_entry->nrt6++; +@@ -7174,7 +7172,7 @@ mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp, + + err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry); + if (err) +- goto err_nexthop6_group_get; ++ goto err_rt6_unwind; + + err = mlxsw_sp_nexthop_group_vr_link(fib_entry->nh_group, + fib_node->fib); +@@ -7193,10 +7191,8 @@ mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp, + mlxsw_sp_nexthop_group_vr_unlink(fib_entry->nh_group, fib_node->fib); + err_nexthop_group_vr_link: + mlxsw_sp_nexthop6_group_put(mlxsw_sp, fib_entry); +-err_nexthop6_group_get: +- i = nrt6; +-err_rt6_create: +- for (i--; i >= 0; i--) { ++err_rt6_unwind: ++ for (; i > 0; i--) { + fib6_entry->nrt6--; + mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list, + struct mlxsw_sp_rt6, list); +-- +2.35.1 + diff --git a/queue-5.18/mptcp-don-t-send-rst-for-single-subflow.patch b/queue-5.18/mptcp-don-t-send-rst-for-single-subflow.patch new file mode 100644 index 00000000000..559155dcf56 --- /dev/null +++ b/queue-5.18/mptcp-don-t-send-rst-for-single-subflow.patch @@ -0,0 +1,50 @@ +From 0a782d12ad419f820fbaf1d637a788097014b5b1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Apr 2022 14:55:36 -0700 +Subject: mptcp: don't send RST for single subflow + +From: Geliang Tang + +[ Upstream commit 1761fed2567807f26fbd53032ff622f55978c7a9 ] + +When a bad checksum is detected and a single subflow is in use, don't +send RST + MP_FAIL, send data_ack + MP_FAIL instead. + +So invoke tcp_send_active_reset() only when mptcp_has_another_subflow() +is true. + +Signed-off-by: Geliang Tang +Signed-off-by: Mat Martineau +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/mptcp/subflow.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c +index 7919e259175d..ccae50eba664 100644 +--- a/net/mptcp/subflow.c ++++ b/net/mptcp/subflow.c +@@ -1221,14 +1221,14 @@ static bool subflow_check_data_avail(struct sock *ssk) + /* RFC 8684 section 3.7. */ + if (subflow->send_mp_fail) { + if (mptcp_has_another_subflow(ssk)) { ++ ssk->sk_err = EBADMSG; ++ tcp_set_state(ssk, TCP_CLOSE); ++ subflow->reset_transient = 0; ++ subflow->reset_reason = MPTCP_RST_EMIDDLEBOX; ++ tcp_send_active_reset(ssk, GFP_ATOMIC); + while ((skb = skb_peek(&ssk->sk_receive_queue))) + sk_eat_skb(ssk, skb); + } +- ssk->sk_err = EBADMSG; +- tcp_set_state(ssk, TCP_CLOSE); +- subflow->reset_transient = 0; +- subflow->reset_reason = MPTCP_RST_EMIDDLEBOX; +- tcp_send_active_reset(ssk, GFP_ATOMIC); + WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA); + return true; + } +-- +2.35.1 + diff --git a/queue-5.18/net-dsa-fix-reference-counting-for-lag-fdbs.patch b/queue-5.18/net-dsa-fix-reference-counting-for-lag-fdbs.patch new file mode 100644 index 00000000000..6e5c2133b53 --- /dev/null +++ b/queue-5.18/net-dsa-fix-reference-counting-for-lag-fdbs.patch @@ -0,0 +1,42 @@ +From 43ae80e638fda0d574ade85ebc27ebd893ec0433 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 23 Jul 2022 04:24:11 +0300 +Subject: net: dsa: fix reference counting for LAG FDBs + +From: Vladimir Oltean + +[ Upstream commit c7560d1203b7a1ea0b99a5c575547e95d564b2a8 ] + +Due to an invalid conflict resolution on my side while working on 2 +different series (LAG FDBs and FDB isolation), dsa_switch_do_lag_fdb_add() +does not store the database associated with a dsa_mac_addr structure. + +So after adding an FDB entry associated with a LAG, dsa_mac_addr_find() +fails to find it while deleting it, because &a->db is zeroized memory +for all stored FDB entries of lag->fdbs, and dsa_switch_do_lag_fdb_del() +returns -ENOENT rather than deleting the entry. + +Fixes: c26933639b54 ("net: dsa: request drivers to perform FDB isolation") +Signed-off-by: Vladimir Oltean +Link: https://lore.kernel.org/r/20220723012411.1125066-1-vladimir.oltean@nxp.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/dsa/switch.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/net/dsa/switch.c b/net/dsa/switch.c +index d8a80cf9742c..52f84ea349d2 100644 +--- a/net/dsa/switch.c ++++ b/net/dsa/switch.c +@@ -363,6 +363,7 @@ static int dsa_switch_do_lag_fdb_add(struct dsa_switch *ds, struct dsa_lag *lag, + + ether_addr_copy(a->addr, addr); + a->vid = vid; ++ a->db = db; + refcount_set(&a->refcount, 1); + list_add_tail(&a->list, &lag->fdbs); + +-- +2.35.1 + diff --git a/queue-5.18/net-fix-data-races-around-sysctl_-rw-mem-_offset.patch b/queue-5.18/net-fix-data-races-around-sysctl_-rw-mem-_offset.patch new file mode 100644 index 00000000000..54c7d94bdbc --- /dev/null +++ b/queue-5.18/net-fix-data-races-around-sysctl_-rw-mem-_offset.patch @@ -0,0 +1,207 @@ +From 8591925a436afb2b3927d6f50f39b68eec4dc35d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Jul 2022 11:22:00 -0700 +Subject: net: Fix data-races around sysctl_[rw]mem(_offset)?. + +From: Kuniyuki Iwashima + +[ Upstream commit 02739545951ad4c1215160db7fbf9b7a918d3c0b ] + +While reading these sysctl variables, they can be changed concurrently. +Thus, we need to add READ_ONCE() to their readers. + + - .sysctl_rmem + - .sysctl_rwmem + - .sysctl_rmem_offset + - .sysctl_wmem_offset + - sysctl_tcp_rmem[1, 2] + - sysctl_tcp_wmem[1, 2] + - sysctl_decnet_rmem[1] + - sysctl_decnet_wmem[1] + - sysctl_tipc_rmem[1] + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + include/net/sock.h | 8 ++++---- + net/decnet/af_decnet.c | 4 ++-- + net/ipv4/tcp.c | 6 +++--- + net/ipv4/tcp_input.c | 13 +++++++------ + net/ipv4/tcp_output.c | 2 +- + net/mptcp/protocol.c | 6 +++--- + net/tipc/socket.c | 2 +- + 7 files changed, 21 insertions(+), 20 deletions(-) + +diff --git a/include/net/sock.h b/include/net/sock.h +index 6bef0ffb1e7b..9563a093fdfc 100644 +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -2834,18 +2834,18 @@ static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto) + { + /* Does this proto have per netns sysctl_wmem ? */ + if (proto->sysctl_wmem_offset) +- return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset); ++ return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset)); + +- return *proto->sysctl_wmem; ++ return READ_ONCE(*proto->sysctl_wmem); + } + + static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto) + { + /* Does this proto have per netns sysctl_rmem ? */ + if (proto->sysctl_rmem_offset) +- return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset); ++ return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset)); + +- return *proto->sysctl_rmem; ++ return READ_ONCE(*proto->sysctl_rmem); + } + + /* Default TCP Small queue budget is ~1 ms of data (1sec >> 10) +diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c +index dc92a67baea3..7d542eb46172 100644 +--- a/net/decnet/af_decnet.c ++++ b/net/decnet/af_decnet.c +@@ -480,8 +480,8 @@ static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gf + sk->sk_family = PF_DECnet; + sk->sk_protocol = 0; + sk->sk_allocation = gfp; +- sk->sk_sndbuf = sysctl_decnet_wmem[1]; +- sk->sk_rcvbuf = sysctl_decnet_rmem[1]; ++ sk->sk_sndbuf = READ_ONCE(sysctl_decnet_wmem[1]); ++ sk->sk_rcvbuf = READ_ONCE(sysctl_decnet_rmem[1]); + + /* Initialization of DECnet Session Control Port */ + scp = DN_SK(sk); +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index 60b46f2a6896..91735d631a28 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -452,8 +452,8 @@ void tcp_init_sock(struct sock *sk) + + icsk->icsk_sync_mss = tcp_sync_mss; + +- WRITE_ONCE(sk->sk_sndbuf, sock_net(sk)->ipv4.sysctl_tcp_wmem[1]); +- WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]); ++ WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1])); ++ WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1])); + + sk_sockets_allocated_inc(sk); + } +@@ -1743,7 +1743,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val) + if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) + cap = sk->sk_rcvbuf >> 1; + else +- cap = sock_net(sk)->ipv4.sysctl_tcp_rmem[2] >> 1; ++ cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; + val = min(val, cap); + WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); + +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index de066fad7dfe..f09b1321a960 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -426,7 +426,7 @@ static void tcp_sndbuf_expand(struct sock *sk) + + if (sk->sk_sndbuf < sndmem) + WRITE_ONCE(sk->sk_sndbuf, +- min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2])); ++ min(sndmem, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[2]))); + } + + /* 2. Tuning advertised window (window_clamp, rcv_ssthresh) +@@ -461,7 +461,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb, + struct tcp_sock *tp = tcp_sk(sk); + /* Optimize this! */ + int truesize = tcp_win_from_space(sk, skbtruesize) >> 1; +- int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; ++ int window = tcp_win_from_space(sk, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])) >> 1; + + while (tp->rcv_ssthresh <= window) { + if (truesize <= skb->len) +@@ -574,16 +574,17 @@ static void tcp_clamp_window(struct sock *sk) + struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); + struct net *net = sock_net(sk); ++ int rmem2; + + icsk->icsk_ack.quick = 0; ++ rmem2 = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]); + +- if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] && ++ if (sk->sk_rcvbuf < rmem2 && + !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && + !tcp_under_memory_pressure(sk) && + sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) { + WRITE_ONCE(sk->sk_rcvbuf, +- min(atomic_read(&sk->sk_rmem_alloc), +- net->ipv4.sysctl_tcp_rmem[2])); ++ min(atomic_read(&sk->sk_rmem_alloc), rmem2)); + } + if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) + tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss); +@@ -745,7 +746,7 @@ void tcp_rcv_space_adjust(struct sock *sk) + + do_div(rcvwin, tp->advmss); + rcvbuf = min_t(u64, rcvwin * rcvmem, +- sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])); + if (rcvbuf > sk->sk_rcvbuf) { + WRITE_ONCE(sk->sk_rcvbuf, rcvbuf); + +diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c +index 60c9f7f444e0..66836b8bd46f 100644 +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -238,7 +238,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, + *rcv_wscale = 0; + if (wscale_ok) { + /* Set window scaling on max possible window */ +- space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); ++ space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])); + space = max_t(u32, space, sysctl_rmem_max); + space = min_t(u32, space, *window_clamp); + *rcv_wscale = clamp_t(int, ilog2(space) - 15, +diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c +index e2790a6e90fb..07b5a2044cab 100644 +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -1900,7 +1900,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) + + do_div(rcvwin, advmss); + rcvbuf = min_t(u64, rcvwin * rcvmem, +- sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])); + + if (rcvbuf > sk->sk_rcvbuf) { + u32 window_clamp; +@@ -2597,8 +2597,8 @@ static int mptcp_init_sock(struct sock *sk) + mptcp_ca_reset(sk); + + sk_sockets_allocated_inc(sk); +- sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1]; +- sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1]; ++ sk->sk_rcvbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]); ++ sk->sk_sndbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]); + + return 0; + } +diff --git a/net/tipc/socket.c b/net/tipc/socket.c +index 43509c7e90fc..f1c3b8eb4b3d 100644 +--- a/net/tipc/socket.c ++++ b/net/tipc/socket.c +@@ -517,7 +517,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, + timer_setup(&sk->sk_timer, tipc_sk_timeout, 0); + sk->sk_shutdown = 0; + sk->sk_backlog_rcv = tipc_sk_backlog_rcv; +- sk->sk_rcvbuf = sysctl_tipc_rmem[1]; ++ sk->sk_rcvbuf = READ_ONCE(sysctl_tipc_rmem[1]); + sk->sk_data_ready = tipc_data_ready; + sk->sk_write_space = tipc_write_space; + sk->sk_destruct = tipc_sock_destruct; +-- +2.35.1 + diff --git a/queue-5.18/net-funeth-fix-fun_xdp_tx-and-xdp-packet-reclaim.patch b/queue-5.18/net-funeth-fix-fun_xdp_tx-and-xdp-packet-reclaim.patch new file mode 100644 index 00000000000..aab5ca4539e --- /dev/null +++ b/queue-5.18/net-funeth-fix-fun_xdp_tx-and-xdp-packet-reclaim.patch @@ -0,0 +1,149 @@ +From a1ea53d946a4be3b0122dd566b0e08163dfb61f7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 26 Jul 2022 14:59:23 -0700 +Subject: net/funeth: Fix fun_xdp_tx() and XDP packet reclaim + +From: Dimitris Michailidis + +[ Upstream commit 51a83391d77bb0f7ff0aef06ca4c7f5aa9e80b4c ] + +The current implementation of fun_xdp_tx(), used for XPD_TX, is +incorrect in that it takes an address/length pair and later releases it +with page_frag_free(). It is OK for XDP_TX but the same code is used by +ndo_xdp_xmit. In that case it loses the XDP memory type and releases the +packet incorrectly for some of the types. Assorted breakage follows. + +Change fun_xdp_tx() to take xdp_frame and rely on xdp_return_frame() in +reclaim. + +Fixes: db37bc177dae ("net/funeth: add the data path") +Signed-off-by: Dimitris Michailidis +Link: https://lore.kernel.org/r/20220726215923.7887-1-dmichail@fungible.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + .../net/ethernet/fungible/funeth/funeth_rx.c | 5 ++++- + .../net/ethernet/fungible/funeth/funeth_tx.c | 20 +++++++++---------- + .../ethernet/fungible/funeth/funeth_txrx.h | 6 +++--- + 3 files changed, 16 insertions(+), 15 deletions(-) + +diff --git a/drivers/net/ethernet/fungible/funeth/funeth_rx.c b/drivers/net/ethernet/fungible/funeth/funeth_rx.c +index 0f6a549b9f67..29a6c2ede43a 100644 +--- a/drivers/net/ethernet/fungible/funeth/funeth_rx.c ++++ b/drivers/net/ethernet/fungible/funeth/funeth_rx.c +@@ -142,6 +142,7 @@ static void *fun_run_xdp(struct funeth_rxq *q, skb_frag_t *frags, void *buf_va, + int ref_ok, struct funeth_txq *xdp_q) + { + struct bpf_prog *xdp_prog; ++ struct xdp_frame *xdpf; + struct xdp_buff xdp; + u32 act; + +@@ -163,7 +164,9 @@ static void *fun_run_xdp(struct funeth_rxq *q, skb_frag_t *frags, void *buf_va, + case XDP_TX: + if (unlikely(!ref_ok)) + goto pass; +- if (!fun_xdp_tx(xdp_q, xdp.data, xdp.data_end - xdp.data)) ++ ++ xdpf = xdp_convert_buff_to_frame(&xdp); ++ if (!xdpf || !fun_xdp_tx(xdp_q, xdpf)) + goto xdp_error; + FUN_QSTAT_INC(q, xdp_tx); + q->xdp_flush |= FUN_XDP_FLUSH_TX; +diff --git a/drivers/net/ethernet/fungible/funeth/funeth_tx.c b/drivers/net/ethernet/fungible/funeth/funeth_tx.c +index ff6e29237253..2f6698b98b03 100644 +--- a/drivers/net/ethernet/fungible/funeth/funeth_tx.c ++++ b/drivers/net/ethernet/fungible/funeth/funeth_tx.c +@@ -466,7 +466,7 @@ static unsigned int fun_xdpq_clean(struct funeth_txq *q, unsigned int budget) + + do { + fun_xdp_unmap(q, reclaim_idx); +- page_frag_free(q->info[reclaim_idx].vaddr); ++ xdp_return_frame(q->info[reclaim_idx].xdpf); + + trace_funeth_tx_free(q, reclaim_idx, 1, head); + +@@ -479,11 +479,11 @@ static unsigned int fun_xdpq_clean(struct funeth_txq *q, unsigned int budget) + return npkts; + } + +-bool fun_xdp_tx(struct funeth_txq *q, void *data, unsigned int len) ++bool fun_xdp_tx(struct funeth_txq *q, struct xdp_frame *xdpf) + { + struct fun_eth_tx_req *req; + struct fun_dataop_gl *gle; +- unsigned int idx; ++ unsigned int idx, len; + dma_addr_t dma; + + if (fun_txq_avail(q) < FUN_XDP_CLEAN_THRES) +@@ -494,7 +494,8 @@ bool fun_xdp_tx(struct funeth_txq *q, void *data, unsigned int len) + return false; + } + +- dma = dma_map_single(q->dma_dev, data, len, DMA_TO_DEVICE); ++ len = xdpf->len; ++ dma = dma_map_single(q->dma_dev, xdpf->data, len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(q->dma_dev, dma))) { + FUN_QSTAT_INC(q, tx_map_err); + return false; +@@ -514,7 +515,7 @@ bool fun_xdp_tx(struct funeth_txq *q, void *data, unsigned int len) + gle = (struct fun_dataop_gl *)req->dataop.imm; + fun_dataop_gl_init(gle, 0, 0, len, dma); + +- q->info[idx].vaddr = data; ++ q->info[idx].xdpf = xdpf; + + u64_stats_update_begin(&q->syncp); + q->stats.tx_bytes += len; +@@ -545,12 +546,9 @@ int fun_xdp_xmit_frames(struct net_device *dev, int n, + if (unlikely(q_idx >= fp->num_xdpqs)) + return -ENXIO; + +- for (q = xdpqs[q_idx], i = 0; i < n; i++) { +- const struct xdp_frame *xdpf = frames[i]; +- +- if (!fun_xdp_tx(q, xdpf->data, xdpf->len)) ++ for (q = xdpqs[q_idx], i = 0; i < n; i++) ++ if (!fun_xdp_tx(q, frames[i])) + break; +- } + + if (unlikely(flags & XDP_XMIT_FLUSH)) + fun_txq_wr_db(q); +@@ -577,7 +575,7 @@ static void fun_xdpq_purge(struct funeth_txq *q) + unsigned int idx = q->cons_cnt & q->mask; + + fun_xdp_unmap(q, idx); +- page_frag_free(q->info[idx].vaddr); ++ xdp_return_frame(q->info[idx].xdpf); + q->cons_cnt++; + } + } +diff --git a/drivers/net/ethernet/fungible/funeth/funeth_txrx.h b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h +index 04c9f91b7489..8708e2895946 100644 +--- a/drivers/net/ethernet/fungible/funeth/funeth_txrx.h ++++ b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h +@@ -95,8 +95,8 @@ struct funeth_txq_stats { /* per Tx queue SW counters */ + + struct funeth_tx_info { /* per Tx descriptor state */ + union { +- struct sk_buff *skb; /* associated packet */ +- void *vaddr; /* start address for XDP */ ++ struct sk_buff *skb; /* associated packet (sk_buff path) */ ++ struct xdp_frame *xdpf; /* associated XDP frame (XDP path) */ + }; + }; + +@@ -245,7 +245,7 @@ static inline int fun_irq_node(const struct fun_irq *p) + int fun_rxq_napi_poll(struct napi_struct *napi, int budget); + int fun_txq_napi_poll(struct napi_struct *napi, int budget); + netdev_tx_t fun_start_xmit(struct sk_buff *skb, struct net_device *netdev); +-bool fun_xdp_tx(struct funeth_txq *q, void *data, unsigned int len); ++bool fun_xdp_tx(struct funeth_txq *q, struct xdp_frame *xdpf); + int fun_xdp_xmit_frames(struct net_device *dev, int n, + struct xdp_frame **frames, u32 flags); + +-- +2.35.1 + diff --git a/queue-5.18/net-macsec-fix-potential-resource-leak-in-macsec_add.patch b/queue-5.18/net-macsec-fix-potential-resource-leak-in-macsec_add.patch new file mode 100644 index 00000000000..19f6f41eb9e --- /dev/null +++ b/queue-5.18/net-macsec-fix-potential-resource-leak-in-macsec_add.patch @@ -0,0 +1,54 @@ +From e0139086e45540d8c295043d262c80845e70a0fa Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Jul 2022 17:29:02 +0800 +Subject: net: macsec: fix potential resource leak in macsec_add_rxsa() and + macsec_add_txsa() + +From: Jianglei Nie + +[ Upstream commit c7b205fbbf3cffa374721bb7623f7aa8c46074f1 ] + +init_rx_sa() allocates relevant resource for rx_sa->stats and rx_sa-> +key.tfm with alloc_percpu() and macsec_alloc_tfm(). When some error +occurs after init_rx_sa() is called in macsec_add_rxsa(), the function +released rx_sa with kfree() without releasing rx_sa->stats and rx_sa-> +key.tfm, which will lead to a resource leak. + +We should call macsec_rxsa_put() instead of kfree() to decrease the ref +count of rx_sa and release the relevant resource if the refcount is 0. +The same bug exists in macsec_add_txsa() for tx_sa as well. This patch +fixes the above two bugs. + +Fixes: 3cf3227a21d1 ("net: macsec: hardware offloading infrastructure") +Signed-off-by: Jianglei Nie +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/macsec.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c +index 95578f04f212..f354fad05714 100644 +--- a/drivers/net/macsec.c ++++ b/drivers/net/macsec.c +@@ -1844,7 +1844,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) + return 0; + + cleanup: +- kfree(rx_sa); ++ macsec_rxsa_put(rx_sa); + rtnl_unlock(); + return err; + } +@@ -2087,7 +2087,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) + + cleanup: + secy->operational = was_operational; +- kfree(tx_sa); ++ macsec_txsa_put(tx_sa); + rtnl_unlock(); + return err; + } +-- +2.35.1 + diff --git a/queue-5.18/net-mld-fix-reference-count-leak-in-mld_-query-repor.patch b/queue-5.18/net-mld-fix-reference-count-leak-in-mld_-query-repor.patch new file mode 100644 index 00000000000..bb87a9b1e52 --- /dev/null +++ b/queue-5.18/net-mld-fix-reference-count-leak-in-mld_-query-repor.patch @@ -0,0 +1,122 @@ +From 832b41ea646d7b3cbb1120ae329394a2803c0a37 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Jul 2022 17:06:35 +0000 +Subject: net: mld: fix reference count leak in mld_{query | report}_work() + +From: Taehee Yoo + +[ Upstream commit 3e7d18b9dca388940a19cae30bfc1f76dccd8c28 ] + +mld_{query | report}_work() processes queued events. +If there are too many events in the queue, it re-queue a work. +And then, it returns without in6_dev_put(). +But if queuing is failed, it should call in6_dev_put(), but it doesn't. +So, a reference count leak would occur. + +THREAD0 THREAD1 +mld_report_work() + spin_lock_bh() + if (!mod_delayed_work()) + in6_dev_hold(); + spin_unlock_bh() + spin_lock_bh() + schedule_delayed_work() + spin_unlock_bh() + +Script to reproduce(by Hangbin Liu): + ip netns add ns1 + ip netns add ns2 + ip netns exec ns1 sysctl -w net.ipv6.conf.all.force_mld_version=1 + ip netns exec ns2 sysctl -w net.ipv6.conf.all.force_mld_version=1 + + ip -n ns1 link add veth0 type veth peer name veth0 netns ns2 + ip -n ns1 link set veth0 up + ip -n ns2 link set veth0 up + + for i in `seq 50`; do + for j in `seq 100`; do + ip -n ns1 addr add 2021:${i}::${j}/64 dev veth0 + ip -n ns2 addr add 2022:${i}::${j}/64 dev veth0 + done + done + modprobe -r veth + ip -a netns del + +splat looks like: + unregister_netdevice: waiting for veth0 to become free. Usage count = 2 + leaked reference. + ipv6_add_dev+0x324/0xec0 + addrconf_notify+0x481/0xd10 + raw_notifier_call_chain+0xe3/0x120 + call_netdevice_notifiers+0x106/0x160 + register_netdevice+0x114c/0x16b0 + veth_newlink+0x48b/0xa50 [veth] + rtnl_newlink+0x11a2/0x1a40 + rtnetlink_rcv_msg+0x63f/0xc00 + netlink_rcv_skb+0x1df/0x3e0 + netlink_unicast+0x5de/0x850 + netlink_sendmsg+0x6c9/0xa90 + ____sys_sendmsg+0x76a/0x780 + __sys_sendmsg+0x27c/0x340 + do_syscall_64+0x43/0x90 + entry_SYSCALL_64_after_hwframe+0x63/0xcd + +Tested-by: Hangbin Liu +Fixes: f185de28d9ae ("mld: add new workqueues for process mld events") +Signed-off-by: Taehee Yoo +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv6/mcast.c | 14 ++++++++------ + 1 file changed, 8 insertions(+), 6 deletions(-) + +diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c +index 7f695c39d9a8..87c699d57b36 100644 +--- a/net/ipv6/mcast.c ++++ b/net/ipv6/mcast.c +@@ -1522,7 +1522,6 @@ static void mld_query_work(struct work_struct *work) + + if (++cnt >= MLD_MAX_QUEUE) { + rework = true; +- schedule_delayed_work(&idev->mc_query_work, 0); + break; + } + } +@@ -1533,8 +1532,10 @@ static void mld_query_work(struct work_struct *work) + __mld_query_work(skb); + mutex_unlock(&idev->mc_lock); + +- if (!rework) +- in6_dev_put(idev); ++ if (rework && queue_delayed_work(mld_wq, &idev->mc_query_work, 0)) ++ return; ++ ++ in6_dev_put(idev); + } + + /* called with rcu_read_lock() */ +@@ -1624,7 +1625,6 @@ static void mld_report_work(struct work_struct *work) + + if (++cnt >= MLD_MAX_QUEUE) { + rework = true; +- schedule_delayed_work(&idev->mc_report_work, 0); + break; + } + } +@@ -1635,8 +1635,10 @@ static void mld_report_work(struct work_struct *work) + __mld_report_work(skb); + mutex_unlock(&idev->mc_lock); + +- if (!rework) +- in6_dev_put(idev); ++ if (rework && queue_delayed_work(mld_wq, &idev->mc_report_work, 0)) ++ return; ++ ++ in6_dev_put(idev); + } + + static bool is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type, +-- +2.35.1 + diff --git a/queue-5.18/net-pcs-xpcs-propagate-xpcs_read-error-to-xpcs_get_s.patch b/queue-5.18/net-pcs-xpcs-propagate-xpcs_read-error-to-xpcs_get_s.patch new file mode 100644 index 00000000000..53cd58ea3ac --- /dev/null +++ b/queue-5.18/net-pcs-xpcs-propagate-xpcs_read-error-to-xpcs_get_s.patch @@ -0,0 +1,42 @@ +From fb395a0519c1ca508a62c862f77166ef1c5de868 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 Jul 2022 14:20:57 +0300 +Subject: net: pcs: xpcs: propagate xpcs_read error to xpcs_get_state_c37_sgmii + +From: Vladimir Oltean + +[ Upstream commit 27161db0904ee48e59140aa8d0835939a666c1f1 ] + +While phylink_pcs_ops :: pcs_get_state does return void, xpcs_get_state() +does check for a non-zero return code from xpcs_get_state_c37_sgmii() +and prints that as a message to the kernel log. + +However, a non-zero return code from xpcs_read() is translated into +"return false" (i.e. zero as int) and the I/O error is therefore not +printed. Fix that. + +Fixes: b97b5331b8ab ("net: pcs: add C37 SGMII AN support for intel mGbE controller") +Signed-off-by: Vladimir Oltean +Link: https://lore.kernel.org/r/20220720112057.3504398-1-vladimir.oltean@nxp.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/pcs/pcs-xpcs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c +index 61418d4dc0cd..8768f6e34846 100644 +--- a/drivers/net/pcs/pcs-xpcs.c ++++ b/drivers/net/pcs/pcs-xpcs.c +@@ -898,7 +898,7 @@ static int xpcs_get_state_c37_sgmii(struct dw_xpcs *xpcs, + */ + ret = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_INTR_STS); + if (ret < 0) +- return false; ++ return ret; + + if (ret & DW_VR_MII_C37_ANSGM_SP_LNKSTS) { + int speed_value; +-- +2.35.1 + diff --git a/queue-5.18/net-sungem_phy-add-of_node_put-for-reference-returne.patch b/queue-5.18/net-sungem_phy-add-of_node_put-for-reference-returne.patch new file mode 100644 index 00000000000..bad377faf41 --- /dev/null +++ b/queue-5.18/net-sungem_phy-add-of_node_put-for-reference-returne.patch @@ -0,0 +1,37 @@ +From ab53afa87f58275e1dfae6c3ac2fc50e6990f675 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 Jul 2022 21:10:03 +0800 +Subject: net: sungem_phy: Add of_node_put() for reference returned by + of_get_parent() + +From: Liang He + +[ Upstream commit ebbbe23fdf6070e31509638df3321688358cc211 ] + +In bcm5421_init(), we should call of_node_put() for the reference +returned by of_get_parent() which has increased the refcount. + +Fixes: 3c326fe9cb7a ("[PATCH] ppc64: Add new PHY to sungem") +Signed-off-by: Liang He +Link: https://lore.kernel.org/r/20220720131003.1287426-1-windhl@126.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/sungem_phy.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/sungem_phy.c b/drivers/net/sungem_phy.c +index 4daac5fda073..0d40d265b688 100644 +--- a/drivers/net/sungem_phy.c ++++ b/drivers/net/sungem_phy.c +@@ -454,6 +454,7 @@ static int bcm5421_init(struct mii_phy* phy) + int can_low_power = 1; + if (np == NULL || of_get_property(np, "no-autolowpower", NULL)) + can_low_power = 0; ++ of_node_put(np); + if (can_low_power) { + /* Enable automatic low-power */ + sungem_phy_write(phy, 0x1c, 0x9002); +-- +2.35.1 + diff --git a/queue-5.18/netfilter-nf_queue-do-not-allow-packet-truncation-be.patch b/queue-5.18/netfilter-nf_queue-do-not-allow-packet-truncation-be.patch new file mode 100644 index 00000000000..92e7b6aa472 --- /dev/null +++ b/queue-5.18/netfilter-nf_queue-do-not-allow-packet-truncation-be.patch @@ -0,0 +1,53 @@ +From 56fee236aaf4c707f71eda52291b415d7669e1d6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 26 Jul 2022 12:42:06 +0200 +Subject: netfilter: nf_queue: do not allow packet truncation below transport + header offset + +From: Florian Westphal + +[ Upstream commit 99a63d36cb3ed5ca3aa6fcb64cffbeaf3b0fb164 ] + +Domingo Dirutigliano and Nicola Guerrera report kernel panic when +sending nf_queue verdict with 1-byte nfta_payload attribute. + +The IP/IPv6 stack pulls the IP(v6) header from the packet after the +input hook. + +If user truncates the packet below the header size, this skb_pull() will +result in a malformed skb (skb->len < 0). + +Fixes: 7af4cc3fa158 ("[NETFILTER]: Add "nfnetlink_queue" netfilter queue handler over nfnetlink") +Reported-by: Domingo Dirutigliano +Signed-off-by: Florian Westphal +Reviewed-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nfnetlink_queue.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c +index a364f8e5e698..87a9009d5234 100644 +--- a/net/netfilter/nfnetlink_queue.c ++++ b/net/netfilter/nfnetlink_queue.c +@@ -843,11 +843,16 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) + } + + static int +-nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff) ++nfqnl_mangle(void *data, unsigned int data_len, struct nf_queue_entry *e, int diff) + { + struct sk_buff *nskb; + + if (diff < 0) { ++ unsigned int min_len = skb_transport_offset(e->skb); ++ ++ if (data_len < min_len) ++ return -EINVAL; ++ + if (pskb_trim(e->skb, data_len)) + return -ENOMEM; + } else if (diff > 0) { +-- +2.35.1 + diff --git a/queue-5.18/octeontx2-pf-cn10k-fix-egress-ratelimit-configuratio.patch b/queue-5.18/octeontx2-pf-cn10k-fix-egress-ratelimit-configuratio.patch new file mode 100644 index 00000000000..f362b68ee55 --- /dev/null +++ b/queue-5.18/octeontx2-pf-cn10k-fix-egress-ratelimit-configuratio.patch @@ -0,0 +1,185 @@ +From d399289594df0f50484c56285796f595c4530776 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 24 Jul 2022 13:51:13 +0530 +Subject: octeontx2-pf: cn10k: Fix egress ratelimit configuration + +From: Sunil Goutham + +[ Upstream commit b354eaeec8637d87003945439209251d76a2bb95 ] + +NIX_AF_TLXX_PIR/CIR register format has changed from OcteonTx2 +to CN10K. CN10K supports larger burst size. Fix burst exponent +and burst mantissa configuration for CN10K. + +Also fixed 'maxrate' from u32 to u64 since 'police.rate_bytes_ps' +passed by stack is also u64. + +Fixes: e638a83f167e ("octeontx2-pf: TC_MATCHALL egress ratelimiting offload") +Signed-off-by: Sunil Goutham +Signed-off-by: Subbaraya Sundeep +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + .../ethernet/marvell/octeontx2/nic/otx2_tc.c | 76 ++++++++++++++----- + 1 file changed, 55 insertions(+), 21 deletions(-) + +diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +index a3d720b1b32c..e64318c110fd 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c ++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +@@ -28,6 +28,9 @@ + #define MAX_RATE_EXPONENT 0x0FULL + #define MAX_RATE_MANTISSA 0xFFULL + ++#define CN10K_MAX_BURST_MANTISSA 0x7FFFULL ++#define CN10K_MAX_BURST_SIZE 8453888ULL ++ + /* Bitfields in NIX_TLX_PIR register */ + #define TLX_RATE_MANTISSA GENMASK_ULL(8, 1) + #define TLX_RATE_EXPONENT GENMASK_ULL(12, 9) +@@ -35,6 +38,9 @@ + #define TLX_BURST_MANTISSA GENMASK_ULL(36, 29) + #define TLX_BURST_EXPONENT GENMASK_ULL(40, 37) + ++#define CN10K_TLX_BURST_MANTISSA GENMASK_ULL(43, 29) ++#define CN10K_TLX_BURST_EXPONENT GENMASK_ULL(47, 44) ++ + struct otx2_tc_flow_stats { + u64 bytes; + u64 pkts; +@@ -77,33 +83,42 @@ int otx2_tc_alloc_ent_bitmap(struct otx2_nic *nic) + } + EXPORT_SYMBOL(otx2_tc_alloc_ent_bitmap); + +-static void otx2_get_egress_burst_cfg(u32 burst, u32 *burst_exp, +- u32 *burst_mantissa) ++static void otx2_get_egress_burst_cfg(struct otx2_nic *nic, u32 burst, ++ u32 *burst_exp, u32 *burst_mantissa) + { ++ int max_burst, max_mantissa; + unsigned int tmp; + ++ if (is_dev_otx2(nic->pdev)) { ++ max_burst = MAX_BURST_SIZE; ++ max_mantissa = MAX_BURST_MANTISSA; ++ } else { ++ max_burst = CN10K_MAX_BURST_SIZE; ++ max_mantissa = CN10K_MAX_BURST_MANTISSA; ++ } ++ + /* Burst is calculated as + * ((256 + BURST_MANTISSA) << (1 + BURST_EXPONENT)) / 256 + * Max supported burst size is 130,816 bytes. + */ +- burst = min_t(u32, burst, MAX_BURST_SIZE); ++ burst = min_t(u32, burst, max_burst); + if (burst) { + *burst_exp = ilog2(burst) ? ilog2(burst) - 1 : 0; + tmp = burst - rounddown_pow_of_two(burst); +- if (burst < MAX_BURST_MANTISSA) ++ if (burst < max_mantissa) + *burst_mantissa = tmp * 2; + else + *burst_mantissa = tmp / (1ULL << (*burst_exp - 7)); + } else { + *burst_exp = MAX_BURST_EXPONENT; +- *burst_mantissa = MAX_BURST_MANTISSA; ++ *burst_mantissa = max_mantissa; + } + } + +-static void otx2_get_egress_rate_cfg(u32 maxrate, u32 *exp, ++static void otx2_get_egress_rate_cfg(u64 maxrate, u32 *exp, + u32 *mantissa, u32 *div_exp) + { +- unsigned int tmp; ++ u64 tmp; + + /* Rate calculation by hardware + * +@@ -132,21 +147,44 @@ static void otx2_get_egress_rate_cfg(u32 maxrate, u32 *exp, + } + } + +-static int otx2_set_matchall_egress_rate(struct otx2_nic *nic, u32 burst, u32 maxrate) ++static u64 otx2_get_txschq_rate_regval(struct otx2_nic *nic, ++ u64 maxrate, u32 burst) + { +- struct otx2_hw *hw = &nic->hw; +- struct nix_txschq_config *req; + u32 burst_exp, burst_mantissa; + u32 exp, mantissa, div_exp; ++ u64 regval = 0; ++ ++ /* Get exponent and mantissa values from the desired rate */ ++ otx2_get_egress_burst_cfg(nic, burst, &burst_exp, &burst_mantissa); ++ otx2_get_egress_rate_cfg(maxrate, &exp, &mantissa, &div_exp); ++ ++ if (is_dev_otx2(nic->pdev)) { ++ regval = FIELD_PREP(TLX_BURST_EXPONENT, (u64)burst_exp) | ++ FIELD_PREP(TLX_BURST_MANTISSA, (u64)burst_mantissa) | ++ FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) | ++ FIELD_PREP(TLX_RATE_EXPONENT, exp) | ++ FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0); ++ } else { ++ regval = FIELD_PREP(CN10K_TLX_BURST_EXPONENT, (u64)burst_exp) | ++ FIELD_PREP(CN10K_TLX_BURST_MANTISSA, (u64)burst_mantissa) | ++ FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) | ++ FIELD_PREP(TLX_RATE_EXPONENT, exp) | ++ FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0); ++ } ++ ++ return regval; ++} ++ ++static int otx2_set_matchall_egress_rate(struct otx2_nic *nic, ++ u32 burst, u64 maxrate) ++{ ++ struct otx2_hw *hw = &nic->hw; ++ struct nix_txschq_config *req; + int txschq, err; + + /* All SQs share the same TL4, so pick the first scheduler */ + txschq = hw->txschq_list[NIX_TXSCH_LVL_TL4][0]; + +- /* Get exponent and mantissa values from the desired rate */ +- otx2_get_egress_burst_cfg(burst, &burst_exp, &burst_mantissa); +- otx2_get_egress_rate_cfg(maxrate, &exp, &mantissa, &div_exp); +- + mutex_lock(&nic->mbox.lock); + req = otx2_mbox_alloc_msg_nix_txschq_cfg(&nic->mbox); + if (!req) { +@@ -157,11 +195,7 @@ static int otx2_set_matchall_egress_rate(struct otx2_nic *nic, u32 burst, u32 ma + req->lvl = NIX_TXSCH_LVL_TL4; + req->num_regs = 1; + req->reg[0] = NIX_AF_TL4X_PIR(txschq); +- req->regval[0] = FIELD_PREP(TLX_BURST_EXPONENT, burst_exp) | +- FIELD_PREP(TLX_BURST_MANTISSA, burst_mantissa) | +- FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) | +- FIELD_PREP(TLX_RATE_EXPONENT, exp) | +- FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0); ++ req->regval[0] = otx2_get_txschq_rate_regval(nic, maxrate, burst); + + err = otx2_sync_mbox_msg(&nic->mbox); + mutex_unlock(&nic->mbox.lock); +@@ -230,7 +264,7 @@ static int otx2_tc_egress_matchall_install(struct otx2_nic *nic, + struct netlink_ext_ack *extack = cls->common.extack; + struct flow_action *actions = &cls->rule->action; + struct flow_action_entry *entry; +- u32 rate; ++ u64 rate; + int err; + + err = otx2_tc_validate_flow(nic, actions, extack); +@@ -256,7 +290,7 @@ static int otx2_tc_egress_matchall_install(struct otx2_nic *nic, + } + /* Convert bytes per second to Mbps */ + rate = entry->police.rate_bytes_ps * 8; +- rate = max_t(u32, rate / 1000000, 1); ++ rate = max_t(u64, rate / 1000000, 1); + err = otx2_set_matchall_egress_rate(nic, entry->police.burst, rate); + if (err) + return err; +-- +2.35.1 + diff --git a/queue-5.18/perf-symbol-correct-address-for-bss-symbols.patch b/queue-5.18/perf-symbol-correct-address-for-bss-symbols.patch new file mode 100644 index 00000000000..951a32fddef --- /dev/null +++ b/queue-5.18/perf-symbol-correct-address-for-bss-symbols.patch @@ -0,0 +1,182 @@ +From 88ab9e31f4c220a0b915becc3ec0e1b3131f3952 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 24 Jul 2022 14:00:12 +0800 +Subject: perf symbol: Correct address for bss symbols + +From: Leo Yan + +[ Upstream commit 2d86612aacb7805f72873691a2644d7279ed0630 ] + +When using 'perf mem' and 'perf c2c', an issue is observed that tool +reports the wrong offset for global data symbols. This is a common +issue on both x86 and Arm64 platforms. + +Let's see an example, for a test program, below is the disassembly for +its .bss section which is dumped with objdump: + + ... + + Disassembly of section .bss: + + 0000000000004040 : + ... + + 0000000000004080 : + ... + + 00000000000040c0 : + ... + + 0000000000004100 : + ... + +First we used 'perf mem record' to run the test program and then used +'perf --debug verbose=4 mem report' to observe what's the symbol info +for 'buf1' and 'buf2' structures. + + # ./perf mem record -e ldlat-loads,ldlat-stores -- false_sharing.exe 8 + # ./perf --debug verbose=4 mem report + ... + dso__load_sym_internal: adjusting symbol: st_value: 0x40c0 sh_addr: 0x4040 sh_offset: 0x3028 + symbol__new: buf2 0x30a8-0x30e8 + ... + dso__load_sym_internal: adjusting symbol: st_value: 0x4080 sh_addr: 0x4040 sh_offset: 0x3028 + symbol__new: buf1 0x3068-0x30a8 + ... + +The perf tool relies on libelf to parse symbols, in executable and +shared object files, 'st_value' holds a virtual address; 'sh_addr' is +the address at which section's first byte should reside in memory, and +'sh_offset' is the byte offset from the beginning of the file to the +first byte in the section. The perf tool uses below formula to convert +a symbol's memory address to a file address: + + file_address = st_value - sh_addr + sh_offset + ^ + ` Memory address + +We can see the final adjusted address ranges for buf1 and buf2 are +[0x30a8-0x30e8) and [0x3068-0x30a8) respectively, apparently this is +incorrect, in the code, the structure for 'buf1' and 'buf2' specifies +compiler attribute with 64-byte alignment. + +The problem happens for 'sh_offset', libelf returns it as 0x3028 which +is not 64-byte aligned, combining with disassembly, it's likely libelf +doesn't respect the alignment for .bss section, therefore, it doesn't +return the aligned value for 'sh_offset'. + +Suggested by Fangrui Song, ELF file contains program header which +contains PT_LOAD segments, the fields p_vaddr and p_offset in PT_LOAD +segments contain the execution info. A better choice for converting +memory address to file address is using the formula: + + file_address = st_value - p_vaddr + p_offset + +This patch introduces elf_read_program_header() which returns the +program header based on the passed 'st_value', then it uses the formula +above to calculate the symbol file address; and the debugging log is +updated respectively. + +After applying the change: + + # ./perf --debug verbose=4 mem report + ... + dso__load_sym_internal: adjusting symbol: st_value: 0x40c0 p_vaddr: 0x3d28 p_offset: 0x2d28 + symbol__new: buf2 0x30c0-0x3100 + ... + dso__load_sym_internal: adjusting symbol: st_value: 0x4080 p_vaddr: 0x3d28 p_offset: 0x2d28 + symbol__new: buf1 0x3080-0x30c0 + ... + +Fixes: f17e04afaff84b5c ("perf report: Fix ELF symbol parsing") +Reported-by: Chang Rui +Suggested-by: Fangrui Song +Signed-off-by: Leo Yan +Acked-by: Namhyung Kim +Cc: Alexander Shishkin +Cc: Ian Rogers +Cc: Ingo Molnar +Cc: Jiri Olsa +Cc: Mark Rutland +Cc: Peter Zijlstra +Link: https://lore.kernel.org/r/20220724060013.171050-2-leo.yan@linaro.org +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Sasha Levin +--- + tools/perf/util/symbol-elf.c | 45 ++++++++++++++++++++++++++++++++---- + 1 file changed, 41 insertions(+), 4 deletions(-) + +diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c +index ecd377938eea..ef6ced5c5746 100644 +--- a/tools/perf/util/symbol-elf.c ++++ b/tools/perf/util/symbol-elf.c +@@ -233,6 +233,33 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, + return NULL; + } + ++static int elf_read_program_header(Elf *elf, u64 vaddr, GElf_Phdr *phdr) ++{ ++ size_t i, phdrnum; ++ u64 sz; ++ ++ if (elf_getphdrnum(elf, &phdrnum)) ++ return -1; ++ ++ for (i = 0; i < phdrnum; i++) { ++ if (gelf_getphdr(elf, i, phdr) == NULL) ++ return -1; ++ ++ if (phdr->p_type != PT_LOAD) ++ continue; ++ ++ sz = max(phdr->p_memsz, phdr->p_filesz); ++ if (!sz) ++ continue; ++ ++ if (vaddr >= phdr->p_vaddr && (vaddr < phdr->p_vaddr + sz)) ++ return 0; ++ } ++ ++ /* Not found any valid program header */ ++ return -1; ++} ++ + static bool want_demangle(bool is_kernel_sym) + { + return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle; +@@ -1209,6 +1236,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss, + sym.st_value); + used_opd = true; + } ++ + /* + * When loading symbols in a data mapping, ABS symbols (which + * has a value of SHN_ABS in its st_shndx) failed at +@@ -1262,11 +1290,20 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss, + goto out_elf_end; + } else if ((used_opd && runtime_ss->adjust_symbols) || + (!used_opd && syms_ss->adjust_symbols)) { ++ GElf_Phdr phdr; ++ ++ if (elf_read_program_header(syms_ss->elf, ++ (u64)sym.st_value, &phdr)) { ++ pr_warning("%s: failed to find program header for " ++ "symbol: %s st_value: %#" PRIx64 "\n", ++ __func__, elf_name, (u64)sym.st_value); ++ continue; ++ } + pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " " +- "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__, +- (u64)sym.st_value, (u64)shdr.sh_addr, +- (u64)shdr.sh_offset); +- sym.st_value -= shdr.sh_addr - shdr.sh_offset; ++ "p_vaddr: %#" PRIx64 " p_offset: %#" PRIx64 "\n", ++ __func__, (u64)sym.st_value, (u64)phdr.p_vaddr, ++ (u64)phdr.p_offset); ++ sym.st_value -= phdr.p_vaddr - phdr.p_offset; + } + + demangled = demangle_sym(dso, kmodule, elf_name); +-- +2.35.1 + diff --git a/queue-5.18/scsi-ufs-core-fix-a-race-condition-related-to-device.patch b/queue-5.18/scsi-ufs-core-fix-a-race-condition-related-to-device.patch new file mode 100644 index 00000000000..4353fff032e --- /dev/null +++ b/queue-5.18/scsi-ufs-core-fix-a-race-condition-related-to-device.patch @@ -0,0 +1,139 @@ +From 1a6777f31913eae21ac5845aca0720d4ee8ccbae Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 Jul 2022 10:02:23 -0700 +Subject: scsi: ufs: core: Fix a race condition related to device management + +From: Bart Van Assche + +[ Upstream commit f5c2976e0cb0f6236013bfb479868531b04f61d4 ] + +If a device management command completion happens after +wait_for_completion_timeout() times out and before ufshcd_clear_cmds() is +called, then the completion code may crash on the complete() call in +__ufshcd_transfer_req_compl(). + +Fix the following crash: + + Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 + Call trace: + complete+0x64/0x178 + __ufshcd_transfer_req_compl+0x30c/0x9c0 + ufshcd_poll+0xf0/0x208 + ufshcd_sl_intr+0xb8/0xf0 + ufshcd_intr+0x168/0x2f4 + __handle_irq_event_percpu+0xa0/0x30c + handle_irq_event+0x84/0x178 + handle_fasteoi_irq+0x150/0x2e8 + __handle_domain_irq+0x114/0x1e4 + gic_handle_irq.31846+0x58/0x300 + el1_irq+0xe4/0x1c0 + efi_header_end+0x110/0x680 + __irq_exit_rcu+0x108/0x124 + __handle_domain_irq+0x118/0x1e4 + gic_handle_irq.31846+0x58/0x300 + el1_irq+0xe4/0x1c0 + cpuidle_enter_state+0x3ac/0x8c4 + do_idle+0x2fc/0x55c + cpu_startup_entry+0x84/0x90 + kernel_init+0x0/0x310 + start_kernel+0x0/0x608 + start_kernel+0x4ec/0x608 + +Link: https://lore.kernel.org/r/20220720170228.1598842-1-bvanassche@acm.org +Fixes: 5a0b0cb9bee7 ("[SCSI] ufs: Add support for sending NOP OUT UPIU") +Cc: Adrian Hunter +Cc: Avri Altman +Cc: Bean Huo +Cc: Stanley Chu +Signed-off-by: Bart Van Assche +Signed-off-by: Martin K. Petersen +Signed-off-by: Sasha Levin +--- + drivers/scsi/ufs/ufshcd.c | 58 +++++++++++++++++++++++++++------------ + 1 file changed, 40 insertions(+), 18 deletions(-) + +diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c +index a34c1fab0246..874490f7f5e7 100644 +--- a/drivers/scsi/ufs/ufshcd.c ++++ b/drivers/scsi/ufs/ufshcd.c +@@ -2947,37 +2947,59 @@ ufshcd_dev_cmd_completion(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) + static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba, + struct ufshcd_lrb *lrbp, int max_timeout) + { +- int err = 0; +- unsigned long time_left; ++ unsigned long time_left = msecs_to_jiffies(max_timeout); + unsigned long flags; ++ bool pending; ++ int err; + ++retry: + time_left = wait_for_completion_timeout(hba->dev_cmd.complete, +- msecs_to_jiffies(max_timeout)); ++ time_left); + +- spin_lock_irqsave(hba->host->host_lock, flags); +- hba->dev_cmd.complete = NULL; + if (likely(time_left)) { ++ /* ++ * The completion handler called complete() and the caller of ++ * this function still owns the @lrbp tag so the code below does ++ * not trigger any race conditions. ++ */ ++ hba->dev_cmd.complete = NULL; + err = ufshcd_get_tr_ocs(lrbp); + if (!err) + err = ufshcd_dev_cmd_completion(hba, lrbp); +- } +- spin_unlock_irqrestore(hba->host->host_lock, flags); +- +- if (!time_left) { ++ } else { + err = -ETIMEDOUT; + dev_dbg(hba->dev, "%s: dev_cmd request timedout, tag %d\n", + __func__, lrbp->task_tag); +- if (!ufshcd_clear_cmds(hba, 1U << lrbp->task_tag)) ++ if (ufshcd_clear_cmds(hba, 1U << lrbp->task_tag) == 0) { + /* successfully cleared the command, retry if needed */ + err = -EAGAIN; +- /* +- * in case of an error, after clearing the doorbell, +- * we also need to clear the outstanding_request +- * field in hba +- */ +- spin_lock_irqsave(&hba->outstanding_lock, flags); +- __clear_bit(lrbp->task_tag, &hba->outstanding_reqs); +- spin_unlock_irqrestore(&hba->outstanding_lock, flags); ++ /* ++ * Since clearing the command succeeded we also need to ++ * clear the task tag bit from the outstanding_reqs ++ * variable. ++ */ ++ spin_lock_irqsave(&hba->outstanding_lock, flags); ++ pending = test_bit(lrbp->task_tag, ++ &hba->outstanding_reqs); ++ if (pending) { ++ hba->dev_cmd.complete = NULL; ++ __clear_bit(lrbp->task_tag, ++ &hba->outstanding_reqs); ++ } ++ spin_unlock_irqrestore(&hba->outstanding_lock, flags); ++ ++ if (!pending) { ++ /* ++ * The completion handler ran while we tried to ++ * clear the command. ++ */ ++ time_left = 1; ++ goto retry; ++ } ++ } else { ++ dev_err(hba->dev, "%s: failed to clear tag %d\n", ++ __func__, lrbp->task_tag); ++ } + } + + return err; +-- +2.35.1 + diff --git a/queue-5.18/scsi-ufs-support-clearing-multiple-commands-at-once.patch b/queue-5.18/scsi-ufs-support-clearing-multiple-commands-at-once.patch new file mode 100644 index 00000000000..651299bc376 --- /dev/null +++ b/queue-5.18/scsi-ufs-support-clearing-multiple-commands-at-once.patch @@ -0,0 +1,117 @@ +From 25704b34acfee56b5b270bb72659201066253eb4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 13 Jun 2022 14:44:41 -0700 +Subject: scsi: ufs: Support clearing multiple commands at once + +From: Bart Van Assche + +[ Upstream commit d1a7644648b7cdacaf8d1013a4285001911e9bc8 ] + +Modify ufshcd_clear_cmd() such that it supports clearing multiple commands +at once instead of one command at a time. This change will be used in a +later patch to reduce the time spent in the reset handler. + +Link: https://lore.kernel.org/r/20220613214442.212466-3-bvanassche@acm.org +Reviewed-by: Stanley Chu +Reviewed-by: Adrian Hunter +Signed-off-by: Bart Van Assche +Signed-off-by: Martin K. Petersen +Signed-off-by: Sasha Levin +--- + drivers/scsi/ufs/ufshcd.c | 42 ++++++++++++++++++++++++++------------- + 1 file changed, 28 insertions(+), 14 deletions(-) + +diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c +index 452ad0612067..a34c1fab0246 100644 +--- a/drivers/scsi/ufs/ufshcd.c ++++ b/drivers/scsi/ufs/ufshcd.c +@@ -734,17 +734,28 @@ static enum utp_ocs ufshcd_get_tr_ocs(struct ufshcd_lrb *lrbp) + } + + /** +- * ufshcd_utrl_clear - Clear a bit in UTRLCLR register ++ * ufshcd_utrl_clear() - Clear requests from the controller request list. + * @hba: per adapter instance +- * @pos: position of the bit to be cleared ++ * @mask: mask with one bit set for each request to be cleared + */ +-static inline void ufshcd_utrl_clear(struct ufs_hba *hba, u32 pos) ++static inline void ufshcd_utrl_clear(struct ufs_hba *hba, u32 mask) + { + if (hba->quirks & UFSHCI_QUIRK_BROKEN_REQ_LIST_CLR) +- ufshcd_writel(hba, (1 << pos), REG_UTP_TRANSFER_REQ_LIST_CLEAR); +- else +- ufshcd_writel(hba, ~(1 << pos), +- REG_UTP_TRANSFER_REQ_LIST_CLEAR); ++ mask = ~mask; ++ /* ++ * From the UFSHCI specification: "UTP Transfer Request List CLear ++ * Register (UTRLCLR): This field is bit significant. Each bit ++ * corresponds to a slot in the UTP Transfer Request List, where bit 0 ++ * corresponds to request slot 0. A bit in this field is set to ‘0’ ++ * by host software to indicate to the host controller that a transfer ++ * request slot is cleared. The host controller ++ * shall free up any resources associated to the request slot ++ * immediately, and shall set the associated bit in UTRLDBR to ‘0’. The ++ * host software indicates no change to request slots by setting the ++ * associated bits in this field to ‘1’. Bits in this field shall only ++ * be set ‘1’ or ‘0’ by host software when UTRLRSR is set to ‘1’." ++ */ ++ ufshcd_writel(hba, ~mask, REG_UTP_TRANSFER_REQ_LIST_CLEAR); + } + + /** +@@ -2853,16 +2864,19 @@ static int ufshcd_compose_dev_cmd(struct ufs_hba *hba, + return ufshcd_compose_devman_upiu(hba, lrbp); + } + +-static int +-ufshcd_clear_cmd(struct ufs_hba *hba, int tag) ++/* ++ * Clear all the requests from the controller for which a bit has been set in ++ * @mask and wait until the controller confirms that these requests have been ++ * cleared. ++ */ ++static int ufshcd_clear_cmds(struct ufs_hba *hba, u32 mask) + { + int err = 0; + unsigned long flags; +- u32 mask = 1 << tag; + + /* clear outstanding transaction before retry */ + spin_lock_irqsave(hba->host->host_lock, flags); +- ufshcd_utrl_clear(hba, tag); ++ ufshcd_utrl_clear(hba, mask); + spin_unlock_irqrestore(hba->host->host_lock, flags); + + /* +@@ -2953,7 +2967,7 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba, + err = -ETIMEDOUT; + dev_dbg(hba->dev, "%s: dev_cmd request timedout, tag %d\n", + __func__, lrbp->task_tag); +- if (!ufshcd_clear_cmd(hba, lrbp->task_tag)) ++ if (!ufshcd_clear_cmds(hba, 1U << lrbp->task_tag)) + /* successfully cleared the command, retry if needed */ + err = -EAGAIN; + /* +@@ -6988,7 +7002,7 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd) + /* clear the commands that were pending for corresponding LUN */ + for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs) { + if (hba->lrb[pos].lun == lun) { +- err = ufshcd_clear_cmd(hba, pos); ++ err = ufshcd_clear_cmds(hba, 1U << pos); + if (err) + break; + __ufshcd_transfer_req_compl(hba, 1U << pos); +@@ -7090,7 +7104,7 @@ static int ufshcd_try_to_abort_task(struct ufs_hba *hba, int tag) + goto out; + } + +- err = ufshcd_clear_cmd(hba, tag); ++ err = ufshcd_clear_cmds(hba, 1U << tag); + if (err) + dev_err(hba->dev, "%s: Failed clearing cmd at tag %d, err %d\n", + __func__, tag, err); +-- +2.35.1 + diff --git a/queue-5.18/sctp-fix-sleep-in-atomic-context-bug-in-timer-handle.patch b/queue-5.18/sctp-fix-sleep-in-atomic-context-bug-in-timer-handle.patch new file mode 100644 index 00000000000..7eca1ecfa8e --- /dev/null +++ b/queue-5.18/sctp-fix-sleep-in-atomic-context-bug-in-timer-handle.patch @@ -0,0 +1,61 @@ +From 8f865e9cd6d0a57debad4c7d4515d949d4fc426a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 23 Jul 2022 09:58:09 +0800 +Subject: sctp: fix sleep in atomic context bug in timer handlers + +From: Duoming Zhou + +[ Upstream commit b89fc26f741d9f9efb51cba3e9b241cf1380ec5a ] + +There are sleep in atomic context bugs in timer handlers of sctp +such as sctp_generate_t3_rtx_event(), sctp_generate_probe_event(), +sctp_generate_t1_init_event(), sctp_generate_timeout_event(), +sctp_generate_t3_rtx_event() and so on. + +The root cause is sctp_sched_prio_init_sid() with GFP_KERNEL parameter +that may sleep could be called by different timer handlers which is in +interrupt context. + +One of the call paths that could trigger bug is shown below: + + (interrupt context) +sctp_generate_probe_event + sctp_do_sm + sctp_side_effects + sctp_cmd_interpreter + sctp_outq_teardown + sctp_outq_init + sctp_sched_set_sched + n->init_sid(..,GFP_KERNEL) + sctp_sched_prio_init_sid //may sleep + +This patch changes gfp_t parameter of init_sid in sctp_sched_set_sched() +from GFP_KERNEL to GFP_ATOMIC in order to prevent sleep in atomic +context bugs. + +Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations") +Signed-off-by: Duoming Zhou +Acked-by: Marcelo Ricardo Leitner +Link: https://lore.kernel.org/r/20220723015809.11553-1-duoming@zju.edu.cn +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/sctp/stream_sched.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c +index 99e5f69fbb74..a2e1d34f52c5 100644 +--- a/net/sctp/stream_sched.c ++++ b/net/sctp/stream_sched.c +@@ -163,7 +163,7 @@ int sctp_sched_set_sched(struct sctp_association *asoc, + if (!SCTP_SO(&asoc->stream, i)->ext) + continue; + +- ret = n->init_sid(&asoc->stream, i, GFP_KERNEL); ++ ret = n->init_sid(&asoc->stream, i, GFP_ATOMIC); + if (ret) + goto err; + } +-- +2.35.1 + diff --git a/queue-5.18/sctp-leave-the-err-path-free-in-sctp_stream_init-to-.patch b/queue-5.18/sctp-leave-the-err-path-free-in-sctp_stream_init-to-.patch new file mode 100644 index 00000000000..1631dc1a970 --- /dev/null +++ b/queue-5.18/sctp-leave-the-err-path-free-in-sctp_stream_init-to-.patch @@ -0,0 +1,109 @@ +From 52db25ddcd09238cbf3c260287edf1cc11e7926a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 25 Jul 2022 18:11:06 -0400 +Subject: sctp: leave the err path free in sctp_stream_init to sctp_stream_free + +From: Xin Long + +[ Upstream commit 181d8d2066c000ba0a0e6940a7ad80f1a0e68e9d ] + +A NULL pointer dereference was reported by Wei Chen: + + BUG: kernel NULL pointer dereference, address: 0000000000000000 + RIP: 0010:__list_del_entry_valid+0x26/0x80 + Call Trace: + + sctp_sched_dequeue_common+0x1c/0x90 + sctp_sched_prio_dequeue+0x67/0x80 + __sctp_outq_teardown+0x299/0x380 + sctp_outq_free+0x15/0x20 + sctp_association_free+0xc3/0x440 + sctp_do_sm+0x1ca7/0x2210 + sctp_assoc_bh_rcv+0x1f6/0x340 + +This happens when calling sctp_sendmsg without connecting to server first. +In this case, a data chunk already queues up in send queue of client side +when processing the INIT_ACK from server in sctp_process_init() where it +calls sctp_stream_init() to alloc stream_in. If it fails to alloc stream_in +all stream_out will be freed in sctp_stream_init's err path. Then in the +asoc freeing it will crash when dequeuing this data chunk as stream_out +is missing. + +As we can't free stream out before dequeuing all data from send queue, and +this patch is to fix it by moving the err path stream_out/in freeing in +sctp_stream_init() to sctp_stream_free() which is eventually called when +freeing the asoc in sctp_association_free(). This fix also makes the code +in sctp_process_init() more clear. + +Note that in sctp_association_init() when it fails in sctp_stream_init(), +sctp_association_free() will not be called, and in that case it should +go to 'stream_free' err path to free stream instead of 'fail_init'. + +Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations") +Reported-by: Wei Chen +Signed-off-by: Xin Long +Link: https://lore.kernel.org/r/831a3dc100c4908ff76e5bcc363be97f2778bc0b.1658787066.git.lucien.xin@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/sctp/associola.c | 5 ++--- + net/sctp/stream.c | 19 +++---------------- + 2 files changed, 5 insertions(+), 19 deletions(-) + +diff --git a/net/sctp/associola.c b/net/sctp/associola.c +index be29da09cc7a..3460abceba44 100644 +--- a/net/sctp/associola.c ++++ b/net/sctp/associola.c +@@ -229,9 +229,8 @@ static struct sctp_association *sctp_association_init( + if (!sctp_ulpq_init(&asoc->ulpq, asoc)) + goto fail_init; + +- if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, +- 0, gfp)) +- goto fail_init; ++ if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, 0, gfp)) ++ goto stream_free; + + /* Initialize default path MTU. */ + asoc->pathmtu = sp->pathmtu; +diff --git a/net/sctp/stream.c b/net/sctp/stream.c +index 6dc95dcc0ff4..ef9fceadef8d 100644 +--- a/net/sctp/stream.c ++++ b/net/sctp/stream.c +@@ -137,7 +137,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, + + ret = sctp_stream_alloc_out(stream, outcnt, gfp); + if (ret) +- goto out_err; ++ return ret; + + for (i = 0; i < stream->outcnt; i++) + SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; +@@ -145,22 +145,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, + handle_in: + sctp_stream_interleave_init(stream); + if (!incnt) +- goto out; +- +- ret = sctp_stream_alloc_in(stream, incnt, gfp); +- if (ret) +- goto in_err; +- +- goto out; ++ return 0; + +-in_err: +- sched->free(stream); +- genradix_free(&stream->in); +-out_err: +- genradix_free(&stream->out); +- stream->outcnt = 0; +-out: +- return ret; ++ return sctp_stream_alloc_in(stream, incnt, gfp); + } + + int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid) +-- +2.35.1 + diff --git a/queue-5.18/series b/queue-5.18/series index 3e5f93496e6..92d09594ae5 100644 --- a/queue-5.18/series +++ b/queue-5.18/series @@ -40,3 +40,39 @@ scsi-mpt3sas-stop-fw-fault-watchdog-work-item-during-system-shutdown.patch net-ping6-fix-memleak-in-ipv6_renew_options.patch ipv6-addrconf-fix-a-null-ptr-deref-bug-for-ip6_ptr.patch net-tls-remove-the-context-from-the-list-in-tls_device_down.patch +net-pcs-xpcs-propagate-xpcs_read-error-to-xpcs_get_s.patch +net-sungem_phy-add-of_node_put-for-reference-returne.patch +mlxsw-spectrum_router-simplify-list-unwinding.patch +tcp-fix-a-data-race-around-sysctl_tcp_min_tso_segs.patch +tcp-fix-a-data-race-around-sysctl_tcp_tso_rtt_log.patch +tcp-fix-a-data-race-around-sysctl_tcp_min_rtt_wlen.patch +tcp-fix-a-data-race-around-sysctl_tcp_autocorking.patch +tcp-fix-a-data-race-around-sysctl_tcp_invalid_rateli.patch +documentation-fix-sctp_wmem-in-ip-sysctl.rst.patch +macsec-fix-null-deref-in-macsec_add_rxsa.patch +macsec-fix-error-message-in-macsec_add_rxsa-and-_txs.patch +macsec-limit-replay-window-size-with-xpn.patch +macsec-always-read-macsec_sa_attr_pn-as-a-u64.patch +net-macsec-fix-potential-resource-leak-in-macsec_add.patch +net-mld-fix-reference-count-leak-in-mld_-query-repor.patch +tcp-fix-data-races-around-sk_pacing_rate.patch +net-fix-data-races-around-sysctl_-rw-mem-_offset.patch +tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_dela.patch +tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_slac.patch +tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_nr.patch +tcp-fix-data-races-around-sysctl_tcp_reflect_tos.patch +ipv4-fix-data-races-around-sysctl_fib_notify_on_flag.patch +i40e-fix-interface-init-with-msi-interrupts-no-msi-x.patch +net-dsa-fix-reference-counting-for-lag-fdbs.patch +sctp-fix-sleep-in-atomic-context-bug-in-timer-handle.patch +octeontx2-pf-cn10k-fix-egress-ratelimit-configuratio.patch +netfilter-nf_queue-do-not-allow-packet-truncation-be.patch +scsi-ufs-support-clearing-multiple-commands-at-once.patch +scsi-ufs-core-fix-a-race-condition-related-to-device.patch +mptcp-don-t-send-rst-for-single-subflow.patch +virtio-net-fix-the-race-between-refill-work-and-clos.patch +perf-symbol-correct-address-for-bss-symbols.patch +sfc-disable-softirqs-for-ptp-tx.patch +sctp-leave-the-err-path-free-in-sctp_stream_init-to-.patch +net-funeth-fix-fun_xdp_tx-and-xdp-packet-reclaim.patch +stmmac-dwmac-mediatek-fix-resource-leak-in-probe.patch diff --git a/queue-5.18/sfc-disable-softirqs-for-ptp-tx.patch b/queue-5.18/sfc-disable-softirqs-for-ptp-tx.patch new file mode 100644 index 00000000000..8d99411dfae --- /dev/null +++ b/queue-5.18/sfc-disable-softirqs-for-ptp-tx.patch @@ -0,0 +1,73 @@ +From 8b1ab4a8751f1c86e1857d655def64a74f6328b6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 26 Jul 2022 08:45:04 +0200 +Subject: sfc: disable softirqs for ptp TX + +From: Alejandro Lucero + +[ Upstream commit 67c3b611d92fc238c43734878bc3e232ab570c79 ] + +Sending a PTP packet can imply to use the normal TX driver datapath but +invoked from the driver's ptp worker. The kernel generic TX code +disables softirqs and preemption before calling specific driver TX code, +but the ptp worker does not. Although current ptp driver functionality +does not require it, there are several reasons for doing so: + + 1) The invoked code is always executed with softirqs disabled for non + PTP packets. + 2) Better if a ptp packet transmission is not interrupted by softirq + handling which could lead to high latencies. + 3) netdev_xmit_more used by the TX code requires preemption to be + disabled. + +Indeed a solution for dealing with kernel preemption state based on static +kernel configuration is not possible since the introduction of dynamic +preemption level configuration at boot time using the static calls +functionality. + +Fixes: f79c957a0b537 ("drivers: net: sfc: use netdev_xmit_more helper") +Signed-off-by: Alejandro Lucero +Link: https://lore.kernel.org/r/20220726064504.49613-1-alejandro.lucero-palau@amd.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/sfc/ptp.c | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + +diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c +index 4625f85acab2..10ad0b93d283 100644 +--- a/drivers/net/ethernet/sfc/ptp.c ++++ b/drivers/net/ethernet/sfc/ptp.c +@@ -1100,7 +1100,29 @@ static void efx_ptp_xmit_skb_queue(struct efx_nic *efx, struct sk_buff *skb) + + tx_queue = efx_channel_get_tx_queue(ptp_data->channel, type); + if (tx_queue && tx_queue->timestamping) { ++ /* This code invokes normal driver TX code which is always ++ * protected from softirqs when called from generic TX code, ++ * which in turn disables preemption. Look at __dev_queue_xmit ++ * which uses rcu_read_lock_bh disabling preemption for RCU ++ * plus disabling softirqs. We do not need RCU reader ++ * protection here. ++ * ++ * Although it is theoretically safe for current PTP TX/RX code ++ * running without disabling softirqs, there are three good ++ * reasond for doing so: ++ * ++ * 1) The code invoked is mainly implemented for non-PTP ++ * packets and it is always executed with softirqs ++ * disabled. ++ * 2) This being a single PTP packet, better to not ++ * interrupt its processing by softirqs which can lead ++ * to high latencies. ++ * 3) netdev_xmit_more checks preemption is disabled and ++ * triggers a BUG_ON if not. ++ */ ++ local_bh_disable(); + efx_enqueue_skb(tx_queue, skb); ++ local_bh_enable(); + } else { + WARN_ONCE(1, "PTP channel has no timestamped tx queue\n"); + dev_kfree_skb_any(skb); +-- +2.35.1 + diff --git a/queue-5.18/stmmac-dwmac-mediatek-fix-resource-leak-in-probe.patch b/queue-5.18/stmmac-dwmac-mediatek-fix-resource-leak-in-probe.patch new file mode 100644 index 00000000000..6a4e7baad80 --- /dev/null +++ b/queue-5.18/stmmac-dwmac-mediatek-fix-resource-leak-in-probe.patch @@ -0,0 +1,52 @@ +From e00acc7d26f75cab80c8949b32135e2f01805d50 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 28 Jul 2022 14:52:09 +0300 +Subject: stmmac: dwmac-mediatek: fix resource leak in probe + +From: Dan Carpenter + +[ Upstream commit 4d3d3a1b244fd54629a6b7047f39a7bbc8d11910 ] + +If mediatek_dwmac_clks_config() fails, then call stmmac_remove_config_dt() +before returning. Otherwise it is a resource leak. + +Fixes: fa4b3ca60e80 ("stmmac: dwmac-mediatek: fix clock issue") +Signed-off-by: Dan Carpenter +Link: https://lore.kernel.org/r/YuJ4aZyMUlG6yGGa@kili +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +index ca8ab290013c..d42e1afb6521 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +@@ -688,18 +688,19 @@ static int mediatek_dwmac_probe(struct platform_device *pdev) + + ret = mediatek_dwmac_clks_config(priv_plat, true); + if (ret) +- return ret; ++ goto err_remove_config_dt; + + ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); +- if (ret) { +- stmmac_remove_config_dt(pdev, plat_dat); ++ if (ret) + goto err_drv_probe; +- } + + return 0; + + err_drv_probe: + mediatek_dwmac_clks_config(priv_plat, false); ++err_remove_config_dt: ++ stmmac_remove_config_dt(pdev, plat_dat); ++ + return ret; + } + +-- +2.35.1 + diff --git a/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_autocorking.patch b/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_autocorking.patch new file mode 100644 index 00000000000..aff0f49d24a --- /dev/null +++ b/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_autocorking.patch @@ -0,0 +1,36 @@ +From 01a8b323793dea3387afc38484649d5790b4aeed Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 Jul 2022 09:50:25 -0700 +Subject: tcp: Fix a data-race around sysctl_tcp_autocorking. + +From: Kuniyuki Iwashima + +[ Upstream commit 85225e6f0a76e6745bc841c9f25169c509b573d8 ] + +While reading sysctl_tcp_autocorking, it can be changed concurrently. +Thus, we need to add READ_ONCE() to its reader. + +Fixes: f54b311142a9 ("tcp: auto corking") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index 97fed1217b7f..60b46f2a6896 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -686,7 +686,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, + int size_goal) + { + return skb->len < size_goal && +- sock_net(sk)->ipv4.sysctl_tcp_autocorking && ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) && + !tcp_rtx_queue_empty(sk) && + refcount_read(&sk->sk_wmem_alloc) > skb->truesize && + tcp_skb_can_collapse_to(skb); +-- +2.35.1 + diff --git a/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_dela.patch b/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_dela.patch new file mode 100644 index 00000000000..066d001fc7d --- /dev/null +++ b/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_dela.patch @@ -0,0 +1,37 @@ +From 02ba4d04348b13331c2da63b04a04a0b950dd91e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Jul 2022 11:22:01 -0700 +Subject: tcp: Fix a data-race around sysctl_tcp_comp_sack_delay_ns. + +From: Kuniyuki Iwashima + +[ Upstream commit 4866b2b0f7672b6d760c4b8ece6fb56f965dcc8a ] + +While reading sysctl_tcp_comp_sack_delay_ns, it can be changed +concurrently. Thus, we need to add READ_ONCE() to its reader. + +Fixes: 6d82aa242092 ("tcp: add tcp_comp_sack_delay_ns sysctl") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_input.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index f09b1321a960..3591a25a8631 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -5546,7 +5546,8 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) + if (tp->srtt_us && tp->srtt_us < rtt) + rtt = tp->srtt_us; + +- delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns, ++ delay = min_t(unsigned long, ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns), + rtt * (NSEC_PER_USEC >> 3)/20); + sock_hold(sk); + hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay), +-- +2.35.1 + diff --git a/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_nr.patch b/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_nr.patch new file mode 100644 index 00000000000..c81e0cf1f03 --- /dev/null +++ b/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_nr.patch @@ -0,0 +1,36 @@ +From f6fa1599ed2f020baa02e25ceedda7ac6662f41e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Jul 2022 11:22:03 -0700 +Subject: tcp: Fix a data-race around sysctl_tcp_comp_sack_nr. + +From: Kuniyuki Iwashima + +[ Upstream commit 79f55473bfc8ac51bd6572929a679eeb4da22251 ] + +While reading sysctl_tcp_comp_sack_nr, it can be changed concurrently. +Thus, we need to add READ_ONCE() to its reader. + +Fixes: 9c21d2fc41c0 ("tcp: add tcp_comp_sack_nr sysctl") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_input.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index 5de396075a27..9221c8c7b9a9 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -5525,7 +5525,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) + } + + if (!tcp_is_sack(tp) || +- tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr) ++ tp->compressed_ack >= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)) + goto send_now; + + if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) { +-- +2.35.1 + diff --git a/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_slac.patch b/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_slac.patch new file mode 100644 index 00000000000..27b96a9079a --- /dev/null +++ b/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_slac.patch @@ -0,0 +1,36 @@ +From b4651439481b0ac6716ce52630ecc8edca98b399 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Jul 2022 11:22:02 -0700 +Subject: tcp: Fix a data-race around sysctl_tcp_comp_sack_slack_ns. + +From: Kuniyuki Iwashima + +[ Upstream commit 22396941a7f343d704738360f9ef0e6576489d43 ] + +While reading sysctl_tcp_comp_sack_slack_ns, it can be changed +concurrently. Thus, we need to add READ_ONCE() to its reader. + +Fixes: a70437cc09a1 ("tcp: add hrtimer slack to sack compression") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_input.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index 3591a25a8631..5de396075a27 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -5551,7 +5551,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) + rtt * (NSEC_PER_USEC >> 3)/20); + sock_hold(sk); + hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay), +- sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns, ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns), + HRTIMER_MODE_REL_PINNED_SOFT); + } + +-- +2.35.1 + diff --git a/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_invalid_rateli.patch b/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_invalid_rateli.patch new file mode 100644 index 00000000000..70730a007cb --- /dev/null +++ b/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_invalid_rateli.patch @@ -0,0 +1,37 @@ +From 38838e77f2d5f3e773dae060bb45a5bf78bed89e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 Jul 2022 09:50:26 -0700 +Subject: tcp: Fix a data-race around sysctl_tcp_invalid_ratelimit. + +From: Kuniyuki Iwashima + +[ Upstream commit 2afdbe7b8de84c28e219073a6661080e1b3ded48 ] + +While reading sysctl_tcp_invalid_ratelimit, it can be changed +concurrently. Thus, we need to add READ_ONCE() to its reader. + +Fixes: 032ee4236954 ("tcp: helpers to mitigate ACK loops by rate-limiting out-of-window dupacks") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_input.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index f3b658fa3e7b..db78197a44ff 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -3581,7 +3581,8 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, + if (*last_oow_ack_time) { + s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time); + +- if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) { ++ if (0 <= elapsed && ++ elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) { + NET_INC_STATS(net, mib_idx); + return true; /* rate-limited: don't send yet! */ + } +-- +2.35.1 + diff --git a/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_min_rtt_wlen.patch b/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_min_rtt_wlen.patch new file mode 100644 index 00000000000..f164939e3df --- /dev/null +++ b/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_min_rtt_wlen.patch @@ -0,0 +1,36 @@ +From f6cd91e480589d9df11b81dae87321d73f5512f6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 Jul 2022 09:50:24 -0700 +Subject: tcp: Fix a data-race around sysctl_tcp_min_rtt_wlen. + +From: Kuniyuki Iwashima + +[ Upstream commit 1330ffacd05fc9ac4159d19286ce119e22450ed2 ] + +While reading sysctl_tcp_min_rtt_wlen, it can be changed concurrently. +Thus, we need to add READ_ONCE() to its reader. + +Fixes: f672258391b4 ("tcp: track min RTT using windowed min-filter") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_input.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index 78e16891f12b..f3b658fa3e7b 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -3058,7 +3058,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, + + static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag) + { +- u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ; ++ u32 wlen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen) * HZ; + struct tcp_sock *tp = tcp_sk(sk); + + if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) { +-- +2.35.1 + diff --git a/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_min_tso_segs.patch b/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_min_tso_segs.patch new file mode 100644 index 00000000000..04c5a758da5 --- /dev/null +++ b/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_min_tso_segs.patch @@ -0,0 +1,36 @@ +From c6fc7e6fbd65bc1659e195592c02e193e9bde34b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 Jul 2022 09:50:22 -0700 +Subject: tcp: Fix a data-race around sysctl_tcp_min_tso_segs. + +From: Kuniyuki Iwashima + +[ Upstream commit e0bb4ab9dfddd872622239f49fb2bd403b70853b ] + +While reading sysctl_tcp_min_tso_segs, it can be changed concurrently. +Thus, we need to add READ_ONCE() to its reader. + +Fixes: 95bd09eb2750 ("tcp: TSO packets automatic sizing") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_output.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c +index 6a3adb0222f4..08466421e7e0 100644 +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -1990,7 +1990,7 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) + + min_tso = ca_ops->min_tso_segs ? + ca_ops->min_tso_segs(sk) : +- sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs; ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); + + tso_segs = tcp_tso_autosize(sk, mss_now, min_tso); + return min_t(u32, tso_segs, sk->sk_gso_max_segs); +-- +2.35.1 + diff --git a/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_tso_rtt_log.patch b/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_tso_rtt_log.patch new file mode 100644 index 00000000000..c85e8d11e41 --- /dev/null +++ b/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_tso_rtt_log.patch @@ -0,0 +1,36 @@ +From 32e56d364a747fcbd462c5f42fe5c67a57b23f57 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 Jul 2022 09:50:23 -0700 +Subject: tcp: Fix a data-race around sysctl_tcp_tso_rtt_log. + +From: Kuniyuki Iwashima + +[ Upstream commit 2455e61b85e9c99af38cd889a7101f1d48b33cb4 ] + +While reading sysctl_tcp_tso_rtt_log, it can be changed concurrently. +Thus, we need to add READ_ONCE() to its reader. + +Fixes: 65466904b015 ("tcp: adjust TSO packet sizes based on min_rtt") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_output.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c +index 08466421e7e0..60c9f7f444e0 100644 +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -1971,7 +1971,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, + + bytes = sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift); + +- r = tcp_min_rtt(tcp_sk(sk)) >> sock_net(sk)->ipv4.sysctl_tcp_tso_rtt_log; ++ r = tcp_min_rtt(tcp_sk(sk)) >> READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_rtt_log); + if (r < BITS_PER_TYPE(sk->sk_gso_max_size)) + bytes += sk->sk_gso_max_size >> r; + +-- +2.35.1 + diff --git a/queue-5.18/tcp-fix-data-races-around-sk_pacing_rate.patch b/queue-5.18/tcp-fix-data-races-around-sk_pacing_rate.patch new file mode 100644 index 00000000000..1efb21a4389 --- /dev/null +++ b/queue-5.18/tcp-fix-data-races-around-sk_pacing_rate.patch @@ -0,0 +1,39 @@ +From 01b5a0cfd3caecff42d228bbde2550dbd28747ef Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Jul 2022 11:21:59 -0700 +Subject: tcp: Fix data-races around sk_pacing_rate. + +From: Kuniyuki Iwashima + +[ Upstream commit 59bf6c65a09fff74215517aecffbbdcd67df76e3 ] + +While reading sysctl_tcp_pacing_(ss|ca)_ratio, they can be changed +concurrently. Thus, we need to add READ_ONCE() to their readers. + +Fixes: 43e122b014c9 ("tcp: refine pacing rate determination") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_input.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index db78197a44ff..de066fad7dfe 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -910,9 +910,9 @@ static void tcp_update_pacing_rate(struct sock *sk) + * end of slow start and should slow down. + */ + if (tcp_snd_cwnd(tp) < tp->snd_ssthresh / 2) +- rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio; ++ rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio); + else +- rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio; ++ rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio); + + rate *= max(tcp_snd_cwnd(tp), tp->packets_out); + +-- +2.35.1 + diff --git a/queue-5.18/tcp-fix-data-races-around-sysctl_tcp_reflect_tos.patch b/queue-5.18/tcp-fix-data-races-around-sysctl_tcp_reflect_tos.patch new file mode 100644 index 00000000000..58ffedc0393 --- /dev/null +++ b/queue-5.18/tcp-fix-data-races-around-sysctl_tcp_reflect_tos.patch @@ -0,0 +1,69 @@ +From 82f76e8e3169ac0577d81c1deea468e0a54b08ea Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Jul 2022 11:22:04 -0700 +Subject: tcp: Fix data-races around sysctl_tcp_reflect_tos. + +From: Kuniyuki Iwashima + +[ Upstream commit 870e3a634b6a6cb1543b359007aca73fe6a03ac5 ] + +While reading sysctl_tcp_reflect_tos, it can be changed concurrently. +Thus, we need to add READ_ONCE() to its readers. + +Fixes: ac8f1710c12b ("tcp: reflect tos value received in SYN to the socket") +Signed-off-by: Kuniyuki Iwashima +Acked-by: Wei Wang +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_ipv4.c | 4 ++-- + net/ipv6/tcp_ipv6.c | 4 ++-- + 2 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c +index a57f96b86874..1db9938163c4 100644 +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -1007,7 +1007,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, + if (skb) { + __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); + +- tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? ++ tos = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? + (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | + (inet_sk(sk)->tos & INET_ECN_MASK) : + inet_sk(sk)->tos; +@@ -1527,7 +1527,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, + /* Set ToS of the new socket based upon the value of incoming SYN. + * ECT bits are set later in tcp_init_transfer(). + */ +- if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) + newinet->tos = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; + + if (!dst) { +diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c +index 5185c11dc444..979e0d7b2119 100644 +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -546,7 +546,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, + if (np->repflow && ireq->pktopts) + fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); + +- tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? ++ tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? + (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | + (np->tclass & INET_ECN_MASK) : + np->tclass; +@@ -1314,7 +1314,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * + /* Set ToS of the new socket based upon the value of incoming SYN. + * ECT bits are set later in tcp_init_transfer(). + */ +- if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) + newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; + + /* Clone native IPv6 options from listening socket (if any) +-- +2.35.1 + diff --git a/queue-5.18/virtio-net-fix-the-race-between-refill-work-and-clos.patch b/queue-5.18/virtio-net-fix-the-race-between-refill-work-and-clos.patch new file mode 100644 index 00000000000..595f15a3001 --- /dev/null +++ b/queue-5.18/virtio-net-fix-the-race-between-refill-work-and-clos.patch @@ -0,0 +1,151 @@ +From ca596e791e5e6909c71d2ea7d91a4ecc5db11e57 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 25 Jul 2022 15:21:59 +0800 +Subject: virtio-net: fix the race between refill work and close + +From: Jason Wang + +[ Upstream commit 5a159128faff151b7fe5f4eb0f310b1e0a2d56bf ] + +We try using cancel_delayed_work_sync() to prevent the work from +enabling NAPI. This is insufficient since we don't disable the source +of the refill work scheduling. This means an NAPI poll callback after +cancel_delayed_work_sync() can schedule the refill work then can +re-enable the NAPI that leads to use-after-free [1]. + +Since the work can enable NAPI, we can't simply disable NAPI before +calling cancel_delayed_work_sync(). So fix this by introducing a +dedicated boolean to control whether or not the work could be +scheduled from NAPI. + +[1] +================================================================== +BUG: KASAN: use-after-free in refill_work+0x43/0xd4 +Read of size 2 at addr ffff88810562c92e by task kworker/2:1/42 + +CPU: 2 PID: 42 Comm: kworker/2:1 Not tainted 5.19.0-rc1+ #480 +Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 +Workqueue: events refill_work +Call Trace: + + dump_stack_lvl+0x34/0x44 + print_report.cold+0xbb/0x6ac + ? _printk+0xad/0xde + ? refill_work+0x43/0xd4 + kasan_report+0xa8/0x130 + ? refill_work+0x43/0xd4 + refill_work+0x43/0xd4 + process_one_work+0x43d/0x780 + worker_thread+0x2a0/0x6f0 + ? process_one_work+0x780/0x780 + kthread+0x167/0x1a0 + ? kthread_exit+0x50/0x50 + ret_from_fork+0x22/0x30 + +... + +Fixes: b2baed69e605c ("virtio_net: set/cancel work on ndo_open/ndo_stop") +Signed-off-by: Jason Wang +Acked-by: Michael S. Tsirkin +Reviewed-by: Xuan Zhuo +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/virtio_net.c | 37 ++++++++++++++++++++++++++++++++++--- + 1 file changed, 34 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c +index c7804fce204c..206904e60784 100644 +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -242,9 +242,15 @@ struct virtnet_info { + /* Packet virtio header size */ + u8 hdr_len; + +- /* Work struct for refilling if we run low on memory. */ ++ /* Work struct for delayed refilling if we run low on memory. */ + struct delayed_work refill; + ++ /* Is delayed refill enabled? */ ++ bool refill_enabled; ++ ++ /* The lock to synchronize the access to refill_enabled */ ++ spinlock_t refill_lock; ++ + /* Work struct for config space updates */ + struct work_struct config_work; + +@@ -348,6 +354,20 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) + return p; + } + ++static void enable_delayed_refill(struct virtnet_info *vi) ++{ ++ spin_lock_bh(&vi->refill_lock); ++ vi->refill_enabled = true; ++ spin_unlock_bh(&vi->refill_lock); ++} ++ ++static void disable_delayed_refill(struct virtnet_info *vi) ++{ ++ spin_lock_bh(&vi->refill_lock); ++ vi->refill_enabled = false; ++ spin_unlock_bh(&vi->refill_lock); ++} ++ + static void virtqueue_napi_schedule(struct napi_struct *napi, + struct virtqueue *vq) + { +@@ -1527,8 +1547,12 @@ static int virtnet_receive(struct receive_queue *rq, int budget, + } + + if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { +- if (!try_fill_recv(vi, rq, GFP_ATOMIC)) +- schedule_delayed_work(&vi->refill, 0); ++ if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { ++ spin_lock(&vi->refill_lock); ++ if (vi->refill_enabled) ++ schedule_delayed_work(&vi->refill, 0); ++ spin_unlock(&vi->refill_lock); ++ } + } + + u64_stats_update_begin(&rq->stats.syncp); +@@ -1651,6 +1675,8 @@ static int virtnet_open(struct net_device *dev) + struct virtnet_info *vi = netdev_priv(dev); + int i, err; + ++ enable_delayed_refill(vi); ++ + for (i = 0; i < vi->max_queue_pairs; i++) { + if (i < vi->curr_queue_pairs) + /* Make sure we have some buffers: if oom use wq. */ +@@ -2033,6 +2059,8 @@ static int virtnet_close(struct net_device *dev) + struct virtnet_info *vi = netdev_priv(dev); + int i; + ++ /* Make sure NAPI doesn't schedule refill work */ ++ disable_delayed_refill(vi); + /* Make sure refill_work doesn't re-enable napi! */ + cancel_delayed_work_sync(&vi->refill); + +@@ -2792,6 +2820,8 @@ static int virtnet_restore_up(struct virtio_device *vdev) + + virtio_device_ready(vdev); + ++ enable_delayed_refill(vi); ++ + if (netif_running(vi->dev)) { + err = virtnet_open(vi->dev); + if (err) +@@ -3534,6 +3564,7 @@ static int virtnet_probe(struct virtio_device *vdev) + vdev->priv = vi; + + INIT_WORK(&vi->config_work, virtnet_config_changed_work); ++ spin_lock_init(&vi->refill_lock); + + /* If we can receive ANY GSO packets, we must allocate large ones. */ + if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || +-- +2.35.1 +