From: Sasha Levin Date: Sun, 31 Jul 2022 02:41:29 +0000 (-0400) Subject: Fixes for 5.10 X-Git-Tag: v5.4.209~38 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ad4b717451700d2dad0f05268896899e08e0f6f8;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.10 Signed-off-by: Sasha Levin --- diff --git a/queue-5.10/documentation-fix-sctp_wmem-in-ip-sysctl.rst.patch b/queue-5.10/documentation-fix-sctp_wmem-in-ip-sysctl.rst.patch new file mode 100644 index 00000000000..feab6b8b246 --- /dev/null +++ b/queue-5.10/documentation-fix-sctp_wmem-in-ip-sysctl.rst.patch @@ -0,0 +1,49 @@ +From 49db61cab0ef7631e6a7d2c91641a48dae668186 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 21 Jul 2022 10:35:46 -0400 +Subject: Documentation: fix sctp_wmem in ip-sysctl.rst + +From: Xin Long + +[ Upstream commit aa709da0e032cee7c202047ecd75f437bb0126ed ] + +Since commit 1033990ac5b2 ("sctp: implement memory accounting on tx path"), +SCTP has supported memory accounting on tx path where 'sctp_wmem' is used +by sk_wmem_schedule(). So we should fix the description for this option in +ip-sysctl.rst accordingly. + +v1->v2: + - Improve the description as Marcelo suggested. + +Fixes: 1033990ac5b2 ("sctp: implement memory accounting on tx path") +Signed-off-by: Xin Long +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + Documentation/networking/ip-sysctl.rst | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst +index 0b1f3235aa77..0158dff63887 100644 +--- a/Documentation/networking/ip-sysctl.rst ++++ b/Documentation/networking/ip-sysctl.rst +@@ -2629,7 +2629,14 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max + Default: 4K + + sctp_wmem - vector of 3 INTEGERs: min, default, max +- Currently this tunable has no effect. ++ Only the first value ("min") is used, "default" and "max" are ++ ignored. ++ ++ min: Minimum size of send buffer that can be used by SCTP sockets. ++ It is guaranteed to each SCTP socket (but not association) even ++ under moderate memory pressure. ++ ++ Default: 4K + + addr_scope_policy - INTEGER + Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00 +-- +2.35.1 + diff --git a/queue-5.10/i40e-fix-interface-init-with-msi-interrupts-no-msi-x.patch b/queue-5.10/i40e-fix-interface-init-with-msi-interrupts-no-msi-x.patch new file mode 100644 index 00000000000..412ff0d73e7 --- /dev/null +++ b/queue-5.10/i40e-fix-interface-init-with-msi-interrupts-no-msi-x.patch @@ -0,0 +1,49 @@ +From ba2e983f0f8bfa687acdcfb40c23e8405809e427 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Jul 2022 10:54:01 -0700 +Subject: i40e: Fix interface init with MSI interrupts (no MSI-X) + +From: Michal Maloszewski + +[ Upstream commit 5fcbb711024aac6d4db385623e6f2fdf019f7782 ] + +Fix the inability to bring an interface up on a setup with +only MSI interrupts enabled (no MSI-X). +Solution is to add a default number of QPs = 1. This is enough, +since without MSI-X support driver enables only a basic feature set. + +Fixes: bc6d33c8d93f ("i40e: Fix the number of queues available to be mapped for use") +Signed-off-by: Dawid Lukwinski +Signed-off-by: Michal Maloszewski +Tested-by: Dave Switzer +Signed-off-by: Tony Nguyen +Link: https://lore.kernel.org/r/20220722175401.112572-1-anthony.l.nguyen@intel.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/i40e/i40e_main.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c +index 11d4e3ba9af4..1dad62ecb8a3 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_main.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c +@@ -1907,11 +1907,15 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, + * non-zero req_queue_pairs says that user requested a new + * queue count via ethtool's set_channels, so use this + * value for queues distribution across traffic classes ++ * We need at least one queue pair for the interface ++ * to be usable as we see in else statement. + */ + if (vsi->req_queue_pairs > 0) + vsi->num_queue_pairs = vsi->req_queue_pairs; + else if (pf->flags & I40E_FLAG_MSIX_ENABLED) + vsi->num_queue_pairs = pf->num_lan_msix; ++ else ++ vsi->num_queue_pairs = 1; + } + + /* Number of queues per enabled TC */ +-- +2.35.1 + diff --git a/queue-5.10/igmp-fix-data-races-around-sysctl_igmp_qrv.patch b/queue-5.10/igmp-fix-data-races-around-sysctl_igmp_qrv.patch new file mode 100644 index 00000000000..cbc052ce8c3 --- /dev/null +++ b/queue-5.10/igmp-fix-data-races-around-sysctl_igmp_qrv.patch @@ -0,0 +1,127 @@ +From 52a89fc7c3535f2fbab7e50c16deb28fb75ccfc1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 15 Jul 2022 10:17:44 -0700 +Subject: igmp: Fix data-races around sysctl_igmp_qrv. + +From: Kuniyuki Iwashima + +[ Upstream commit 8ebcc62c738f68688ee7c6fec2efe5bc6d3d7e60 ] + +While reading sysctl_igmp_qrv, it can be changed concurrently. +Thus, we need to add READ_ONCE() to its readers. + +This test can be packed into a helper, so such changes will be in the +follow-up series after net is merged into net-next. + + qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); + +Fixes: a9fe8e29945d ("ipv4: implement igmp_qrv sysctl to tune igmp robustness variable") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/igmp.c | 24 +++++++++++++----------- + 1 file changed, 13 insertions(+), 11 deletions(-) + +diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c +index 428cc3a4c36f..c71b863093ac 100644 +--- a/net/ipv4/igmp.c ++++ b/net/ipv4/igmp.c +@@ -827,7 +827,7 @@ static void igmp_ifc_event(struct in_device *in_dev) + struct net *net = dev_net(in_dev->dev); + if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) + return; +- WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv); ++ WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv)); + igmp_ifc_start_timer(in_dev, 1); + } + +@@ -1009,7 +1009,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, + * received value was zero, use the default or statically + * configured value. + */ +- in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv; ++ in_dev->mr_qrv = ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); + in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL; + + /* RFC3376, 8.3. Query Response Interval: +@@ -1189,7 +1189,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im, + pmc->interface = im->interface; + in_dev_hold(in_dev); + pmc->multiaddr = im->multiaddr; +- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; ++ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); + pmc->sfmode = im->sfmode; + if (pmc->sfmode == MCAST_INCLUDE) { + struct ip_sf_list *psf; +@@ -1240,9 +1240,11 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im) + swap(im->tomb, pmc->tomb); + swap(im->sources, pmc->sources); + for (psf = im->sources; psf; psf = psf->sf_next) +- psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; ++ psf->sf_crcount = in_dev->mr_qrv ?: ++ READ_ONCE(net->ipv4.sysctl_igmp_qrv); + } else { +- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; ++ im->crcount = in_dev->mr_qrv ?: ++ READ_ONCE(net->ipv4.sysctl_igmp_qrv); + } + in_dev_put(pmc->interface); + kfree_pmc(pmc); +@@ -1349,7 +1351,7 @@ static void igmp_group_added(struct ip_mc_list *im) + if (in_dev->dead) + return; + +- im->unsolicit_count = net->ipv4.sysctl_igmp_qrv; ++ im->unsolicit_count = READ_ONCE(net->ipv4.sysctl_igmp_qrv); + if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) { + spin_lock_bh(&im->lock); + igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY); +@@ -1363,7 +1365,7 @@ static void igmp_group_added(struct ip_mc_list *im) + * IN() to IN(A). + */ + if (im->sfmode == MCAST_EXCLUDE) +- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; ++ im->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); + + igmp_ifc_event(in_dev); + #endif +@@ -1754,7 +1756,7 @@ static void ip_mc_reset(struct in_device *in_dev) + + in_dev->mr_qi = IGMP_QUERY_INTERVAL; + in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL; +- in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv; ++ in_dev->mr_qrv = READ_ONCE(net->ipv4.sysctl_igmp_qrv); + } + #else + static void ip_mc_reset(struct in_device *in_dev) +@@ -1888,7 +1890,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, + #ifdef CONFIG_IP_MULTICAST + if (psf->sf_oldin && + !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) { +- psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; ++ psf->sf_crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); + psf->sf_next = pmc->tomb; + pmc->tomb = psf; + rv = 1; +@@ -1952,7 +1954,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, + /* filter mode change */ + pmc->sfmode = MCAST_INCLUDE; + #ifdef CONFIG_IP_MULTICAST +- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; ++ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); + WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount); + for (psf = pmc->sources; psf; psf = psf->sf_next) + psf->sf_crcount = 0; +@@ -2131,7 +2133,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, + #ifdef CONFIG_IP_MULTICAST + /* else no filters; keep old mode for reports */ + +- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; ++ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); + WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount); + for (psf = pmc->sources; psf; psf = psf->sf_next) + psf->sf_crcount = 0; +-- +2.35.1 + diff --git a/queue-5.10/macsec-always-read-macsec_sa_attr_pn-as-a-u64.patch b/queue-5.10/macsec-always-read-macsec_sa_attr_pn-as-a-u64.patch new file mode 100644 index 00000000000..31797b0899e --- /dev/null +++ b/queue-5.10/macsec-always-read-macsec_sa_attr_pn-as-a-u64.patch @@ -0,0 +1,62 @@ +From 2ce1c106d330dd7efefd2385429ce9167d0e8e64 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Jul 2022 11:16:30 +0200 +Subject: macsec: always read MACSEC_SA_ATTR_PN as a u64 + +From: Sabrina Dubroca + +[ Upstream commit c630d1fe6219769049c87d1a6a0e9a6de55328a1 ] + +Currently, MACSEC_SA_ATTR_PN is handled inconsistently, sometimes as a +u32, sometimes forced into a u64 without checking the actual length of +the attribute. Instead, we can use nla_get_u64 everywhere, which will +read up to 64 bits into a u64, capped by the actual length of the +attribute coming from userspace. + +This fixes several issues: + - the check in validate_add_rxsa doesn't work with 32-bit attributes + - the checks in validate_add_txsa and validate_upd_sa incorrectly + reject X << 32 (with X != 0) + +Fixes: 48ef50fa866a ("macsec: Netlink support of XPN cipher suites (IEEE 802.1AEbw)") +Signed-off-by: Sabrina Dubroca +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/macsec.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c +index 96dc7bd4813d..8d73b72d6179 100644 +--- a/drivers/net/macsec.c ++++ b/drivers/net/macsec.c +@@ -1695,7 +1695,7 @@ static bool validate_add_rxsa(struct nlattr **attrs) + return false; + + if (attrs[MACSEC_SA_ATTR_PN] && +- *(u64 *)nla_data(attrs[MACSEC_SA_ATTR_PN]) == 0) ++ nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0) + return false; + + if (attrs[MACSEC_SA_ATTR_ACTIVE]) { +@@ -1938,7 +1938,7 @@ static bool validate_add_txsa(struct nlattr **attrs) + if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN) + return false; + +- if (nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0) ++ if (nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0) + return false; + + if (attrs[MACSEC_SA_ATTR_ACTIVE]) { +@@ -2292,7 +2292,7 @@ static bool validate_upd_sa(struct nlattr **attrs) + if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN) + return false; + +- if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0) ++ if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0) + return false; + + if (attrs[MACSEC_SA_ATTR_ACTIVE]) { +-- +2.35.1 + diff --git a/queue-5.10/macsec-fix-error-message-in-macsec_add_rxsa-and-_txs.patch b/queue-5.10/macsec-fix-error-message-in-macsec_add_rxsa-and-_txs.patch new file mode 100644 index 00000000000..8427f1929a2 --- /dev/null +++ b/queue-5.10/macsec-fix-error-message-in-macsec_add_rxsa-and-_txs.patch @@ -0,0 +1,44 @@ +From 229023933d6a2a15133e0683e882d056b7edad88 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Jul 2022 11:16:28 +0200 +Subject: macsec: fix error message in macsec_add_rxsa and _txsa + +From: Sabrina Dubroca + +[ Upstream commit 3240eac4ff20e51b87600dbd586ed814daf313db ] + +The expected length is MACSEC_SALT_LEN, not MACSEC_SA_ATTR_SALT. + +Fixes: 48ef50fa866a ("macsec: Netlink support of XPN cipher suites (IEEE 802.1AEbw)") +Signed-off-by: Sabrina Dubroca +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/macsec.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c +index 0b53c7cadd87..c2d8bcda2503 100644 +--- a/drivers/net/macsec.c ++++ b/drivers/net/macsec.c +@@ -1767,7 +1767,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) + if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) { + pr_notice("macsec: nl: add_rxsa: bad salt length: %d != %d\n", + nla_len(tb_sa[MACSEC_SA_ATTR_SALT]), +- MACSEC_SA_ATTR_SALT); ++ MACSEC_SALT_LEN); + rtnl_unlock(); + return -EINVAL; + } +@@ -2009,7 +2009,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) + if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) { + pr_notice("macsec: nl: add_txsa: bad salt length: %d != %d\n", + nla_len(tb_sa[MACSEC_SA_ATTR_SALT]), +- MACSEC_SA_ATTR_SALT); ++ MACSEC_SALT_LEN); + rtnl_unlock(); + return -EINVAL; + } +-- +2.35.1 + diff --git a/queue-5.10/macsec-fix-null-deref-in-macsec_add_rxsa.patch b/queue-5.10/macsec-fix-null-deref-in-macsec_add_rxsa.patch new file mode 100644 index 00000000000..6d8c10e59cb --- /dev/null +++ b/queue-5.10/macsec-fix-null-deref-in-macsec_add_rxsa.patch @@ -0,0 +1,45 @@ +From 6b5301e2611221df45c8de07889a609370e4a5de Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Jul 2022 11:16:27 +0200 +Subject: macsec: fix NULL deref in macsec_add_rxsa + +From: Sabrina Dubroca + +[ Upstream commit f46040eeaf2e523a4096199fd93a11e794818009 ] + +Commit 48ef50fa866a added a test on tb_sa[MACSEC_SA_ATTR_PN], but +nothing guarantees that it's not NULL at this point. The same code was +added to macsec_add_txsa, but there it's not a problem because +validate_add_txsa checks that the MACSEC_SA_ATTR_PN attribute is +present. + +Note: it's not possible to reproduce with iproute, because iproute +doesn't allow creating an SA without specifying the PN. + +Fixes: 48ef50fa866a ("macsec: Netlink support of XPN cipher suites (IEEE 802.1AEbw)") +Link: https://bugzilla.kernel.org/show_bug.cgi?id=208315 +Reported-by: Frantisek Sumsal +Signed-off-by: Sabrina Dubroca +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/macsec.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c +index 789a124809e3..0b53c7cadd87 100644 +--- a/drivers/net/macsec.c ++++ b/drivers/net/macsec.c +@@ -1750,7 +1750,8 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) + } + + pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN; +- if (nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) { ++ if (tb_sa[MACSEC_SA_ATTR_PN] && ++ nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) { + pr_notice("macsec: nl: add_rxsa: bad pn length: %d != %d\n", + nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len); + rtnl_unlock(); +-- +2.35.1 + diff --git a/queue-5.10/macsec-limit-replay-window-size-with-xpn.patch b/queue-5.10/macsec-limit-replay-window-size-with-xpn.patch new file mode 100644 index 00000000000..80e44c82cb8 --- /dev/null +++ b/queue-5.10/macsec-limit-replay-window-size-with-xpn.patch @@ -0,0 +1,81 @@ +From f0762ed225e24bfb0f3d44420e2723fbcd8efdf3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Jul 2022 11:16:29 +0200 +Subject: macsec: limit replay window size with XPN + +From: Sabrina Dubroca + +[ Upstream commit b07a0e2044057f201d694ab474f5c42a02b6465b ] + +IEEE 802.1AEbw-2013 (section 10.7.8) specifies that the maximum value +of the replay window is 2^30-1, to help with recovery of the upper +bits of the PN. + +To avoid leaving the existing macsec device in an inconsistent state +if this test fails during changelink, reuse the cleanup mechanism +introduced for HW offload. This wasn't needed until now because +macsec_changelink_common could not fail during changelink, as +modifying the cipher suite was not allowed. + +Finally, this must happen after handling IFLA_MACSEC_CIPHER_SUITE so +that secy->xpn is set. + +Fixes: 48ef50fa866a ("macsec: Netlink support of XPN cipher suites (IEEE 802.1AEbw)") +Signed-off-by: Sabrina Dubroca +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/macsec.c | 16 ++++++++++++---- + 1 file changed, 12 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c +index c2d8bcda2503..96dc7bd4813d 100644 +--- a/drivers/net/macsec.c ++++ b/drivers/net/macsec.c +@@ -240,6 +240,7 @@ static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb) + #define DEFAULT_SEND_SCI true + #define DEFAULT_ENCRYPT false + #define DEFAULT_ENCODING_SA 0 ++#define MACSEC_XPN_MAX_REPLAY_WINDOW (((1 << 30) - 1)) + + static bool send_sci(const struct macsec_secy *secy) + { +@@ -3738,9 +3739,6 @@ static int macsec_changelink_common(struct net_device *dev, + secy->operational = tx_sa && tx_sa->active; + } + +- if (data[IFLA_MACSEC_WINDOW]) +- secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]); +- + if (data[IFLA_MACSEC_ENCRYPT]) + tx_sc->encrypt = !!nla_get_u8(data[IFLA_MACSEC_ENCRYPT]); + +@@ -3786,6 +3784,16 @@ static int macsec_changelink_common(struct net_device *dev, + } + } + ++ if (data[IFLA_MACSEC_WINDOW]) { ++ secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]); ++ ++ /* IEEE 802.1AEbw-2013 10.7.8 - maximum replay window ++ * for XPN cipher suites */ ++ if (secy->xpn && ++ secy->replay_window > MACSEC_XPN_MAX_REPLAY_WINDOW) ++ return -EINVAL; ++ } ++ + return 0; + } + +@@ -3815,7 +3823,7 @@ static int macsec_changelink(struct net_device *dev, struct nlattr *tb[], + + ret = macsec_changelink_common(dev, data); + if (ret) +- return ret; ++ goto cleanup; + + /* If h/w offloading is available, propagate to the device */ + if (macsec_is_offloaded(macsec)) { +-- +2.35.1 + diff --git a/queue-5.10/net-macsec-fix-potential-resource-leak-in-macsec_add.patch b/queue-5.10/net-macsec-fix-potential-resource-leak-in-macsec_add.patch new file mode 100644 index 00000000000..e2352be29d5 --- /dev/null +++ b/queue-5.10/net-macsec-fix-potential-resource-leak-in-macsec_add.patch @@ -0,0 +1,54 @@ +From 9bce583ccc543e38dea6e3fe415e2f3fb8a372fa Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Jul 2022 17:29:02 +0800 +Subject: net: macsec: fix potential resource leak in macsec_add_rxsa() and + macsec_add_txsa() + +From: Jianglei Nie + +[ Upstream commit c7b205fbbf3cffa374721bb7623f7aa8c46074f1 ] + +init_rx_sa() allocates relevant resource for rx_sa->stats and rx_sa-> +key.tfm with alloc_percpu() and macsec_alloc_tfm(). When some error +occurs after init_rx_sa() is called in macsec_add_rxsa(), the function +released rx_sa with kfree() without releasing rx_sa->stats and rx_sa-> +key.tfm, which will lead to a resource leak. + +We should call macsec_rxsa_put() instead of kfree() to decrease the ref +count of rx_sa and release the relevant resource if the refcount is 0. +The same bug exists in macsec_add_txsa() for tx_sa as well. This patch +fixes the above two bugs. + +Fixes: 3cf3227a21d1 ("net: macsec: hardware offloading infrastructure") +Signed-off-by: Jianglei Nie +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/macsec.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c +index 8d73b72d6179..70c5905a916b 100644 +--- a/drivers/net/macsec.c ++++ b/drivers/net/macsec.c +@@ -1841,7 +1841,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) + return 0; + + cleanup: +- kfree(rx_sa); ++ macsec_rxsa_put(rx_sa); + rtnl_unlock(); + return err; + } +@@ -2084,7 +2084,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) + + cleanup: + secy->operational = was_operational; +- kfree(tx_sa); ++ macsec_txsa_put(tx_sa); + rtnl_unlock(); + return err; + } +-- +2.35.1 + diff --git a/queue-5.10/net-sungem_phy-add-of_node_put-for-reference-returne.patch b/queue-5.10/net-sungem_phy-add-of_node_put-for-reference-returne.patch new file mode 100644 index 00000000000..20ef702a7ec --- /dev/null +++ b/queue-5.10/net-sungem_phy-add-of_node_put-for-reference-returne.patch @@ -0,0 +1,37 @@ +From 0fddffa67ac516a019efaa48491c9c95ce9b454b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 Jul 2022 21:10:03 +0800 +Subject: net: sungem_phy: Add of_node_put() for reference returned by + of_get_parent() + +From: Liang He + +[ Upstream commit ebbbe23fdf6070e31509638df3321688358cc211 ] + +In bcm5421_init(), we should call of_node_put() for the reference +returned by of_get_parent() which has increased the refcount. + +Fixes: 3c326fe9cb7a ("[PATCH] ppc64: Add new PHY to sungem") +Signed-off-by: Liang He +Link: https://lore.kernel.org/r/20220720131003.1287426-1-windhl@126.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/sungem_phy.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/sungem_phy.c b/drivers/net/sungem_phy.c +index 291fa449993f..45f295403cb5 100644 +--- a/drivers/net/sungem_phy.c ++++ b/drivers/net/sungem_phy.c +@@ -454,6 +454,7 @@ static int bcm5421_init(struct mii_phy* phy) + int can_low_power = 1; + if (np == NULL || of_get_property(np, "no-autolowpower", NULL)) + can_low_power = 0; ++ of_node_put(np); + if (can_low_power) { + /* Enable automatic low-power */ + sungem_phy_write(phy, 0x1c, 0x9002); +-- +2.35.1 + diff --git a/queue-5.10/netfilter-nf_queue-do-not-allow-packet-truncation-be.patch b/queue-5.10/netfilter-nf_queue-do-not-allow-packet-truncation-be.patch new file mode 100644 index 00000000000..4b5b9ce3aae --- /dev/null +++ b/queue-5.10/netfilter-nf_queue-do-not-allow-packet-truncation-be.patch @@ -0,0 +1,53 @@ +From 7eaaacddd03d3ed43786b5832c1d0e1d51ac3888 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 26 Jul 2022 12:42:06 +0200 +Subject: netfilter: nf_queue: do not allow packet truncation below transport + header offset + +From: Florian Westphal + +[ Upstream commit 99a63d36cb3ed5ca3aa6fcb64cffbeaf3b0fb164 ] + +Domingo Dirutigliano and Nicola Guerrera report kernel panic when +sending nf_queue verdict with 1-byte nfta_payload attribute. + +The IP/IPv6 stack pulls the IP(v6) header from the packet after the +input hook. + +If user truncates the packet below the header size, this skb_pull() will +result in a malformed skb (skb->len < 0). + +Fixes: 7af4cc3fa158 ("[NETFILTER]: Add "nfnetlink_queue" netfilter queue handler over nfnetlink") +Reported-by: Domingo Dirutigliano +Signed-off-by: Florian Westphal +Reviewed-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nfnetlink_queue.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c +index 1640da5c5077..72d30922ed29 100644 +--- a/net/netfilter/nfnetlink_queue.c ++++ b/net/netfilter/nfnetlink_queue.c +@@ -838,11 +838,16 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) + } + + static int +-nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff) ++nfqnl_mangle(void *data, unsigned int data_len, struct nf_queue_entry *e, int diff) + { + struct sk_buff *nskb; + + if (diff < 0) { ++ unsigned int min_len = skb_transport_offset(e->skb); ++ ++ if (data_len < min_len) ++ return -EINVAL; ++ + if (pskb_trim(e->skb, data_len)) + return -ENOMEM; + } else if (diff > 0) { +-- +2.35.1 + diff --git a/queue-5.10/perf-symbol-correct-address-for-bss-symbols.patch b/queue-5.10/perf-symbol-correct-address-for-bss-symbols.patch new file mode 100644 index 00000000000..f156db1ba49 --- /dev/null +++ b/queue-5.10/perf-symbol-correct-address-for-bss-symbols.patch @@ -0,0 +1,182 @@ +From c2e589d53ac011181bacf4fc65b7112b7e618776 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 24 Jul 2022 14:00:12 +0800 +Subject: perf symbol: Correct address for bss symbols + +From: Leo Yan + +[ Upstream commit 2d86612aacb7805f72873691a2644d7279ed0630 ] + +When using 'perf mem' and 'perf c2c', an issue is observed that tool +reports the wrong offset for global data symbols. This is a common +issue on both x86 and Arm64 platforms. + +Let's see an example, for a test program, below is the disassembly for +its .bss section which is dumped with objdump: + + ... + + Disassembly of section .bss: + + 0000000000004040 : + ... + + 0000000000004080 : + ... + + 00000000000040c0 : + ... + + 0000000000004100 : + ... + +First we used 'perf mem record' to run the test program and then used +'perf --debug verbose=4 mem report' to observe what's the symbol info +for 'buf1' and 'buf2' structures. + + # ./perf mem record -e ldlat-loads,ldlat-stores -- false_sharing.exe 8 + # ./perf --debug verbose=4 mem report + ... + dso__load_sym_internal: adjusting symbol: st_value: 0x40c0 sh_addr: 0x4040 sh_offset: 0x3028 + symbol__new: buf2 0x30a8-0x30e8 + ... + dso__load_sym_internal: adjusting symbol: st_value: 0x4080 sh_addr: 0x4040 sh_offset: 0x3028 + symbol__new: buf1 0x3068-0x30a8 + ... + +The perf tool relies on libelf to parse symbols, in executable and +shared object files, 'st_value' holds a virtual address; 'sh_addr' is +the address at which section's first byte should reside in memory, and +'sh_offset' is the byte offset from the beginning of the file to the +first byte in the section. The perf tool uses below formula to convert +a symbol's memory address to a file address: + + file_address = st_value - sh_addr + sh_offset + ^ + ` Memory address + +We can see the final adjusted address ranges for buf1 and buf2 are +[0x30a8-0x30e8) and [0x3068-0x30a8) respectively, apparently this is +incorrect, in the code, the structure for 'buf1' and 'buf2' specifies +compiler attribute with 64-byte alignment. + +The problem happens for 'sh_offset', libelf returns it as 0x3028 which +is not 64-byte aligned, combining with disassembly, it's likely libelf +doesn't respect the alignment for .bss section, therefore, it doesn't +return the aligned value for 'sh_offset'. + +Suggested by Fangrui Song, ELF file contains program header which +contains PT_LOAD segments, the fields p_vaddr and p_offset in PT_LOAD +segments contain the execution info. A better choice for converting +memory address to file address is using the formula: + + file_address = st_value - p_vaddr + p_offset + +This patch introduces elf_read_program_header() which returns the +program header based on the passed 'st_value', then it uses the formula +above to calculate the symbol file address; and the debugging log is +updated respectively. + +After applying the change: + + # ./perf --debug verbose=4 mem report + ... + dso__load_sym_internal: adjusting symbol: st_value: 0x40c0 p_vaddr: 0x3d28 p_offset: 0x2d28 + symbol__new: buf2 0x30c0-0x3100 + ... + dso__load_sym_internal: adjusting symbol: st_value: 0x4080 p_vaddr: 0x3d28 p_offset: 0x2d28 + symbol__new: buf1 0x3080-0x30c0 + ... + +Fixes: f17e04afaff84b5c ("perf report: Fix ELF symbol parsing") +Reported-by: Chang Rui +Suggested-by: Fangrui Song +Signed-off-by: Leo Yan +Acked-by: Namhyung Kim +Cc: Alexander Shishkin +Cc: Ian Rogers +Cc: Ingo Molnar +Cc: Jiri Olsa +Cc: Mark Rutland +Cc: Peter Zijlstra +Link: https://lore.kernel.org/r/20220724060013.171050-2-leo.yan@linaro.org +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Sasha Levin +--- + tools/perf/util/symbol-elf.c | 45 ++++++++++++++++++++++++++++++++---- + 1 file changed, 41 insertions(+), 4 deletions(-) + +diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c +index 94809aed8b44..1cab29d45bfb 100644 +--- a/tools/perf/util/symbol-elf.c ++++ b/tools/perf/util/symbol-elf.c +@@ -232,6 +232,33 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, + return NULL; + } + ++static int elf_read_program_header(Elf *elf, u64 vaddr, GElf_Phdr *phdr) ++{ ++ size_t i, phdrnum; ++ u64 sz; ++ ++ if (elf_getphdrnum(elf, &phdrnum)) ++ return -1; ++ ++ for (i = 0; i < phdrnum; i++) { ++ if (gelf_getphdr(elf, i, phdr) == NULL) ++ return -1; ++ ++ if (phdr->p_type != PT_LOAD) ++ continue; ++ ++ sz = max(phdr->p_memsz, phdr->p_filesz); ++ if (!sz) ++ continue; ++ ++ if (vaddr >= phdr->p_vaddr && (vaddr < phdr->p_vaddr + sz)) ++ return 0; ++ } ++ ++ /* Not found any valid program header */ ++ return -1; ++} ++ + static bool want_demangle(bool is_kernel_sym) + { + return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle; +@@ -1181,6 +1208,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, + sym.st_value); + used_opd = true; + } ++ + /* + * When loading symbols in a data mapping, ABS symbols (which + * has a value of SHN_ABS in its st_shndx) failed at +@@ -1217,11 +1245,20 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, + goto out_elf_end; + } else if ((used_opd && runtime_ss->adjust_symbols) || + (!used_opd && syms_ss->adjust_symbols)) { ++ GElf_Phdr phdr; ++ ++ if (elf_read_program_header(syms_ss->elf, ++ (u64)sym.st_value, &phdr)) { ++ pr_warning("%s: failed to find program header for " ++ "symbol: %s st_value: %#" PRIx64 "\n", ++ __func__, elf_name, (u64)sym.st_value); ++ continue; ++ } + pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " " +- "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__, +- (u64)sym.st_value, (u64)shdr.sh_addr, +- (u64)shdr.sh_offset); +- sym.st_value -= shdr.sh_addr - shdr.sh_offset; ++ "p_vaddr: %#" PRIx64 " p_offset: %#" PRIx64 "\n", ++ __func__, (u64)sym.st_value, (u64)phdr.p_vaddr, ++ (u64)phdr.p_offset); ++ sym.st_value -= phdr.p_vaddr - phdr.p_offset; + } + + demangled = demangle_sym(dso, kmodule, elf_name); +-- +2.35.1 + diff --git a/queue-5.10/sctp-fix-sleep-in-atomic-context-bug-in-timer-handle.patch b/queue-5.10/sctp-fix-sleep-in-atomic-context-bug-in-timer-handle.patch new file mode 100644 index 00000000000..aec83742f80 --- /dev/null +++ b/queue-5.10/sctp-fix-sleep-in-atomic-context-bug-in-timer-handle.patch @@ -0,0 +1,61 @@ +From d4e50ac92fae9fe0b48e5348b08b74dcb8e8ee31 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 23 Jul 2022 09:58:09 +0800 +Subject: sctp: fix sleep in atomic context bug in timer handlers + +From: Duoming Zhou + +[ Upstream commit b89fc26f741d9f9efb51cba3e9b241cf1380ec5a ] + +There are sleep in atomic context bugs in timer handlers of sctp +such as sctp_generate_t3_rtx_event(), sctp_generate_probe_event(), +sctp_generate_t1_init_event(), sctp_generate_timeout_event(), +sctp_generate_t3_rtx_event() and so on. + +The root cause is sctp_sched_prio_init_sid() with GFP_KERNEL parameter +that may sleep could be called by different timer handlers which is in +interrupt context. + +One of the call paths that could trigger bug is shown below: + + (interrupt context) +sctp_generate_probe_event + sctp_do_sm + sctp_side_effects + sctp_cmd_interpreter + sctp_outq_teardown + sctp_outq_init + sctp_sched_set_sched + n->init_sid(..,GFP_KERNEL) + sctp_sched_prio_init_sid //may sleep + +This patch changes gfp_t parameter of init_sid in sctp_sched_set_sched() +from GFP_KERNEL to GFP_ATOMIC in order to prevent sleep in atomic +context bugs. + +Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations") +Signed-off-by: Duoming Zhou +Acked-by: Marcelo Ricardo Leitner +Link: https://lore.kernel.org/r/20220723015809.11553-1-duoming@zju.edu.cn +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/sctp/stream_sched.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c +index 99e5f69fbb74..a2e1d34f52c5 100644 +--- a/net/sctp/stream_sched.c ++++ b/net/sctp/stream_sched.c +@@ -163,7 +163,7 @@ int sctp_sched_set_sched(struct sctp_association *asoc, + if (!SCTP_SO(&asoc->stream, i)->ext) + continue; + +- ret = n->init_sid(&asoc->stream, i, GFP_KERNEL); ++ ret = n->init_sid(&asoc->stream, i, GFP_ATOMIC); + if (ret) + goto err; + } +-- +2.35.1 + diff --git a/queue-5.10/sctp-leave-the-err-path-free-in-sctp_stream_init-to-.patch b/queue-5.10/sctp-leave-the-err-path-free-in-sctp_stream_init-to-.patch new file mode 100644 index 00000000000..37c027a3fc1 --- /dev/null +++ b/queue-5.10/sctp-leave-the-err-path-free-in-sctp_stream_init-to-.patch @@ -0,0 +1,109 @@ +From 384d89c32641e5b50fb435ca32edb8792c16347d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 25 Jul 2022 18:11:06 -0400 +Subject: sctp: leave the err path free in sctp_stream_init to sctp_stream_free + +From: Xin Long + +[ Upstream commit 181d8d2066c000ba0a0e6940a7ad80f1a0e68e9d ] + +A NULL pointer dereference was reported by Wei Chen: + + BUG: kernel NULL pointer dereference, address: 0000000000000000 + RIP: 0010:__list_del_entry_valid+0x26/0x80 + Call Trace: + + sctp_sched_dequeue_common+0x1c/0x90 + sctp_sched_prio_dequeue+0x67/0x80 + __sctp_outq_teardown+0x299/0x380 + sctp_outq_free+0x15/0x20 + sctp_association_free+0xc3/0x440 + sctp_do_sm+0x1ca7/0x2210 + sctp_assoc_bh_rcv+0x1f6/0x340 + +This happens when calling sctp_sendmsg without connecting to server first. +In this case, a data chunk already queues up in send queue of client side +when processing the INIT_ACK from server in sctp_process_init() where it +calls sctp_stream_init() to alloc stream_in. If it fails to alloc stream_in +all stream_out will be freed in sctp_stream_init's err path. Then in the +asoc freeing it will crash when dequeuing this data chunk as stream_out +is missing. + +As we can't free stream out before dequeuing all data from send queue, and +this patch is to fix it by moving the err path stream_out/in freeing in +sctp_stream_init() to sctp_stream_free() which is eventually called when +freeing the asoc in sctp_association_free(). This fix also makes the code +in sctp_process_init() more clear. + +Note that in sctp_association_init() when it fails in sctp_stream_init(), +sctp_association_free() will not be called, and in that case it should +go to 'stream_free' err path to free stream instead of 'fail_init'. + +Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations") +Reported-by: Wei Chen +Signed-off-by: Xin Long +Link: https://lore.kernel.org/r/831a3dc100c4908ff76e5bcc363be97f2778bc0b.1658787066.git.lucien.xin@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/sctp/associola.c | 5 ++--- + net/sctp/stream.c | 19 +++---------------- + 2 files changed, 5 insertions(+), 19 deletions(-) + +diff --git a/net/sctp/associola.c b/net/sctp/associola.c +index fdb69d46276d..2d4ec6187755 100644 +--- a/net/sctp/associola.c ++++ b/net/sctp/associola.c +@@ -226,9 +226,8 @@ static struct sctp_association *sctp_association_init( + if (!sctp_ulpq_init(&asoc->ulpq, asoc)) + goto fail_init; + +- if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, +- 0, gfp)) +- goto fail_init; ++ if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, 0, gfp)) ++ goto stream_free; + + /* Initialize default path MTU. */ + asoc->pathmtu = sp->pathmtu; +diff --git a/net/sctp/stream.c b/net/sctp/stream.c +index 6dc95dcc0ff4..ef9fceadef8d 100644 +--- a/net/sctp/stream.c ++++ b/net/sctp/stream.c +@@ -137,7 +137,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, + + ret = sctp_stream_alloc_out(stream, outcnt, gfp); + if (ret) +- goto out_err; ++ return ret; + + for (i = 0; i < stream->outcnt; i++) + SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; +@@ -145,22 +145,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, + handle_in: + sctp_stream_interleave_init(stream); + if (!incnt) +- goto out; +- +- ret = sctp_stream_alloc_in(stream, incnt, gfp); +- if (ret) +- goto in_err; +- +- goto out; ++ return 0; + +-in_err: +- sched->free(stream); +- genradix_free(&stream->in); +-out_err: +- genradix_free(&stream->out); +- stream->outcnt = 0; +-out: +- return ret; ++ return sctp_stream_alloc_in(stream, incnt, gfp); + } + + int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid) +-- +2.35.1 + diff --git a/queue-5.10/series b/queue-5.10/series index 87585babae7..221c6069495 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -21,3 +21,26 @@ tcp-fix-a-data-race-around-sysctl_tcp_challenge_ack_limit.patch net-ping6-fix-memleak-in-ipv6_renew_options.patch ipv6-addrconf-fix-a-null-ptr-deref-bug-for-ip6_ptr.patch net-tls-remove-the-context-from-the-list-in-tls_device_down.patch +igmp-fix-data-races-around-sysctl_igmp_qrv.patch +net-sungem_phy-add-of_node_put-for-reference-returne.patch +tcp-fix-a-data-race-around-sysctl_tcp_min_tso_segs.patch +tcp-fix-a-data-race-around-sysctl_tcp_min_rtt_wlen.patch +tcp-fix-a-data-race-around-sysctl_tcp_autocorking.patch +tcp-fix-a-data-race-around-sysctl_tcp_invalid_rateli.patch +documentation-fix-sctp_wmem-in-ip-sysctl.rst.patch +macsec-fix-null-deref-in-macsec_add_rxsa.patch +macsec-fix-error-message-in-macsec_add_rxsa-and-_txs.patch +macsec-limit-replay-window-size-with-xpn.patch +macsec-always-read-macsec_sa_attr_pn-as-a-u64.patch +net-macsec-fix-potential-resource-leak-in-macsec_add.patch +tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_dela.patch +tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_slac.patch +tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_nr.patch +tcp-fix-data-races-around-sysctl_tcp_reflect_tos.patch +i40e-fix-interface-init-with-msi-interrupts-no-msi-x.patch +sctp-fix-sleep-in-atomic-context-bug-in-timer-handle.patch +netfilter-nf_queue-do-not-allow-packet-truncation-be.patch +virtio-net-fix-the-race-between-refill-work-and-clos.patch +perf-symbol-correct-address-for-bss-symbols.patch +sfc-disable-softirqs-for-ptp-tx.patch +sctp-leave-the-err-path-free-in-sctp_stream_init-to-.patch diff --git a/queue-5.10/sfc-disable-softirqs-for-ptp-tx.patch b/queue-5.10/sfc-disable-softirqs-for-ptp-tx.patch new file mode 100644 index 00000000000..8a732c721b3 --- /dev/null +++ b/queue-5.10/sfc-disable-softirqs-for-ptp-tx.patch @@ -0,0 +1,73 @@ +From 1c2de5e19cbbd305d8cfba32a2c95896ca6a461b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 26 Jul 2022 08:45:04 +0200 +Subject: sfc: disable softirqs for ptp TX + +From: Alejandro Lucero + +[ Upstream commit 67c3b611d92fc238c43734878bc3e232ab570c79 ] + +Sending a PTP packet can imply to use the normal TX driver datapath but +invoked from the driver's ptp worker. The kernel generic TX code +disables softirqs and preemption before calling specific driver TX code, +but the ptp worker does not. Although current ptp driver functionality +does not require it, there are several reasons for doing so: + + 1) The invoked code is always executed with softirqs disabled for non + PTP packets. + 2) Better if a ptp packet transmission is not interrupted by softirq + handling which could lead to high latencies. + 3) netdev_xmit_more used by the TX code requires preemption to be + disabled. + +Indeed a solution for dealing with kernel preemption state based on static +kernel configuration is not possible since the introduction of dynamic +preemption level configuration at boot time using the static calls +functionality. + +Fixes: f79c957a0b537 ("drivers: net: sfc: use netdev_xmit_more helper") +Signed-off-by: Alejandro Lucero +Link: https://lore.kernel.org/r/20220726064504.49613-1-alejandro.lucero-palau@amd.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/sfc/ptp.c | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + +diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c +index 725b0f38813a..a2b4e3befa59 100644 +--- a/drivers/net/ethernet/sfc/ptp.c ++++ b/drivers/net/ethernet/sfc/ptp.c +@@ -1100,7 +1100,29 @@ static void efx_ptp_xmit_skb_queue(struct efx_nic *efx, struct sk_buff *skb) + + tx_queue = efx_channel_get_tx_queue(ptp_data->channel, type); + if (tx_queue && tx_queue->timestamping) { ++ /* This code invokes normal driver TX code which is always ++ * protected from softirqs when called from generic TX code, ++ * which in turn disables preemption. Look at __dev_queue_xmit ++ * which uses rcu_read_lock_bh disabling preemption for RCU ++ * plus disabling softirqs. We do not need RCU reader ++ * protection here. ++ * ++ * Although it is theoretically safe for current PTP TX/RX code ++ * running without disabling softirqs, there are three good ++ * reasond for doing so: ++ * ++ * 1) The code invoked is mainly implemented for non-PTP ++ * packets and it is always executed with softirqs ++ * disabled. ++ * 2) This being a single PTP packet, better to not ++ * interrupt its processing by softirqs which can lead ++ * to high latencies. ++ * 3) netdev_xmit_more checks preemption is disabled and ++ * triggers a BUG_ON if not. ++ */ ++ local_bh_disable(); + efx_enqueue_skb(tx_queue, skb); ++ local_bh_enable(); + } else { + WARN_ONCE(1, "PTP channel has no timestamped tx queue\n"); + dev_kfree_skb_any(skb); +-- +2.35.1 + diff --git a/queue-5.10/tcp-fix-a-data-race-around-sysctl_tcp_autocorking.patch b/queue-5.10/tcp-fix-a-data-race-around-sysctl_tcp_autocorking.patch new file mode 100644 index 00000000000..33b3c5a7141 --- /dev/null +++ b/queue-5.10/tcp-fix-a-data-race-around-sysctl_tcp_autocorking.patch @@ -0,0 +1,36 @@ +From 494ebf8b94014354fb555a17ef614fcee67baaf4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 Jul 2022 09:50:25 -0700 +Subject: tcp: Fix a data-race around sysctl_tcp_autocorking. + +From: Kuniyuki Iwashima + +[ Upstream commit 85225e6f0a76e6745bc841c9f25169c509b573d8 ] + +While reading sysctl_tcp_autocorking, it can be changed concurrently. +Thus, we need to add READ_ONCE() to its reader. + +Fixes: f54b311142a9 ("tcp: auto corking") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index f1fd26bb199c..78460eb39b3a 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -698,7 +698,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, + int size_goal) + { + return skb->len < size_goal && +- sock_net(sk)->ipv4.sysctl_tcp_autocorking && ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) && + !tcp_rtx_queue_empty(sk) && + refcount_read(&sk->sk_wmem_alloc) > skb->truesize; + } +-- +2.35.1 + diff --git a/queue-5.10/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_dela.patch b/queue-5.10/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_dela.patch new file mode 100644 index 00000000000..9bf73f14b1f --- /dev/null +++ b/queue-5.10/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_dela.patch @@ -0,0 +1,37 @@ +From 501435d3d1705d096bbeba7744f0c7b38573c1ba Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Jul 2022 11:22:01 -0700 +Subject: tcp: Fix a data-race around sysctl_tcp_comp_sack_delay_ns. + +From: Kuniyuki Iwashima + +[ Upstream commit 4866b2b0f7672b6d760c4b8ece6fb56f965dcc8a ] + +While reading sysctl_tcp_comp_sack_delay_ns, it can be changed +concurrently. Thus, we need to add READ_ONCE() to its reader. + +Fixes: 6d82aa242092 ("tcp: add tcp_comp_sack_delay_ns sysctl") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_input.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index 716bc95ebfb0..b86d98c07cdf 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -5461,7 +5461,8 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) + if (tp->srtt_us && tp->srtt_us < rtt) + rtt = tp->srtt_us; + +- delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns, ++ delay = min_t(unsigned long, ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns), + rtt * (NSEC_PER_USEC >> 3)/20); + sock_hold(sk); + hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay), +-- +2.35.1 + diff --git a/queue-5.10/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_nr.patch b/queue-5.10/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_nr.patch new file mode 100644 index 00000000000..304aa4ef015 --- /dev/null +++ b/queue-5.10/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_nr.patch @@ -0,0 +1,36 @@ +From 6aa8b9592ffbe7683e56a28cfb88df4338f9e15e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Jul 2022 11:22:03 -0700 +Subject: tcp: Fix a data-race around sysctl_tcp_comp_sack_nr. + +From: Kuniyuki Iwashima + +[ Upstream commit 79f55473bfc8ac51bd6572929a679eeb4da22251 ] + +While reading sysctl_tcp_comp_sack_nr, it can be changed concurrently. +Thus, we need to add READ_ONCE() to its reader. + +Fixes: 9c21d2fc41c0 ("tcp: add tcp_comp_sack_nr sysctl") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_input.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index 72a339d3f18f..d35e88b5ffcb 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -5440,7 +5440,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) + } + + if (!tcp_is_sack(tp) || +- tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr) ++ tp->compressed_ack >= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)) + goto send_now; + + if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) { +-- +2.35.1 + diff --git a/queue-5.10/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_slac.patch b/queue-5.10/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_slac.patch new file mode 100644 index 00000000000..2e826ebd64f --- /dev/null +++ b/queue-5.10/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_slac.patch @@ -0,0 +1,36 @@ +From 8fa2e9cfa1800b30be22ccd6d3f70f385a8ae78c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Jul 2022 11:22:02 -0700 +Subject: tcp: Fix a data-race around sysctl_tcp_comp_sack_slack_ns. + +From: Kuniyuki Iwashima + +[ Upstream commit 22396941a7f343d704738360f9ef0e6576489d43 ] + +While reading sysctl_tcp_comp_sack_slack_ns, it can be changed +concurrently. Thus, we need to add READ_ONCE() to its reader. + +Fixes: a70437cc09a1 ("tcp: add hrtimer slack to sack compression") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_input.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index b86d98c07cdf..72a339d3f18f 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -5466,7 +5466,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) + rtt * (NSEC_PER_USEC >> 3)/20); + sock_hold(sk); + hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay), +- sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns, ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns), + HRTIMER_MODE_REL_PINNED_SOFT); + } + +-- +2.35.1 + diff --git a/queue-5.10/tcp-fix-a-data-race-around-sysctl_tcp_invalid_rateli.patch b/queue-5.10/tcp-fix-a-data-race-around-sysctl_tcp_invalid_rateli.patch new file mode 100644 index 00000000000..6812a89863e --- /dev/null +++ b/queue-5.10/tcp-fix-a-data-race-around-sysctl_tcp_invalid_rateli.patch @@ -0,0 +1,37 @@ +From 2ff0aa82f362d9414322a20b4609cb2735bb9306 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 Jul 2022 09:50:26 -0700 +Subject: tcp: Fix a data-race around sysctl_tcp_invalid_ratelimit. + +From: Kuniyuki Iwashima + +[ Upstream commit 2afdbe7b8de84c28e219073a6661080e1b3ded48 ] + +While reading sysctl_tcp_invalid_ratelimit, it can be changed +concurrently. Thus, we need to add READ_ONCE() to its reader. + +Fixes: 032ee4236954 ("tcp: helpers to mitigate ACK loops by rate-limiting out-of-window dupacks") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_input.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index 79539ef5eb90..716bc95ebfb0 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -3528,7 +3528,8 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, + if (*last_oow_ack_time) { + s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time); + +- if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) { ++ if (0 <= elapsed && ++ elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) { + NET_INC_STATS(net, mib_idx); + return true; /* rate-limited: don't send yet! */ + } +-- +2.35.1 + diff --git a/queue-5.10/tcp-fix-a-data-race-around-sysctl_tcp_min_rtt_wlen.patch b/queue-5.10/tcp-fix-a-data-race-around-sysctl_tcp_min_rtt_wlen.patch new file mode 100644 index 00000000000..6f99396aa83 --- /dev/null +++ b/queue-5.10/tcp-fix-a-data-race-around-sysctl_tcp_min_rtt_wlen.patch @@ -0,0 +1,36 @@ +From d1c5ee6d185dc3d0e03ae2c511cad5fa075e1c8a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 Jul 2022 09:50:24 -0700 +Subject: tcp: Fix a data-race around sysctl_tcp_min_rtt_wlen. + +From: Kuniyuki Iwashima + +[ Upstream commit 1330ffacd05fc9ac4159d19286ce119e22450ed2 ] + +While reading sysctl_tcp_min_rtt_wlen, it can be changed concurrently. +Thus, we need to add READ_ONCE() to its reader. + +Fixes: f672258391b4 ("tcp: track min RTT using windowed min-filter") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_input.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index c31db58b93a6..79539ef5eb90 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -3004,7 +3004,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, + + static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag) + { +- u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ; ++ u32 wlen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen) * HZ; + struct tcp_sock *tp = tcp_sk(sk); + + if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) { +-- +2.35.1 + diff --git a/queue-5.10/tcp-fix-a-data-race-around-sysctl_tcp_min_tso_segs.patch b/queue-5.10/tcp-fix-a-data-race-around-sysctl_tcp_min_tso_segs.patch new file mode 100644 index 00000000000..45d4c864c92 --- /dev/null +++ b/queue-5.10/tcp-fix-a-data-race-around-sysctl_tcp_min_tso_segs.patch @@ -0,0 +1,36 @@ +From f1b5c1814939520d2fb691068248e2f6b8b2abfb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 Jul 2022 09:50:22 -0700 +Subject: tcp: Fix a data-race around sysctl_tcp_min_tso_segs. + +From: Kuniyuki Iwashima + +[ Upstream commit e0bb4ab9dfddd872622239f49fb2bd403b70853b ] + +While reading sysctl_tcp_min_tso_segs, it can be changed concurrently. +Thus, we need to add READ_ONCE() to its reader. + +Fixes: 95bd09eb2750 ("tcp: TSO packets automatic sizing") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_output.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c +index 1a144c38039c..657b0a4d9359 100644 +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -1984,7 +1984,7 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) + + min_tso = ca_ops->min_tso_segs ? + ca_ops->min_tso_segs(sk) : +- sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs; ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); + + tso_segs = tcp_tso_autosize(sk, mss_now, min_tso); + return min_t(u32, tso_segs, sk->sk_gso_max_segs); +-- +2.35.1 + diff --git a/queue-5.10/tcp-fix-data-races-around-sysctl_tcp_reflect_tos.patch b/queue-5.10/tcp-fix-data-races-around-sysctl_tcp_reflect_tos.patch new file mode 100644 index 00000000000..62bc2b0a629 --- /dev/null +++ b/queue-5.10/tcp-fix-data-races-around-sysctl_tcp_reflect_tos.patch @@ -0,0 +1,69 @@ +From 6e1981d1ddffd5406e061ef57f88c7b0f8ad7673 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Jul 2022 11:22:04 -0700 +Subject: tcp: Fix data-races around sysctl_tcp_reflect_tos. + +From: Kuniyuki Iwashima + +[ Upstream commit 870e3a634b6a6cb1543b359007aca73fe6a03ac5 ] + +While reading sysctl_tcp_reflect_tos, it can be changed concurrently. +Thus, we need to add READ_ONCE() to its readers. + +Fixes: ac8f1710c12b ("tcp: reflect tos value received in SYN to the socket") +Signed-off-by: Kuniyuki Iwashima +Acked-by: Wei Wang +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_ipv4.c | 4 ++-- + net/ipv6/tcp_ipv6.c | 4 ++-- + 2 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c +index d5f13ff7d900..0d165ce2d80a 100644 +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -983,7 +983,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, + if (skb) { + __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); + +- tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? ++ tos = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? + (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | + (inet_sk(sk)->tos & INET_ECN_MASK) : + inet_sk(sk)->tos; +@@ -1558,7 +1558,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, + /* Set ToS of the new socket based upon the value of incoming SYN. + * ECT bits are set later in tcp_init_transfer(). + */ +- if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) + newinet->tos = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; + + if (!dst) { +diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c +index 303b54414a6c..8d91f36cb11b 100644 +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -542,7 +542,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, + if (np->repflow && ireq->pktopts) + fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); + +- tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? ++ tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? + (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | + (np->tclass & INET_ECN_MASK) : + np->tclass; +@@ -1344,7 +1344,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * + /* Set ToS of the new socket based upon the value of incoming SYN. + * ECT bits are set later in tcp_init_transfer(). + */ +- if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) + newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; + + /* Clone native IPv6 options from listening socket (if any) +-- +2.35.1 + diff --git a/queue-5.10/virtio-net-fix-the-race-between-refill-work-and-clos.patch b/queue-5.10/virtio-net-fix-the-race-between-refill-work-and-clos.patch new file mode 100644 index 00000000000..808da84a487 --- /dev/null +++ b/queue-5.10/virtio-net-fix-the-race-between-refill-work-and-clos.patch @@ -0,0 +1,151 @@ +From e27a8c0d76aaff6a7450c66e37fe0130fa84b104 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 25 Jul 2022 15:21:59 +0800 +Subject: virtio-net: fix the race between refill work and close + +From: Jason Wang + +[ Upstream commit 5a159128faff151b7fe5f4eb0f310b1e0a2d56bf ] + +We try using cancel_delayed_work_sync() to prevent the work from +enabling NAPI. This is insufficient since we don't disable the source +of the refill work scheduling. This means an NAPI poll callback after +cancel_delayed_work_sync() can schedule the refill work then can +re-enable the NAPI that leads to use-after-free [1]. + +Since the work can enable NAPI, we can't simply disable NAPI before +calling cancel_delayed_work_sync(). So fix this by introducing a +dedicated boolean to control whether or not the work could be +scheduled from NAPI. + +[1] +================================================================== +BUG: KASAN: use-after-free in refill_work+0x43/0xd4 +Read of size 2 at addr ffff88810562c92e by task kworker/2:1/42 + +CPU: 2 PID: 42 Comm: kworker/2:1 Not tainted 5.19.0-rc1+ #480 +Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 +Workqueue: events refill_work +Call Trace: + + dump_stack_lvl+0x34/0x44 + print_report.cold+0xbb/0x6ac + ? _printk+0xad/0xde + ? refill_work+0x43/0xd4 + kasan_report+0xa8/0x130 + ? refill_work+0x43/0xd4 + refill_work+0x43/0xd4 + process_one_work+0x43d/0x780 + worker_thread+0x2a0/0x6f0 + ? process_one_work+0x780/0x780 + kthread+0x167/0x1a0 + ? kthread_exit+0x50/0x50 + ret_from_fork+0x22/0x30 + +... + +Fixes: b2baed69e605c ("virtio_net: set/cancel work on ndo_open/ndo_stop") +Signed-off-by: Jason Wang +Acked-by: Michael S. Tsirkin +Reviewed-by: Xuan Zhuo +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/virtio_net.c | 37 ++++++++++++++++++++++++++++++++++--- + 1 file changed, 34 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c +index 37178b078ee3..0a07c05a610d 100644 +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -213,9 +213,15 @@ struct virtnet_info { + /* Packet virtio header size */ + u8 hdr_len; + +- /* Work struct for refilling if we run low on memory. */ ++ /* Work struct for delayed refilling if we run low on memory. */ + struct delayed_work refill; + ++ /* Is delayed refill enabled? */ ++ bool refill_enabled; ++ ++ /* The lock to synchronize the access to refill_enabled */ ++ spinlock_t refill_lock; ++ + /* Work struct for config space updates */ + struct work_struct config_work; + +@@ -319,6 +325,20 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) + return p; + } + ++static void enable_delayed_refill(struct virtnet_info *vi) ++{ ++ spin_lock_bh(&vi->refill_lock); ++ vi->refill_enabled = true; ++ spin_unlock_bh(&vi->refill_lock); ++} ++ ++static void disable_delayed_refill(struct virtnet_info *vi) ++{ ++ spin_lock_bh(&vi->refill_lock); ++ vi->refill_enabled = false; ++ spin_unlock_bh(&vi->refill_lock); ++} ++ + static void virtqueue_napi_schedule(struct napi_struct *napi, + struct virtqueue *vq) + { +@@ -1403,8 +1423,12 @@ static int virtnet_receive(struct receive_queue *rq, int budget, + } + + if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { +- if (!try_fill_recv(vi, rq, GFP_ATOMIC)) +- schedule_delayed_work(&vi->refill, 0); ++ if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { ++ spin_lock(&vi->refill_lock); ++ if (vi->refill_enabled) ++ schedule_delayed_work(&vi->refill, 0); ++ spin_unlock(&vi->refill_lock); ++ } + } + + u64_stats_update_begin(&rq->stats.syncp); +@@ -1523,6 +1547,8 @@ static int virtnet_open(struct net_device *dev) + struct virtnet_info *vi = netdev_priv(dev); + int i, err; + ++ enable_delayed_refill(vi); ++ + for (i = 0; i < vi->max_queue_pairs; i++) { + if (i < vi->curr_queue_pairs) + /* Make sure we have some buffers: if oom use wq. */ +@@ -1893,6 +1919,8 @@ static int virtnet_close(struct net_device *dev) + struct virtnet_info *vi = netdev_priv(dev); + int i; + ++ /* Make sure NAPI doesn't schedule refill work */ ++ disable_delayed_refill(vi); + /* Make sure refill_work doesn't re-enable napi! */ + cancel_delayed_work_sync(&vi->refill); + +@@ -2390,6 +2418,8 @@ static int virtnet_restore_up(struct virtio_device *vdev) + + virtio_device_ready(vdev); + ++ enable_delayed_refill(vi); ++ + if (netif_running(vi->dev)) { + err = virtnet_open(vi->dev); + if (err) +@@ -3092,6 +3122,7 @@ static int virtnet_probe(struct virtio_device *vdev) + vdev->priv = vi; + + INIT_WORK(&vi->config_work, virtnet_config_changed_work); ++ spin_lock_init(&vi->refill_lock); + + /* If we can receive ANY GSO packets, we must allocate large ones. */ + if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || +-- +2.35.1 +