From fe8b4b6d23036f989867f5191bc8ea7f278f8636 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sat, 30 Jul 2022 22:41:29 -0400 Subject: [PATCH] Fixes for 5.4 Signed-off-by: Sasha Levin --- ...ation-fix-sctp_wmem-in-ip-sysctl.rst.patch | 49 +++++ ...ce-init-with-msi-interrupts-no-msi-x.patch | 49 +++++ ...ix-data-races-around-sysctl_igmp_qrv.patch | 127 ++++++++++++ ...dd-of_node_put-for-reference-returne.patch | 37 ++++ ...ue-do-not-allow-packet-truncation-be.patch | 53 +++++ ...mbol-correct-address-for-bss-symbols.patch | 182 ++++++++++++++++++ ...n-atomic-context-bug-in-timer-handle.patch | 61 ++++++ ...rr-path-free-in-sctp_stream_init-to-.patch | 109 +++++++++++ queue-5.4/series | 16 ++ .../sfc-disable-softirqs-for-ptp-tx.patch | 73 +++++++ ...a-race-around-sysctl_tcp_autocorking.patch | 36 ++++ ...ace-around-sysctl_tcp_comp_sack_dela.patch | 37 ++++ ...-race-around-sysctl_tcp_comp_sack_nr.patch | 36 ++++ ...ace-around-sysctl_tcp_invalid_rateli.patch | 37 ++++ ...-race-around-sysctl_tcp_min_rtt_wlen.patch | 36 ++++ ...-race-around-sysctl_tcp_min_tso_segs.patch | 36 ++++ ...he-race-between-refill-work-and-clos.patch | 151 +++++++++++++++ 17 files changed, 1125 insertions(+) create mode 100644 queue-5.4/documentation-fix-sctp_wmem-in-ip-sysctl.rst.patch create mode 100644 queue-5.4/i40e-fix-interface-init-with-msi-interrupts-no-msi-x.patch create mode 100644 queue-5.4/igmp-fix-data-races-around-sysctl_igmp_qrv.patch create mode 100644 queue-5.4/net-sungem_phy-add-of_node_put-for-reference-returne.patch create mode 100644 queue-5.4/netfilter-nf_queue-do-not-allow-packet-truncation-be.patch create mode 100644 queue-5.4/perf-symbol-correct-address-for-bss-symbols.patch create mode 100644 queue-5.4/sctp-fix-sleep-in-atomic-context-bug-in-timer-handle.patch create mode 100644 queue-5.4/sctp-leave-the-err-path-free-in-sctp_stream_init-to-.patch create mode 100644 queue-5.4/sfc-disable-softirqs-for-ptp-tx.patch create mode 100644 queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_autocorking.patch create mode 100644 queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_dela.patch create mode 100644 queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_nr.patch create mode 100644 queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_invalid_rateli.patch create mode 100644 queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_min_rtt_wlen.patch create mode 100644 queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_min_tso_segs.patch create mode 100644 queue-5.4/virtio-net-fix-the-race-between-refill-work-and-clos.patch diff --git a/queue-5.4/documentation-fix-sctp_wmem-in-ip-sysctl.rst.patch b/queue-5.4/documentation-fix-sctp_wmem-in-ip-sysctl.rst.patch new file mode 100644 index 00000000000..a50058b3f18 --- /dev/null +++ b/queue-5.4/documentation-fix-sctp_wmem-in-ip-sysctl.rst.patch @@ -0,0 +1,49 @@ +From d886bc9ce8df6004b79ae28ee3e84a553ddac7fb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 21 Jul 2022 10:35:46 -0400 +Subject: Documentation: fix sctp_wmem in ip-sysctl.rst + +From: Xin Long + +[ Upstream commit aa709da0e032cee7c202047ecd75f437bb0126ed ] + +Since commit 1033990ac5b2 ("sctp: implement memory accounting on tx path"), +SCTP has supported memory accounting on tx path where 'sctp_wmem' is used +by sk_wmem_schedule(). So we should fix the description for this option in +ip-sysctl.rst accordingly. + +v1->v2: + - Improve the description as Marcelo suggested. + +Fixes: 1033990ac5b2 ("sctp: implement memory accounting on tx path") +Signed-off-by: Xin Long +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + Documentation/networking/ip-sysctl.txt | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt +index 787a9c077ef1..5cf601c94e35 100644 +--- a/Documentation/networking/ip-sysctl.txt ++++ b/Documentation/networking/ip-sysctl.txt +@@ -2284,7 +2284,14 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max + Default: 4K + + sctp_wmem - vector of 3 INTEGERs: min, default, max +- Currently this tunable has no effect. ++ Only the first value ("min") is used, "default" and "max" are ++ ignored. ++ ++ min: Minimum size of send buffer that can be used by SCTP sockets. ++ It is guaranteed to each SCTP socket (but not association) even ++ under moderate memory pressure. ++ ++ Default: 4K + + addr_scope_policy - INTEGER + Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00 +-- +2.35.1 + diff --git a/queue-5.4/i40e-fix-interface-init-with-msi-interrupts-no-msi-x.patch b/queue-5.4/i40e-fix-interface-init-with-msi-interrupts-no-msi-x.patch new file mode 100644 index 00000000000..bc371e5581e --- /dev/null +++ b/queue-5.4/i40e-fix-interface-init-with-msi-interrupts-no-msi-x.patch @@ -0,0 +1,49 @@ +From 888ff31a2b98c89501504eaf7b2e2fa64c25243e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Jul 2022 10:54:01 -0700 +Subject: i40e: Fix interface init with MSI interrupts (no MSI-X) + +From: Michal Maloszewski + +[ Upstream commit 5fcbb711024aac6d4db385623e6f2fdf019f7782 ] + +Fix the inability to bring an interface up on a setup with +only MSI interrupts enabled (no MSI-X). +Solution is to add a default number of QPs = 1. This is enough, +since without MSI-X support driver enables only a basic feature set. + +Fixes: bc6d33c8d93f ("i40e: Fix the number of queues available to be mapped for use") +Signed-off-by: Dawid Lukwinski +Signed-off-by: Michal Maloszewski +Tested-by: Dave Switzer +Signed-off-by: Tony Nguyen +Link: https://lore.kernel.org/r/20220722175401.112572-1-anthony.l.nguyen@intel.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/i40e/i40e_main.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c +index 0610d344fdbf..637f6ed78b48 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_main.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c +@@ -1821,11 +1821,15 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, + * non-zero req_queue_pairs says that user requested a new + * queue count via ethtool's set_channels, so use this + * value for queues distribution across traffic classes ++ * We need at least one queue pair for the interface ++ * to be usable as we see in else statement. + */ + if (vsi->req_queue_pairs > 0) + vsi->num_queue_pairs = vsi->req_queue_pairs; + else if (pf->flags & I40E_FLAG_MSIX_ENABLED) + vsi->num_queue_pairs = pf->num_lan_msix; ++ else ++ vsi->num_queue_pairs = 1; + } + + /* Number of queues per enabled TC */ +-- +2.35.1 + diff --git a/queue-5.4/igmp-fix-data-races-around-sysctl_igmp_qrv.patch b/queue-5.4/igmp-fix-data-races-around-sysctl_igmp_qrv.patch new file mode 100644 index 00000000000..5ea20e0eedf --- /dev/null +++ b/queue-5.4/igmp-fix-data-races-around-sysctl_igmp_qrv.patch @@ -0,0 +1,127 @@ +From 8e893ea193073506f79b56296efd030e8518aa77 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 15 Jul 2022 10:17:44 -0700 +Subject: igmp: Fix data-races around sysctl_igmp_qrv. + +From: Kuniyuki Iwashima + +[ Upstream commit 8ebcc62c738f68688ee7c6fec2efe5bc6d3d7e60 ] + +While reading sysctl_igmp_qrv, it can be changed concurrently. +Thus, we need to add READ_ONCE() to its readers. + +This test can be packed into a helper, so such changes will be in the +follow-up series after net is merged into net-next. + + qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); + +Fixes: a9fe8e29945d ("ipv4: implement igmp_qrv sysctl to tune igmp robustness variable") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/igmp.c | 24 +++++++++++++----------- + 1 file changed, 13 insertions(+), 11 deletions(-) + +diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c +index 660b41040c77..1023f881091e 100644 +--- a/net/ipv4/igmp.c ++++ b/net/ipv4/igmp.c +@@ -829,7 +829,7 @@ static void igmp_ifc_event(struct in_device *in_dev) + struct net *net = dev_net(in_dev->dev); + if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) + return; +- WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv); ++ WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv)); + igmp_ifc_start_timer(in_dev, 1); + } + +@@ -1011,7 +1011,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, + * received value was zero, use the default or statically + * configured value. + */ +- in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv; ++ in_dev->mr_qrv = ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); + in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL; + + /* RFC3376, 8.3. Query Response Interval: +@@ -1191,7 +1191,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im, + pmc->interface = im->interface; + in_dev_hold(in_dev); + pmc->multiaddr = im->multiaddr; +- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; ++ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); + pmc->sfmode = im->sfmode; + if (pmc->sfmode == MCAST_INCLUDE) { + struct ip_sf_list *psf; +@@ -1242,9 +1242,11 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im) + swap(im->tomb, pmc->tomb); + swap(im->sources, pmc->sources); + for (psf = im->sources; psf; psf = psf->sf_next) +- psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; ++ psf->sf_crcount = in_dev->mr_qrv ?: ++ READ_ONCE(net->ipv4.sysctl_igmp_qrv); + } else { +- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; ++ im->crcount = in_dev->mr_qrv ?: ++ READ_ONCE(net->ipv4.sysctl_igmp_qrv); + } + in_dev_put(pmc->interface); + kfree_pmc(pmc); +@@ -1351,7 +1353,7 @@ static void igmp_group_added(struct ip_mc_list *im) + if (in_dev->dead) + return; + +- im->unsolicit_count = net->ipv4.sysctl_igmp_qrv; ++ im->unsolicit_count = READ_ONCE(net->ipv4.sysctl_igmp_qrv); + if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) { + spin_lock_bh(&im->lock); + igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY); +@@ -1365,7 +1367,7 @@ static void igmp_group_added(struct ip_mc_list *im) + * IN() to IN(A). + */ + if (im->sfmode == MCAST_EXCLUDE) +- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; ++ im->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); + + igmp_ifc_event(in_dev); + #endif +@@ -1756,7 +1758,7 @@ static void ip_mc_reset(struct in_device *in_dev) + + in_dev->mr_qi = IGMP_QUERY_INTERVAL; + in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL; +- in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv; ++ in_dev->mr_qrv = READ_ONCE(net->ipv4.sysctl_igmp_qrv); + } + #else + static void ip_mc_reset(struct in_device *in_dev) +@@ -1890,7 +1892,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, + #ifdef CONFIG_IP_MULTICAST + if (psf->sf_oldin && + !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) { +- psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; ++ psf->sf_crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); + psf->sf_next = pmc->tomb; + pmc->tomb = psf; + rv = 1; +@@ -1954,7 +1956,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, + /* filter mode change */ + pmc->sfmode = MCAST_INCLUDE; + #ifdef CONFIG_IP_MULTICAST +- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; ++ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); + WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount); + for (psf = pmc->sources; psf; psf = psf->sf_next) + psf->sf_crcount = 0; +@@ -2133,7 +2135,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, + #ifdef CONFIG_IP_MULTICAST + /* else no filters; keep old mode for reports */ + +- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; ++ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); + WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount); + for (psf = pmc->sources; psf; psf = psf->sf_next) + psf->sf_crcount = 0; +-- +2.35.1 + diff --git a/queue-5.4/net-sungem_phy-add-of_node_put-for-reference-returne.patch b/queue-5.4/net-sungem_phy-add-of_node_put-for-reference-returne.patch new file mode 100644 index 00000000000..d51e7a3ab4d --- /dev/null +++ b/queue-5.4/net-sungem_phy-add-of_node_put-for-reference-returne.patch @@ -0,0 +1,37 @@ +From c5e2dec8c589406223337e362cd608bb190db07a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 Jul 2022 21:10:03 +0800 +Subject: net: sungem_phy: Add of_node_put() for reference returned by + of_get_parent() + +From: Liang He + +[ Upstream commit ebbbe23fdf6070e31509638df3321688358cc211 ] + +In bcm5421_init(), we should call of_node_put() for the reference +returned by of_get_parent() which has increased the refcount. + +Fixes: 3c326fe9cb7a ("[PATCH] ppc64: Add new PHY to sungem") +Signed-off-by: Liang He +Link: https://lore.kernel.org/r/20220720131003.1287426-1-windhl@126.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/sungem_phy.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/sungem_phy.c b/drivers/net/sungem_phy.c +index 291fa449993f..45f295403cb5 100644 +--- a/drivers/net/sungem_phy.c ++++ b/drivers/net/sungem_phy.c +@@ -454,6 +454,7 @@ static int bcm5421_init(struct mii_phy* phy) + int can_low_power = 1; + if (np == NULL || of_get_property(np, "no-autolowpower", NULL)) + can_low_power = 0; ++ of_node_put(np); + if (can_low_power) { + /* Enable automatic low-power */ + sungem_phy_write(phy, 0x1c, 0x9002); +-- +2.35.1 + diff --git a/queue-5.4/netfilter-nf_queue-do-not-allow-packet-truncation-be.patch b/queue-5.4/netfilter-nf_queue-do-not-allow-packet-truncation-be.patch new file mode 100644 index 00000000000..c4c83efb5c9 --- /dev/null +++ b/queue-5.4/netfilter-nf_queue-do-not-allow-packet-truncation-be.patch @@ -0,0 +1,53 @@ +From 6effee1825eaaece708a8dcfa2831119db9c8828 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 26 Jul 2022 12:42:06 +0200 +Subject: netfilter: nf_queue: do not allow packet truncation below transport + header offset + +From: Florian Westphal + +[ Upstream commit 99a63d36cb3ed5ca3aa6fcb64cffbeaf3b0fb164 ] + +Domingo Dirutigliano and Nicola Guerrera report kernel panic when +sending nf_queue verdict with 1-byte nfta_payload attribute. + +The IP/IPv6 stack pulls the IP(v6) header from the packet after the +input hook. + +If user truncates the packet below the header size, this skb_pull() will +result in a malformed skb (skb->len < 0). + +Fixes: 7af4cc3fa158 ("[NETFILTER]: Add "nfnetlink_queue" netfilter queue handler over nfnetlink") +Reported-by: Domingo Dirutigliano +Signed-off-by: Florian Westphal +Reviewed-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nfnetlink_queue.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c +index 7d3ab08a5a2d..581bd1353a44 100644 +--- a/net/netfilter/nfnetlink_queue.c ++++ b/net/netfilter/nfnetlink_queue.c +@@ -846,11 +846,16 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) + } + + static int +-nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff) ++nfqnl_mangle(void *data, unsigned int data_len, struct nf_queue_entry *e, int diff) + { + struct sk_buff *nskb; + + if (diff < 0) { ++ unsigned int min_len = skb_transport_offset(e->skb); ++ ++ if (data_len < min_len) ++ return -EINVAL; ++ + if (pskb_trim(e->skb, data_len)) + return -ENOMEM; + } else if (diff > 0) { +-- +2.35.1 + diff --git a/queue-5.4/perf-symbol-correct-address-for-bss-symbols.patch b/queue-5.4/perf-symbol-correct-address-for-bss-symbols.patch new file mode 100644 index 00000000000..21d1a7b1ebb --- /dev/null +++ b/queue-5.4/perf-symbol-correct-address-for-bss-symbols.patch @@ -0,0 +1,182 @@ +From c06e2278d0ba78bfc80b033b1c52b88173fdf47a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 24 Jul 2022 14:00:12 +0800 +Subject: perf symbol: Correct address for bss symbols + +From: Leo Yan + +[ Upstream commit 2d86612aacb7805f72873691a2644d7279ed0630 ] + +When using 'perf mem' and 'perf c2c', an issue is observed that tool +reports the wrong offset for global data symbols. This is a common +issue on both x86 and Arm64 platforms. + +Let's see an example, for a test program, below is the disassembly for +its .bss section which is dumped with objdump: + + ... + + Disassembly of section .bss: + + 0000000000004040 : + ... + + 0000000000004080 : + ... + + 00000000000040c0 : + ... + + 0000000000004100 : + ... + +First we used 'perf mem record' to run the test program and then used +'perf --debug verbose=4 mem report' to observe what's the symbol info +for 'buf1' and 'buf2' structures. + + # ./perf mem record -e ldlat-loads,ldlat-stores -- false_sharing.exe 8 + # ./perf --debug verbose=4 mem report + ... + dso__load_sym_internal: adjusting symbol: st_value: 0x40c0 sh_addr: 0x4040 sh_offset: 0x3028 + symbol__new: buf2 0x30a8-0x30e8 + ... + dso__load_sym_internal: adjusting symbol: st_value: 0x4080 sh_addr: 0x4040 sh_offset: 0x3028 + symbol__new: buf1 0x3068-0x30a8 + ... + +The perf tool relies on libelf to parse symbols, in executable and +shared object files, 'st_value' holds a virtual address; 'sh_addr' is +the address at which section's first byte should reside in memory, and +'sh_offset' is the byte offset from the beginning of the file to the +first byte in the section. The perf tool uses below formula to convert +a symbol's memory address to a file address: + + file_address = st_value - sh_addr + sh_offset + ^ + ` Memory address + +We can see the final adjusted address ranges for buf1 and buf2 are +[0x30a8-0x30e8) and [0x3068-0x30a8) respectively, apparently this is +incorrect, in the code, the structure for 'buf1' and 'buf2' specifies +compiler attribute with 64-byte alignment. + +The problem happens for 'sh_offset', libelf returns it as 0x3028 which +is not 64-byte aligned, combining with disassembly, it's likely libelf +doesn't respect the alignment for .bss section, therefore, it doesn't +return the aligned value for 'sh_offset'. + +Suggested by Fangrui Song, ELF file contains program header which +contains PT_LOAD segments, the fields p_vaddr and p_offset in PT_LOAD +segments contain the execution info. A better choice for converting +memory address to file address is using the formula: + + file_address = st_value - p_vaddr + p_offset + +This patch introduces elf_read_program_header() which returns the +program header based on the passed 'st_value', then it uses the formula +above to calculate the symbol file address; and the debugging log is +updated respectively. + +After applying the change: + + # ./perf --debug verbose=4 mem report + ... + dso__load_sym_internal: adjusting symbol: st_value: 0x40c0 p_vaddr: 0x3d28 p_offset: 0x2d28 + symbol__new: buf2 0x30c0-0x3100 + ... + dso__load_sym_internal: adjusting symbol: st_value: 0x4080 p_vaddr: 0x3d28 p_offset: 0x2d28 + symbol__new: buf1 0x3080-0x30c0 + ... + +Fixes: f17e04afaff84b5c ("perf report: Fix ELF symbol parsing") +Reported-by: Chang Rui +Suggested-by: Fangrui Song +Signed-off-by: Leo Yan +Acked-by: Namhyung Kim +Cc: Alexander Shishkin +Cc: Ian Rogers +Cc: Ingo Molnar +Cc: Jiri Olsa +Cc: Mark Rutland +Cc: Peter Zijlstra +Link: https://lore.kernel.org/r/20220724060013.171050-2-leo.yan@linaro.org +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Sasha Levin +--- + tools/perf/util/symbol-elf.c | 45 ++++++++++++++++++++++++++++++++---- + 1 file changed, 41 insertions(+), 4 deletions(-) + +diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c +index 2ec0a32da579..0b185b1090ff 100644 +--- a/tools/perf/util/symbol-elf.c ++++ b/tools/perf/util/symbol-elf.c +@@ -230,6 +230,33 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, + return NULL; + } + ++static int elf_read_program_header(Elf *elf, u64 vaddr, GElf_Phdr *phdr) ++{ ++ size_t i, phdrnum; ++ u64 sz; ++ ++ if (elf_getphdrnum(elf, &phdrnum)) ++ return -1; ++ ++ for (i = 0; i < phdrnum; i++) { ++ if (gelf_getphdr(elf, i, phdr) == NULL) ++ return -1; ++ ++ if (phdr->p_type != PT_LOAD) ++ continue; ++ ++ sz = max(phdr->p_memsz, phdr->p_filesz); ++ if (!sz) ++ continue; ++ ++ if (vaddr >= phdr->p_vaddr && (vaddr < phdr->p_vaddr + sz)) ++ return 0; ++ } ++ ++ /* Not found any valid program header */ ++ return -1; ++} ++ + static bool want_demangle(bool is_kernel_sym) + { + return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle; +@@ -1091,6 +1118,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, + sym.st_value); + used_opd = true; + } ++ + /* + * When loading symbols in a data mapping, ABS symbols (which + * has a value of SHN_ABS in its st_shndx) failed at +@@ -1127,11 +1155,20 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, + goto out_elf_end; + } else if ((used_opd && runtime_ss->adjust_symbols) || + (!used_opd && syms_ss->adjust_symbols)) { ++ GElf_Phdr phdr; ++ ++ if (elf_read_program_header(syms_ss->elf, ++ (u64)sym.st_value, &phdr)) { ++ pr_warning("%s: failed to find program header for " ++ "symbol: %s st_value: %#" PRIx64 "\n", ++ __func__, elf_name, (u64)sym.st_value); ++ continue; ++ } + pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " " +- "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__, +- (u64)sym.st_value, (u64)shdr.sh_addr, +- (u64)shdr.sh_offset); +- sym.st_value -= shdr.sh_addr - shdr.sh_offset; ++ "p_vaddr: %#" PRIx64 " p_offset: %#" PRIx64 "\n", ++ __func__, (u64)sym.st_value, (u64)phdr.p_vaddr, ++ (u64)phdr.p_offset); ++ sym.st_value -= phdr.p_vaddr - phdr.p_offset; + } + + demangled = demangle_sym(dso, kmodule, elf_name); +-- +2.35.1 + diff --git a/queue-5.4/sctp-fix-sleep-in-atomic-context-bug-in-timer-handle.patch b/queue-5.4/sctp-fix-sleep-in-atomic-context-bug-in-timer-handle.patch new file mode 100644 index 00000000000..bd5062102c8 --- /dev/null +++ b/queue-5.4/sctp-fix-sleep-in-atomic-context-bug-in-timer-handle.patch @@ -0,0 +1,61 @@ +From 9ef7bc8b0c4534d5308afe5b2c0cf13bda2aa884 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 23 Jul 2022 09:58:09 +0800 +Subject: sctp: fix sleep in atomic context bug in timer handlers + +From: Duoming Zhou + +[ Upstream commit b89fc26f741d9f9efb51cba3e9b241cf1380ec5a ] + +There are sleep in atomic context bugs in timer handlers of sctp +such as sctp_generate_t3_rtx_event(), sctp_generate_probe_event(), +sctp_generate_t1_init_event(), sctp_generate_timeout_event(), +sctp_generate_t3_rtx_event() and so on. + +The root cause is sctp_sched_prio_init_sid() with GFP_KERNEL parameter +that may sleep could be called by different timer handlers which is in +interrupt context. + +One of the call paths that could trigger bug is shown below: + + (interrupt context) +sctp_generate_probe_event + sctp_do_sm + sctp_side_effects + sctp_cmd_interpreter + sctp_outq_teardown + sctp_outq_init + sctp_sched_set_sched + n->init_sid(..,GFP_KERNEL) + sctp_sched_prio_init_sid //may sleep + +This patch changes gfp_t parameter of init_sid in sctp_sched_set_sched() +from GFP_KERNEL to GFP_ATOMIC in order to prevent sleep in atomic +context bugs. + +Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations") +Signed-off-by: Duoming Zhou +Acked-by: Marcelo Ricardo Leitner +Link: https://lore.kernel.org/r/20220723015809.11553-1-duoming@zju.edu.cn +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/sctp/stream_sched.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c +index 99e5f69fbb74..a2e1d34f52c5 100644 +--- a/net/sctp/stream_sched.c ++++ b/net/sctp/stream_sched.c +@@ -163,7 +163,7 @@ int sctp_sched_set_sched(struct sctp_association *asoc, + if (!SCTP_SO(&asoc->stream, i)->ext) + continue; + +- ret = n->init_sid(&asoc->stream, i, GFP_KERNEL); ++ ret = n->init_sid(&asoc->stream, i, GFP_ATOMIC); + if (ret) + goto err; + } +-- +2.35.1 + diff --git a/queue-5.4/sctp-leave-the-err-path-free-in-sctp_stream_init-to-.patch b/queue-5.4/sctp-leave-the-err-path-free-in-sctp_stream_init-to-.patch new file mode 100644 index 00000000000..ac91fb3fe25 --- /dev/null +++ b/queue-5.4/sctp-leave-the-err-path-free-in-sctp_stream_init-to-.patch @@ -0,0 +1,109 @@ +From 59e88348aee82011fb00db676d8ad971518e0003 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 25 Jul 2022 18:11:06 -0400 +Subject: sctp: leave the err path free in sctp_stream_init to sctp_stream_free + +From: Xin Long + +[ Upstream commit 181d8d2066c000ba0a0e6940a7ad80f1a0e68e9d ] + +A NULL pointer dereference was reported by Wei Chen: + + BUG: kernel NULL pointer dereference, address: 0000000000000000 + RIP: 0010:__list_del_entry_valid+0x26/0x80 + Call Trace: + + sctp_sched_dequeue_common+0x1c/0x90 + sctp_sched_prio_dequeue+0x67/0x80 + __sctp_outq_teardown+0x299/0x380 + sctp_outq_free+0x15/0x20 + sctp_association_free+0xc3/0x440 + sctp_do_sm+0x1ca7/0x2210 + sctp_assoc_bh_rcv+0x1f6/0x340 + +This happens when calling sctp_sendmsg without connecting to server first. +In this case, a data chunk already queues up in send queue of client side +when processing the INIT_ACK from server in sctp_process_init() where it +calls sctp_stream_init() to alloc stream_in. If it fails to alloc stream_in +all stream_out will be freed in sctp_stream_init's err path. Then in the +asoc freeing it will crash when dequeuing this data chunk as stream_out +is missing. + +As we can't free stream out before dequeuing all data from send queue, and +this patch is to fix it by moving the err path stream_out/in freeing in +sctp_stream_init() to sctp_stream_free() which is eventually called when +freeing the asoc in sctp_association_free(). This fix also makes the code +in sctp_process_init() more clear. + +Note that in sctp_association_init() when it fails in sctp_stream_init(), +sctp_association_free() will not be called, and in that case it should +go to 'stream_free' err path to free stream instead of 'fail_init'. + +Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations") +Reported-by: Wei Chen +Signed-off-by: Xin Long +Link: https://lore.kernel.org/r/831a3dc100c4908ff76e5bcc363be97f2778bc0b.1658787066.git.lucien.xin@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/sctp/associola.c | 5 ++--- + net/sctp/stream.c | 19 +++---------------- + 2 files changed, 5 insertions(+), 19 deletions(-) + +diff --git a/net/sctp/associola.c b/net/sctp/associola.c +index fb6f62264e87..f960b0e1e552 100644 +--- a/net/sctp/associola.c ++++ b/net/sctp/associola.c +@@ -224,9 +224,8 @@ static struct sctp_association *sctp_association_init( + if (!sctp_ulpq_init(&asoc->ulpq, asoc)) + goto fail_init; + +- if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, +- 0, gfp)) +- goto fail_init; ++ if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, 0, gfp)) ++ goto stream_free; + + /* Initialize default path MTU. */ + asoc->pathmtu = sp->pathmtu; +diff --git a/net/sctp/stream.c b/net/sctp/stream.c +index cd20638b6151..56762745d6e4 100644 +--- a/net/sctp/stream.c ++++ b/net/sctp/stream.c +@@ -137,7 +137,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, + + ret = sctp_stream_alloc_out(stream, outcnt, gfp); + if (ret) +- goto out_err; ++ return ret; + + for (i = 0; i < stream->outcnt; i++) + SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; +@@ -145,22 +145,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, + handle_in: + sctp_stream_interleave_init(stream); + if (!incnt) +- goto out; +- +- ret = sctp_stream_alloc_in(stream, incnt, gfp); +- if (ret) +- goto in_err; +- +- goto out; ++ return 0; + +-in_err: +- sched->free(stream); +- genradix_free(&stream->in); +-out_err: +- genradix_free(&stream->out); +- stream->outcnt = 0; +-out: +- return ret; ++ return sctp_stream_alloc_in(stream, incnt, gfp); + } + + int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid) +-- +2.35.1 + diff --git a/queue-5.4/series b/queue-5.4/series index e951aa5752b..fd3bdc2db92 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -13,3 +13,19 @@ tcp-fix-a-data-race-around-sysctl_tcp_limit_output_bytes.patch tcp-fix-a-data-race-around-sysctl_tcp_challenge_ack_limit.patch net-ping6-fix-memleak-in-ipv6_renew_options.patch ipv6-addrconf-fix-a-null-ptr-deref-bug-for-ip6_ptr.patch +igmp-fix-data-races-around-sysctl_igmp_qrv.patch +net-sungem_phy-add-of_node_put-for-reference-returne.patch +tcp-fix-a-data-race-around-sysctl_tcp_min_tso_segs.patch +tcp-fix-a-data-race-around-sysctl_tcp_min_rtt_wlen.patch +tcp-fix-a-data-race-around-sysctl_tcp_autocorking.patch +tcp-fix-a-data-race-around-sysctl_tcp_invalid_rateli.patch +documentation-fix-sctp_wmem-in-ip-sysctl.rst.patch +tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_dela.patch +tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_nr.patch +i40e-fix-interface-init-with-msi-interrupts-no-msi-x.patch +sctp-fix-sleep-in-atomic-context-bug-in-timer-handle.patch +netfilter-nf_queue-do-not-allow-packet-truncation-be.patch +virtio-net-fix-the-race-between-refill-work-and-clos.patch +perf-symbol-correct-address-for-bss-symbols.patch +sfc-disable-softirqs-for-ptp-tx.patch +sctp-leave-the-err-path-free-in-sctp_stream_init-to-.patch diff --git a/queue-5.4/sfc-disable-softirqs-for-ptp-tx.patch b/queue-5.4/sfc-disable-softirqs-for-ptp-tx.patch new file mode 100644 index 00000000000..37c456b77d4 --- /dev/null +++ b/queue-5.4/sfc-disable-softirqs-for-ptp-tx.patch @@ -0,0 +1,73 @@ +From 0abfc83176102acd7f06f60874a95e7bc5f915fa Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 26 Jul 2022 08:45:04 +0200 +Subject: sfc: disable softirqs for ptp TX + +From: Alejandro Lucero + +[ Upstream commit 67c3b611d92fc238c43734878bc3e232ab570c79 ] + +Sending a PTP packet can imply to use the normal TX driver datapath but +invoked from the driver's ptp worker. The kernel generic TX code +disables softirqs and preemption before calling specific driver TX code, +but the ptp worker does not. Although current ptp driver functionality +does not require it, there are several reasons for doing so: + + 1) The invoked code is always executed with softirqs disabled for non + PTP packets. + 2) Better if a ptp packet transmission is not interrupted by softirq + handling which could lead to high latencies. + 3) netdev_xmit_more used by the TX code requires preemption to be + disabled. + +Indeed a solution for dealing with kernel preemption state based on static +kernel configuration is not possible since the introduction of dynamic +preemption level configuration at boot time using the static calls +functionality. + +Fixes: f79c957a0b537 ("drivers: net: sfc: use netdev_xmit_more helper") +Signed-off-by: Alejandro Lucero +Link: https://lore.kernel.org/r/20220726064504.49613-1-alejandro.lucero-palau@amd.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/sfc/ptp.c | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + +diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c +index 1fa1b71dbfa1..ed1140ecca60 100644 +--- a/drivers/net/ethernet/sfc/ptp.c ++++ b/drivers/net/ethernet/sfc/ptp.c +@@ -1093,7 +1093,29 @@ static void efx_ptp_xmit_skb_queue(struct efx_nic *efx, struct sk_buff *skb) + + tx_queue = &ptp_data->channel->tx_queue[type]; + if (tx_queue && tx_queue->timestamping) { ++ /* This code invokes normal driver TX code which is always ++ * protected from softirqs when called from generic TX code, ++ * which in turn disables preemption. Look at __dev_queue_xmit ++ * which uses rcu_read_lock_bh disabling preemption for RCU ++ * plus disabling softirqs. We do not need RCU reader ++ * protection here. ++ * ++ * Although it is theoretically safe for current PTP TX/RX code ++ * running without disabling softirqs, there are three good ++ * reasond for doing so: ++ * ++ * 1) The code invoked is mainly implemented for non-PTP ++ * packets and it is always executed with softirqs ++ * disabled. ++ * 2) This being a single PTP packet, better to not ++ * interrupt its processing by softirqs which can lead ++ * to high latencies. ++ * 3) netdev_xmit_more checks preemption is disabled and ++ * triggers a BUG_ON if not. ++ */ ++ local_bh_disable(); + efx_enqueue_skb(tx_queue, skb); ++ local_bh_enable(); + } else { + WARN_ONCE(1, "PTP channel has no timestamped tx queue\n"); + dev_kfree_skb_any(skb); +-- +2.35.1 + diff --git a/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_autocorking.patch b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_autocorking.patch new file mode 100644 index 00000000000..c7508403df9 --- /dev/null +++ b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_autocorking.patch @@ -0,0 +1,36 @@ +From e316ab213ee484da6d1e41e6c2b1ed0065c844ec Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 Jul 2022 09:50:25 -0700 +Subject: tcp: Fix a data-race around sysctl_tcp_autocorking. + +From: Kuniyuki Iwashima + +[ Upstream commit 85225e6f0a76e6745bc841c9f25169c509b573d8 ] + +While reading sysctl_tcp_autocorking, it can be changed concurrently. +Thus, we need to add READ_ONCE() to its reader. + +Fixes: f54b311142a9 ("tcp: auto corking") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index 4b31f6e9ec61..0a570d5d0b38 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -697,7 +697,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, + int size_goal) + { + return skb->len < size_goal && +- sock_net(sk)->ipv4.sysctl_tcp_autocorking && ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) && + !tcp_rtx_queue_empty(sk) && + refcount_read(&sk->sk_wmem_alloc) > skb->truesize; + } +-- +2.35.1 + diff --git a/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_dela.patch b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_dela.patch new file mode 100644 index 00000000000..32899214644 --- /dev/null +++ b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_dela.patch @@ -0,0 +1,37 @@ +From ebf4075665258aa877a27565be22f64acdf3cb16 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Jul 2022 11:22:01 -0700 +Subject: tcp: Fix a data-race around sysctl_tcp_comp_sack_delay_ns. + +From: Kuniyuki Iwashima + +[ Upstream commit 4866b2b0f7672b6d760c4b8ece6fb56f965dcc8a ] + +While reading sysctl_tcp_comp_sack_delay_ns, it can be changed +concurrently. Thus, we need to add READ_ONCE() to its reader. + +Fixes: 6d82aa242092 ("tcp: add tcp_comp_sack_delay_ns sysctl") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_input.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index 7af5e6acf41a..f8fa036cfae2 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -5326,7 +5326,8 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) + if (tp->srtt_us && tp->srtt_us < rtt) + rtt = tp->srtt_us; + +- delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns, ++ delay = min_t(unsigned long, ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns), + rtt * (NSEC_PER_USEC >> 3)/20); + sock_hold(sk); + hrtimer_start(&tp->compressed_ack_timer, ns_to_ktime(delay), +-- +2.35.1 + diff --git a/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_nr.patch b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_nr.patch new file mode 100644 index 00000000000..a55506582be --- /dev/null +++ b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_nr.patch @@ -0,0 +1,36 @@ +From 637627cb3171e89f7c0c48a64671e5a73f885708 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 22 Jul 2022 11:22:03 -0700 +Subject: tcp: Fix a data-race around sysctl_tcp_comp_sack_nr. + +From: Kuniyuki Iwashima + +[ Upstream commit 79f55473bfc8ac51bd6572929a679eeb4da22251 ] + +While reading sysctl_tcp_comp_sack_nr, it can be changed concurrently. +Thus, we need to add READ_ONCE() to its reader. + +Fixes: 9c21d2fc41c0 ("tcp: add tcp_comp_sack_nr sysctl") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_input.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index f8fa036cfae2..f4e00ff909da 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -5303,7 +5303,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) + } + + if (!tcp_is_sack(tp) || +- tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr) ++ tp->compressed_ack >= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)) + goto send_now; + + if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) { +-- +2.35.1 + diff --git a/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_invalid_rateli.patch b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_invalid_rateli.patch new file mode 100644 index 00000000000..238829a97db --- /dev/null +++ b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_invalid_rateli.patch @@ -0,0 +1,37 @@ +From c2f2c8c94efeb0a28ef9b8b773cf1e5ea606c1a6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 Jul 2022 09:50:26 -0700 +Subject: tcp: Fix a data-race around sysctl_tcp_invalid_ratelimit. + +From: Kuniyuki Iwashima + +[ Upstream commit 2afdbe7b8de84c28e219073a6661080e1b3ded48 ] + +While reading sysctl_tcp_invalid_ratelimit, it can be changed +concurrently. Thus, we need to add READ_ONCE() to its reader. + +Fixes: 032ee4236954 ("tcp: helpers to mitigate ACK loops by rate-limiting out-of-window dupacks") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_input.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index b760ad0b16d9..7af5e6acf41a 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -3436,7 +3436,8 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, + if (*last_oow_ack_time) { + s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time); + +- if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) { ++ if (0 <= elapsed && ++ elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) { + NET_INC_STATS(net, mib_idx); + return true; /* rate-limited: don't send yet! */ + } +-- +2.35.1 + diff --git a/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_min_rtt_wlen.patch b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_min_rtt_wlen.patch new file mode 100644 index 00000000000..afabe75f6c5 --- /dev/null +++ b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_min_rtt_wlen.patch @@ -0,0 +1,36 @@ +From b40186736da09f578da0ae9d2e5a54169420c40c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 Jul 2022 09:50:24 -0700 +Subject: tcp: Fix a data-race around sysctl_tcp_min_rtt_wlen. + +From: Kuniyuki Iwashima + +[ Upstream commit 1330ffacd05fc9ac4159d19286ce119e22450ed2 ] + +While reading sysctl_tcp_min_rtt_wlen, it can be changed concurrently. +Thus, we need to add READ_ONCE() to its reader. + +Fixes: f672258391b4 ("tcp: track min RTT using windowed min-filter") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_input.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index a8d8ff488281..b760ad0b16d9 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -2914,7 +2914,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, + + static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag) + { +- u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ; ++ u32 wlen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen) * HZ; + struct tcp_sock *tp = tcp_sk(sk); + + if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) { +-- +2.35.1 + diff --git a/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_min_tso_segs.patch b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_min_tso_segs.patch new file mode 100644 index 00000000000..aaca29178d1 --- /dev/null +++ b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_min_tso_segs.patch @@ -0,0 +1,36 @@ +From fc9ed037a799f945ac3a7c788c0abf1ad3d68df5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 Jul 2022 09:50:22 -0700 +Subject: tcp: Fix a data-race around sysctl_tcp_min_tso_segs. + +From: Kuniyuki Iwashima + +[ Upstream commit e0bb4ab9dfddd872622239f49fb2bd403b70853b ] + +While reading sysctl_tcp_min_tso_segs, it can be changed concurrently. +Thus, we need to add READ_ONCE() to its reader. + +Fixes: 95bd09eb2750 ("tcp: TSO packets automatic sizing") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_output.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c +index 99e077422975..ef749a47768a 100644 +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -1761,7 +1761,7 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) + + min_tso = ca_ops->min_tso_segs ? + ca_ops->min_tso_segs(sk) : +- sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs; ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); + + tso_segs = tcp_tso_autosize(sk, mss_now, min_tso); + return min_t(u32, tso_segs, sk->sk_gso_max_segs); +-- +2.35.1 + diff --git a/queue-5.4/virtio-net-fix-the-race-between-refill-work-and-clos.patch b/queue-5.4/virtio-net-fix-the-race-between-refill-work-and-clos.patch new file mode 100644 index 00000000000..580b1f70baf --- /dev/null +++ b/queue-5.4/virtio-net-fix-the-race-between-refill-work-and-clos.patch @@ -0,0 +1,151 @@ +From 107584d36a876b373e733c893b58dd447c8b9e48 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 25 Jul 2022 15:21:59 +0800 +Subject: virtio-net: fix the race between refill work and close + +From: Jason Wang + +[ Upstream commit 5a159128faff151b7fe5f4eb0f310b1e0a2d56bf ] + +We try using cancel_delayed_work_sync() to prevent the work from +enabling NAPI. This is insufficient since we don't disable the source +of the refill work scheduling. This means an NAPI poll callback after +cancel_delayed_work_sync() can schedule the refill work then can +re-enable the NAPI that leads to use-after-free [1]. + +Since the work can enable NAPI, we can't simply disable NAPI before +calling cancel_delayed_work_sync(). So fix this by introducing a +dedicated boolean to control whether or not the work could be +scheduled from NAPI. + +[1] +================================================================== +BUG: KASAN: use-after-free in refill_work+0x43/0xd4 +Read of size 2 at addr ffff88810562c92e by task kworker/2:1/42 + +CPU: 2 PID: 42 Comm: kworker/2:1 Not tainted 5.19.0-rc1+ #480 +Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 +Workqueue: events refill_work +Call Trace: + + dump_stack_lvl+0x34/0x44 + print_report.cold+0xbb/0x6ac + ? _printk+0xad/0xde + ? refill_work+0x43/0xd4 + kasan_report+0xa8/0x130 + ? refill_work+0x43/0xd4 + refill_work+0x43/0xd4 + process_one_work+0x43d/0x780 + worker_thread+0x2a0/0x6f0 + ? process_one_work+0x780/0x780 + kthread+0x167/0x1a0 + ? kthread_exit+0x50/0x50 + ret_from_fork+0x22/0x30 + +... + +Fixes: b2baed69e605c ("virtio_net: set/cancel work on ndo_open/ndo_stop") +Signed-off-by: Jason Wang +Acked-by: Michael S. Tsirkin +Reviewed-by: Xuan Zhuo +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/virtio_net.c | 37 ++++++++++++++++++++++++++++++++++--- + 1 file changed, 34 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c +index e14842fbe3d6..579df7c5411d 100644 +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -213,9 +213,15 @@ struct virtnet_info { + /* Packet virtio header size */ + u8 hdr_len; + +- /* Work struct for refilling if we run low on memory. */ ++ /* Work struct for delayed refilling if we run low on memory. */ + struct delayed_work refill; + ++ /* Is delayed refill enabled? */ ++ bool refill_enabled; ++ ++ /* The lock to synchronize the access to refill_enabled */ ++ spinlock_t refill_lock; ++ + /* Work struct for config space updates */ + struct work_struct config_work; + +@@ -319,6 +325,20 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) + return p; + } + ++static void enable_delayed_refill(struct virtnet_info *vi) ++{ ++ spin_lock_bh(&vi->refill_lock); ++ vi->refill_enabled = true; ++ spin_unlock_bh(&vi->refill_lock); ++} ++ ++static void disable_delayed_refill(struct virtnet_info *vi) ++{ ++ spin_lock_bh(&vi->refill_lock); ++ vi->refill_enabled = false; ++ spin_unlock_bh(&vi->refill_lock); ++} ++ + static void virtqueue_napi_schedule(struct napi_struct *napi, + struct virtqueue *vq) + { +@@ -1388,8 +1408,12 @@ static int virtnet_receive(struct receive_queue *rq, int budget, + } + + if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { +- if (!try_fill_recv(vi, rq, GFP_ATOMIC)) +- schedule_delayed_work(&vi->refill, 0); ++ if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { ++ spin_lock(&vi->refill_lock); ++ if (vi->refill_enabled) ++ schedule_delayed_work(&vi->refill, 0); ++ spin_unlock(&vi->refill_lock); ++ } + } + + u64_stats_update_begin(&rq->stats.syncp); +@@ -1508,6 +1532,8 @@ static int virtnet_open(struct net_device *dev) + struct virtnet_info *vi = netdev_priv(dev); + int i, err; + ++ enable_delayed_refill(vi); ++ + for (i = 0; i < vi->max_queue_pairs; i++) { + if (i < vi->curr_queue_pairs) + /* Make sure we have some buffers: if oom use wq. */ +@@ -1878,6 +1904,8 @@ static int virtnet_close(struct net_device *dev) + struct virtnet_info *vi = netdev_priv(dev); + int i; + ++ /* Make sure NAPI doesn't schedule refill work */ ++ disable_delayed_refill(vi); + /* Make sure refill_work doesn't re-enable napi! */ + cancel_delayed_work_sync(&vi->refill); + +@@ -2417,6 +2445,8 @@ static int virtnet_restore_up(struct virtio_device *vdev) + + virtio_device_ready(vdev); + ++ enable_delayed_refill(vi); ++ + if (netif_running(vi->dev)) { + err = virtnet_open(vi->dev); + if (err) +@@ -3140,6 +3170,7 @@ static int virtnet_probe(struct virtio_device *vdev) + vdev->priv = vi; + + INIT_WORK(&vi->config_work, virtnet_config_changed_work); ++ spin_lock_init(&vi->refill_lock); + + /* If we can receive ANY GSO packets, we must allocate large ones. */ + if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || +-- +2.35.1 + -- 2.47.3