Fixes for 5.4

author Sasha Levin <sashal@kernel.org>

Sun, 31 Jul 2022 02:41:29 +0000 (22:41 -0400)

committer Sasha Levin <sashal@kernel.org>

Sun, 31 Jul 2022 02:41:29 +0000 (22:41 -0400)
author Sasha Levin <sashal@kernel.org>
Sun, 31 Jul 2022 02:41:29 +0000 (22:41 -0400)
committer Sasha Levin <sashal@kernel.org>
Sun, 31 Jul 2022 02:41:29 +0000 (22:41 -0400)
diff --git a/queue-5.4/documentation-fix-sctp_wmem-in-ip-sysctl.rst.patch b/queue-5.4/documentation-fix-sctp_wmem-in-ip-sysctl.rst.patch

new file mode 100644 (file)

index 0000000..a50058b
--- /dev/null
+++ b/queue-5.4/documentation-fix-sctp_wmem-in-ip-sysctl.rst.patch
@@ -0,0 +1,49 @@
+From d886bc9ce8df6004b79ae28ee3e84a553ddac7fb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Jul 2022 10:35:46 -0400
+Subject: Documentation: fix sctp_wmem in ip-sysctl.rst
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit aa709da0e032cee7c202047ecd75f437bb0126ed ]
+
+Since commit 1033990ac5b2 ("sctp: implement memory accounting on tx path"),
+SCTP has supported memory accounting on tx path where 'sctp_wmem' is used
+by sk_wmem_schedule(). So we should fix the description for this option in
+ip-sysctl.rst accordingly.
+
+v1->v2:
+  - Improve the description as Marcelo suggested.
+
+Fixes: 1033990ac5b2 ("sctp: implement memory accounting on tx path")
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/networking/ip-sysctl.txt | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
+index 787a9c077ef1..5cf601c94e35 100644
+--- a/Documentation/networking/ip-sysctl.txt
++++ b/Documentation/networking/ip-sysctl.txt
+@@ -2284,7 +2284,14 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max
+       Default: 4K
+ 
+ sctp_wmem  - vector of 3 INTEGERs: min, default, max
+-      Currently this tunable has no effect.
++      Only the first value ("min") is used, "default" and "max" are
++      ignored.
++
++      min: Minimum size of send buffer that can be used by SCTP sockets.
++      It is guaranteed to each SCTP socket (but not association) even
++      under moderate memory pressure.
++
++      Default: 4K
+ 
+ addr_scope_policy - INTEGER
+       Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00
+-- 
+2.35.1
+
diff --git a/queue-5.4/i40e-fix-interface-init-with-msi-interrupts-no-msi-x.patch b/queue-5.4/i40e-fix-interface-init-with-msi-interrupts-no-msi-x.patch

new file mode 100644 (file)

index 0000000..bc371e5
--- /dev/null
+++ b/queue-5.4/i40e-fix-interface-init-with-msi-interrupts-no-msi-x.patch
@@ -0,0 +1,49 @@
+From 888ff31a2b98c89501504eaf7b2e2fa64c25243e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 10:54:01 -0700
+Subject: i40e: Fix interface init with MSI interrupts (no MSI-X)
+
+From: Michal Maloszewski <michal.maloszewski@intel.com>
+
+[ Upstream commit 5fcbb711024aac6d4db385623e6f2fdf019f7782 ]
+
+Fix the inability to bring an interface up on a setup with
+only MSI interrupts enabled (no MSI-X).
+Solution is to add a default number of QPs = 1. This is enough,
+since without MSI-X support driver enables only a basic feature set.
+
+Fixes: bc6d33c8d93f ("i40e: Fix the number of queues available to be mapped for use")
+Signed-off-by: Dawid Lukwinski <dawid.lukwinski@intel.com>
+Signed-off-by: Michal Maloszewski <michal.maloszewski@intel.com>
+Tested-by: Dave Switzer <david.switzer@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Link: https://lore.kernel.org/r/20220722175401.112572-1-anthony.l.nguyen@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/i40e/i40e_main.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
+index 0610d344fdbf..637f6ed78b48 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
+@@ -1821,11 +1821,15 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
+                * non-zero req_queue_pairs says that user requested a new
+                * queue count via ethtool's set_channels, so use this
+                * value for queues distribution across traffic classes
++               * We need at least one queue pair for the interface
++               * to be usable as we see in else statement.
+                */
+               if (vsi->req_queue_pairs > 0)
+                       vsi->num_queue_pairs = vsi->req_queue_pairs;
+               else if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+                       vsi->num_queue_pairs = pf->num_lan_msix;
++              else
++                      vsi->num_queue_pairs = 1;
+       }
+ 
+       /* Number of queues per enabled TC */
+-- 
+2.35.1
+
diff --git a/queue-5.4/igmp-fix-data-races-around-sysctl_igmp_qrv.patch b/queue-5.4/igmp-fix-data-races-around-sysctl_igmp_qrv.patch

new file mode 100644 (file)

index 0000000..5ea20e0
--- /dev/null
+++ b/queue-5.4/igmp-fix-data-races-around-sysctl_igmp_qrv.patch
@@ -0,0 +1,127 @@
+From 8e893ea193073506f79b56296efd030e8518aa77 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jul 2022 10:17:44 -0700
+Subject: igmp: Fix data-races around sysctl_igmp_qrv.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 8ebcc62c738f68688ee7c6fec2efe5bc6d3d7e60 ]
+
+While reading sysctl_igmp_qrv, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its readers.
+
+This test can be packed into a helper, so such changes will be in the
+follow-up series after net is merged into net-next.
+
+  qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+
+Fixes: a9fe8e29945d ("ipv4: implement igmp_qrv sysctl to tune igmp robustness variable")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/igmp.c | 24 +++++++++++++-----------
+ 1 file changed, 13 insertions(+), 11 deletions(-)
+
+diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
+index 660b41040c77..1023f881091e 100644
+--- a/net/ipv4/igmp.c
++++ b/net/ipv4/igmp.c
+@@ -829,7 +829,7 @@ static void igmp_ifc_event(struct in_device *in_dev)
+       struct net *net = dev_net(in_dev->dev);
+       if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
+               return;
+-      WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv);
++      WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv));
+       igmp_ifc_start_timer(in_dev, 1);
+ }
+ 
+@@ -1011,7 +1011,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
+                * received value was zero, use the default or statically
+                * configured value.
+                */
+-              in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv;
++              in_dev->mr_qrv = ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+               in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL;
+ 
+               /* RFC3376, 8.3. Query Response Interval:
+@@ -1191,7 +1191,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im,
+       pmc->interface = im->interface;
+       in_dev_hold(in_dev);
+       pmc->multiaddr = im->multiaddr;
+-      pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++      pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+       pmc->sfmode = im->sfmode;
+       if (pmc->sfmode == MCAST_INCLUDE) {
+               struct ip_sf_list *psf;
+@@ -1242,9 +1242,11 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im)
+                       swap(im->tomb, pmc->tomb);
+                       swap(im->sources, pmc->sources);
+                       for (psf = im->sources; psf; psf = psf->sf_next)
+-                              psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++                              psf->sf_crcount = in_dev->mr_qrv ?:
++                                      READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+               } else {
+-                      im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++                      im->crcount = in_dev->mr_qrv ?:
++                              READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+               }
+               in_dev_put(pmc->interface);
+               kfree_pmc(pmc);
+@@ -1351,7 +1353,7 @@ static void igmp_group_added(struct ip_mc_list *im)
+       if (in_dev->dead)
+               return;
+ 
+-      im->unsolicit_count = net->ipv4.sysctl_igmp_qrv;
++      im->unsolicit_count = READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+       if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) {
+               spin_lock_bh(&im->lock);
+               igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY);
+@@ -1365,7 +1367,7 @@ static void igmp_group_added(struct ip_mc_list *im)
+        * IN() to IN(A).
+        */
+       if (im->sfmode == MCAST_EXCLUDE)
+-              im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++              im->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+ 
+       igmp_ifc_event(in_dev);
+ #endif
+@@ -1756,7 +1758,7 @@ static void ip_mc_reset(struct in_device *in_dev)
+ 
+       in_dev->mr_qi = IGMP_QUERY_INTERVAL;
+       in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL;
+-      in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
++      in_dev->mr_qrv = READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+ }
+ #else
+ static void ip_mc_reset(struct in_device *in_dev)
+@@ -1890,7 +1892,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
+ #ifdef CONFIG_IP_MULTICAST
+               if (psf->sf_oldin &&
+                   !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) {
+-                      psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++                      psf->sf_crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+                       psf->sf_next = pmc->tomb;
+                       pmc->tomb = psf;
+                       rv = 1;
+@@ -1954,7 +1956,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
+               /* filter mode change */
+               pmc->sfmode = MCAST_INCLUDE;
+ #ifdef CONFIG_IP_MULTICAST
+-              pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++              pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+               WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
+               for (psf = pmc->sources; psf; psf = psf->sf_next)
+                       psf->sf_crcount = 0;
+@@ -2133,7 +2135,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
+ #ifdef CONFIG_IP_MULTICAST
+               /* else no filters; keep old mode for reports */
+ 
+-              pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++              pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+               WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
+               for (psf = pmc->sources; psf; psf = psf->sf_next)
+                       psf->sf_crcount = 0;
+-- 
+2.35.1
+
diff --git a/queue-5.4/net-sungem_phy-add-of_node_put-for-reference-returne.patch b/queue-5.4/net-sungem_phy-add-of_node_put-for-reference-returne.patch

new file mode 100644 (file)

index 0000000..d51e7a3
--- /dev/null
+++ b/queue-5.4/net-sungem_phy-add-of_node_put-for-reference-returne.patch
@@ -0,0 +1,37 @@
+From c5e2dec8c589406223337e362cd608bb190db07a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 21:10:03 +0800
+Subject: net: sungem_phy: Add of_node_put() for reference returned by
+ of_get_parent()
+
+From: Liang He <windhl@126.com>
+
+[ Upstream commit ebbbe23fdf6070e31509638df3321688358cc211 ]
+
+In bcm5421_init(), we should call of_node_put() for the reference
+returned by of_get_parent() which has increased the refcount.
+
+Fixes: 3c326fe9cb7a ("[PATCH] ppc64: Add new PHY to sungem")
+Signed-off-by: Liang He <windhl@126.com>
+Link: https://lore.kernel.org/r/20220720131003.1287426-1-windhl@126.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/sungem_phy.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/sungem_phy.c b/drivers/net/sungem_phy.c
+index 291fa449993f..45f295403cb5 100644
+--- a/drivers/net/sungem_phy.c
++++ b/drivers/net/sungem_phy.c
+@@ -454,6 +454,7 @@ static int bcm5421_init(struct mii_phy* phy)
+               int can_low_power = 1;
+               if (np == NULL || of_get_property(np, "no-autolowpower", NULL))
+                       can_low_power = 0;
++              of_node_put(np);
+               if (can_low_power) {
+                       /* Enable automatic low-power */
+                       sungem_phy_write(phy, 0x1c, 0x9002);
+-- 
+2.35.1
+
diff --git a/queue-5.4/netfilter-nf_queue-do-not-allow-packet-truncation-be.patch b/queue-5.4/netfilter-nf_queue-do-not-allow-packet-truncation-be.patch

new file mode 100644 (file)

index 0000000..c4c83ef
--- /dev/null
+++ b/queue-5.4/netfilter-nf_queue-do-not-allow-packet-truncation-be.patch
@@ -0,0 +1,53 @@
+From 6effee1825eaaece708a8dcfa2831119db9c8828 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jul 2022 12:42:06 +0200
+Subject: netfilter: nf_queue: do not allow packet truncation below transport
+ header offset
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 99a63d36cb3ed5ca3aa6fcb64cffbeaf3b0fb164 ]
+
+Domingo Dirutigliano and Nicola Guerrera report kernel panic when
+sending nf_queue verdict with 1-byte nfta_payload attribute.
+
+The IP/IPv6 stack pulls the IP(v6) header from the packet after the
+input hook.
+
+If user truncates the packet below the header size, this skb_pull() will
+result in a malformed skb (skb->len < 0).
+
+Fixes: 7af4cc3fa158 ("[NETFILTER]: Add "nfnetlink_queue" netfilter queue handler over nfnetlink")
+Reported-by: Domingo Dirutigliano <pwnzer0tt1@proton.me>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Reviewed-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nfnetlink_queue.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
+index 7d3ab08a5a2d..581bd1353a44 100644
+--- a/net/netfilter/nfnetlink_queue.c
++++ b/net/netfilter/nfnetlink_queue.c
+@@ -846,11 +846,16 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
+ }
+ 
+ static int
+-nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
++nfqnl_mangle(void *data, unsigned int data_len, struct nf_queue_entry *e, int diff)
+ {
+       struct sk_buff *nskb;
+ 
+       if (diff < 0) {
++              unsigned int min_len = skb_transport_offset(e->skb);
++
++              if (data_len < min_len)
++                      return -EINVAL;
++
+               if (pskb_trim(e->skb, data_len))
+                       return -ENOMEM;
+       } else if (diff > 0) {
+-- 
+2.35.1
+
diff --git a/queue-5.4/perf-symbol-correct-address-for-bss-symbols.patch b/queue-5.4/perf-symbol-correct-address-for-bss-symbols.patch

new file mode 100644 (file)

index 0000000..21d1a7b
--- /dev/null
+++ b/queue-5.4/perf-symbol-correct-address-for-bss-symbols.patch
@@ -0,0 +1,182 @@
+From c06e2278d0ba78bfc80b033b1c52b88173fdf47a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 24 Jul 2022 14:00:12 +0800
+Subject: perf symbol: Correct address for bss symbols
+
+From: Leo Yan <leo.yan@linaro.org>
+
+[ Upstream commit 2d86612aacb7805f72873691a2644d7279ed0630 ]
+
+When using 'perf mem' and 'perf c2c', an issue is observed that tool
+reports the wrong offset for global data symbols.  This is a common
+issue on both x86 and Arm64 platforms.
+
+Let's see an example, for a test program, below is the disassembly for
+its .bss section which is dumped with objdump:
+
+  ...
+
+  Disassembly of section .bss:
+
+  0000000000004040 <completed.0>:
+       ...
+
+  0000000000004080 <buf1>:
+       ...
+
+  00000000000040c0 <buf2>:
+       ...
+
+  0000000000004100 <thread>:
+       ...
+
+First we used 'perf mem record' to run the test program and then used
+'perf --debug verbose=4 mem report' to observe what's the symbol info
+for 'buf1' and 'buf2' structures.
+
+  # ./perf mem record -e ldlat-loads,ldlat-stores -- false_sharing.exe 8
+  # ./perf --debug verbose=4 mem report
+    ...
+    dso__load_sym_internal: adjusting symbol: st_value: 0x40c0 sh_addr: 0x4040 sh_offset: 0x3028
+    symbol__new: buf2 0x30a8-0x30e8
+    ...
+    dso__load_sym_internal: adjusting symbol: st_value: 0x4080 sh_addr: 0x4040 sh_offset: 0x3028
+    symbol__new: buf1 0x3068-0x30a8
+    ...
+
+The perf tool relies on libelf to parse symbols, in executable and
+shared object files, 'st_value' holds a virtual address; 'sh_addr' is
+the address at which section's first byte should reside in memory, and
+'sh_offset' is the byte offset from the beginning of the file to the
+first byte in the section.  The perf tool uses below formula to convert
+a symbol's memory address to a file address:
+
+  file_address = st_value - sh_addr + sh_offset
+                    ^
+                    ` Memory address
+
+We can see the final adjusted address ranges for buf1 and buf2 are
+[0x30a8-0x30e8) and [0x3068-0x30a8) respectively, apparently this is
+incorrect, in the code, the structure for 'buf1' and 'buf2' specifies
+compiler attribute with 64-byte alignment.
+
+The problem happens for 'sh_offset', libelf returns it as 0x3028 which
+is not 64-byte aligned, combining with disassembly, it's likely libelf
+doesn't respect the alignment for .bss section, therefore, it doesn't
+return the aligned value for 'sh_offset'.
+
+Suggested by Fangrui Song, ELF file contains program header which
+contains PT_LOAD segments, the fields p_vaddr and p_offset in PT_LOAD
+segments contain the execution info.  A better choice for converting
+memory address to file address is using the formula:
+
+  file_address = st_value - p_vaddr + p_offset
+
+This patch introduces elf_read_program_header() which returns the
+program header based on the passed 'st_value', then it uses the formula
+above to calculate the symbol file address; and the debugging log is
+updated respectively.
+
+After applying the change:
+
+  # ./perf --debug verbose=4 mem report
+    ...
+    dso__load_sym_internal: adjusting symbol: st_value: 0x40c0 p_vaddr: 0x3d28 p_offset: 0x2d28
+    symbol__new: buf2 0x30c0-0x3100
+    ...
+    dso__load_sym_internal: adjusting symbol: st_value: 0x4080 p_vaddr: 0x3d28 p_offset: 0x2d28
+    symbol__new: buf1 0x3080-0x30c0
+    ...
+
+Fixes: f17e04afaff84b5c ("perf report: Fix ELF symbol parsing")
+Reported-by: Chang Rui <changruinj@gmail.com>
+Suggested-by: Fangrui Song <maskray@google.com>
+Signed-off-by: Leo Yan <leo.yan@linaro.org>
+Acked-by: Namhyung Kim <namhyung@kernel.org>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Ian Rogers <irogers@google.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20220724060013.171050-2-leo.yan@linaro.org
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/util/symbol-elf.c | 45 ++++++++++++++++++++++++++++++++----
+ 1 file changed, 41 insertions(+), 4 deletions(-)
+
+diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
+index 2ec0a32da579..0b185b1090ff 100644
+--- a/tools/perf/util/symbol-elf.c
++++ b/tools/perf/util/symbol-elf.c
+@@ -230,6 +230,33 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
+       return NULL;
+ }
+ 
++static int elf_read_program_header(Elf *elf, u64 vaddr, GElf_Phdr *phdr)
++{
++      size_t i, phdrnum;
++      u64 sz;
++
++      if (elf_getphdrnum(elf, &phdrnum))
++              return -1;
++
++      for (i = 0; i < phdrnum; i++) {
++              if (gelf_getphdr(elf, i, phdr) == NULL)
++                      return -1;
++
++              if (phdr->p_type != PT_LOAD)
++                      continue;
++
++              sz = max(phdr->p_memsz, phdr->p_filesz);
++              if (!sz)
++                      continue;
++
++              if (vaddr >= phdr->p_vaddr && (vaddr < phdr->p_vaddr + sz))
++                      return 0;
++      }
++
++      /* Not found any valid program header */
++      return -1;
++}
++
+ static bool want_demangle(bool is_kernel_sym)
+ {
+       return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
+@@ -1091,6 +1118,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
+                                       sym.st_value);
+                       used_opd = true;
+               }
++
+               /*
+                * When loading symbols in a data mapping, ABS symbols (which
+                * has a value of SHN_ABS in its st_shndx) failed at
+@@ -1127,11 +1155,20 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
+                               goto out_elf_end;
+               } else if ((used_opd && runtime_ss->adjust_symbols) ||
+                          (!used_opd && syms_ss->adjust_symbols)) {
++                      GElf_Phdr phdr;
++
++                      if (elf_read_program_header(syms_ss->elf,
++                                                  (u64)sym.st_value, &phdr)) {
++                              pr_warning("%s: failed to find program header for "
++                                         "symbol: %s st_value: %#" PRIx64 "\n",
++                                         __func__, elf_name, (u64)sym.st_value);
++                              continue;
++                      }
+                       pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " "
+-                                "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__,
+-                                (u64)sym.st_value, (u64)shdr.sh_addr,
+-                                (u64)shdr.sh_offset);
+-                      sym.st_value -= shdr.sh_addr - shdr.sh_offset;
++                                "p_vaddr: %#" PRIx64 " p_offset: %#" PRIx64 "\n",
++                                __func__, (u64)sym.st_value, (u64)phdr.p_vaddr,
++                                (u64)phdr.p_offset);
++                      sym.st_value -= phdr.p_vaddr - phdr.p_offset;
+               }
+ 
+               demangled = demangle_sym(dso, kmodule, elf_name);
+-- 
+2.35.1
+
diff --git a/queue-5.4/sctp-fix-sleep-in-atomic-context-bug-in-timer-handle.patch b/queue-5.4/sctp-fix-sleep-in-atomic-context-bug-in-timer-handle.patch

new file mode 100644 (file)

index 0000000..bd50621
--- /dev/null
+++ b/queue-5.4/sctp-fix-sleep-in-atomic-context-bug-in-timer-handle.patch
@@ -0,0 +1,61 @@
+From 9ef7bc8b0c4534d5308afe5b2c0cf13bda2aa884 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 23 Jul 2022 09:58:09 +0800
+Subject: sctp: fix sleep in atomic context bug in timer handlers
+
+From: Duoming Zhou <duoming@zju.edu.cn>
+
+[ Upstream commit b89fc26f741d9f9efb51cba3e9b241cf1380ec5a ]
+
+There are sleep in atomic context bugs in timer handlers of sctp
+such as sctp_generate_t3_rtx_event(), sctp_generate_probe_event(),
+sctp_generate_t1_init_event(), sctp_generate_timeout_event(),
+sctp_generate_t3_rtx_event() and so on.
+
+The root cause is sctp_sched_prio_init_sid() with GFP_KERNEL parameter
+that may sleep could be called by different timer handlers which is in
+interrupt context.
+
+One of the call paths that could trigger bug is shown below:
+
+      (interrupt context)
+sctp_generate_probe_event
+  sctp_do_sm
+    sctp_side_effects
+      sctp_cmd_interpreter
+        sctp_outq_teardown
+          sctp_outq_init
+            sctp_sched_set_sched
+              n->init_sid(..,GFP_KERNEL)
+                sctp_sched_prio_init_sid //may sleep
+
+This patch changes gfp_t parameter of init_sid in sctp_sched_set_sched()
+from GFP_KERNEL to GFP_ATOMIC in order to prevent sleep in atomic
+context bugs.
+
+Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations")
+Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Link: https://lore.kernel.org/r/20220723015809.11553-1-duoming@zju.edu.cn
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sctp/stream_sched.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
+index 99e5f69fbb74..a2e1d34f52c5 100644
+--- a/net/sctp/stream_sched.c
++++ b/net/sctp/stream_sched.c
+@@ -163,7 +163,7 @@ int sctp_sched_set_sched(struct sctp_association *asoc,
+               if (!SCTP_SO(&asoc->stream, i)->ext)
+                       continue;
+ 
+-              ret = n->init_sid(&asoc->stream, i, GFP_KERNEL);
++              ret = n->init_sid(&asoc->stream, i, GFP_ATOMIC);
+               if (ret)
+                       goto err;
+       }
+-- 
+2.35.1
+
diff --git a/queue-5.4/sctp-leave-the-err-path-free-in-sctp_stream_init-to-.patch b/queue-5.4/sctp-leave-the-err-path-free-in-sctp_stream_init-to-.patch

new file mode 100644 (file)

index 0000000..ac91fb3
--- /dev/null
+++ b/queue-5.4/sctp-leave-the-err-path-free-in-sctp_stream_init-to-.patch
@@ -0,0 +1,109 @@
+From 59e88348aee82011fb00db676d8ad971518e0003 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Jul 2022 18:11:06 -0400
+Subject: sctp: leave the err path free in sctp_stream_init to sctp_stream_free
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit 181d8d2066c000ba0a0e6940a7ad80f1a0e68e9d ]
+
+A NULL pointer dereference was reported by Wei Chen:
+
+  BUG: kernel NULL pointer dereference, address: 0000000000000000
+  RIP: 0010:__list_del_entry_valid+0x26/0x80
+  Call Trace:
+   <TASK>
+   sctp_sched_dequeue_common+0x1c/0x90
+   sctp_sched_prio_dequeue+0x67/0x80
+   __sctp_outq_teardown+0x299/0x380
+   sctp_outq_free+0x15/0x20
+   sctp_association_free+0xc3/0x440
+   sctp_do_sm+0x1ca7/0x2210
+   sctp_assoc_bh_rcv+0x1f6/0x340
+
+This happens when calling sctp_sendmsg without connecting to server first.
+In this case, a data chunk already queues up in send queue of client side
+when processing the INIT_ACK from server in sctp_process_init() where it
+calls sctp_stream_init() to alloc stream_in. If it fails to alloc stream_in
+all stream_out will be freed in sctp_stream_init's err path. Then in the
+asoc freeing it will crash when dequeuing this data chunk as stream_out
+is missing.
+
+As we can't free stream out before dequeuing all data from send queue, and
+this patch is to fix it by moving the err path stream_out/in freeing in
+sctp_stream_init() to sctp_stream_free() which is eventually called when
+freeing the asoc in sctp_association_free(). This fix also makes the code
+in sctp_process_init() more clear.
+
+Note that in sctp_association_init() when it fails in sctp_stream_init(),
+sctp_association_free() will not be called, and in that case it should
+go to 'stream_free' err path to free stream instead of 'fail_init'.
+
+Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations")
+Reported-by: Wei Chen <harperchen1110@gmail.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Link: https://lore.kernel.org/r/831a3dc100c4908ff76e5bcc363be97f2778bc0b.1658787066.git.lucien.xin@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sctp/associola.c |  5 ++---
+ net/sctp/stream.c    | 19 +++----------------
+ 2 files changed, 5 insertions(+), 19 deletions(-)
+
+diff --git a/net/sctp/associola.c b/net/sctp/associola.c
+index fb6f62264e87..f960b0e1e552 100644
+--- a/net/sctp/associola.c
++++ b/net/sctp/associola.c
+@@ -224,9 +224,8 @@ static struct sctp_association *sctp_association_init(
+       if (!sctp_ulpq_init(&asoc->ulpq, asoc))
+               goto fail_init;
+ 
+-      if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams,
+-                           0, gfp))
+-              goto fail_init;
++      if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, 0, gfp))
++              goto stream_free;
+ 
+       /* Initialize default path MTU. */
+       asoc->pathmtu = sp->pathmtu;
+diff --git a/net/sctp/stream.c b/net/sctp/stream.c
+index cd20638b6151..56762745d6e4 100644
+--- a/net/sctp/stream.c
++++ b/net/sctp/stream.c
+@@ -137,7 +137,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
+ 
+       ret = sctp_stream_alloc_out(stream, outcnt, gfp);
+       if (ret)
+-              goto out_err;
++              return ret;
+ 
+       for (i = 0; i < stream->outcnt; i++)
+               SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
+@@ -145,22 +145,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
+ handle_in:
+       sctp_stream_interleave_init(stream);
+       if (!incnt)
+-              goto out;
+-
+-      ret = sctp_stream_alloc_in(stream, incnt, gfp);
+-      if (ret)
+-              goto in_err;
+-
+-      goto out;
++              return 0;
+ 
+-in_err:
+-      sched->free(stream);
+-      genradix_free(&stream->in);
+-out_err:
+-      genradix_free(&stream->out);
+-      stream->outcnt = 0;
+-out:
+-      return ret;
++      return sctp_stream_alloc_in(stream, incnt, gfp);
+ }
+ 
+ int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid)
+-- 
+2.35.1
+
diff --git a/queue-5.4/series b/queue-5.4/series

index e951aa5752b4364bba2fd9952567c77c859f2ce1..fd3bdc2db9203e14c2a950d7db217c6dc9ec67da 100644 (file)
--- a/queue-5.4/series
+++ b/queue-5.4/series
@@ -13,3 +13,19 @@ tcp-fix-a-data-race-around-sysctl_tcp_limit_output_bytes.patch
  tcp-fix-a-data-race-around-sysctl_tcp_challenge_ack_limit.patch
  net-ping6-fix-memleak-in-ipv6_renew_options.patch
  ipv6-addrconf-fix-a-null-ptr-deref-bug-for-ip6_ptr.patch
+igmp-fix-data-races-around-sysctl_igmp_qrv.patch
+net-sungem_phy-add-of_node_put-for-reference-returne.patch
+tcp-fix-a-data-race-around-sysctl_tcp_min_tso_segs.patch
+tcp-fix-a-data-race-around-sysctl_tcp_min_rtt_wlen.patch
+tcp-fix-a-data-race-around-sysctl_tcp_autocorking.patch
+tcp-fix-a-data-race-around-sysctl_tcp_invalid_rateli.patch
+documentation-fix-sctp_wmem-in-ip-sysctl.rst.patch
+tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_dela.patch
+tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_nr.patch
+i40e-fix-interface-init-with-msi-interrupts-no-msi-x.patch
+sctp-fix-sleep-in-atomic-context-bug-in-timer-handle.patch
+netfilter-nf_queue-do-not-allow-packet-truncation-be.patch
+virtio-net-fix-the-race-between-refill-work-and-clos.patch
+perf-symbol-correct-address-for-bss-symbols.patch
+sfc-disable-softirqs-for-ptp-tx.patch
+sctp-leave-the-err-path-free-in-sctp_stream_init-to-.patch
diff --git a/queue-5.4/sfc-disable-softirqs-for-ptp-tx.patch b/queue-5.4/sfc-disable-softirqs-for-ptp-tx.patch

new file mode 100644 (file)

index 0000000..37c456b
--- /dev/null
+++ b/queue-5.4/sfc-disable-softirqs-for-ptp-tx.patch
@@ -0,0 +1,73 @@
+From 0abfc83176102acd7f06f60874a95e7bc5f915fa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jul 2022 08:45:04 +0200
+Subject: sfc: disable softirqs for ptp TX
+
+From: Alejandro Lucero <alejandro.lucero-palau@amd.com>
+
+[ Upstream commit 67c3b611d92fc238c43734878bc3e232ab570c79 ]
+
+Sending a PTP packet can imply to use the normal TX driver datapath but
+invoked from the driver's ptp worker. The kernel generic TX code
+disables softirqs and preemption before calling specific driver TX code,
+but the ptp worker does not. Although current ptp driver functionality
+does not require it, there are several reasons for doing so:
+
+   1) The invoked code is always executed with softirqs disabled for non
+      PTP packets.
+   2) Better if a ptp packet transmission is not interrupted by softirq
+      handling which could lead to high latencies.
+   3) netdev_xmit_more used by the TX code requires preemption to be
+      disabled.
+
+Indeed a solution for dealing with kernel preemption state based on static
+kernel configuration is not possible since the introduction of dynamic
+preemption level configuration at boot time using the static calls
+functionality.
+
+Fixes: f79c957a0b537 ("drivers: net: sfc: use netdev_xmit_more helper")
+Signed-off-by: Alejandro Lucero <alejandro.lucero-palau@amd.com>
+Link: https://lore.kernel.org/r/20220726064504.49613-1-alejandro.lucero-palau@amd.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/sfc/ptp.c | 22 ++++++++++++++++++++++
+ 1 file changed, 22 insertions(+)
+
+diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
+index 1fa1b71dbfa1..ed1140ecca60 100644
+--- a/drivers/net/ethernet/sfc/ptp.c
++++ b/drivers/net/ethernet/sfc/ptp.c
+@@ -1093,7 +1093,29 @@ static void efx_ptp_xmit_skb_queue(struct efx_nic *efx, struct sk_buff *skb)
+ 
+       tx_queue = &ptp_data->channel->tx_queue[type];
+       if (tx_queue && tx_queue->timestamping) {
++              /* This code invokes normal driver TX code which is always
++               * protected from softirqs when called from generic TX code,
++               * which in turn disables preemption. Look at __dev_queue_xmit
++               * which uses rcu_read_lock_bh disabling preemption for RCU
++               * plus disabling softirqs. We do not need RCU reader
++               * protection here.
++               *
++               * Although it is theoretically safe for current PTP TX/RX code
++               * running without disabling softirqs, there are three good
++               * reasond for doing so:
++               *
++               *      1) The code invoked is mainly implemented for non-PTP
++               *         packets and it is always executed with softirqs
++               *         disabled.
++               *      2) This being a single PTP packet, better to not
++               *         interrupt its processing by softirqs which can lead
++               *         to high latencies.
++               *      3) netdev_xmit_more checks preemption is disabled and
++               *         triggers a BUG_ON if not.
++               */
++              local_bh_disable();
+               efx_enqueue_skb(tx_queue, skb);
++              local_bh_enable();
+       } else {
+               WARN_ONCE(1, "PTP channel has no timestamped tx queue\n");
+               dev_kfree_skb_any(skb);
+-- 
+2.35.1
+
diff --git a/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_autocorking.patch b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_autocorking.patch

new file mode 100644 (file)

index 0000000..c750840
--- /dev/null
+++ b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_autocorking.patch
@@ -0,0 +1,36 @@
+From e316ab213ee484da6d1e41e6c2b1ed0065c844ec Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:25 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_autocorking.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 85225e6f0a76e6745bc841c9f25169c509b573d8 ]
+
+While reading sysctl_tcp_autocorking, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: f54b311142a9 ("tcp: auto corking")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 4b31f6e9ec61..0a570d5d0b38 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -697,7 +697,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
+                               int size_goal)
+ {
+       return skb->len < size_goal &&
+-             sock_net(sk)->ipv4.sysctl_tcp_autocorking &&
++             READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) &&
+              !tcp_rtx_queue_empty(sk) &&
+              refcount_read(&sk->sk_wmem_alloc) > skb->truesize;
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_dela.patch b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_dela.patch

new file mode 100644 (file)

index 0000000..3289921
--- /dev/null
+++ b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_dela.patch
@@ -0,0 +1,37 @@
+From ebf4075665258aa877a27565be22f64acdf3cb16 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:22:01 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_comp_sack_delay_ns.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 4866b2b0f7672b6d760c4b8ece6fb56f965dcc8a ]
+
+While reading sysctl_tcp_comp_sack_delay_ns, it can be changed
+concurrently.  Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 6d82aa242092 ("tcp: add tcp_comp_sack_delay_ns sysctl")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 7af5e6acf41a..f8fa036cfae2 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -5326,7 +5326,8 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
+       if (tp->srtt_us && tp->srtt_us < rtt)
+               rtt = tp->srtt_us;
+ 
+-      delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
++      delay = min_t(unsigned long,
++                    READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns),
+                     rtt * (NSEC_PER_USEC >> 3)/20);
+       sock_hold(sk);
+       hrtimer_start(&tp->compressed_ack_timer, ns_to_ktime(delay),
+-- 
+2.35.1
+
diff --git a/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_nr.patch b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_nr.patch

new file mode 100644 (file)

index 0000000..a555065
--- /dev/null
+++ b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_nr.patch
@@ -0,0 +1,36 @@
+From 637627cb3171e89f7c0c48a64671e5a73f885708 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:22:03 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_comp_sack_nr.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 79f55473bfc8ac51bd6572929a679eeb4da22251 ]
+
+While reading sysctl_tcp_comp_sack_nr, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 9c21d2fc41c0 ("tcp: add tcp_comp_sack_nr sysctl")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index f8fa036cfae2..f4e00ff909da 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -5303,7 +5303,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
+       }
+ 
+       if (!tcp_is_sack(tp) ||
+-          tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
++          tp->compressed_ack >= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr))
+               goto send_now;
+ 
+       if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
+-- 
+2.35.1
+
diff --git a/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_invalid_rateli.patch b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_invalid_rateli.patch

new file mode 100644 (file)

index 0000000..238829a
--- /dev/null
+++ b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_invalid_rateli.patch
@@ -0,0 +1,37 @@
+From c2f2c8c94efeb0a28ef9b8b773cf1e5ea606c1a6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:26 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_invalid_ratelimit.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 2afdbe7b8de84c28e219073a6661080e1b3ded48 ]
+
+While reading sysctl_tcp_invalid_ratelimit, it can be changed
+concurrently.  Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 032ee4236954 ("tcp: helpers to mitigate ACK loops by rate-limiting out-of-window dupacks")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index b760ad0b16d9..7af5e6acf41a 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -3436,7 +3436,8 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
+       if (*last_oow_ack_time) {
+               s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
+ 
+-              if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
++              if (0 <= elapsed &&
++                  elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) {
+                       NET_INC_STATS(net, mib_idx);
+                       return true;    /* rate-limited: don't send yet! */
+               }
+-- 
+2.35.1
+
diff --git a/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_min_rtt_wlen.patch b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_min_rtt_wlen.patch

new file mode 100644 (file)

index 0000000..afabe75
--- /dev/null
+++ b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_min_rtt_wlen.patch
@@ -0,0 +1,36 @@
+From b40186736da09f578da0ae9d2e5a54169420c40c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:24 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_min_rtt_wlen.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 1330ffacd05fc9ac4159d19286ce119e22450ed2 ]
+
+While reading sysctl_tcp_min_rtt_wlen, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: f672258391b4 ("tcp: track min RTT using windowed min-filter")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index a8d8ff488281..b760ad0b16d9 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -2914,7 +2914,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
+ 
+ static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
+ {
+-      u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
++      u32 wlen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen) * HZ;
+       struct tcp_sock *tp = tcp_sk(sk);
+ 
+       if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
+-- 
+2.35.1
+
diff --git a/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_min_tso_segs.patch b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_min_tso_segs.patch

new file mode 100644 (file)

index 0000000..aaca291
--- /dev/null
+++ b/queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_min_tso_segs.patch
@@ -0,0 +1,36 @@
+From fc9ed037a799f945ac3a7c788c0abf1ad3d68df5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:22 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_min_tso_segs.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit e0bb4ab9dfddd872622239f49fb2bd403b70853b ]
+
+While reading sysctl_tcp_min_tso_segs, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 95bd09eb2750 ("tcp: TSO packets automatic sizing")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_output.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 99e077422975..ef749a47768a 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -1761,7 +1761,7 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
+ 
+       min_tso = ca_ops->min_tso_segs ?
+                       ca_ops->min_tso_segs(sk) :
+-                      sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
++                      READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
+ 
+       tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
+       return min_t(u32, tso_segs, sk->sk_gso_max_segs);
+-- 
+2.35.1
+
diff --git a/queue-5.4/virtio-net-fix-the-race-between-refill-work-and-clos.patch b/queue-5.4/virtio-net-fix-the-race-between-refill-work-and-clos.patch

new file mode 100644 (file)

index 0000000..580b1f7
--- /dev/null
+++ b/queue-5.4/virtio-net-fix-the-race-between-refill-work-and-clos.patch
@@ -0,0 +1,151 @@
+From 107584d36a876b373e733c893b58dd447c8b9e48 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Jul 2022 15:21:59 +0800
+Subject: virtio-net: fix the race between refill work and close
+
+From: Jason Wang <jasowang@redhat.com>
+
+[ Upstream commit 5a159128faff151b7fe5f4eb0f310b1e0a2d56bf ]
+
+We try using cancel_delayed_work_sync() to prevent the work from
+enabling NAPI. This is insufficient since we don't disable the source
+of the refill work scheduling. This means an NAPI poll callback after
+cancel_delayed_work_sync() can schedule the refill work then can
+re-enable the NAPI that leads to use-after-free [1].
+
+Since the work can enable NAPI, we can't simply disable NAPI before
+calling cancel_delayed_work_sync(). So fix this by introducing a
+dedicated boolean to control whether or not the work could be
+scheduled from NAPI.
+
+[1]
+==================================================================
+BUG: KASAN: use-after-free in refill_work+0x43/0xd4
+Read of size 2 at addr ffff88810562c92e by task kworker/2:1/42
+
+CPU: 2 PID: 42 Comm: kworker/2:1 Not tainted 5.19.0-rc1+ #480
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
+Workqueue: events refill_work
+Call Trace:
+ <TASK>
+ dump_stack_lvl+0x34/0x44
+ print_report.cold+0xbb/0x6ac
+ ? _printk+0xad/0xde
+ ? refill_work+0x43/0xd4
+ kasan_report+0xa8/0x130
+ ? refill_work+0x43/0xd4
+ refill_work+0x43/0xd4
+ process_one_work+0x43d/0x780
+ worker_thread+0x2a0/0x6f0
+ ? process_one_work+0x780/0x780
+ kthread+0x167/0x1a0
+ ? kthread_exit+0x50/0x50
+ ret_from_fork+0x22/0x30
+ </TASK>
+...
+
+Fixes: b2baed69e605c ("virtio_net: set/cancel work on ndo_open/ndo_stop")
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/virtio_net.c | 37 ++++++++++++++++++++++++++++++++++---
+ 1 file changed, 34 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
+index e14842fbe3d6..579df7c5411d 100644
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -213,9 +213,15 @@ struct virtnet_info {
+       /* Packet virtio header size */
+       u8 hdr_len;
+ 
+-      /* Work struct for refilling if we run low on memory. */
++      /* Work struct for delayed refilling if we run low on memory. */
+       struct delayed_work refill;
+ 
++      /* Is delayed refill enabled? */
++      bool refill_enabled;
++
++      /* The lock to synchronize the access to refill_enabled */
++      spinlock_t refill_lock;
++
+       /* Work struct for config space updates */
+       struct work_struct config_work;
+ 
+@@ -319,6 +325,20 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
+       return p;
+ }
+ 
++static void enable_delayed_refill(struct virtnet_info *vi)
++{
++      spin_lock_bh(&vi->refill_lock);
++      vi->refill_enabled = true;
++      spin_unlock_bh(&vi->refill_lock);
++}
++
++static void disable_delayed_refill(struct virtnet_info *vi)
++{
++      spin_lock_bh(&vi->refill_lock);
++      vi->refill_enabled = false;
++      spin_unlock_bh(&vi->refill_lock);
++}
++
+ static void virtqueue_napi_schedule(struct napi_struct *napi,
+                                   struct virtqueue *vq)
+ {
+@@ -1388,8 +1408,12 @@ static int virtnet_receive(struct receive_queue *rq, int budget,
+       }
+ 
+       if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) {
+-              if (!try_fill_recv(vi, rq, GFP_ATOMIC))
+-                      schedule_delayed_work(&vi->refill, 0);
++              if (!try_fill_recv(vi, rq, GFP_ATOMIC)) {
++                      spin_lock(&vi->refill_lock);
++                      if (vi->refill_enabled)
++                              schedule_delayed_work(&vi->refill, 0);
++                      spin_unlock(&vi->refill_lock);
++              }
+       }
+ 
+       u64_stats_update_begin(&rq->stats.syncp);
+@@ -1508,6 +1532,8 @@ static int virtnet_open(struct net_device *dev)
+       struct virtnet_info *vi = netdev_priv(dev);
+       int i, err;
+ 
++      enable_delayed_refill(vi);
++
+       for (i = 0; i < vi->max_queue_pairs; i++) {
+               if (i < vi->curr_queue_pairs)
+                       /* Make sure we have some buffers: if oom use wq. */
+@@ -1878,6 +1904,8 @@ static int virtnet_close(struct net_device *dev)
+       struct virtnet_info *vi = netdev_priv(dev);
+       int i;
+ 
++      /* Make sure NAPI doesn't schedule refill work */
++      disable_delayed_refill(vi);
+       /* Make sure refill_work doesn't re-enable napi! */
+       cancel_delayed_work_sync(&vi->refill);
+ 
+@@ -2417,6 +2445,8 @@ static int virtnet_restore_up(struct virtio_device *vdev)
+ 
+       virtio_device_ready(vdev);
+ 
++      enable_delayed_refill(vi);
++
+       if (netif_running(vi->dev)) {
+               err = virtnet_open(vi->dev);
+               if (err)
+@@ -3140,6 +3170,7 @@ static int virtnet_probe(struct virtio_device *vdev)
+       vdev->priv = vi;
+ 
+       INIT_WORK(&vi->config_work, virtnet_config_changed_work);
++      spin_lock_init(&vi->refill_lock);
+ 
+       /* If we can receive ANY GSO packets, we must allocate large ones. */
+       if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
+-- 
+2.35.1
+
author	Sasha Levin <sashal@kernel.org>
	Sun, 31 Jul 2022 02:41:29 +0000 (22:41 -0400)
committer	Sasha Levin <sashal@kernel.org>
	Sun, 31 Jul 2022 02:41:29 +0000 (22:41 -0400)
queue-5.4/documentation-fix-sctp_wmem-in-ip-sysctl.rst.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/i40e-fix-interface-init-with-msi-interrupts-no-msi-x.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/igmp-fix-data-races-around-sysctl_igmp_qrv.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/net-sungem_phy-add-of_node_put-for-reference-returne.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/netfilter-nf_queue-do-not-allow-packet-truncation-be.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/perf-symbol-correct-address-for-bss-symbols.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/sctp-fix-sleep-in-atomic-context-bug-in-timer-handle.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/sctp-leave-the-err-path-free-in-sctp_stream_init-to-.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/series		patch \| blob \| blame \| history
queue-5.4/sfc-disable-softirqs-for-ptp-tx.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_autocorking.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_dela.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_nr.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_invalid_rateli.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_min_rtt_wlen.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/tcp-fix-a-data-race-around-sysctl_tcp_min_tso_segs.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/virtio-net-fix-the-race-between-refill-work-and-clos.patch	[new file with mode: 0644]	patch \| blob