]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.15
authorSasha Levin <sashal@kernel.org>
Sun, 31 Jul 2022 02:41:28 +0000 (22:41 -0400)
committerSasha Levin <sashal@kernel.org>
Sun, 31 Jul 2022 02:41:28 +0000 (22:41 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
53 files changed:
queue-5.15/asm-generic-remove-a-broken-and-needless-ifdef-condi.patch [new file with mode: 0644]
queue-5.15/documentation-fix-sctp_wmem-in-ip-sysctl.rst.patch [new file with mode: 0644]
queue-5.15/i40e-fix-interface-init-with-msi-interrupts-no-msi-x.patch [new file with mode: 0644]
queue-5.15/ice-check-dd-eof-bits-on-rx-descriptor-rather-than-e.patch [new file with mode: 0644]
queue-5.15/ice-do-not-setup-vlan-for-loopback-vsi.patch-1510 [new file with mode: 0644]
queue-5.15/igmp-fix-data-races-around-sysctl_igmp_qrv.patch [new file with mode: 0644]
queue-5.15/ipv4-fix-data-races-around-sysctl_fib_notify_on_flag.patch [new file with mode: 0644]
queue-5.15/ipv6-addrconf-fix-a-null-ptr-deref-bug-for-ip6_ptr.patch-17245 [new file with mode: 0644]
queue-5.15/macsec-always-read-macsec_sa_attr_pn-as-a-u64.patch [new file with mode: 0644]
queue-5.15/macsec-fix-error-message-in-macsec_add_rxsa-and-_txs.patch [new file with mode: 0644]
queue-5.15/macsec-fix-null-deref-in-macsec_add_rxsa.patch [new file with mode: 0644]
queue-5.15/macsec-limit-replay-window-size-with-xpn.patch [new file with mode: 0644]
queue-5.15/net-fix-data-races-around-sysctl_-rw-mem-_offset.patch [new file with mode: 0644]
queue-5.15/net-macsec-fix-potential-resource-leak-in-macsec_add.patch [new file with mode: 0644]
queue-5.15/net-mld-fix-reference-count-leak-in-mld_-query-repor.patch [new file with mode: 0644]
queue-5.15/net-pcs-xpcs-propagate-xpcs_read-error-to-xpcs_get_s.patch [new file with mode: 0644]
queue-5.15/net-ping6-fix-memleak-in-ipv6_renew_options.patch-12523 [new file with mode: 0644]
queue-5.15/net-sungem_phy-add-of_node_put-for-reference-returne.patch [new file with mode: 0644]
queue-5.15/net-tls-remove-the-context-from-the-list-in-tls_devi.patch [new file with mode: 0644]
queue-5.15/netfilter-nf_queue-do-not-allow-packet-truncation-be.patch [new file with mode: 0644]
queue-5.15/octeontx2-pf-cn10k-fix-egress-ratelimit-configuratio.patch [new file with mode: 0644]
queue-5.15/octeontx2-pf-fix-udp-tcp-src-and-dst-port-tc-filters.patch-781 [new file with mode: 0644]
queue-5.15/perf-symbol-correct-address-for-bss-symbols.patch [new file with mode: 0644]
queue-5.15/revert-tcp-change-pingpong-threshold-to-3.patch-30941 [new file with mode: 0644]
queue-5.15/s390-archrandom-prevent-cpacf-trng-invocations-in-in.patch [new file with mode: 0644]
queue-5.15/scsi-core-fix-warning-in-scsi_alloc_sgtables.patch-8274 [new file with mode: 0644]
queue-5.15/scsi-mpt3sas-stop-fw-fault-watchdog-work-item-during.patch [new file with mode: 0644]
queue-5.15/scsi-ufs-host-hold-reference-returned-by-of_parse_ph.patch [new file with mode: 0644]
queue-5.15/sctp-fix-sleep-in-atomic-context-bug-in-timer-handle.patch [new file with mode: 0644]
queue-5.15/sctp-leave-the-err-path-free-in-sctp_stream_init-to-.patch [new file with mode: 0644]
queue-5.15/series
queue-5.15/sfc-disable-softirqs-for-ptp-tx.patch [new file with mode: 0644]
queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_adv_win_scale.patch-19790 [new file with mode: 0644]
queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_app_win.patch-22294 [new file with mode: 0644]
queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_autocorking.patch [new file with mode: 0644]
queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_challenge_ack_.patch [new file with mode: 0644]
queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_dela.patch [new file with mode: 0644]
queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_nr.patch [new file with mode: 0644]
queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_slac.patch [new file with mode: 0644]
queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_frto.patch-3670 [new file with mode: 0644]
queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_invalid_rateli.patch [new file with mode: 0644]
queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_limit_output_b.patch [new file with mode: 0644]
queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_min_rtt_wlen.patch [new file with mode: 0644]
queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_min_tso_segs.patch [new file with mode: 0644]
queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_nometrics_save.patch-5497 [new file with mode: 0644]
queue-5.15/tcp-fix-data-races-around-sk_pacing_rate.patch [new file with mode: 0644]
queue-5.15/tcp-fix-data-races-around-sysctl_tcp_dsack.patch-17026 [new file with mode: 0644]
queue-5.15/tcp-fix-data-races-around-sysctl_tcp_moderate_rcvbuf.patch-32656 [new file with mode: 0644]
queue-5.15/tcp-fix-data-races-around-sysctl_tcp_no_ssthresh_met.patch [new file with mode: 0644]
queue-5.15/tcp-fix-data-races-around-sysctl_tcp_reflect_tos.patch [new file with mode: 0644]
queue-5.15/virtio-net-fix-the-race-between-refill-work-and-clos.patch [new file with mode: 0644]
queue-5.15/watch_queue-fix-missing-locking-in-add_watch_to_obje.patch [new file with mode: 0644]
queue-5.15/watch_queue-fix-missing-rcu-annotation.patch-18505 [new file with mode: 0644]

diff --git a/queue-5.15/asm-generic-remove-a-broken-and-needless-ifdef-condi.patch b/queue-5.15/asm-generic-remove-a-broken-and-needless-ifdef-condi.patch
new file mode 100644 (file)
index 0000000..ad69d7f
--- /dev/null
@@ -0,0 +1,56 @@
+From ba9e2ef98d7b71254487dcca7051b49ea764e93b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 13:07:11 +0200
+Subject: asm-generic: remove a broken and needless ifdef conditional
+
+From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
+
+[ Upstream commit e2a619ca0b38f2114347b7078b8a67d72d457a3d ]
+
+Commit 527701eda5f1 ("lib: Add a generic version of devmem_is_allowed()")
+introduces the config symbol GENERIC_LIB_DEVMEM_IS_ALLOWED, but then
+falsely refers to CONFIG_GENERIC_DEVMEM_IS_ALLOWED (note the missing LIB
+in the reference) in ./include/asm-generic/io.h.
+
+Luckily, ./scripts/checkkconfigsymbols.py warns on non-existing configs:
+
+GENERIC_DEVMEM_IS_ALLOWED
+Referencing files: include/asm-generic/io.h
+
+The actual fix, though, is simply to not to make this function declaration
+dependent on any kernel config. For architectures that intend to use
+the generic version, the arch's 'select GENERIC_LIB_DEVMEM_IS_ALLOWED' will
+lead to picking the function definition, and for other architectures, this
+function is simply defined elsewhere.
+
+The wrong '#ifndef' on a non-existing config symbol also always had the
+same effect (although more by mistake than by intent). So, there is no
+functional change.
+
+Remove this broken and needless ifdef conditional.
+
+Fixes: 527701eda5f1 ("lib: Add a generic version of devmem_is_allowed()")
+Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/asm-generic/io.h | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
+index 7ce93aaf69f8..98954dda5734 100644
+--- a/include/asm-generic/io.h
++++ b/include/asm-generic/io.h
+@@ -1125,9 +1125,7 @@ static inline void memcpy_toio(volatile void __iomem *addr, const void *buffer,
+ }
+ #endif
+-#ifndef CONFIG_GENERIC_DEVMEM_IS_ALLOWED
+ extern int devmem_is_allowed(unsigned long pfn);
+-#endif
+ #endif /* __KERNEL__ */
+-- 
+2.35.1
+
diff --git a/queue-5.15/documentation-fix-sctp_wmem-in-ip-sysctl.rst.patch b/queue-5.15/documentation-fix-sctp_wmem-in-ip-sysctl.rst.patch
new file mode 100644 (file)
index 0000000..4b976c7
--- /dev/null
@@ -0,0 +1,49 @@
+From 2dc82bcc5bb40f4cf6076f892ace8fdfa3041183 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Jul 2022 10:35:46 -0400
+Subject: Documentation: fix sctp_wmem in ip-sysctl.rst
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit aa709da0e032cee7c202047ecd75f437bb0126ed ]
+
+Since commit 1033990ac5b2 ("sctp: implement memory accounting on tx path"),
+SCTP has supported memory accounting on tx path where 'sctp_wmem' is used
+by sk_wmem_schedule(). So we should fix the description for this option in
+ip-sysctl.rst accordingly.
+
+v1->v2:
+  - Improve the description as Marcelo suggested.
+
+Fixes: 1033990ac5b2 ("sctp: implement memory accounting on tx path")
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/networking/ip-sysctl.rst | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
+index b8b67041f955..ba0e8e6337c0 100644
+--- a/Documentation/networking/ip-sysctl.rst
++++ b/Documentation/networking/ip-sysctl.rst
+@@ -2808,7 +2808,14 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max
+       Default: 4K
+ sctp_wmem  - vector of 3 INTEGERs: min, default, max
+-      Currently this tunable has no effect.
++      Only the first value ("min") is used, "default" and "max" are
++      ignored.
++
++      min: Minimum size of send buffer that can be used by SCTP sockets.
++      It is guaranteed to each SCTP socket (but not association) even
++      under moderate memory pressure.
++
++      Default: 4K
+ addr_scope_policy - INTEGER
+       Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00
+-- 
+2.35.1
+
diff --git a/queue-5.15/i40e-fix-interface-init-with-msi-interrupts-no-msi-x.patch b/queue-5.15/i40e-fix-interface-init-with-msi-interrupts-no-msi-x.patch
new file mode 100644 (file)
index 0000000..fce91f3
--- /dev/null
@@ -0,0 +1,49 @@
+From abb2ff0414e76999b74ab47eeba4bd461a796d5f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 10:54:01 -0700
+Subject: i40e: Fix interface init with MSI interrupts (no MSI-X)
+
+From: Michal Maloszewski <michal.maloszewski@intel.com>
+
+[ Upstream commit 5fcbb711024aac6d4db385623e6f2fdf019f7782 ]
+
+Fix the inability to bring an interface up on a setup with
+only MSI interrupts enabled (no MSI-X).
+Solution is to add a default number of QPs = 1. This is enough,
+since without MSI-X support driver enables only a basic feature set.
+
+Fixes: bc6d33c8d93f ("i40e: Fix the number of queues available to be mapped for use")
+Signed-off-by: Dawid Lukwinski <dawid.lukwinski@intel.com>
+Signed-off-by: Michal Maloszewski <michal.maloszewski@intel.com>
+Tested-by: Dave Switzer <david.switzer@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Link: https://lore.kernel.org/r/20220722175401.112572-1-anthony.l.nguyen@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/i40e/i40e_main.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
+index c801b128e5b2..b07d55c99317 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
+@@ -1908,11 +1908,15 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
+                * non-zero req_queue_pairs says that user requested a new
+                * queue count via ethtool's set_channels, so use this
+                * value for queues distribution across traffic classes
++               * We need at least one queue pair for the interface
++               * to be usable as we see in else statement.
+                */
+               if (vsi->req_queue_pairs > 0)
+                       vsi->num_queue_pairs = vsi->req_queue_pairs;
+               else if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+                       vsi->num_queue_pairs = pf->num_lan_msix;
++              else
++                      vsi->num_queue_pairs = 1;
+       }
+       /* Number of queues per enabled TC */
+-- 
+2.35.1
+
diff --git a/queue-5.15/ice-check-dd-eof-bits-on-rx-descriptor-rather-than-e.patch b/queue-5.15/ice-check-dd-eof-bits-on-rx-descriptor-rather-than-e.patch
new file mode 100644 (file)
index 0000000..cded44b
--- /dev/null
@@ -0,0 +1,46 @@
+From 9df4f4593611ea640dfab3ff20f6ee5a5dd41485 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Jul 2022 12:20:42 +0200
+Subject: ice: check (DD | EOF) bits on Rx descriptor rather than (EOP | RS)
+
+From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+
+[ Upstream commit 283d736ff7c7e96ac5b32c6c0de40372f8eb171e ]
+
+Tx side sets EOP and RS bits on descriptors to indicate that a
+particular descriptor is the last one and needs to generate an irq when
+it was sent. These bits should not be checked on completion path
+regardless whether it's the Tx or the Rx. DD bit serves this purpose and
+it indicates that a particular descriptor is either for Rx or was
+successfully Txed. EOF is also set as loopback test does not xmit
+fragmented frames.
+
+Look at (DD | EOF) bits setting in ice_lbtest_receive_frames() instead
+of EOP and RS pair.
+
+Fixes: 0e674aeb0b77 ("ice: Add handler for ethtool selftest")
+Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Tested-by: George Kuruvinakunnel <george.kuruvinakunnel@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_ethtool.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
+index 982db894754f..9b9c2b885486 100644
+--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
++++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
+@@ -651,7 +651,8 @@ static int ice_lbtest_receive_frames(struct ice_ring *rx_ring)
+               rx_desc = ICE_RX_DESC(rx_ring, i);
+               if (!(rx_desc->wb.status_error0 &
+-                  cpu_to_le16(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS)))
++                  (cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S)) |
++                   cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)))))
+                       continue;
+               rx_buf = &rx_ring->rx_buf[i];
+-- 
+2.35.1
+
diff --git a/queue-5.15/ice-do-not-setup-vlan-for-loopback-vsi.patch-1510 b/queue-5.15/ice-do-not-setup-vlan-for-loopback-vsi.patch-1510
new file mode 100644 (file)
index 0000000..4dc6c5b
--- /dev/null
@@ -0,0 +1,44 @@
+From ef5016a05107a9ae75d07a6689f3e249bcbf0772 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 7 Jul 2022 12:20:43 +0200
+Subject: ice: do not setup vlan for loopback VSI
+
+From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+
+[ Upstream commit cc019545a238518fa9da1e2a889f6e1bb1005a63 ]
+
+Currently loopback test is failiing due to the error returned from
+ice_vsi_vlan_setup(). Skip calling it when preparing loopback VSI.
+
+Fixes: 0e674aeb0b77 ("ice: Add handler for ethtool selftest")
+Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Tested-by: George Kuruvinakunnel <george.kuruvinakunnel@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_main.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
+index 188abf36a5b2..b9d45c7dbef1 100644
+--- a/drivers/net/ethernet/intel/ice/ice_main.c
++++ b/drivers/net/ethernet/intel/ice/ice_main.c
+@@ -5481,10 +5481,12 @@ int ice_vsi_cfg(struct ice_vsi *vsi)
+       if (vsi->netdev) {
+               ice_set_rx_mode(vsi->netdev);
+-              err = ice_vsi_vlan_setup(vsi);
++              if (vsi->type != ICE_VSI_LB) {
++                      err = ice_vsi_vlan_setup(vsi);
+-              if (err)
+-                      return err;
++                      if (err)
++                              return err;
++              }
+       }
+       ice_vsi_cfg_dcb_rings(vsi);
+-- 
+2.35.1
+
diff --git a/queue-5.15/igmp-fix-data-races-around-sysctl_igmp_qrv.patch b/queue-5.15/igmp-fix-data-races-around-sysctl_igmp_qrv.patch
new file mode 100644 (file)
index 0000000..2ef1e7a
--- /dev/null
@@ -0,0 +1,127 @@
+From 6876cbac8e50d133f62830c030fc2c5947133c45 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jul 2022 10:17:44 -0700
+Subject: igmp: Fix data-races around sysctl_igmp_qrv.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 8ebcc62c738f68688ee7c6fec2efe5bc6d3d7e60 ]
+
+While reading sysctl_igmp_qrv, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its readers.
+
+This test can be packed into a helper, so such changes will be in the
+follow-up series after net is merged into net-next.
+
+  qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+
+Fixes: a9fe8e29945d ("ipv4: implement igmp_qrv sysctl to tune igmp robustness variable")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/igmp.c | 24 +++++++++++++-----------
+ 1 file changed, 13 insertions(+), 11 deletions(-)
+
+diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
+index 9f4674244aff..e07d10b2c486 100644
+--- a/net/ipv4/igmp.c
++++ b/net/ipv4/igmp.c
+@@ -827,7 +827,7 @@ static void igmp_ifc_event(struct in_device *in_dev)
+       struct net *net = dev_net(in_dev->dev);
+       if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
+               return;
+-      WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv);
++      WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv));
+       igmp_ifc_start_timer(in_dev, 1);
+ }
+@@ -1009,7 +1009,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
+                * received value was zero, use the default or statically
+                * configured value.
+                */
+-              in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv;
++              in_dev->mr_qrv = ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+               in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL;
+               /* RFC3376, 8.3. Query Response Interval:
+@@ -1189,7 +1189,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im,
+       pmc->interface = im->interface;
+       in_dev_hold(in_dev);
+       pmc->multiaddr = im->multiaddr;
+-      pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++      pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+       pmc->sfmode = im->sfmode;
+       if (pmc->sfmode == MCAST_INCLUDE) {
+               struct ip_sf_list *psf;
+@@ -1240,9 +1240,11 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im)
+                       swap(im->tomb, pmc->tomb);
+                       swap(im->sources, pmc->sources);
+                       for (psf = im->sources; psf; psf = psf->sf_next)
+-                              psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++                              psf->sf_crcount = in_dev->mr_qrv ?:
++                                      READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+               } else {
+-                      im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++                      im->crcount = in_dev->mr_qrv ?:
++                              READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+               }
+               in_dev_put(pmc->interface);
+               kfree_pmc(pmc);
+@@ -1349,7 +1351,7 @@ static void igmp_group_added(struct ip_mc_list *im)
+       if (in_dev->dead)
+               return;
+-      im->unsolicit_count = net->ipv4.sysctl_igmp_qrv;
++      im->unsolicit_count = READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+       if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) {
+               spin_lock_bh(&im->lock);
+               igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY);
+@@ -1363,7 +1365,7 @@ static void igmp_group_added(struct ip_mc_list *im)
+        * IN() to IN(A).
+        */
+       if (im->sfmode == MCAST_EXCLUDE)
+-              im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++              im->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+       igmp_ifc_event(in_dev);
+ #endif
+@@ -1754,7 +1756,7 @@ static void ip_mc_reset(struct in_device *in_dev)
+       in_dev->mr_qi = IGMP_QUERY_INTERVAL;
+       in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL;
+-      in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
++      in_dev->mr_qrv = READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+ }
+ #else
+ static void ip_mc_reset(struct in_device *in_dev)
+@@ -1888,7 +1890,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
+ #ifdef CONFIG_IP_MULTICAST
+               if (psf->sf_oldin &&
+                   !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) {
+-                      psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++                      psf->sf_crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+                       psf->sf_next = pmc->tomb;
+                       pmc->tomb = psf;
+                       rv = 1;
+@@ -1952,7 +1954,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
+               /* filter mode change */
+               pmc->sfmode = MCAST_INCLUDE;
+ #ifdef CONFIG_IP_MULTICAST
+-              pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++              pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+               WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
+               for (psf = pmc->sources; psf; psf = psf->sf_next)
+                       psf->sf_crcount = 0;
+@@ -2131,7 +2133,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
+ #ifdef CONFIG_IP_MULTICAST
+               /* else no filters; keep old mode for reports */
+-              pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++              pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+               WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
+               for (psf = pmc->sources; psf; psf = psf->sf_next)
+                       psf->sf_crcount = 0;
+-- 
+2.35.1
+
diff --git a/queue-5.15/ipv4-fix-data-races-around-sysctl_fib_notify_on_flag.patch b/queue-5.15/ipv4-fix-data-races-around-sysctl_fib_notify_on_flag.patch
new file mode 100644 (file)
index 0000000..4d8aa4c
--- /dev/null
@@ -0,0 +1,54 @@
+From 9aa6f4cced2c928ce2f202bef7c6a419240f5e60 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:22:05 -0700
+Subject: ipv4: Fix data-races around sysctl_fib_notify_on_flag_change.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 96b9bd8c6d125490f9adfb57d387ef81a55a103e ]
+
+While reading sysctl_fib_notify_on_flag_change, it can be changed
+concurrently.  Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: 680aea08e78c ("net: ipv4: Emit notification when fib hardware flags are changed")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/fib_trie.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
+index a9cd9c2bd84e..19c6e7b93d3d 100644
+--- a/net/ipv4/fib_trie.c
++++ b/net/ipv4/fib_trie.c
+@@ -1037,6 +1037,7 @@ fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri)
+ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
+ {
++      u8 fib_notify_on_flag_change;
+       struct fib_alias *fa_match;
+       struct sk_buff *skb;
+       int err;
+@@ -1058,14 +1059,16 @@ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
+       WRITE_ONCE(fa_match->offload, fri->offload);
+       WRITE_ONCE(fa_match->trap, fri->trap);
++      fib_notify_on_flag_change = READ_ONCE(net->ipv4.sysctl_fib_notify_on_flag_change);
++
+       /* 2 means send notifications only if offload_failed was changed. */
+-      if (net->ipv4.sysctl_fib_notify_on_flag_change == 2 &&
++      if (fib_notify_on_flag_change == 2 &&
+           READ_ONCE(fa_match->offload_failed) == fri->offload_failed)
+               goto out;
+       WRITE_ONCE(fa_match->offload_failed, fri->offload_failed);
+-      if (!net->ipv4.sysctl_fib_notify_on_flag_change)
++      if (!fib_notify_on_flag_change)
+               goto out;
+       skb = nlmsg_new(fib_nlmsg_size(fa_match->fa_info), GFP_ATOMIC);
+-- 
+2.35.1
+
diff --git a/queue-5.15/ipv6-addrconf-fix-a-null-ptr-deref-bug-for-ip6_ptr.patch-17245 b/queue-5.15/ipv6-addrconf-fix-a-null-ptr-deref-bug-for-ip6_ptr.patch-17245
new file mode 100644 (file)
index 0000000..095ff72
--- /dev/null
@@ -0,0 +1,102 @@
+From 71714997e0c6d4b2bbdcaae339b2498d17d1c689 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Jul 2022 09:33:07 +0800
+Subject: ipv6/addrconf: fix a null-ptr-deref bug for ip6_ptr
+
+From: Ziyang Xuan <william.xuanziyang@huawei.com>
+
+[ Upstream commit 85f0173df35e5462d89947135a6a5599c6c3ef6f ]
+
+Change net device's MTU to smaller than IPV6_MIN_MTU or unregister
+device while matching route. That may trigger null-ptr-deref bug
+for ip6_ptr probability as following.
+
+=========================================================
+BUG: KASAN: null-ptr-deref in find_match.part.0+0x70/0x134
+Read of size 4 at addr 0000000000000308 by task ping6/263
+
+CPU: 2 PID: 263 Comm: ping6 Not tainted 5.19.0-rc7+ #14
+Call trace:
+ dump_backtrace+0x1a8/0x230
+ show_stack+0x20/0x70
+ dump_stack_lvl+0x68/0x84
+ print_report+0xc4/0x120
+ kasan_report+0x84/0x120
+ __asan_load4+0x94/0xd0
+ find_match.part.0+0x70/0x134
+ __find_rr_leaf+0x408/0x470
+ fib6_table_lookup+0x264/0x540
+ ip6_pol_route+0xf4/0x260
+ ip6_pol_route_output+0x58/0x70
+ fib6_rule_lookup+0x1a8/0x330
+ ip6_route_output_flags_noref+0xd8/0x1a0
+ ip6_route_output_flags+0x58/0x160
+ ip6_dst_lookup_tail+0x5b4/0x85c
+ ip6_dst_lookup_flow+0x98/0x120
+ rawv6_sendmsg+0x49c/0xc70
+ inet_sendmsg+0x68/0x94
+
+Reproducer as following:
+Firstly, prepare conditions:
+$ip netns add ns1
+$ip netns add ns2
+$ip link add veth1 type veth peer name veth2
+$ip link set veth1 netns ns1
+$ip link set veth2 netns ns2
+$ip netns exec ns1 ip -6 addr add 2001:0db8:0:f101::1/64 dev veth1
+$ip netns exec ns2 ip -6 addr add 2001:0db8:0:f101::2/64 dev veth2
+$ip netns exec ns1 ifconfig veth1 up
+$ip netns exec ns2 ifconfig veth2 up
+$ip netns exec ns1 ip -6 route add 2000::/64 dev veth1 metric 1
+$ip netns exec ns2 ip -6 route add 2001::/64 dev veth2 metric 1
+
+Secondly, execute the following two commands in two ssh windows
+respectively:
+$ip netns exec ns1 sh
+$while true; do ip -6 addr add 2001:0db8:0:f101::1/64 dev veth1; ip -6 route add 2000::/64 dev veth1 metric 1; ping6 2000::2; done
+
+$ip netns exec ns1 sh
+$while true; do ip link set veth1 mtu 1000; ip link set veth1 mtu 1500; sleep 5; done
+
+It is because ip6_ptr has been assigned to NULL in addrconf_ifdown() firstly,
+then ip6_ignore_linkdown() accesses ip6_ptr directly without NULL check.
+
+       cpu0                    cpu1
+fib6_table_lookup
+__find_rr_leaf
+                       addrconf_notify [ NETDEV_CHANGEMTU ]
+                       addrconf_ifdown
+                       RCU_INIT_POINTER(dev->ip6_ptr, NULL)
+find_match
+ip6_ignore_linkdown
+
+So we can add NULL check for ip6_ptr before using in ip6_ignore_linkdown() to
+fix the null-ptr-deref bug.
+
+Fixes: dcd1f572954f ("net/ipv6: Remove fib6_idev")
+Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20220728013307.656257-1-william.xuanziyang@huawei.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/addrconf.h | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/include/net/addrconf.h b/include/net/addrconf.h
+index 59940e230b78..53627afab104 100644
+--- a/include/net/addrconf.h
++++ b/include/net/addrconf.h
+@@ -403,6 +403,9 @@ static inline bool ip6_ignore_linkdown(const struct net_device *dev)
+ {
+       const struct inet6_dev *idev = __in6_dev_get(dev);
++      if (unlikely(!idev))
++              return true;
++
+       return !!idev->cnf.ignore_routes_with_linkdown;
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.15/macsec-always-read-macsec_sa_attr_pn-as-a-u64.patch b/queue-5.15/macsec-always-read-macsec_sa_attr_pn-as-a-u64.patch
new file mode 100644 (file)
index 0000000..d2805d8
--- /dev/null
@@ -0,0 +1,62 @@
+From 081e5abb4ae9e32d3d465ab28d17d9ae7a168959 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:16:30 +0200
+Subject: macsec: always read MACSEC_SA_ATTR_PN as a u64
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+[ Upstream commit c630d1fe6219769049c87d1a6a0e9a6de55328a1 ]
+
+Currently, MACSEC_SA_ATTR_PN is handled inconsistently, sometimes as a
+u32, sometimes forced into a u64 without checking the actual length of
+the attribute. Instead, we can use nla_get_u64 everywhere, which will
+read up to 64 bits into a u64, capped by the actual length of the
+attribute coming from userspace.
+
+This fixes several issues:
+ - the check in validate_add_rxsa doesn't work with 32-bit attributes
+ - the checks in validate_add_txsa and validate_upd_sa incorrectly
+   reject X << 32 (with X != 0)
+
+Fixes: 48ef50fa866a ("macsec: Netlink support of XPN cipher suites (IEEE 802.1AEbw)")
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/macsec.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
+index 1f2eb576533c..3e74dcc1f875 100644
+--- a/drivers/net/macsec.c
++++ b/drivers/net/macsec.c
+@@ -1696,7 +1696,7 @@ static bool validate_add_rxsa(struct nlattr **attrs)
+               return false;
+       if (attrs[MACSEC_SA_ATTR_PN] &&
+-          *(u64 *)nla_data(attrs[MACSEC_SA_ATTR_PN]) == 0)
++          nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)
+               return false;
+       if (attrs[MACSEC_SA_ATTR_ACTIVE]) {
+@@ -1939,7 +1939,7 @@ static bool validate_add_txsa(struct nlattr **attrs)
+       if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN)
+               return false;
+-      if (nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0)
++      if (nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)
+               return false;
+       if (attrs[MACSEC_SA_ATTR_ACTIVE]) {
+@@ -2293,7 +2293,7 @@ static bool validate_upd_sa(struct nlattr **attrs)
+       if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN)
+               return false;
+-      if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0)
++      if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)
+               return false;
+       if (attrs[MACSEC_SA_ATTR_ACTIVE]) {
+-- 
+2.35.1
+
diff --git a/queue-5.15/macsec-fix-error-message-in-macsec_add_rxsa-and-_txs.patch b/queue-5.15/macsec-fix-error-message-in-macsec_add_rxsa-and-_txs.patch
new file mode 100644 (file)
index 0000000..b28d5c8
--- /dev/null
@@ -0,0 +1,44 @@
+From 8539634486e6ae2111a44ae0f8fc36b4a1f84782 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:16:28 +0200
+Subject: macsec: fix error message in macsec_add_rxsa and _txsa
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+[ Upstream commit 3240eac4ff20e51b87600dbd586ed814daf313db ]
+
+The expected length is MACSEC_SALT_LEN, not MACSEC_SA_ATTR_SALT.
+
+Fixes: 48ef50fa866a ("macsec: Netlink support of XPN cipher suites (IEEE 802.1AEbw)")
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/macsec.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
+index f72d4380374d..9ede0d7cd0b5 100644
+--- a/drivers/net/macsec.c
++++ b/drivers/net/macsec.c
+@@ -1768,7 +1768,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
+               if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) {
+                       pr_notice("macsec: nl: add_rxsa: bad salt length: %d != %d\n",
+                                 nla_len(tb_sa[MACSEC_SA_ATTR_SALT]),
+-                                MACSEC_SA_ATTR_SALT);
++                                MACSEC_SALT_LEN);
+                       rtnl_unlock();
+                       return -EINVAL;
+               }
+@@ -2010,7 +2010,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info)
+               if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) {
+                       pr_notice("macsec: nl: add_txsa: bad salt length: %d != %d\n",
+                                 nla_len(tb_sa[MACSEC_SA_ATTR_SALT]),
+-                                MACSEC_SA_ATTR_SALT);
++                                MACSEC_SALT_LEN);
+                       rtnl_unlock();
+                       return -EINVAL;
+               }
+-- 
+2.35.1
+
diff --git a/queue-5.15/macsec-fix-null-deref-in-macsec_add_rxsa.patch b/queue-5.15/macsec-fix-null-deref-in-macsec_add_rxsa.patch
new file mode 100644 (file)
index 0000000..4d30b84
--- /dev/null
@@ -0,0 +1,45 @@
+From 414439ad3b6120d214261de3ba8fdcd99b3ee897 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:16:27 +0200
+Subject: macsec: fix NULL deref in macsec_add_rxsa
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+[ Upstream commit f46040eeaf2e523a4096199fd93a11e794818009 ]
+
+Commit 48ef50fa866a added a test on tb_sa[MACSEC_SA_ATTR_PN], but
+nothing guarantees that it's not NULL at this point. The same code was
+added to macsec_add_txsa, but there it's not a problem because
+validate_add_txsa checks that the MACSEC_SA_ATTR_PN attribute is
+present.
+
+Note: it's not possible to reproduce with iproute, because iproute
+doesn't allow creating an SA without specifying the PN.
+
+Fixes: 48ef50fa866a ("macsec: Netlink support of XPN cipher suites (IEEE 802.1AEbw)")
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=208315
+Reported-by: Frantisek Sumsal <fsumsal@redhat.com>
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/macsec.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
+index e53b40359fd1..f72d4380374d 100644
+--- a/drivers/net/macsec.c
++++ b/drivers/net/macsec.c
+@@ -1751,7 +1751,8 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
+       }
+       pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN;
+-      if (nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) {
++      if (tb_sa[MACSEC_SA_ATTR_PN] &&
++          nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) {
+               pr_notice("macsec: nl: add_rxsa: bad pn length: %d != %d\n",
+                         nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len);
+               rtnl_unlock();
+-- 
+2.35.1
+
diff --git a/queue-5.15/macsec-limit-replay-window-size-with-xpn.patch b/queue-5.15/macsec-limit-replay-window-size-with-xpn.patch
new file mode 100644 (file)
index 0000000..f758caf
--- /dev/null
@@ -0,0 +1,81 @@
+From 25efaa6a023c1491338eb330272d4839419e2058 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:16:29 +0200
+Subject: macsec: limit replay window size with XPN
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+[ Upstream commit b07a0e2044057f201d694ab474f5c42a02b6465b ]
+
+IEEE 802.1AEbw-2013 (section 10.7.8) specifies that the maximum value
+of the replay window is 2^30-1, to help with recovery of the upper
+bits of the PN.
+
+To avoid leaving the existing macsec device in an inconsistent state
+if this test fails during changelink, reuse the cleanup mechanism
+introduced for HW offload. This wasn't needed until now because
+macsec_changelink_common could not fail during changelink, as
+modifying the cipher suite was not allowed.
+
+Finally, this must happen after handling IFLA_MACSEC_CIPHER_SUITE so
+that secy->xpn is set.
+
+Fixes: 48ef50fa866a ("macsec: Netlink support of XPN cipher suites (IEEE 802.1AEbw)")
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/macsec.c | 16 ++++++++++++----
+ 1 file changed, 12 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
+index 9ede0d7cd0b5..1f2eb576533c 100644
+--- a/drivers/net/macsec.c
++++ b/drivers/net/macsec.c
+@@ -241,6 +241,7 @@ static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb)
+ #define DEFAULT_SEND_SCI true
+ #define DEFAULT_ENCRYPT false
+ #define DEFAULT_ENCODING_SA 0
++#define MACSEC_XPN_MAX_REPLAY_WINDOW (((1 << 30) - 1))
+ static bool send_sci(const struct macsec_secy *secy)
+ {
+@@ -3739,9 +3740,6 @@ static int macsec_changelink_common(struct net_device *dev,
+               secy->operational = tx_sa && tx_sa->active;
+       }
+-      if (data[IFLA_MACSEC_WINDOW])
+-              secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]);
+-
+       if (data[IFLA_MACSEC_ENCRYPT])
+               tx_sc->encrypt = !!nla_get_u8(data[IFLA_MACSEC_ENCRYPT]);
+@@ -3787,6 +3785,16 @@ static int macsec_changelink_common(struct net_device *dev,
+               }
+       }
++      if (data[IFLA_MACSEC_WINDOW]) {
++              secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]);
++
++              /* IEEE 802.1AEbw-2013 10.7.8 - maximum replay window
++               * for XPN cipher suites */
++              if (secy->xpn &&
++                  secy->replay_window > MACSEC_XPN_MAX_REPLAY_WINDOW)
++                      return -EINVAL;
++      }
++
+       return 0;
+ }
+@@ -3816,7 +3824,7 @@ static int macsec_changelink(struct net_device *dev, struct nlattr *tb[],
+       ret = macsec_changelink_common(dev, data);
+       if (ret)
+-              return ret;
++              goto cleanup;
+       /* If h/w offloading is available, propagate to the device */
+       if (macsec_is_offloaded(macsec)) {
+-- 
+2.35.1
+
diff --git a/queue-5.15/net-fix-data-races-around-sysctl_-rw-mem-_offset.patch b/queue-5.15/net-fix-data-races-around-sysctl_-rw-mem-_offset.patch
new file mode 100644 (file)
index 0000000..a724d08
--- /dev/null
@@ -0,0 +1,207 @@
+From b5fa6dd358db1cb266dc248d241e66bc021864e0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:22:00 -0700
+Subject: net: Fix data-races around sysctl_[rw]mem(_offset)?.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 02739545951ad4c1215160db7fbf9b7a918d3c0b ]
+
+While reading these sysctl variables, they can be changed concurrently.
+Thus, we need to add READ_ONCE() to their readers.
+
+  - .sysctl_rmem
+  - .sysctl_rwmem
+  - .sysctl_rmem_offset
+  - .sysctl_wmem_offset
+  - sysctl_tcp_rmem[1, 2]
+  - sysctl_tcp_wmem[1, 2]
+  - sysctl_decnet_rmem[1]
+  - sysctl_decnet_wmem[1]
+  - sysctl_tipc_rmem[1]
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/sock.h     |  8 ++++----
+ net/decnet/af_decnet.c |  4 ++--
+ net/ipv4/tcp.c         |  6 +++---
+ net/ipv4/tcp_input.c   | 13 +++++++------
+ net/ipv4/tcp_output.c  |  2 +-
+ net/mptcp/protocol.c   |  6 +++---
+ net/tipc/socket.c      |  2 +-
+ 7 files changed, 21 insertions(+), 20 deletions(-)
+
+diff --git a/include/net/sock.h b/include/net/sock.h
+index 96f51d4b1649..819c53965ef3 100644
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -2765,18 +2765,18 @@ static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto)
+ {
+       /* Does this proto have per netns sysctl_wmem ? */
+       if (proto->sysctl_wmem_offset)
+-              return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset);
++              return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset));
+-      return *proto->sysctl_wmem;
++      return READ_ONCE(*proto->sysctl_wmem);
+ }
+ static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto)
+ {
+       /* Does this proto have per netns sysctl_rmem ? */
+       if (proto->sysctl_rmem_offset)
+-              return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset);
++              return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset));
+-      return *proto->sysctl_rmem;
++      return READ_ONCE(*proto->sysctl_rmem);
+ }
+ /* Default TCP Small queue budget is ~1 ms of data (1sec >> 10)
+diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
+index dc92a67baea3..7d542eb46172 100644
+--- a/net/decnet/af_decnet.c
++++ b/net/decnet/af_decnet.c
+@@ -480,8 +480,8 @@ static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gf
+       sk->sk_family      = PF_DECnet;
+       sk->sk_protocol    = 0;
+       sk->sk_allocation  = gfp;
+-      sk->sk_sndbuf      = sysctl_decnet_wmem[1];
+-      sk->sk_rcvbuf      = sysctl_decnet_rmem[1];
++      sk->sk_sndbuf      = READ_ONCE(sysctl_decnet_wmem[1]);
++      sk->sk_rcvbuf      = READ_ONCE(sysctl_decnet_rmem[1]);
+       /* Initialization of DECnet Session Control Port                */
+       scp = DN_SK(sk);
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 7ba9059c263a..2097eeaf30a6 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -458,8 +458,8 @@ void tcp_init_sock(struct sock *sk)
+       icsk->icsk_sync_mss = tcp_sync_mss;
+-      WRITE_ONCE(sk->sk_sndbuf, sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
+-      WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
++      WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]));
++      WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]));
+       sk_sockets_allocated_inc(sk);
+       sk->sk_route_forced_caps = NETIF_F_GSO;
+@@ -1722,7 +1722,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
+       if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
+               cap = sk->sk_rcvbuf >> 1;
+       else
+-              cap = sock_net(sk)->ipv4.sysctl_tcp_rmem[2] >> 1;
++              cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
+       val = min(val, cap);
+       WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 566745f527fe..e007bdc20e82 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -426,7 +426,7 @@ static void tcp_sndbuf_expand(struct sock *sk)
+       if (sk->sk_sndbuf < sndmem)
+               WRITE_ONCE(sk->sk_sndbuf,
+-                         min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]));
++                         min(sndmem, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[2])));
+ }
+ /* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
+@@ -461,7 +461,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb,
+       struct tcp_sock *tp = tcp_sk(sk);
+       /* Optimize this! */
+       int truesize = tcp_win_from_space(sk, skbtruesize) >> 1;
+-      int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
++      int window = tcp_win_from_space(sk, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])) >> 1;
+       while (tp->rcv_ssthresh <= window) {
+               if (truesize <= skb->len)
+@@ -566,16 +566,17 @@ static void tcp_clamp_window(struct sock *sk)
+       struct tcp_sock *tp = tcp_sk(sk);
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       struct net *net = sock_net(sk);
++      int rmem2;
+       icsk->icsk_ack.quick = 0;
++      rmem2 = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]);
+-      if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
++      if (sk->sk_rcvbuf < rmem2 &&
+           !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
+           !tcp_under_memory_pressure(sk) &&
+           sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
+               WRITE_ONCE(sk->sk_rcvbuf,
+-                         min(atomic_read(&sk->sk_rmem_alloc),
+-                             net->ipv4.sysctl_tcp_rmem[2]));
++                         min(atomic_read(&sk->sk_rmem_alloc), rmem2));
+       }
+       if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
+               tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
+@@ -737,7 +738,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
+               do_div(rcvwin, tp->advmss);
+               rcvbuf = min_t(u64, rcvwin * rcvmem,
+-                             sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
++                             READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
+               if (rcvbuf > sk->sk_rcvbuf) {
+                       WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 51f31311fdb6..9c9a0f7a3dee 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -238,7 +238,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
+       *rcv_wscale = 0;
+       if (wscale_ok) {
+               /* Set window scaling on max possible window */
+-              space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
++              space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
+               space = max_t(u32, space, sysctl_rmem_max);
+               space = min_t(u32, space, *window_clamp);
+               *rcv_wscale = clamp_t(int, ilog2(space) - 15,
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index 01ede89e3c46..7f96e0c42a09 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -1899,7 +1899,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
+               do_div(rcvwin, advmss);
+               rcvbuf = min_t(u64, rcvwin * rcvmem,
+-                             sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
++                             READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
+               if (rcvbuf > sk->sk_rcvbuf) {
+                       u32 window_clamp;
+@@ -2532,8 +2532,8 @@ static int mptcp_init_sock(struct sock *sk)
+       icsk->icsk_ca_ops = NULL;
+       sk_sockets_allocated_inc(sk);
+-      sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
+-      sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
++      sk->sk_rcvbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
++      sk->sk_sndbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
+       return 0;
+ }
+diff --git a/net/tipc/socket.c b/net/tipc/socket.c
+index 43509c7e90fc..f1c3b8eb4b3d 100644
+--- a/net/tipc/socket.c
++++ b/net/tipc/socket.c
+@@ -517,7 +517,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
+       timer_setup(&sk->sk_timer, tipc_sk_timeout, 0);
+       sk->sk_shutdown = 0;
+       sk->sk_backlog_rcv = tipc_sk_backlog_rcv;
+-      sk->sk_rcvbuf = sysctl_tipc_rmem[1];
++      sk->sk_rcvbuf = READ_ONCE(sysctl_tipc_rmem[1]);
+       sk->sk_data_ready = tipc_data_ready;
+       sk->sk_write_space = tipc_write_space;
+       sk->sk_destruct = tipc_sock_destruct;
+-- 
+2.35.1
+
diff --git a/queue-5.15/net-macsec-fix-potential-resource-leak-in-macsec_add.patch b/queue-5.15/net-macsec-fix-potential-resource-leak-in-macsec_add.patch
new file mode 100644 (file)
index 0000000..5a2cae4
--- /dev/null
@@ -0,0 +1,54 @@
+From f1ecee4a3ddd64f73a30dc33bf38ec8ea0ecbd2a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 17:29:02 +0800
+Subject: net: macsec: fix potential resource leak in macsec_add_rxsa() and
+ macsec_add_txsa()
+
+From: Jianglei Nie <niejianglei2021@163.com>
+
+[ Upstream commit c7b205fbbf3cffa374721bb7623f7aa8c46074f1 ]
+
+init_rx_sa() allocates relevant resource for rx_sa->stats and rx_sa->
+key.tfm with alloc_percpu() and macsec_alloc_tfm(). When some error
+occurs after init_rx_sa() is called in macsec_add_rxsa(), the function
+released rx_sa with kfree() without releasing rx_sa->stats and rx_sa->
+key.tfm, which will lead to a resource leak.
+
+We should call macsec_rxsa_put() instead of kfree() to decrease the ref
+count of rx_sa and release the relevant resource if the refcount is 0.
+The same bug exists in macsec_add_txsa() for tx_sa as well. This patch
+fixes the above two bugs.
+
+Fixes: 3cf3227a21d1 ("net: macsec: hardware offloading infrastructure")
+Signed-off-by: Jianglei Nie <niejianglei2021@163.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/macsec.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
+index 3e74dcc1f875..354890948f8a 100644
+--- a/drivers/net/macsec.c
++++ b/drivers/net/macsec.c
+@@ -1842,7 +1842,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info)
+       return 0;
+ cleanup:
+-      kfree(rx_sa);
++      macsec_rxsa_put(rx_sa);
+       rtnl_unlock();
+       return err;
+ }
+@@ -2085,7 +2085,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info)
+ cleanup:
+       secy->operational = was_operational;
+-      kfree(tx_sa);
++      macsec_txsa_put(tx_sa);
+       rtnl_unlock();
+       return err;
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.15/net-mld-fix-reference-count-leak-in-mld_-query-repor.patch b/queue-5.15/net-mld-fix-reference-count-leak-in-mld_-query-repor.patch
new file mode 100644 (file)
index 0000000..69e7475
--- /dev/null
@@ -0,0 +1,122 @@
+From 7a044e3d41151e64b7892601fd0f0c4a7477df81 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 17:06:35 +0000
+Subject: net: mld: fix reference count leak in mld_{query | report}_work()
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit 3e7d18b9dca388940a19cae30bfc1f76dccd8c28 ]
+
+mld_{query | report}_work() processes queued events.
+If there are too many events in the queue, it re-queue a work.
+And then, it returns without in6_dev_put().
+But if queuing is failed, it should call in6_dev_put(), but it doesn't.
+So, a reference count leak would occur.
+
+THREAD0                                THREAD1
+mld_report_work()
+                               spin_lock_bh()
+                               if (!mod_delayed_work())
+                                       in6_dev_hold();
+                               spin_unlock_bh()
+       spin_lock_bh()
+       schedule_delayed_work()
+       spin_unlock_bh()
+
+Script to reproduce(by Hangbin Liu):
+   ip netns add ns1
+   ip netns add ns2
+   ip netns exec ns1 sysctl -w net.ipv6.conf.all.force_mld_version=1
+   ip netns exec ns2 sysctl -w net.ipv6.conf.all.force_mld_version=1
+
+   ip -n ns1 link add veth0 type veth peer name veth0 netns ns2
+   ip -n ns1 link set veth0 up
+   ip -n ns2 link set veth0 up
+
+   for i in `seq 50`; do
+           for j in `seq 100`; do
+                   ip -n ns1 addr add 2021:${i}::${j}/64 dev veth0
+                   ip -n ns2 addr add 2022:${i}::${j}/64 dev veth0
+           done
+   done
+   modprobe -r veth
+   ip -a netns del
+
+splat looks like:
+ unregister_netdevice: waiting for veth0 to become free. Usage count = 2
+ leaked reference.
+  ipv6_add_dev+0x324/0xec0
+  addrconf_notify+0x481/0xd10
+  raw_notifier_call_chain+0xe3/0x120
+  call_netdevice_notifiers+0x106/0x160
+  register_netdevice+0x114c/0x16b0
+  veth_newlink+0x48b/0xa50 [veth]
+  rtnl_newlink+0x11a2/0x1a40
+  rtnetlink_rcv_msg+0x63f/0xc00
+  netlink_rcv_skb+0x1df/0x3e0
+  netlink_unicast+0x5de/0x850
+  netlink_sendmsg+0x6c9/0xa90
+  ____sys_sendmsg+0x76a/0x780
+  __sys_sendmsg+0x27c/0x340
+  do_syscall_64+0x43/0x90
+  entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+Tested-by: Hangbin Liu <liuhangbin@gmail.com>
+Fixes: f185de28d9ae ("mld: add new workqueues for process mld events")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/mcast.c | 14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
+index 7f695c39d9a8..87c699d57b36 100644
+--- a/net/ipv6/mcast.c
++++ b/net/ipv6/mcast.c
+@@ -1522,7 +1522,6 @@ static void mld_query_work(struct work_struct *work)
+               if (++cnt >= MLD_MAX_QUEUE) {
+                       rework = true;
+-                      schedule_delayed_work(&idev->mc_query_work, 0);
+                       break;
+               }
+       }
+@@ -1533,8 +1532,10 @@ static void mld_query_work(struct work_struct *work)
+               __mld_query_work(skb);
+       mutex_unlock(&idev->mc_lock);
+-      if (!rework)
+-              in6_dev_put(idev);
++      if (rework && queue_delayed_work(mld_wq, &idev->mc_query_work, 0))
++              return;
++
++      in6_dev_put(idev);
+ }
+ /* called with rcu_read_lock() */
+@@ -1624,7 +1625,6 @@ static void mld_report_work(struct work_struct *work)
+               if (++cnt >= MLD_MAX_QUEUE) {
+                       rework = true;
+-                      schedule_delayed_work(&idev->mc_report_work, 0);
+                       break;
+               }
+       }
+@@ -1635,8 +1635,10 @@ static void mld_report_work(struct work_struct *work)
+               __mld_report_work(skb);
+       mutex_unlock(&idev->mc_lock);
+-      if (!rework)
+-              in6_dev_put(idev);
++      if (rework && queue_delayed_work(mld_wq, &idev->mc_report_work, 0))
++              return;
++
++      in6_dev_put(idev);
+ }
+ static bool is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type,
+-- 
+2.35.1
+
diff --git a/queue-5.15/net-pcs-xpcs-propagate-xpcs_read-error-to-xpcs_get_s.patch b/queue-5.15/net-pcs-xpcs-propagate-xpcs_read-error-to-xpcs_get_s.patch
new file mode 100644 (file)
index 0000000..bc26047
--- /dev/null
@@ -0,0 +1,42 @@
+From f5e90e5ec1e647d90d283e99ba9e3a2f84f312d5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 14:20:57 +0300
+Subject: net: pcs: xpcs: propagate xpcs_read error to xpcs_get_state_c37_sgmii
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+[ Upstream commit 27161db0904ee48e59140aa8d0835939a666c1f1 ]
+
+While phylink_pcs_ops :: pcs_get_state does return void, xpcs_get_state()
+does check for a non-zero return code from xpcs_get_state_c37_sgmii()
+and prints that as a message to the kernel log.
+
+However, a non-zero return code from xpcs_read() is translated into
+"return false" (i.e. zero as int) and the I/O error is therefore not
+printed. Fix that.
+
+Fixes: b97b5331b8ab ("net: pcs: add C37 SGMII AN support for intel mGbE controller")
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Link: https://lore.kernel.org/r/20220720112057.3504398-1-vladimir.oltean@nxp.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/pcs/pcs-xpcs.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
+index 7de631f5356f..fd4cbf8a55ad 100644
+--- a/drivers/net/pcs/pcs-xpcs.c
++++ b/drivers/net/pcs/pcs-xpcs.c
+@@ -890,7 +890,7 @@ static int xpcs_get_state_c37_sgmii(struct dw_xpcs *xpcs,
+        */
+       ret = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_INTR_STS);
+       if (ret < 0)
+-              return false;
++              return ret;
+       if (ret & DW_VR_MII_C37_ANSGM_SP_LNKSTS) {
+               int speed_value;
+-- 
+2.35.1
+
diff --git a/queue-5.15/net-ping6-fix-memleak-in-ipv6_renew_options.patch-12523 b/queue-5.15/net-ping6-fix-memleak-in-ipv6_renew_options.patch-12523
new file mode 100644 (file)
index 0000000..263df0a
--- /dev/null
@@ -0,0 +1,110 @@
+From 85b30d6c4b4c5891b9c1df8e41f2e0e08f4360f8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Jul 2022 18:22:20 -0700
+Subject: net: ping6: Fix memleak in ipv6_renew_options().
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit e27326009a3d247b831eda38878c777f6f4eb3d1 ]
+
+When we close ping6 sockets, some resources are left unfreed because
+pingv6_prot is missing sk->sk_prot->destroy().  As reported by
+syzbot [0], just three syscalls leak 96 bytes and easily cause OOM.
+
+    struct ipv6_sr_hdr *hdr;
+    char data[24] = {0};
+    int fd;
+
+    hdr = (struct ipv6_sr_hdr *)data;
+    hdr->hdrlen = 2;
+    hdr->type = IPV6_SRCRT_TYPE_4;
+
+    fd = socket(AF_INET6, SOCK_DGRAM, NEXTHDR_ICMP);
+    setsockopt(fd, IPPROTO_IPV6, IPV6_RTHDR, data, 24);
+    close(fd);
+
+To fix memory leaks, let's add a destroy function.
+
+Note the socket() syscall checks if the GID is within the range of
+net.ipv4.ping_group_range.  The default value is [1, 0] so that no
+GID meets the condition (1 <= GID <= 0).  Thus, the local DoS does
+not succeed until we change the default value.  However, at least
+Ubuntu/Fedora/RHEL loosen it.
+
+    $ cat /usr/lib/sysctl.d/50-default.conf
+    ...
+    -net.ipv4.ping_group_range = 0 2147483647
+
+Also, there could be another path reported with these options, and
+some of them require CAP_NET_RAW.
+
+  setsockopt
+      IPV6_ADDRFORM (inet6_sk(sk)->pktoptions)
+      IPV6_RECVPATHMTU (inet6_sk(sk)->rxpmtu)
+      IPV6_HOPOPTS (inet6_sk(sk)->opt)
+      IPV6_RTHDRDSTOPTS (inet6_sk(sk)->opt)
+      IPV6_RTHDR (inet6_sk(sk)->opt)
+      IPV6_DSTOPTS (inet6_sk(sk)->opt)
+      IPV6_2292PKTOPTIONS (inet6_sk(sk)->opt)
+
+  getsockopt
+      IPV6_FLOWLABEL_MGR (inet6_sk(sk)->ipv6_fl_list)
+
+For the record, I left a different splat with syzbot's one.
+
+  unreferenced object 0xffff888006270c60 (size 96):
+    comm "repro2", pid 231, jiffies 4294696626 (age 13.118s)
+    hex dump (first 32 bytes):
+      01 00 00 00 44 00 00 00 00 00 00 00 00 00 00 00  ....D...........
+      00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+    backtrace:
+      [<00000000f6bc7ea9>] sock_kmalloc (net/core/sock.c:2564 net/core/sock.c:2554)
+      [<000000006d699550>] do_ipv6_setsockopt.constprop.0 (net/ipv6/ipv6_sockglue.c:715)
+      [<00000000c3c3b1f5>] ipv6_setsockopt (net/ipv6/ipv6_sockglue.c:1024)
+      [<000000007096a025>] __sys_setsockopt (net/socket.c:2254)
+      [<000000003a8ff47b>] __x64_sys_setsockopt (net/socket.c:2265 net/socket.c:2262 net/socket.c:2262)
+      [<000000007c409dcb>] do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80)
+      [<00000000e939c4a9>] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120)
+
+[0]: https://syzkaller.appspot.com/bug?extid=a8430774139ec3ab7176
+
+Fixes: 6d0bfe226116 ("net: ipv6: Add IPv6 support to the ping socket.")
+Reported-by: syzbot+a8430774139ec3ab7176@syzkaller.appspotmail.com
+Reported-by: Ayushman Dutta <ayudutta@amazon.com>
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20220728012220.46918-1-kuniyu@amazon.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/ping.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
+index 6ac88fe24a8e..135e3a060caa 100644
+--- a/net/ipv6/ping.c
++++ b/net/ipv6/ping.c
+@@ -22,6 +22,11 @@
+ #include <linux/proc_fs.h>
+ #include <net/ping.h>
++static void ping_v6_destroy(struct sock *sk)
++{
++      inet6_destroy_sock(sk);
++}
++
+ /* Compatibility glue so we can support IPv6 when it's compiled as a module */
+ static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len,
+                                int *addr_len)
+@@ -166,6 +171,7 @@ struct proto pingv6_prot = {
+       .owner =        THIS_MODULE,
+       .init =         ping_init_sock,
+       .close =        ping_close,
++      .destroy =      ping_v6_destroy,
+       .connect =      ip6_datagram_connect_v6_only,
+       .disconnect =   __udp_disconnect,
+       .setsockopt =   ipv6_setsockopt,
+-- 
+2.35.1
+
diff --git a/queue-5.15/net-sungem_phy-add-of_node_put-for-reference-returne.patch b/queue-5.15/net-sungem_phy-add-of_node_put-for-reference-returne.patch
new file mode 100644 (file)
index 0000000..d7a0c03
--- /dev/null
@@ -0,0 +1,37 @@
+From b986ef8c75eb0129ac75c5196992a37541149183 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 21:10:03 +0800
+Subject: net: sungem_phy: Add of_node_put() for reference returned by
+ of_get_parent()
+
+From: Liang He <windhl@126.com>
+
+[ Upstream commit ebbbe23fdf6070e31509638df3321688358cc211 ]
+
+In bcm5421_init(), we should call of_node_put() for the reference
+returned by of_get_parent() which has increased the refcount.
+
+Fixes: 3c326fe9cb7a ("[PATCH] ppc64: Add new PHY to sungem")
+Signed-off-by: Liang He <windhl@126.com>
+Link: https://lore.kernel.org/r/20220720131003.1287426-1-windhl@126.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/sungem_phy.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/sungem_phy.c b/drivers/net/sungem_phy.c
+index 291fa449993f..45f295403cb5 100644
+--- a/drivers/net/sungem_phy.c
++++ b/drivers/net/sungem_phy.c
+@@ -454,6 +454,7 @@ static int bcm5421_init(struct mii_phy* phy)
+               int can_low_power = 1;
+               if (np == NULL || of_get_property(np, "no-autolowpower", NULL))
+                       can_low_power = 0;
++              of_node_put(np);
+               if (can_low_power) {
+                       /* Enable automatic low-power */
+                       sungem_phy_write(phy, 0x1c, 0x9002);
+-- 
+2.35.1
+
diff --git a/queue-5.15/net-tls-remove-the-context-from-the-list-in-tls_devi.patch b/queue-5.15/net-tls-remove-the-context-from-the-list-in-tls_devi.patch
new file mode 100644 (file)
index 0000000..c2a4811
--- /dev/null
@@ -0,0 +1,51 @@
+From e1bf6422281150d1b6fb11cbaeb4d17c644404c8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Jul 2022 12:11:27 +0300
+Subject: net/tls: Remove the context from the list in tls_device_down
+
+From: Maxim Mikityanskiy <maximmi@nvidia.com>
+
+[ Upstream commit f6336724a4d4220c89a4ec38bca84b03b178b1a3 ]
+
+tls_device_down takes a reference on all contexts it's going to move to
+the degraded state (software fallback). If sk_destruct runs afterwards,
+it can reduce the reference counter back to 1 and return early without
+destroying the context. Then tls_device_down will release the reference
+it took and call tls_device_free_ctx. However, the context will still
+stay in tls_device_down_list forever. The list will contain an item,
+memory for which is released, making a memory corruption possible.
+
+Fix the above bug by properly removing the context from all lists before
+any call to tls_device_free_ctx.
+
+Fixes: 3740651bf7e2 ("tls: Fix context leak on tls_device_down")
+Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tls/tls_device.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
+index 4e33150cfb9e..cf75969375cf 100644
+--- a/net/tls/tls_device.c
++++ b/net/tls/tls_device.c
+@@ -1351,8 +1351,13 @@ static int tls_device_down(struct net_device *netdev)
+                * by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW.
+                * Now release the ref taken above.
+                */
+-              if (refcount_dec_and_test(&ctx->refcount))
++              if (refcount_dec_and_test(&ctx->refcount)) {
++                      /* sk_destruct ran after tls_device_down took a ref, and
++                       * it returned early. Complete the destruction here.
++                       */
++                      list_del(&ctx->list);
+                       tls_device_free_ctx(ctx);
++              }
+       }
+       up_write(&device_offload_lock);
+-- 
+2.35.1
+
diff --git a/queue-5.15/netfilter-nf_queue-do-not-allow-packet-truncation-be.patch b/queue-5.15/netfilter-nf_queue-do-not-allow-packet-truncation-be.patch
new file mode 100644 (file)
index 0000000..153d7c9
--- /dev/null
@@ -0,0 +1,53 @@
+From 573a6eb0d3b6dc31b9f97275d07a98dcfebf6c8a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jul 2022 12:42:06 +0200
+Subject: netfilter: nf_queue: do not allow packet truncation below transport
+ header offset
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 99a63d36cb3ed5ca3aa6fcb64cffbeaf3b0fb164 ]
+
+Domingo Dirutigliano and Nicola Guerrera report kernel panic when
+sending nf_queue verdict with 1-byte nfta_payload attribute.
+
+The IP/IPv6 stack pulls the IP(v6) header from the packet after the
+input hook.
+
+If user truncates the packet below the header size, this skb_pull() will
+result in a malformed skb (skb->len < 0).
+
+Fixes: 7af4cc3fa158 ("[NETFILTER]: Add "nfnetlink_queue" netfilter queue handler over nfnetlink")
+Reported-by: Domingo Dirutigliano <pwnzer0tt1@proton.me>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Reviewed-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nfnetlink_queue.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
+index 8787d0613ad8..5329ebf19a18 100644
+--- a/net/netfilter/nfnetlink_queue.c
++++ b/net/netfilter/nfnetlink_queue.c
+@@ -836,11 +836,16 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
+ }
+ static int
+-nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
++nfqnl_mangle(void *data, unsigned int data_len, struct nf_queue_entry *e, int diff)
+ {
+       struct sk_buff *nskb;
+       if (diff < 0) {
++              unsigned int min_len = skb_transport_offset(e->skb);
++
++              if (data_len < min_len)
++                      return -EINVAL;
++
+               if (pskb_trim(e->skb, data_len))
+                       return -ENOMEM;
+       } else if (diff > 0) {
+-- 
+2.35.1
+
diff --git a/queue-5.15/octeontx2-pf-cn10k-fix-egress-ratelimit-configuratio.patch b/queue-5.15/octeontx2-pf-cn10k-fix-egress-ratelimit-configuratio.patch
new file mode 100644 (file)
index 0000000..35206dd
--- /dev/null
@@ -0,0 +1,185 @@
+From e1fc39bcf4c9b83ef7a2c9c35e442f3c69a74d91 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 24 Jul 2022 13:51:13 +0530
+Subject: octeontx2-pf: cn10k: Fix egress ratelimit configuration
+
+From: Sunil Goutham <sgoutham@marvell.com>
+
+[ Upstream commit b354eaeec8637d87003945439209251d76a2bb95 ]
+
+NIX_AF_TLXX_PIR/CIR register format has changed from OcteonTx2
+to CN10K. CN10K supports larger burst size. Fix burst exponent
+and burst mantissa configuration for CN10K.
+
+Also fixed 'maxrate' from u32 to u64 since 'police.rate_bytes_ps'
+passed by stack is also u64.
+
+Fixes: e638a83f167e ("octeontx2-pf: TC_MATCHALL egress ratelimiting offload")
+Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
+Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/marvell/octeontx2/nic/otx2_tc.c  | 76 ++++++++++++++-----
+ 1 file changed, 55 insertions(+), 21 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+index 626961a41089..ff569e261be4 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+@@ -28,6 +28,9 @@
+ #define MAX_RATE_EXPONENT             0x0FULL
+ #define MAX_RATE_MANTISSA             0xFFULL
++#define CN10K_MAX_BURST_MANTISSA      0x7FFFULL
++#define CN10K_MAX_BURST_SIZE          8453888ULL
++
+ /* Bitfields in NIX_TLX_PIR register */
+ #define TLX_RATE_MANTISSA             GENMASK_ULL(8, 1)
+ #define TLX_RATE_EXPONENT             GENMASK_ULL(12, 9)
+@@ -35,6 +38,9 @@
+ #define TLX_BURST_MANTISSA            GENMASK_ULL(36, 29)
+ #define TLX_BURST_EXPONENT            GENMASK_ULL(40, 37)
++#define CN10K_TLX_BURST_MANTISSA      GENMASK_ULL(43, 29)
++#define CN10K_TLX_BURST_EXPONENT      GENMASK_ULL(47, 44)
++
+ struct otx2_tc_flow_stats {
+       u64 bytes;
+       u64 pkts;
+@@ -77,33 +83,42 @@ int otx2_tc_alloc_ent_bitmap(struct otx2_nic *nic)
+ }
+ EXPORT_SYMBOL(otx2_tc_alloc_ent_bitmap);
+-static void otx2_get_egress_burst_cfg(u32 burst, u32 *burst_exp,
+-                                    u32 *burst_mantissa)
++static void otx2_get_egress_burst_cfg(struct otx2_nic *nic, u32 burst,
++                                    u32 *burst_exp, u32 *burst_mantissa)
+ {
++      int max_burst, max_mantissa;
+       unsigned int tmp;
++      if (is_dev_otx2(nic->pdev)) {
++              max_burst = MAX_BURST_SIZE;
++              max_mantissa = MAX_BURST_MANTISSA;
++      } else {
++              max_burst = CN10K_MAX_BURST_SIZE;
++              max_mantissa = CN10K_MAX_BURST_MANTISSA;
++      }
++
+       /* Burst is calculated as
+        * ((256 + BURST_MANTISSA) << (1 + BURST_EXPONENT)) / 256
+        * Max supported burst size is 130,816 bytes.
+        */
+-      burst = min_t(u32, burst, MAX_BURST_SIZE);
++      burst = min_t(u32, burst, max_burst);
+       if (burst) {
+               *burst_exp = ilog2(burst) ? ilog2(burst) - 1 : 0;
+               tmp = burst - rounddown_pow_of_two(burst);
+-              if (burst < MAX_BURST_MANTISSA)
++              if (burst < max_mantissa)
+                       *burst_mantissa = tmp * 2;
+               else
+                       *burst_mantissa = tmp / (1ULL << (*burst_exp - 7));
+       } else {
+               *burst_exp = MAX_BURST_EXPONENT;
+-              *burst_mantissa = MAX_BURST_MANTISSA;
++              *burst_mantissa = max_mantissa;
+       }
+ }
+-static void otx2_get_egress_rate_cfg(u32 maxrate, u32 *exp,
++static void otx2_get_egress_rate_cfg(u64 maxrate, u32 *exp,
+                                    u32 *mantissa, u32 *div_exp)
+ {
+-      unsigned int tmp;
++      u64 tmp;
+       /* Rate calculation by hardware
+        *
+@@ -132,21 +147,44 @@ static void otx2_get_egress_rate_cfg(u32 maxrate, u32 *exp,
+       }
+ }
+-static int otx2_set_matchall_egress_rate(struct otx2_nic *nic, u32 burst, u32 maxrate)
++static u64 otx2_get_txschq_rate_regval(struct otx2_nic *nic,
++                                     u64 maxrate, u32 burst)
+ {
+-      struct otx2_hw *hw = &nic->hw;
+-      struct nix_txschq_config *req;
+       u32 burst_exp, burst_mantissa;
+       u32 exp, mantissa, div_exp;
++      u64 regval = 0;
++
++      /* Get exponent and mantissa values from the desired rate */
++      otx2_get_egress_burst_cfg(nic, burst, &burst_exp, &burst_mantissa);
++      otx2_get_egress_rate_cfg(maxrate, &exp, &mantissa, &div_exp);
++
++      if (is_dev_otx2(nic->pdev)) {
++              regval = FIELD_PREP(TLX_BURST_EXPONENT, (u64)burst_exp) |
++                              FIELD_PREP(TLX_BURST_MANTISSA, (u64)burst_mantissa) |
++                              FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) |
++                              FIELD_PREP(TLX_RATE_EXPONENT, exp) |
++                              FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0);
++      } else {
++              regval = FIELD_PREP(CN10K_TLX_BURST_EXPONENT, (u64)burst_exp) |
++                              FIELD_PREP(CN10K_TLX_BURST_MANTISSA, (u64)burst_mantissa) |
++                              FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) |
++                              FIELD_PREP(TLX_RATE_EXPONENT, exp) |
++                              FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0);
++      }
++
++      return regval;
++}
++
++static int otx2_set_matchall_egress_rate(struct otx2_nic *nic,
++                                       u32 burst, u64 maxrate)
++{
++      struct otx2_hw *hw = &nic->hw;
++      struct nix_txschq_config *req;
+       int txschq, err;
+       /* All SQs share the same TL4, so pick the first scheduler */
+       txschq = hw->txschq_list[NIX_TXSCH_LVL_TL4][0];
+-      /* Get exponent and mantissa values from the desired rate */
+-      otx2_get_egress_burst_cfg(burst, &burst_exp, &burst_mantissa);
+-      otx2_get_egress_rate_cfg(maxrate, &exp, &mantissa, &div_exp);
+-
+       mutex_lock(&nic->mbox.lock);
+       req = otx2_mbox_alloc_msg_nix_txschq_cfg(&nic->mbox);
+       if (!req) {
+@@ -157,11 +195,7 @@ static int otx2_set_matchall_egress_rate(struct otx2_nic *nic, u32 burst, u32 ma
+       req->lvl = NIX_TXSCH_LVL_TL4;
+       req->num_regs = 1;
+       req->reg[0] = NIX_AF_TL4X_PIR(txschq);
+-      req->regval[0] = FIELD_PREP(TLX_BURST_EXPONENT, burst_exp) |
+-                       FIELD_PREP(TLX_BURST_MANTISSA, burst_mantissa) |
+-                       FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) |
+-                       FIELD_PREP(TLX_RATE_EXPONENT, exp) |
+-                       FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0);
++      req->regval[0] = otx2_get_txschq_rate_regval(nic, maxrate, burst);
+       err = otx2_sync_mbox_msg(&nic->mbox);
+       mutex_unlock(&nic->mbox.lock);
+@@ -196,7 +230,7 @@ static int otx2_tc_egress_matchall_install(struct otx2_nic *nic,
+       struct netlink_ext_ack *extack = cls->common.extack;
+       struct flow_action *actions = &cls->rule->action;
+       struct flow_action_entry *entry;
+-      u32 rate;
++      u64 rate;
+       int err;
+       err = otx2_tc_validate_flow(nic, actions, extack);
+@@ -218,7 +252,7 @@ static int otx2_tc_egress_matchall_install(struct otx2_nic *nic,
+               }
+               /* Convert bytes per second to Mbps */
+               rate = entry->police.rate_bytes_ps * 8;
+-              rate = max_t(u32, rate / 1000000, 1);
++              rate = max_t(u64, rate / 1000000, 1);
+               err = otx2_set_matchall_egress_rate(nic, entry->police.burst, rate);
+               if (err)
+                       return err;
+-- 
+2.35.1
+
diff --git a/queue-5.15/octeontx2-pf-fix-udp-tcp-src-and-dst-port-tc-filters.patch-781 b/queue-5.15/octeontx2-pf-fix-udp-tcp-src-and-dst-port-tc-filters.patch-781
new file mode 100644 (file)
index 0000000..b21427a
--- /dev/null
@@ -0,0 +1,70 @@
+From c2604438cc7c262202cdf9f88f33229a28bcddb2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 24 Jul 2022 13:51:14 +0530
+Subject: octeontx2-pf: Fix UDP/TCP src and dst port tc filters
+
+From: Subbaraya Sundeep <sbhatta@marvell.com>
+
+[ Upstream commit 59e1be6f83b928a04189bbf3ab683a1fc6248db3 ]
+
+Check the mask for non-zero value before installing tc filters
+for L4 source and destination ports. Otherwise installing a
+filter for source port installs destination port too and
+vice-versa.
+
+Fixes: 1d4d9e42c240 ("octeontx2-pf: Add tc flower hardware offload on ingress traffic")
+Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
+Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/marvell/octeontx2/nic/otx2_tc.c  | 30 +++++++++++--------
+ 1 file changed, 18 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+index ff569e261be4..75388a65f349 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+@@ -605,21 +605,27 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node,
+               flow_spec->dport = match.key->dst;
+               flow_mask->dport = match.mask->dst;
+-              if (ip_proto == IPPROTO_UDP)
+-                      req->features |= BIT_ULL(NPC_DPORT_UDP);
+-              else if (ip_proto == IPPROTO_TCP)
+-                      req->features |= BIT_ULL(NPC_DPORT_TCP);
+-              else if (ip_proto == IPPROTO_SCTP)
+-                      req->features |= BIT_ULL(NPC_DPORT_SCTP);
++
++              if (flow_mask->dport) {
++                      if (ip_proto == IPPROTO_UDP)
++                              req->features |= BIT_ULL(NPC_DPORT_UDP);
++                      else if (ip_proto == IPPROTO_TCP)
++                              req->features |= BIT_ULL(NPC_DPORT_TCP);
++                      else if (ip_proto == IPPROTO_SCTP)
++                              req->features |= BIT_ULL(NPC_DPORT_SCTP);
++              }
+               flow_spec->sport = match.key->src;
+               flow_mask->sport = match.mask->src;
+-              if (ip_proto == IPPROTO_UDP)
+-                      req->features |= BIT_ULL(NPC_SPORT_UDP);
+-              else if (ip_proto == IPPROTO_TCP)
+-                      req->features |= BIT_ULL(NPC_SPORT_TCP);
+-              else if (ip_proto == IPPROTO_SCTP)
+-                      req->features |= BIT_ULL(NPC_SPORT_SCTP);
++
++              if (flow_mask->sport) {
++                      if (ip_proto == IPPROTO_UDP)
++                              req->features |= BIT_ULL(NPC_SPORT_UDP);
++                      else if (ip_proto == IPPROTO_TCP)
++                              req->features |= BIT_ULL(NPC_SPORT_TCP);
++                      else if (ip_proto == IPPROTO_SCTP)
++                              req->features |= BIT_ULL(NPC_SPORT_SCTP);
++              }
+       }
+       return otx2_tc_parse_actions(nic, &rule->action, req, f, node);
+-- 
+2.35.1
+
diff --git a/queue-5.15/perf-symbol-correct-address-for-bss-symbols.patch b/queue-5.15/perf-symbol-correct-address-for-bss-symbols.patch
new file mode 100644 (file)
index 0000000..d316043
--- /dev/null
@@ -0,0 +1,182 @@
+From 47347b30c7057567055b6144446ddea16815dacf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 24 Jul 2022 14:00:12 +0800
+Subject: perf symbol: Correct address for bss symbols
+
+From: Leo Yan <leo.yan@linaro.org>
+
+[ Upstream commit 2d86612aacb7805f72873691a2644d7279ed0630 ]
+
+When using 'perf mem' and 'perf c2c', an issue is observed that tool
+reports the wrong offset for global data symbols.  This is a common
+issue on both x86 and Arm64 platforms.
+
+Let's see an example, for a test program, below is the disassembly for
+its .bss section which is dumped with objdump:
+
+  ...
+
+  Disassembly of section .bss:
+
+  0000000000004040 <completed.0>:
+       ...
+
+  0000000000004080 <buf1>:
+       ...
+
+  00000000000040c0 <buf2>:
+       ...
+
+  0000000000004100 <thread>:
+       ...
+
+First we used 'perf mem record' to run the test program and then used
+'perf --debug verbose=4 mem report' to observe what's the symbol info
+for 'buf1' and 'buf2' structures.
+
+  # ./perf mem record -e ldlat-loads,ldlat-stores -- false_sharing.exe 8
+  # ./perf --debug verbose=4 mem report
+    ...
+    dso__load_sym_internal: adjusting symbol: st_value: 0x40c0 sh_addr: 0x4040 sh_offset: 0x3028
+    symbol__new: buf2 0x30a8-0x30e8
+    ...
+    dso__load_sym_internal: adjusting symbol: st_value: 0x4080 sh_addr: 0x4040 sh_offset: 0x3028
+    symbol__new: buf1 0x3068-0x30a8
+    ...
+
+The perf tool relies on libelf to parse symbols, in executable and
+shared object files, 'st_value' holds a virtual address; 'sh_addr' is
+the address at which section's first byte should reside in memory, and
+'sh_offset' is the byte offset from the beginning of the file to the
+first byte in the section.  The perf tool uses below formula to convert
+a symbol's memory address to a file address:
+
+  file_address = st_value - sh_addr + sh_offset
+                    ^
+                    ` Memory address
+
+We can see the final adjusted address ranges for buf1 and buf2 are
+[0x30a8-0x30e8) and [0x3068-0x30a8) respectively, apparently this is
+incorrect, in the code, the structure for 'buf1' and 'buf2' specifies
+compiler attribute with 64-byte alignment.
+
+The problem happens for 'sh_offset', libelf returns it as 0x3028 which
+is not 64-byte aligned, combining with disassembly, it's likely libelf
+doesn't respect the alignment for .bss section, therefore, it doesn't
+return the aligned value for 'sh_offset'.
+
+Suggested by Fangrui Song, ELF file contains program header which
+contains PT_LOAD segments, the fields p_vaddr and p_offset in PT_LOAD
+segments contain the execution info.  A better choice for converting
+memory address to file address is using the formula:
+
+  file_address = st_value - p_vaddr + p_offset
+
+This patch introduces elf_read_program_header() which returns the
+program header based on the passed 'st_value', then it uses the formula
+above to calculate the symbol file address; and the debugging log is
+updated respectively.
+
+After applying the change:
+
+  # ./perf --debug verbose=4 mem report
+    ...
+    dso__load_sym_internal: adjusting symbol: st_value: 0x40c0 p_vaddr: 0x3d28 p_offset: 0x2d28
+    symbol__new: buf2 0x30c0-0x3100
+    ...
+    dso__load_sym_internal: adjusting symbol: st_value: 0x4080 p_vaddr: 0x3d28 p_offset: 0x2d28
+    symbol__new: buf1 0x3080-0x30c0
+    ...
+
+Fixes: f17e04afaff84b5c ("perf report: Fix ELF symbol parsing")
+Reported-by: Chang Rui <changruinj@gmail.com>
+Suggested-by: Fangrui Song <maskray@google.com>
+Signed-off-by: Leo Yan <leo.yan@linaro.org>
+Acked-by: Namhyung Kim <namhyung@kernel.org>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Ian Rogers <irogers@google.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20220724060013.171050-2-leo.yan@linaro.org
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/util/symbol-elf.c | 45 ++++++++++++++++++++++++++++++++----
+ 1 file changed, 41 insertions(+), 4 deletions(-)
+
+diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
+index ecd377938eea..ef6ced5c5746 100644
+--- a/tools/perf/util/symbol-elf.c
++++ b/tools/perf/util/symbol-elf.c
+@@ -233,6 +233,33 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
+       return NULL;
+ }
++static int elf_read_program_header(Elf *elf, u64 vaddr, GElf_Phdr *phdr)
++{
++      size_t i, phdrnum;
++      u64 sz;
++
++      if (elf_getphdrnum(elf, &phdrnum))
++              return -1;
++
++      for (i = 0; i < phdrnum; i++) {
++              if (gelf_getphdr(elf, i, phdr) == NULL)
++                      return -1;
++
++              if (phdr->p_type != PT_LOAD)
++                      continue;
++
++              sz = max(phdr->p_memsz, phdr->p_filesz);
++              if (!sz)
++                      continue;
++
++              if (vaddr >= phdr->p_vaddr && (vaddr < phdr->p_vaddr + sz))
++                      return 0;
++      }
++
++      /* Not found any valid program header */
++      return -1;
++}
++
+ static bool want_demangle(bool is_kernel_sym)
+ {
+       return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
+@@ -1209,6 +1236,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
+                                       sym.st_value);
+                       used_opd = true;
+               }
++
+               /*
+                * When loading symbols in a data mapping, ABS symbols (which
+                * has a value of SHN_ABS in its st_shndx) failed at
+@@ -1262,11 +1290,20 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
+                               goto out_elf_end;
+               } else if ((used_opd && runtime_ss->adjust_symbols) ||
+                          (!used_opd && syms_ss->adjust_symbols)) {
++                      GElf_Phdr phdr;
++
++                      if (elf_read_program_header(syms_ss->elf,
++                                                  (u64)sym.st_value, &phdr)) {
++                              pr_warning("%s: failed to find program header for "
++                                         "symbol: %s st_value: %#" PRIx64 "\n",
++                                         __func__, elf_name, (u64)sym.st_value);
++                              continue;
++                      }
+                       pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " "
+-                                "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__,
+-                                (u64)sym.st_value, (u64)shdr.sh_addr,
+-                                (u64)shdr.sh_offset);
+-                      sym.st_value -= shdr.sh_addr - shdr.sh_offset;
++                                "p_vaddr: %#" PRIx64 " p_offset: %#" PRIx64 "\n",
++                                __func__, (u64)sym.st_value, (u64)phdr.p_vaddr,
++                                (u64)phdr.p_offset);
++                      sym.st_value -= phdr.p_vaddr - phdr.p_offset;
+               }
+               demangled = demangle_sym(dso, kmodule, elf_name);
+-- 
+2.35.1
+
diff --git a/queue-5.15/revert-tcp-change-pingpong-threshold-to-3.patch-30941 b/queue-5.15/revert-tcp-change-pingpong-threshold-to-3.patch-30941
new file mode 100644 (file)
index 0000000..3a7a54f
--- /dev/null
@@ -0,0 +1,91 @@
+From 8f85722e7d52d3f218ef18d54b8a84fb170f4f22 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Jul 2022 20:44:04 +0000
+Subject: Revert "tcp: change pingpong threshold to 3"
+
+From: Wei Wang <weiwan@google.com>
+
+[ Upstream commit 4d8f24eeedc58d5f87b650ddda73c16e8ba56559 ]
+
+This reverts commit 4a41f453bedfd5e9cd040bad509d9da49feb3e2c.
+
+This to-be-reverted commit was meant to apply a stricter rule for the
+stack to enter pingpong mode. However, the condition used to check for
+interactive session "before(tp->lsndtime, icsk->icsk_ack.lrcvtime)" is
+jiffy based and might be too coarse, which delays the stack entering
+pingpong mode.
+We revert this patch so that we no longer use the above condition to
+determine interactive session, and also reduce pingpong threshold to 1.
+
+Fixes: 4a41f453bedf ("tcp: change pingpong threshold to 3")
+Reported-by: LemmyHuang <hlm3280@163.com>
+Suggested-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Wei Wang <weiwan@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20220721204404.388396-1-weiwan@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/inet_connection_sock.h | 10 +---------
+ net/ipv4/tcp_output.c              | 15 ++++++---------
+ 2 files changed, 7 insertions(+), 18 deletions(-)
+
+diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
+index fa6a87246a7b..695ed45841f0 100644
+--- a/include/net/inet_connection_sock.h
++++ b/include/net/inet_connection_sock.h
+@@ -315,7 +315,7 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
+ struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
+-#define TCP_PINGPONG_THRESH   3
++#define TCP_PINGPONG_THRESH   1
+ static inline void inet_csk_enter_pingpong_mode(struct sock *sk)
+ {
+@@ -332,14 +332,6 @@ static inline bool inet_csk_in_pingpong_mode(struct sock *sk)
+       return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
+ }
+-static inline void inet_csk_inc_pingpong_cnt(struct sock *sk)
+-{
+-      struct inet_connection_sock *icsk = inet_csk(sk);
+-
+-      if (icsk->icsk_ack.pingpong < U8_MAX)
+-              icsk->icsk_ack.pingpong++;
+-}
+-
+ static inline bool inet_csk_has_ulp(struct sock *sk)
+ {
+       return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops;
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 3a84553fb4ed..51f31311fdb6 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -167,16 +167,13 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
+       if (tcp_packets_in_flight(tp) == 0)
+               tcp_ca_event(sk, CA_EVENT_TX_START);
+-      /* If this is the first data packet sent in response to the
+-       * previous received data,
+-       * and it is a reply for ato after last received packet,
+-       * increase pingpong count.
+-       */
+-      if (before(tp->lsndtime, icsk->icsk_ack.lrcvtime) &&
+-          (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
+-              inet_csk_inc_pingpong_cnt(sk);
+-
+       tp->lsndtime = now;
++
++      /* If it is a reply for ato after last received
++       * packet, enter pingpong mode.
++       */
++      if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
++              inet_csk_enter_pingpong_mode(sk);
+ }
+ /* Account for an ACK we sent. */
+-- 
+2.35.1
+
diff --git a/queue-5.15/s390-archrandom-prevent-cpacf-trng-invocations-in-in.patch b/queue-5.15/s390-archrandom-prevent-cpacf-trng-invocations-in-in.patch
new file mode 100644 (file)
index 0000000..df75099
--- /dev/null
@@ -0,0 +1,130 @@
+From 9b15aec0a530a3401f9c95b7cbfa9863e92f1217 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Jul 2022 15:17:21 +0200
+Subject: s390/archrandom: prevent CPACF trng invocations in interrupt context
+
+From: Harald Freudenberger <freude@linux.ibm.com>
+
+[ Upstream commit 918e75f77af7d2e049bb70469ec0a2c12782d96a ]
+
+This patch slightly reworks the s390 arch_get_random_seed_{int,long}
+implementation: Make sure the CPACF trng instruction is never
+called in any interrupt context. This is done by adding an
+additional condition in_task().
+
+Justification:
+
+There are some constrains to satisfy for the invocation of the
+arch_get_random_seed_{int,long}() functions:
+- They should provide good random data during kernel initialization.
+- They should not be called in interrupt context as the TRNG
+  instruction is relatively heavy weight and may for example
+  make some network loads cause to timeout and buck.
+
+However, it was not clear what kind of interrupt context is exactly
+encountered during kernel init or network traffic eventually calling
+arch_get_random_seed_long().
+
+After some days of investigations it is clear that the s390
+start_kernel function is not running in any interrupt context and
+so the trng is called:
+
+Jul 11 18:33:39 t35lp54 kernel:  [<00000001064e90ca>] arch_get_random_seed_long.part.0+0x32/0x70
+Jul 11 18:33:39 t35lp54 kernel:  [<000000010715f246>] random_init+0xf6/0x238
+Jul 11 18:33:39 t35lp54 kernel:  [<000000010712545c>] start_kernel+0x4a4/0x628
+Jul 11 18:33:39 t35lp54 kernel:  [<000000010590402a>] startup_continue+0x2a/0x40
+
+The condition in_task() is true and the CPACF trng provides random data
+during kernel startup.
+
+The network traffic however, is more difficult. A typical call stack
+looks like this:
+
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b5600fc>] extract_entropy.constprop.0+0x23c/0x240
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b560136>] crng_reseed+0x36/0xd8
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b5604b8>] crng_make_state+0x78/0x340
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b5607e0>] _get_random_bytes+0x60/0xf8
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b56108a>] get_random_u32+0xda/0x248
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008aefe7a8>] kfence_guarded_alloc+0x48/0x4b8
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008aeff35e>] __kfence_alloc+0x18e/0x1b8
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008aef7f10>] __kmalloc_node_track_caller+0x368/0x4d8
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b611eac>] kmalloc_reserve+0x44/0xa0
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b611f98>] __alloc_skb+0x90/0x178
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b6120dc>] __napi_alloc_skb+0x5c/0x118
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b8f06b4>] qeth_extract_skb+0x13c/0x680
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b8f6526>] qeth_poll+0x256/0x3f8
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b63d76e>] __napi_poll.constprop.0+0x46/0x2f8
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b63dbec>] net_rx_action+0x1cc/0x408
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b937302>] __do_softirq+0x132/0x6b0
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008abf46ce>] __irq_exit_rcu+0x13e/0x170
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008abf531a>] irq_exit_rcu+0x22/0x50
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b922506>] do_io_irq+0xe6/0x198
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b935826>] io_int_handler+0xd6/0x110
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b9358a6>] psw_idle_exit+0x0/0xa
+Jul 06 17:37:07 t35lp54 kernel: ([<000000008ab9c59a>] arch_cpu_idle+0x52/0xe0)
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b933cfe>] default_idle_call+0x6e/0xd0
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008ac59f4e>] do_idle+0xf6/0x1b0
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008ac5a28e>] cpu_startup_entry+0x36/0x40
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008abb0d90>] smp_start_secondary+0x148/0x158
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b935b9e>] restart_int_handler+0x6e/0x90
+
+which confirms that the call is in softirq context. So in_task() covers exactly
+the cases where we want to have CPACF trng called: not in nmi, not in hard irq,
+not in soft irq but in normal task context and during kernel init.
+
+Signed-off-by: Harald Freudenberger <freude@linux.ibm.com>
+Acked-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Reviewed-by: Juergen Christ <jchrist@linux.ibm.com>
+Link: https://lore.kernel.org/r/20220713131721.257907-1-freude@linux.ibm.com
+Fixes: e4f74400308c ("s390/archrandom: simplify back to earlier design and initialize earlier")
+[agordeev@linux.ibm.com changed desc, added Fixes and Link, removed -stable]
+Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/include/asm/archrandom.h | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h
+index 2c6e1c6ecbe7..4120c428dc37 100644
+--- a/arch/s390/include/asm/archrandom.h
++++ b/arch/s390/include/asm/archrandom.h
+@@ -2,7 +2,7 @@
+ /*
+  * Kernel interface for the s390 arch_random_* functions
+  *
+- * Copyright IBM Corp. 2017, 2020
++ * Copyright IBM Corp. 2017, 2022
+  *
+  * Author: Harald Freudenberger <freude@de.ibm.com>
+  *
+@@ -14,6 +14,7 @@
+ #ifdef CONFIG_ARCH_RANDOM
+ #include <linux/static_key.h>
++#include <linux/preempt.h>
+ #include <linux/atomic.h>
+ #include <asm/cpacf.h>
+@@ -32,7 +33,8 @@ static inline bool __must_check arch_get_random_int(unsigned int *v)
+ static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
+ {
+-      if (static_branch_likely(&s390_arch_random_available)) {
++      if (static_branch_likely(&s390_arch_random_available) &&
++          in_task()) {
+               cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v));
+               atomic64_add(sizeof(*v), &s390_arch_random_counter);
+               return true;
+@@ -42,7 +44,8 @@ static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
+ static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
+ {
+-      if (static_branch_likely(&s390_arch_random_available)) {
++      if (static_branch_likely(&s390_arch_random_available) &&
++          in_task()) {
+               cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v));
+               atomic64_add(sizeof(*v), &s390_arch_random_counter);
+               return true;
+-- 
+2.35.1
+
diff --git a/queue-5.15/scsi-core-fix-warning-in-scsi_alloc_sgtables.patch-8274 b/queue-5.15/scsi-core-fix-warning-in-scsi_alloc_sgtables.patch-8274
new file mode 100644 (file)
index 0000000..ea488f3
--- /dev/null
@@ -0,0 +1,106 @@
+From 2852775159d84567f50f76f5f682f72d328646ce Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 10:51:20 +0800
+Subject: scsi: core: Fix warning in scsi_alloc_sgtables()
+
+From: Jason Yan <yanaijie@huawei.com>
+
+[ Upstream commit d9a434fa0c12ed5f7afe1e9dd30003ab5d059b85 ]
+
+As explained in SG_IO howto[1]:
+
+"If iovec_count is non-zero then 'dxfer_len' should be equal to the sum of
+iov_len lengths. If not, the minimum of the two is the transfer length."
+
+When iovec_count is non-zero and dxfer_len is zero, the sg_io() just
+genarated a null bio, and finally caused a warning below. To fix it, skip
+generating a bio for this request if dxfer_len is zero.
+
+[1] https://tldp.org/HOWTO/SCSI-Generic-HOWTO/x198.html
+
+WARNING: CPU: 2 PID: 3643 at drivers/scsi/scsi_lib.c:1032 scsi_alloc_sgtables+0xc7d/0xf70 drivers/scsi/scsi_lib.c:1032
+Modules linked in:
+
+CPU: 2 PID: 3643 Comm: syz-executor397 Not tainted
+5.17.0-rc3-syzkaller-00316-gb81b1829e7e3 #0
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-204/01/2014
+RIP: 0010:scsi_alloc_sgtables+0xc7d/0xf70 drivers/scsi/scsi_lib.c:1032
+Code: e7 fc 31 ff 44 89 f6 e8 c1 4e e7 fc 45 85 f6 0f 84 1a f5 ff ff e8
+93 4c e7 fc 83 c5 01 0f b7 ed e9 0f f5 ff ff e8 83 4c e7 fc <0f> 0b 41
+   bc 0a 00 00 00 e9 2b fb ff ff 41 bc 09 00 00 00 e9 20 fb
+RSP: 0018:ffffc90000d07558 EFLAGS: 00010293
+RAX: 0000000000000000 RBX: ffff88801bfc96a0 RCX: 0000000000000000
+RDX: ffff88801c876000 RSI: ffffffff849060bd RDI: 0000000000000003
+RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
+R10: ffffffff849055b9 R11: 0000000000000000 R12: ffff888012b8c000
+R13: ffff88801bfc9580 R14: 0000000000000000 R15: ffff88801432c000
+FS:  00007effdec8e700(0000) GS:ffff88802cc00000(0000)
+knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007effdec6d718 CR3: 00000000206d6000 CR4: 0000000000150ee0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ <TASK>
+ scsi_setup_scsi_cmnd drivers/scsi/scsi_lib.c:1219 [inline]
+ scsi_prepare_cmd drivers/scsi/scsi_lib.c:1614 [inline]
+ scsi_queue_rq+0x283e/0x3630 drivers/scsi/scsi_lib.c:1730
+ blk_mq_dispatch_rq_list+0x6ea/0x22e0 block/blk-mq.c:1851
+ __blk_mq_sched_dispatch_requests+0x20b/0x410 block/blk-mq-sched.c:299
+ blk_mq_sched_dispatch_requests+0xfb/0x180 block/blk-mq-sched.c:332
+ __blk_mq_run_hw_queue+0xf9/0x350 block/blk-mq.c:1968
+ __blk_mq_delay_run_hw_queue+0x5b6/0x6c0 block/blk-mq.c:2045
+ blk_mq_run_hw_queue+0x30f/0x480 block/blk-mq.c:2096
+ blk_mq_sched_insert_request+0x340/0x440 block/blk-mq-sched.c:451
+ blk_execute_rq+0xcc/0x340 block/blk-mq.c:1231
+ sg_io+0x67c/0x1210 drivers/scsi/scsi_ioctl.c:485
+ scsi_ioctl_sg_io drivers/scsi/scsi_ioctl.c:866 [inline]
+ scsi_ioctl+0xa66/0x1560 drivers/scsi/scsi_ioctl.c:921
+ sd_ioctl+0x199/0x2a0 drivers/scsi/sd.c:1576
+ blkdev_ioctl+0x37a/0x800 block/ioctl.c:588
+ vfs_ioctl fs/ioctl.c:51 [inline]
+ __do_sys_ioctl fs/ioctl.c:874 [inline]
+ __se_sys_ioctl fs/ioctl.c:860 [inline]
+ __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:860
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+RIP: 0033:0x7effdecdc5d9
+Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 81 14 00 00 90 48 89 f8 48 89
+f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01
+f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48
+RSP: 002b:00007effdec8e2f8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
+RAX: ffffffffffffffda RBX: 00007effded664c0 RCX: 00007effdecdc5d9
+RDX: 0000000020002300 RSI: 0000000000002285 RDI: 0000000000000004
+RBP: 00007effded34034 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000003
+R13: 00007effded34054 R14: 2f30656c69662f2e R15: 00007effded664c8
+
+Link: https://lore.kernel.org/r/20220720025120.3226770-1-yanaijie@huawei.com
+Fixes: 25636e282fe9 ("block: fix SG_IO vector request data length handling")
+Reported-by: syzbot+d44b35ecfb807e5af0b5@syzkaller.appspotmail.com
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Signed-off-by: Jason Yan <yanaijie@huawei.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/scsi_ioctl.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c
+index a06c61f22742..6e2f82152b4a 100644
+--- a/drivers/scsi/scsi_ioctl.c
++++ b/drivers/scsi/scsi_ioctl.c
+@@ -457,7 +457,7 @@ static int sg_io(struct scsi_device *sdev, struct gendisk *disk,
+               goto out_free_cdb;
+       ret = 0;
+-      if (hdr->iovec_count) {
++      if (hdr->iovec_count && hdr->dxfer_len) {
+               struct iov_iter i;
+               struct iovec *iov = NULL;
+-- 
+2.35.1
+
diff --git a/queue-5.15/scsi-mpt3sas-stop-fw-fault-watchdog-work-item-during.patch b/queue-5.15/scsi-mpt3sas-stop-fw-fault-watchdog-work-item-during.patch
new file mode 100644 (file)
index 0000000..1982a72
--- /dev/null
@@ -0,0 +1,46 @@
+From b4d21334619c4ef06ef1832f7db1b552e104b4d9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 10:24:48 -0400
+Subject: scsi: mpt3sas: Stop fw fault watchdog work item during system
+ shutdown
+
+From: David Jeffery <djeffery@redhat.com>
+
+[ Upstream commit 0fde22c5420ed258ee538a760291c2f3935f6a01 ]
+
+During system shutdown or reboot, mpt3sas will reset the firmware back to
+ready state. However, the driver leaves running a watchdog work item
+intended to keep the firmware in operational state. This causes a second,
+unneeded reset on shutdown and moves the firmware back to operational
+instead of in ready state as intended. And if the mpt3sas_fwfault_debug
+module parameter is set, this extra reset also panics the system.
+
+mpt3sas's scsih_shutdown needs to stop the watchdog before resetting the
+firmware back to ready state.
+
+Link: https://lore.kernel.org/r/20220722142448.6289-1-djeffery@redhat.com
+Fixes: fae21608c31c ("scsi: mpt3sas: Transition IOC to Ready state during shutdown")
+Tested-by: Laurence Oberman <loberman@redhat.com>
+Acked-by: Sreekanth Reddy <sreekanth.reddy@broadcom.com>
+Signed-off-by: David Jeffery <djeffery@redhat.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/mpt3sas/mpt3sas_scsih.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+index af275ac42795..5351959fbaba 100644
+--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
++++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+@@ -11386,6 +11386,7 @@ scsih_shutdown(struct pci_dev *pdev)
+       _scsih_ir_shutdown(ioc);
+       _scsih_nvme_shutdown(ioc);
+       mpt3sas_base_mask_interrupts(ioc);
++      mpt3sas_base_stop_watchdog(ioc);
+       ioc->shost_recovery = 1;
+       mpt3sas_base_make_ioc_ready(ioc, SOFT_RESET);
+       ioc->shost_recovery = 0;
+-- 
+2.35.1
+
diff --git a/queue-5.15/scsi-ufs-host-hold-reference-returned-by-of_parse_ph.patch b/queue-5.15/scsi-ufs-host-hold-reference-returned-by-of_parse_ph.patch
new file mode 100644 (file)
index 0000000..610596a
--- /dev/null
@@ -0,0 +1,61 @@
+From b30e11a395cf847b4266f0f0971244256bc61cd9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 19 Jul 2022 15:15:29 +0800
+Subject: scsi: ufs: host: Hold reference returned by of_parse_phandle()
+
+From: Liang He <windhl@126.com>
+
+[ Upstream commit a3435afba87dc6cd83f5595e7607f3c40f93ef01 ]
+
+In ufshcd_populate_vreg(), we should hold the reference returned by
+of_parse_phandle() and then use it to call of_node_put() for refcount
+balance.
+
+Link: https://lore.kernel.org/r/20220719071529.1081166-1-windhl@126.com
+Fixes: aa4976130934 ("ufs: Add regulator enable support")
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Signed-off-by: Liang He <windhl@126.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/ufs/ufshcd-pltfrm.c | 15 +++++++++++++--
+ 1 file changed, 13 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/scsi/ufs/ufshcd-pltfrm.c b/drivers/scsi/ufs/ufshcd-pltfrm.c
+index 87975d1a21c8..adc302b1a57a 100644
+--- a/drivers/scsi/ufs/ufshcd-pltfrm.c
++++ b/drivers/scsi/ufs/ufshcd-pltfrm.c
+@@ -107,9 +107,20 @@ static int ufshcd_parse_clock_info(struct ufs_hba *hba)
+       return ret;
+ }
++static bool phandle_exists(const struct device_node *np,
++                         const char *phandle_name, int index)
++{
++      struct device_node *parse_np = of_parse_phandle(np, phandle_name, index);
++
++      if (parse_np)
++              of_node_put(parse_np);
++
++      return parse_np != NULL;
++}
++
+ #define MAX_PROP_SIZE 32
+ static int ufshcd_populate_vreg(struct device *dev, const char *name,
+-              struct ufs_vreg **out_vreg)
++                              struct ufs_vreg **out_vreg)
+ {
+       char prop_name[MAX_PROP_SIZE];
+       struct ufs_vreg *vreg = NULL;
+@@ -121,7 +132,7 @@ static int ufshcd_populate_vreg(struct device *dev, const char *name,
+       }
+       snprintf(prop_name, MAX_PROP_SIZE, "%s-supply", name);
+-      if (!of_parse_phandle(np, prop_name, 0)) {
++      if (!phandle_exists(np, prop_name, 0)) {
+               dev_info(dev, "%s: Unable to find %s regulator, assuming enabled\n",
+                               __func__, prop_name);
+               goto out;
+-- 
+2.35.1
+
diff --git a/queue-5.15/sctp-fix-sleep-in-atomic-context-bug-in-timer-handle.patch b/queue-5.15/sctp-fix-sleep-in-atomic-context-bug-in-timer-handle.patch
new file mode 100644 (file)
index 0000000..3a6df31
--- /dev/null
@@ -0,0 +1,61 @@
+From 05bc0f8f5e2083a8ed36ff7f1514d05da48dc4a7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 23 Jul 2022 09:58:09 +0800
+Subject: sctp: fix sleep in atomic context bug in timer handlers
+
+From: Duoming Zhou <duoming@zju.edu.cn>
+
+[ Upstream commit b89fc26f741d9f9efb51cba3e9b241cf1380ec5a ]
+
+There are sleep in atomic context bugs in timer handlers of sctp
+such as sctp_generate_t3_rtx_event(), sctp_generate_probe_event(),
+sctp_generate_t1_init_event(), sctp_generate_timeout_event(),
+sctp_generate_t3_rtx_event() and so on.
+
+The root cause is sctp_sched_prio_init_sid() with GFP_KERNEL parameter
+that may sleep could be called by different timer handlers which is in
+interrupt context.
+
+One of the call paths that could trigger bug is shown below:
+
+      (interrupt context)
+sctp_generate_probe_event
+  sctp_do_sm
+    sctp_side_effects
+      sctp_cmd_interpreter
+        sctp_outq_teardown
+          sctp_outq_init
+            sctp_sched_set_sched
+              n->init_sid(..,GFP_KERNEL)
+                sctp_sched_prio_init_sid //may sleep
+
+This patch changes gfp_t parameter of init_sid in sctp_sched_set_sched()
+from GFP_KERNEL to GFP_ATOMIC in order to prevent sleep in atomic
+context bugs.
+
+Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations")
+Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Link: https://lore.kernel.org/r/20220723015809.11553-1-duoming@zju.edu.cn
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sctp/stream_sched.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
+index 99e5f69fbb74..a2e1d34f52c5 100644
+--- a/net/sctp/stream_sched.c
++++ b/net/sctp/stream_sched.c
+@@ -163,7 +163,7 @@ int sctp_sched_set_sched(struct sctp_association *asoc,
+               if (!SCTP_SO(&asoc->stream, i)->ext)
+                       continue;
+-              ret = n->init_sid(&asoc->stream, i, GFP_KERNEL);
++              ret = n->init_sid(&asoc->stream, i, GFP_ATOMIC);
+               if (ret)
+                       goto err;
+       }
+-- 
+2.35.1
+
diff --git a/queue-5.15/sctp-leave-the-err-path-free-in-sctp_stream_init-to-.patch b/queue-5.15/sctp-leave-the-err-path-free-in-sctp_stream_init-to-.patch
new file mode 100644 (file)
index 0000000..287fd04
--- /dev/null
@@ -0,0 +1,109 @@
+From 75448a2fa4330709ebd12f9c467aa925fc030e5e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Jul 2022 18:11:06 -0400
+Subject: sctp: leave the err path free in sctp_stream_init to sctp_stream_free
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit 181d8d2066c000ba0a0e6940a7ad80f1a0e68e9d ]
+
+A NULL pointer dereference was reported by Wei Chen:
+
+  BUG: kernel NULL pointer dereference, address: 0000000000000000
+  RIP: 0010:__list_del_entry_valid+0x26/0x80
+  Call Trace:
+   <TASK>
+   sctp_sched_dequeue_common+0x1c/0x90
+   sctp_sched_prio_dequeue+0x67/0x80
+   __sctp_outq_teardown+0x299/0x380
+   sctp_outq_free+0x15/0x20
+   sctp_association_free+0xc3/0x440
+   sctp_do_sm+0x1ca7/0x2210
+   sctp_assoc_bh_rcv+0x1f6/0x340
+
+This happens when calling sctp_sendmsg without connecting to server first.
+In this case, a data chunk already queues up in send queue of client side
+when processing the INIT_ACK from server in sctp_process_init() where it
+calls sctp_stream_init() to alloc stream_in. If it fails to alloc stream_in
+all stream_out will be freed in sctp_stream_init's err path. Then in the
+asoc freeing it will crash when dequeuing this data chunk as stream_out
+is missing.
+
+As we can't free stream out before dequeuing all data from send queue, and
+this patch is to fix it by moving the err path stream_out/in freeing in
+sctp_stream_init() to sctp_stream_free() which is eventually called when
+freeing the asoc in sctp_association_free(). This fix also makes the code
+in sctp_process_init() more clear.
+
+Note that in sctp_association_init() when it fails in sctp_stream_init(),
+sctp_association_free() will not be called, and in that case it should
+go to 'stream_free' err path to free stream instead of 'fail_init'.
+
+Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations")
+Reported-by: Wei Chen <harperchen1110@gmail.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Link: https://lore.kernel.org/r/831a3dc100c4908ff76e5bcc363be97f2778bc0b.1658787066.git.lucien.xin@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sctp/associola.c |  5 ++---
+ net/sctp/stream.c    | 19 +++----------------
+ 2 files changed, 5 insertions(+), 19 deletions(-)
+
+diff --git a/net/sctp/associola.c b/net/sctp/associola.c
+index be29da09cc7a..3460abceba44 100644
+--- a/net/sctp/associola.c
++++ b/net/sctp/associola.c
+@@ -229,9 +229,8 @@ static struct sctp_association *sctp_association_init(
+       if (!sctp_ulpq_init(&asoc->ulpq, asoc))
+               goto fail_init;
+-      if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams,
+-                           0, gfp))
+-              goto fail_init;
++      if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, 0, gfp))
++              goto stream_free;
+       /* Initialize default path MTU. */
+       asoc->pathmtu = sp->pathmtu;
+diff --git a/net/sctp/stream.c b/net/sctp/stream.c
+index 6dc95dcc0ff4..ef9fceadef8d 100644
+--- a/net/sctp/stream.c
++++ b/net/sctp/stream.c
+@@ -137,7 +137,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
+       ret = sctp_stream_alloc_out(stream, outcnt, gfp);
+       if (ret)
+-              goto out_err;
++              return ret;
+       for (i = 0; i < stream->outcnt; i++)
+               SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
+@@ -145,22 +145,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
+ handle_in:
+       sctp_stream_interleave_init(stream);
+       if (!incnt)
+-              goto out;
+-
+-      ret = sctp_stream_alloc_in(stream, incnt, gfp);
+-      if (ret)
+-              goto in_err;
+-
+-      goto out;
++              return 0;
+-in_err:
+-      sched->free(stream);
+-      genradix_free(&stream->in);
+-out_err:
+-      genradix_free(&stream->out);
+-      stream->outcnt = 0;
+-out:
+-      return ret;
++      return sctp_stream_alloc_in(stream, incnt, gfp);
+ }
+ int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid)
+-- 
+2.35.1
+
index 879cde6f8bc3c4e5f9db8b858f9b68f5e0db93fd..7c2be9da69b1f08ce07bc37b01531e215011dc45 100644 (file)
@@ -30,3 +30,55 @@ scsi-mpt3sas-stop-fw-fault-watchdog-work-item-during-system-shutdown.patch
 net-ping6-fix-memleak-in-ipv6_renew_options.patch
 ipv6-addrconf-fix-a-null-ptr-deref-bug-for-ip6_ptr.patch
 net-tls-remove-the-context-from-the-list-in-tls_device_down.patch
+igmp-fix-data-races-around-sysctl_igmp_qrv.patch
+s390-archrandom-prevent-cpacf-trng-invocations-in-in.patch
+net-pcs-xpcs-propagate-xpcs_read-error-to-xpcs_get_s.patch
+net-sungem_phy-add-of_node_put-for-reference-returne.patch
+tcp-fix-data-races-around-sysctl_tcp_dsack.patch-17026
+tcp-fix-a-data-race-around-sysctl_tcp_app_win.patch-22294
+tcp-fix-a-data-race-around-sysctl_tcp_adv_win_scale.patch-19790
+tcp-fix-a-data-race-around-sysctl_tcp_frto.patch-3670
+tcp-fix-a-data-race-around-sysctl_tcp_nometrics_save.patch-5497
+tcp-fix-data-races-around-sysctl_tcp_no_ssthresh_met.patch
+tcp-fix-data-races-around-sysctl_tcp_moderate_rcvbuf.patch-32656
+tcp-fix-a-data-race-around-sysctl_tcp_limit_output_b.patch
+tcp-fix-a-data-race-around-sysctl_tcp_challenge_ack_.patch
+tcp-fix-a-data-race-around-sysctl_tcp_min_tso_segs.patch
+tcp-fix-a-data-race-around-sysctl_tcp_min_rtt_wlen.patch
+tcp-fix-a-data-race-around-sysctl_tcp_autocorking.patch
+tcp-fix-a-data-race-around-sysctl_tcp_invalid_rateli.patch
+asm-generic-remove-a-broken-and-needless-ifdef-condi.patch
+revert-tcp-change-pingpong-threshold-to-3.patch-30941
+net-tls-remove-the-context-from-the-list-in-tls_devi.patch
+documentation-fix-sctp_wmem-in-ip-sysctl.rst.patch
+macsec-fix-null-deref-in-macsec_add_rxsa.patch
+macsec-fix-error-message-in-macsec_add_rxsa-and-_txs.patch
+macsec-limit-replay-window-size-with-xpn.patch
+macsec-always-read-macsec_sa_attr_pn-as-a-u64.patch
+net-macsec-fix-potential-resource-leak-in-macsec_add.patch
+net-mld-fix-reference-count-leak-in-mld_-query-repor.patch
+tcp-fix-data-races-around-sk_pacing_rate.patch
+net-fix-data-races-around-sysctl_-rw-mem-_offset.patch
+tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_dela.patch
+tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_slac.patch
+tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_nr.patch
+tcp-fix-data-races-around-sysctl_tcp_reflect_tos.patch
+ipv4-fix-data-races-around-sysctl_fib_notify_on_flag.patch
+i40e-fix-interface-init-with-msi-interrupts-no-msi-x.patch
+sctp-fix-sleep-in-atomic-context-bug-in-timer-handle.patch
+octeontx2-pf-cn10k-fix-egress-ratelimit-configuratio.patch
+octeontx2-pf-fix-udp-tcp-src-and-dst-port-tc-filters.patch-781
+netfilter-nf_queue-do-not-allow-packet-truncation-be.patch
+ice-check-dd-eof-bits-on-rx-descriptor-rather-than-e.patch
+ice-do-not-setup-vlan-for-loopback-vsi.patch-1510
+scsi-mpt3sas-stop-fw-fault-watchdog-work-item-during.patch
+scsi-ufs-host-hold-reference-returned-by-of_parse_ph.patch
+scsi-core-fix-warning-in-scsi_alloc_sgtables.patch-8274
+virtio-net-fix-the-race-between-refill-work-and-clos.patch
+perf-symbol-correct-address-for-bss-symbols.patch
+sfc-disable-softirqs-for-ptp-tx.patch
+sctp-leave-the-err-path-free-in-sctp_stream_init-to-.patch
+watch_queue-fix-missing-rcu-annotation.patch-18505
+watch_queue-fix-missing-locking-in-add_watch_to_obje.patch
+net-ping6-fix-memleak-in-ipv6_renew_options.patch-12523
+ipv6-addrconf-fix-a-null-ptr-deref-bug-for-ip6_ptr.patch-17245
diff --git a/queue-5.15/sfc-disable-softirqs-for-ptp-tx.patch b/queue-5.15/sfc-disable-softirqs-for-ptp-tx.patch
new file mode 100644 (file)
index 0000000..e1097b4
--- /dev/null
@@ -0,0 +1,73 @@
+From ab14b4d9c053d14bda83bf8b9a4e08617547b767 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jul 2022 08:45:04 +0200
+Subject: sfc: disable softirqs for ptp TX
+
+From: Alejandro Lucero <alejandro.lucero-palau@amd.com>
+
+[ Upstream commit 67c3b611d92fc238c43734878bc3e232ab570c79 ]
+
+Sending a PTP packet can imply to use the normal TX driver datapath but
+invoked from the driver's ptp worker. The kernel generic TX code
+disables softirqs and preemption before calling specific driver TX code,
+but the ptp worker does not. Although current ptp driver functionality
+does not require it, there are several reasons for doing so:
+
+   1) The invoked code is always executed with softirqs disabled for non
+      PTP packets.
+   2) Better if a ptp packet transmission is not interrupted by softirq
+      handling which could lead to high latencies.
+   3) netdev_xmit_more used by the TX code requires preemption to be
+      disabled.
+
+Indeed a solution for dealing with kernel preemption state based on static
+kernel configuration is not possible since the introduction of dynamic
+preemption level configuration at boot time using the static calls
+functionality.
+
+Fixes: f79c957a0b537 ("drivers: net: sfc: use netdev_xmit_more helper")
+Signed-off-by: Alejandro Lucero <alejandro.lucero-palau@amd.com>
+Link: https://lore.kernel.org/r/20220726064504.49613-1-alejandro.lucero-palau@amd.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/sfc/ptp.c | 22 ++++++++++++++++++++++
+ 1 file changed, 22 insertions(+)
+
+diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
+index 725b0f38813a..a2b4e3befa59 100644
+--- a/drivers/net/ethernet/sfc/ptp.c
++++ b/drivers/net/ethernet/sfc/ptp.c
+@@ -1100,7 +1100,29 @@ static void efx_ptp_xmit_skb_queue(struct efx_nic *efx, struct sk_buff *skb)
+       tx_queue = efx_channel_get_tx_queue(ptp_data->channel, type);
+       if (tx_queue && tx_queue->timestamping) {
++              /* This code invokes normal driver TX code which is always
++               * protected from softirqs when called from generic TX code,
++               * which in turn disables preemption. Look at __dev_queue_xmit
++               * which uses rcu_read_lock_bh disabling preemption for RCU
++               * plus disabling softirqs. We do not need RCU reader
++               * protection here.
++               *
++               * Although it is theoretically safe for current PTP TX/RX code
++               * running without disabling softirqs, there are three good
++               * reasond for doing so:
++               *
++               *      1) The code invoked is mainly implemented for non-PTP
++               *         packets and it is always executed with softirqs
++               *         disabled.
++               *      2) This being a single PTP packet, better to not
++               *         interrupt its processing by softirqs which can lead
++               *         to high latencies.
++               *      3) netdev_xmit_more checks preemption is disabled and
++               *         triggers a BUG_ON if not.
++               */
++              local_bh_disable();
+               efx_enqueue_skb(tx_queue, skb);
++              local_bh_enable();
+       } else {
+               WARN_ONCE(1, "PTP channel has no timestamped tx queue\n");
+               dev_kfree_skb_any(skb);
+-- 
+2.35.1
+
diff --git a/queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_adv_win_scale.patch-19790 b/queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_adv_win_scale.patch-19790
new file mode 100644 (file)
index 0000000..5682d6a
--- /dev/null
@@ -0,0 +1,36 @@
+From 2c2c4964d511d85932c83e2cc5ff64cb8ae5c52e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:14 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_adv_win_scale.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 36eeee75ef0157e42fb6593dcc65daab289b559e ]
+
+While reading sysctl_tcp_adv_win_scale, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/tcp.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/include/net/tcp.h b/include/net/tcp.h
+index 8ce8aafeef0f..76b0d7f2b967 100644
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -1406,7 +1406,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space,
+ static inline int tcp_win_from_space(const struct sock *sk, int space)
+ {
+-      int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale;
++      int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale);
+       return tcp_adv_win_scale <= 0 ?
+               (space>>(-tcp_adv_win_scale)) :
+-- 
+2.35.1
+
diff --git a/queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_app_win.patch-22294 b/queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_app_win.patch-22294
new file mode 100644 (file)
index 0000000..826d082
--- /dev/null
@@ -0,0 +1,36 @@
+From 01d0355715b3e8b0718bc8922ecd294148810b21 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:13 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_app_win.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 02ca527ac5581cf56749db9fd03d854e842253dd ]
+
+While reading sysctl_tcp_app_win, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index e066c527a723..1c940517f5f5 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -526,7 +526,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb,
+  */
+ static void tcp_init_buffer_space(struct sock *sk)
+ {
+-      int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
++      int tcp_app_win = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_app_win);
+       struct tcp_sock *tp = tcp_sk(sk);
+       int maxwin;
+-- 
+2.35.1
+
diff --git a/queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_autocorking.patch b/queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_autocorking.patch
new file mode 100644 (file)
index 0000000..8be468d
--- /dev/null
@@ -0,0 +1,36 @@
+From 935b89148da41562bb34b63fc5d5bc9cd8206477 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:25 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_autocorking.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 85225e6f0a76e6745bc841c9f25169c509b573d8 ]
+
+While reading sysctl_tcp_autocorking, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: f54b311142a9 ("tcp: auto corking")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 1abdb8712655..7ba9059c263a 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -694,7 +694,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
+                               int size_goal)
+ {
+       return skb->len < size_goal &&
+-             sock_net(sk)->ipv4.sysctl_tcp_autocorking &&
++             READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) &&
+              !tcp_rtx_queue_empty(sk) &&
+              refcount_read(&sk->sk_wmem_alloc) > skb->truesize;
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_challenge_ack_.patch b/queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_challenge_ack_.patch
new file mode 100644 (file)
index 0000000..45b3be5
--- /dev/null
@@ -0,0 +1,36 @@
+From 820771349c258fcb893ad8ff9992b0cc1d27de45 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:21 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_challenge_ack_limit.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit db3815a2fa691da145cfbe834584f31ad75df9ff ]
+
+While reading sysctl_tcp_challenge_ack_limit, it can be changed
+concurrently.  Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 282f23c6ee34 ("tcp: implement RFC 5961 3.2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 426f8fe02850..a5357ebfbcc0 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -3622,7 +3622,7 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
+       /* Then check host-wide RFC 5961 rate limit. */
+       now = jiffies / HZ;
+       if (now != challenge_timestamp) {
+-              u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
++              u32 ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit);
+               u32 half = (ack_limit + 1) >> 1;
+               challenge_timestamp = now;
+-- 
+2.35.1
+
diff --git a/queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_dela.patch b/queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_dela.patch
new file mode 100644 (file)
index 0000000..748156b
--- /dev/null
@@ -0,0 +1,37 @@
+From bdefd924f38e2c716687fca5a9dde96310d3da23 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:22:01 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_comp_sack_delay_ns.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 4866b2b0f7672b6d760c4b8ece6fb56f965dcc8a ]
+
+While reading sysctl_tcp_comp_sack_delay_ns, it can be changed
+concurrently.  Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 6d82aa242092 ("tcp: add tcp_comp_sack_delay_ns sysctl")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index e007bdc20e82..486ca1d5b436 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -5512,7 +5512,8 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
+       if (tp->srtt_us && tp->srtt_us < rtt)
+               rtt = tp->srtt_us;
+-      delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
++      delay = min_t(unsigned long,
++                    READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns),
+                     rtt * (NSEC_PER_USEC >> 3)/20);
+       sock_hold(sk);
+       hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
+-- 
+2.35.1
+
diff --git a/queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_nr.patch b/queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_nr.patch
new file mode 100644 (file)
index 0000000..2caa928
--- /dev/null
@@ -0,0 +1,36 @@
+From e0220c760de403bd66edcf660c26d8142a0ea7b5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:22:03 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_comp_sack_nr.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 79f55473bfc8ac51bd6572929a679eeb4da22251 ]
+
+While reading sysctl_tcp_comp_sack_nr, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 9c21d2fc41c0 ("tcp: add tcp_comp_sack_nr sysctl")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 7b593865b4ae..a33e6aa42a4c 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -5491,7 +5491,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
+       }
+       if (!tcp_is_sack(tp) ||
+-          tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
++          tp->compressed_ack >= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr))
+               goto send_now;
+       if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
+-- 
+2.35.1
+
diff --git a/queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_slac.patch b/queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_comp_sack_slac.patch
new file mode 100644 (file)
index 0000000..03b859f
--- /dev/null
@@ -0,0 +1,36 @@
+From d01503cad72cd3674173d67418ac2e164136ede7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:22:02 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_comp_sack_slack_ns.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 22396941a7f343d704738360f9ef0e6576489d43 ]
+
+While reading sysctl_tcp_comp_sack_slack_ns, it can be changed
+concurrently.  Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: a70437cc09a1 ("tcp: add hrtimer slack to sack compression")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 486ca1d5b436..7b593865b4ae 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -5517,7 +5517,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
+                     rtt * (NSEC_PER_USEC >> 3)/20);
+       sock_hold(sk);
+       hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
+-                             sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns,
++                             READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns),
+                              HRTIMER_MODE_REL_PINNED_SOFT);
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_frto.patch-3670 b/queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_frto.patch-3670
new file mode 100644 (file)
index 0000000..425c0d5
--- /dev/null
@@ -0,0 +1,36 @@
+From 64fd28e6e558585b066d00ac8cd29d08a2eb58b1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:15 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_frto.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 706c6202a3589f290e1ef9be0584a8f4a3cc0507 ]
+
+While reading sysctl_tcp_frto, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 1c940517f5f5..b9fd51826aea 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -2167,7 +2167,7 @@ void tcp_enter_loss(struct sock *sk)
+        * loss recovery is underway except recurring timeout(s) on
+        * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
+        */
+-      tp->frto = net->ipv4.sysctl_tcp_frto &&
++      tp->frto = READ_ONCE(net->ipv4.sysctl_tcp_frto) &&
+                  (new_recovery || icsk->icsk_retransmits) &&
+                  !inet_csk(sk)->icsk_mtup.probe_size;
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_invalid_rateli.patch b/queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_invalid_rateli.patch
new file mode 100644 (file)
index 0000000..dd05d7d
--- /dev/null
@@ -0,0 +1,37 @@
+From 1d33fed2a2ede95683224e4c14a1d86273544711 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:26 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_invalid_ratelimit.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 2afdbe7b8de84c28e219073a6661080e1b3ded48 ]
+
+While reading sysctl_tcp_invalid_ratelimit, it can be changed
+concurrently.  Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 032ee4236954 ("tcp: helpers to mitigate ACK loops by rate-limiting out-of-window dupacks")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index b925c766f1d2..018be3f346e6 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -3574,7 +3574,8 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
+       if (*last_oow_ack_time) {
+               s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
+-              if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
++              if (0 <= elapsed &&
++                  elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) {
+                       NET_INC_STATS(net, mib_idx);
+                       return true;    /* rate-limited: don't send yet! */
+               }
+-- 
+2.35.1
+
diff --git a/queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_limit_output_b.patch b/queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_limit_output_b.patch
new file mode 100644 (file)
index 0000000..7eb9b1a
--- /dev/null
@@ -0,0 +1,36 @@
+From 68b671415407d91c1e4a99211604b999304afc85 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:20 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_limit_output_bytes.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 9fb90193fbd66b4c5409ef729fd081861f8b6351 ]
+
+While reading sysctl_tcp_limit_output_bytes, it can be changed
+concurrently.  Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 46d3ceabd8d9 ("tcp: TCP Small Queues")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_output.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index caf9283f9b0f..8b6d89bb2d36 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2506,7 +2506,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
+                     sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift));
+       if (sk->sk_pacing_status == SK_PACING_NONE)
+               limit = min_t(unsigned long, limit,
+-                            sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
++                            READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes));
+       limit <<= factor;
+       if (static_branch_unlikely(&tcp_tx_delay_enabled) &&
+-- 
+2.35.1
+
diff --git a/queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_min_rtt_wlen.patch b/queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_min_rtt_wlen.patch
new file mode 100644 (file)
index 0000000..7556af2
--- /dev/null
@@ -0,0 +1,36 @@
+From 1a6202a794340f35493415e040698c5eef71a1a4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:24 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_min_rtt_wlen.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 1330ffacd05fc9ac4159d19286ce119e22450ed2 ]
+
+While reading sysctl_tcp_min_rtt_wlen, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: f672258391b4 ("tcp: track min RTT using windowed min-filter")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index a5357ebfbcc0..b925c766f1d2 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -3050,7 +3050,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
+ static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
+ {
+-      u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
++      u32 wlen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen) * HZ;
+       struct tcp_sock *tp = tcp_sk(sk);
+       if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
+-- 
+2.35.1
+
diff --git a/queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_min_tso_segs.patch b/queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_min_tso_segs.patch
new file mode 100644 (file)
index 0000000..ea295f8
--- /dev/null
@@ -0,0 +1,36 @@
+From 4ffa907eee81733f9847740d879df8380366c7a1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:22 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_min_tso_segs.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit e0bb4ab9dfddd872622239f49fb2bd403b70853b ]
+
+While reading sysctl_tcp_min_tso_segs, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 95bd09eb2750 ("tcp: TSO packets automatic sizing")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_output.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 8b6d89bb2d36..3a84553fb4ed 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -1989,7 +1989,7 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
+       min_tso = ca_ops->min_tso_segs ?
+                       ca_ops->min_tso_segs(sk) :
+-                      sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
++                      READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
+       tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
+       return min_t(u32, tso_segs, sk->sk_gso_max_segs);
+-- 
+2.35.1
+
diff --git a/queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_nometrics_save.patch-5497 b/queue-5.15/tcp-fix-a-data-race-around-sysctl_tcp_nometrics_save.patch-5497
new file mode 100644 (file)
index 0000000..4afa21d
--- /dev/null
@@ -0,0 +1,36 @@
+From 72a1d5ff658140f598f89c99c368e997e4dd6ae7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:16 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_nometrics_save.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 8499a2454d9e8a55ce616ede9f9580f36fd5b0f3 ]
+
+While reading sysctl_tcp_nometrics_save, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_metrics.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
+index a501150deaa3..9dcc418a26f2 100644
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -329,7 +329,7 @@ void tcp_update_metrics(struct sock *sk)
+       int m;
+       sk_dst_confirm(sk);
+-      if (net->ipv4.sysctl_tcp_nometrics_save || !dst)
++      if (READ_ONCE(net->ipv4.sysctl_tcp_nometrics_save) || !dst)
+               return;
+       rcu_read_lock();
+-- 
+2.35.1
+
diff --git a/queue-5.15/tcp-fix-data-races-around-sk_pacing_rate.patch b/queue-5.15/tcp-fix-data-races-around-sk_pacing_rate.patch
new file mode 100644 (file)
index 0000000..ff17876
--- /dev/null
@@ -0,0 +1,39 @@
+From 4f19f4db66e4df97f9b37509bbe6497cffa150c7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:21:59 -0700
+Subject: tcp: Fix data-races around sk_pacing_rate.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 59bf6c65a09fff74215517aecffbbdcd67df76e3 ]
+
+While reading sysctl_tcp_pacing_(ss|ca)_ratio, they can be changed
+concurrently.  Thus, we need to add READ_ONCE() to their readers.
+
+Fixes: 43e122b014c9 ("tcp: refine pacing rate determination")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 018be3f346e6..566745f527fe 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -902,9 +902,9 @@ static void tcp_update_pacing_rate(struct sock *sk)
+        *       end of slow start and should slow down.
+        */
+       if (tcp_snd_cwnd(tp) < tp->snd_ssthresh / 2)
+-              rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
++              rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio);
+       else
+-              rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
++              rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio);
+       rate *= max(tcp_snd_cwnd(tp), tp->packets_out);
+-- 
+2.35.1
+
diff --git a/queue-5.15/tcp-fix-data-races-around-sysctl_tcp_dsack.patch-17026 b/queue-5.15/tcp-fix-data-races-around-sysctl_tcp_dsack.patch-17026
new file mode 100644 (file)
index 0000000..40a87e6
--- /dev/null
@@ -0,0 +1,45 @@
+From f9e076cbb094b959e6e2304894fff853a915d812 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:12 -0700
+Subject: tcp: Fix data-races around sysctl_tcp_dsack.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 58ebb1c8b35a8ef38cd6927431e0fa7b173a632d ]
+
+While reading sysctl_tcp_dsack, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 2d21d8bf3b8c..e066c527a723 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -4419,7 +4419,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
+ {
+       struct tcp_sock *tp = tcp_sk(sk);
+-      if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
++      if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
+               int mib_idx;
+               if (before(seq, tp->rcv_nxt))
+@@ -4466,7 +4466,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
+               NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
+               tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
+-              if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
++              if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
+                       u32 end_seq = TCP_SKB_CB(skb)->end_seq;
+                       tcp_rcv_spurious_retrans(sk, skb);
+-- 
+2.35.1
+
diff --git a/queue-5.15/tcp-fix-data-races-around-sysctl_tcp_moderate_rcvbuf.patch-32656 b/queue-5.15/tcp-fix-data-races-around-sysctl_tcp_moderate_rcvbuf.patch-32656
new file mode 100644 (file)
index 0000000..50ac547
--- /dev/null
@@ -0,0 +1,50 @@
+From b3a4a46c8114c2565fd8b6758f66f760b12588da Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:18 -0700
+Subject: tcp: Fix data-races around sysctl_tcp_moderate_rcvbuf.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 780476488844e070580bfc9e3bc7832ec1cea883 ]
+
+While reading sysctl_tcp_moderate_rcvbuf, it can be changed
+concurrently.  Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 2 +-
+ net/mptcp/protocol.c | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index b9fd51826aea..426f8fe02850 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -716,7 +716,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
+        * <prev RTT . ><current RTT .. ><next RTT .... >
+        */
+-      if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
++      if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
+           !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
+               int rcvmem, rcvbuf;
+               u64 rcvwin, grow;
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index d6def23b8cba..01ede89e3c46 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -1881,7 +1881,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
+       if (msk->rcvq_space.copied <= msk->rcvq_space.space)
+               goto new_measure;
+-      if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
++      if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
+           !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
+               int rcvmem, rcvbuf;
+               u64 rcvwin, grow;
+-- 
+2.35.1
+
diff --git a/queue-5.15/tcp-fix-data-races-around-sysctl_tcp_no_ssthresh_met.patch b/queue-5.15/tcp-fix-data-races-around-sysctl_tcp_no_ssthresh_met.patch
new file mode 100644 (file)
index 0000000..4a6d236
--- /dev/null
@@ -0,0 +1,63 @@
+From 6b07ef9a9ac13f3e43fb3b713c23345768eb51d6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 09:50:17 -0700
+Subject: tcp: Fix data-races around sysctl_tcp_no_ssthresh_metrics_save.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit ab1ba21b523ab496b1a4a8e396333b24b0a18f9a ]
+
+While reading sysctl_tcp_no_ssthresh_metrics_save, it can be changed
+concurrently.  Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: 65e6d90168f3 ("net-tcp: Disable TCP ssthresh metrics cache by default")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_metrics.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
+index 9dcc418a26f2..d58e672be31c 100644
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -385,7 +385,7 @@ void tcp_update_metrics(struct sock *sk)
+       if (tcp_in_initial_slowstart(tp)) {
+               /* Slow start still did not finish. */
+-              if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
++              if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
+                   !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
+                       val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
+                       if (val && (tcp_snd_cwnd(tp) >> 1) > val)
+@@ -401,7 +401,7 @@ void tcp_update_metrics(struct sock *sk)
+       } else if (!tcp_in_slow_start(tp) &&
+                  icsk->icsk_ca_state == TCP_CA_Open) {
+               /* Cong. avoidance phase, cwnd is reliable. */
+-              if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
++              if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
+                   !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
+                       tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
+                                      max(tcp_snd_cwnd(tp) >> 1, tp->snd_ssthresh));
+@@ -418,7 +418,7 @@ void tcp_update_metrics(struct sock *sk)
+                       tcp_metric_set(tm, TCP_METRIC_CWND,
+                                      (val + tp->snd_ssthresh) >> 1);
+               }
+-              if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
++              if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
+                   !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
+                       val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
+                       if (val && tp->snd_ssthresh > val)
+@@ -463,7 +463,7 @@ void tcp_init_metrics(struct sock *sk)
+       if (tcp_metric_locked(tm, TCP_METRIC_CWND))
+               tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND);
+-      val = net->ipv4.sysctl_tcp_no_ssthresh_metrics_save ?
++      val = READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) ?
+             0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
+       if (val) {
+               tp->snd_ssthresh = val;
+-- 
+2.35.1
+
diff --git a/queue-5.15/tcp-fix-data-races-around-sysctl_tcp_reflect_tos.patch b/queue-5.15/tcp-fix-data-races-around-sysctl_tcp_reflect_tos.patch
new file mode 100644 (file)
index 0000000..a7f7c27
--- /dev/null
@@ -0,0 +1,69 @@
+From 813c29e1b67ed114af8c31db5698f4506e3f650b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:22:04 -0700
+Subject: tcp: Fix data-races around sysctl_tcp_reflect_tos.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 870e3a634b6a6cb1543b359007aca73fe6a03ac5 ]
+
+While reading sysctl_tcp_reflect_tos, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: ac8f1710c12b ("tcp: reflect tos value received in SYN to the socket")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Acked-by: Wei Wang <weiwan@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_ipv4.c | 4 ++--
+ net/ipv6/tcp_ipv6.c | 4 ++--
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index fba02cf6b468..dae0776c4948 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -1004,7 +1004,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
+       if (skb) {
+               __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
+-              tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
++              tos = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
+                               (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
+                               (inet_sk(sk)->tos & INET_ECN_MASK) :
+                               inet_sk(sk)->tos;
+@@ -1590,7 +1590,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
+       /* Set ToS of the new socket based upon the value of incoming SYN.
+        * ECT bits are set later in tcp_init_transfer().
+        */
+-      if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
++      if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
+               newinet->tos = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
+       if (!dst) {
+diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
+index beaa0c2ada23..8ab39cf57d43 100644
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -542,7 +542,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
+               if (np->repflow && ireq->pktopts)
+                       fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
+-              tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
++              tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
+                               (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
+                               (np->tclass & INET_ECN_MASK) :
+                               np->tclass;
+@@ -1364,7 +1364,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
+       /* Set ToS of the new socket based upon the value of incoming SYN.
+        * ECT bits are set later in tcp_init_transfer().
+        */
+-      if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
++      if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
+               newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
+       /* Clone native IPv6 options from listening socket (if any)
+-- 
+2.35.1
+
diff --git a/queue-5.15/virtio-net-fix-the-race-between-refill-work-and-clos.patch b/queue-5.15/virtio-net-fix-the-race-between-refill-work-and-clos.patch
new file mode 100644 (file)
index 0000000..c5cbfa0
--- /dev/null
@@ -0,0 +1,151 @@
+From ef017bfd0c1275b4874825a97bff716abdb151ad Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Jul 2022 15:21:59 +0800
+Subject: virtio-net: fix the race between refill work and close
+
+From: Jason Wang <jasowang@redhat.com>
+
+[ Upstream commit 5a159128faff151b7fe5f4eb0f310b1e0a2d56bf ]
+
+We try using cancel_delayed_work_sync() to prevent the work from
+enabling NAPI. This is insufficient since we don't disable the source
+of the refill work scheduling. This means an NAPI poll callback after
+cancel_delayed_work_sync() can schedule the refill work then can
+re-enable the NAPI that leads to use-after-free [1].
+
+Since the work can enable NAPI, we can't simply disable NAPI before
+calling cancel_delayed_work_sync(). So fix this by introducing a
+dedicated boolean to control whether or not the work could be
+scheduled from NAPI.
+
+[1]
+==================================================================
+BUG: KASAN: use-after-free in refill_work+0x43/0xd4
+Read of size 2 at addr ffff88810562c92e by task kworker/2:1/42
+
+CPU: 2 PID: 42 Comm: kworker/2:1 Not tainted 5.19.0-rc1+ #480
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
+Workqueue: events refill_work
+Call Trace:
+ <TASK>
+ dump_stack_lvl+0x34/0x44
+ print_report.cold+0xbb/0x6ac
+ ? _printk+0xad/0xde
+ ? refill_work+0x43/0xd4
+ kasan_report+0xa8/0x130
+ ? refill_work+0x43/0xd4
+ refill_work+0x43/0xd4
+ process_one_work+0x43d/0x780
+ worker_thread+0x2a0/0x6f0
+ ? process_one_work+0x780/0x780
+ kthread+0x167/0x1a0
+ ? kthread_exit+0x50/0x50
+ ret_from_fork+0x22/0x30
+ </TASK>
+...
+
+Fixes: b2baed69e605c ("virtio_net: set/cancel work on ndo_open/ndo_stop")
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/virtio_net.c | 37 ++++++++++++++++++++++++++++++++++---
+ 1 file changed, 34 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
+index 318c681ad63e..53cefad2a79d 100644
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -213,9 +213,15 @@ struct virtnet_info {
+       /* Packet virtio header size */
+       u8 hdr_len;
+-      /* Work struct for refilling if we run low on memory. */
++      /* Work struct for delayed refilling if we run low on memory. */
+       struct delayed_work refill;
++      /* Is delayed refill enabled? */
++      bool refill_enabled;
++
++      /* The lock to synchronize the access to refill_enabled */
++      spinlock_t refill_lock;
++
+       /* Work struct for config space updates */
+       struct work_struct config_work;
+@@ -319,6 +325,20 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
+       return p;
+ }
++static void enable_delayed_refill(struct virtnet_info *vi)
++{
++      spin_lock_bh(&vi->refill_lock);
++      vi->refill_enabled = true;
++      spin_unlock_bh(&vi->refill_lock);
++}
++
++static void disable_delayed_refill(struct virtnet_info *vi)
++{
++      spin_lock_bh(&vi->refill_lock);
++      vi->refill_enabled = false;
++      spin_unlock_bh(&vi->refill_lock);
++}
++
+ static void virtqueue_napi_schedule(struct napi_struct *napi,
+                                   struct virtqueue *vq)
+ {
+@@ -1454,8 +1474,12 @@ static int virtnet_receive(struct receive_queue *rq, int budget,
+       }
+       if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) {
+-              if (!try_fill_recv(vi, rq, GFP_ATOMIC))
+-                      schedule_delayed_work(&vi->refill, 0);
++              if (!try_fill_recv(vi, rq, GFP_ATOMIC)) {
++                      spin_lock(&vi->refill_lock);
++                      if (vi->refill_enabled)
++                              schedule_delayed_work(&vi->refill, 0);
++                      spin_unlock(&vi->refill_lock);
++              }
+       }
+       u64_stats_update_begin(&rq->stats.syncp);
+@@ -1578,6 +1602,8 @@ static int virtnet_open(struct net_device *dev)
+       struct virtnet_info *vi = netdev_priv(dev);
+       int i, err;
++      enable_delayed_refill(vi);
++
+       for (i = 0; i < vi->max_queue_pairs; i++) {
+               if (i < vi->curr_queue_pairs)
+                       /* Make sure we have some buffers: if oom use wq. */
+@@ -1958,6 +1984,8 @@ static int virtnet_close(struct net_device *dev)
+       struct virtnet_info *vi = netdev_priv(dev);
+       int i;
++      /* Make sure NAPI doesn't schedule refill work */
++      disable_delayed_refill(vi);
+       /* Make sure refill_work doesn't re-enable napi! */
+       cancel_delayed_work_sync(&vi->refill);
+@@ -2455,6 +2483,8 @@ static int virtnet_restore_up(struct virtio_device *vdev)
+       virtio_device_ready(vdev);
++      enable_delayed_refill(vi);
++
+       if (netif_running(vi->dev)) {
+               err = virtnet_open(vi->dev);
+               if (err)
+@@ -3162,6 +3192,7 @@ static int virtnet_probe(struct virtio_device *vdev)
+       vdev->priv = vi;
+       INIT_WORK(&vi->config_work, virtnet_config_changed_work);
++      spin_lock_init(&vi->refill_lock);
+       /* If we can receive ANY GSO packets, we must allocate large ones. */
+       if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
+-- 
+2.35.1
+
diff --git a/queue-5.15/watch_queue-fix-missing-locking-in-add_watch_to_obje.patch b/queue-5.15/watch_queue-fix-missing-locking-in-add_watch_to_obje.patch
new file mode 100644 (file)
index 0000000..492ab3c
--- /dev/null
@@ -0,0 +1,120 @@
+From bfe5eaf5aac9d468598ad292627d44632264d877 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Jul 2022 10:31:12 +0100
+Subject: watch_queue: Fix missing locking in add_watch_to_object()
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+[ Upstream commit e64ab2dbd882933b65cd82ff6235d705ad65dbb6 ]
+
+If a watch is being added to a queue, it needs to guard against
+interference from addition of a new watch, manual removal of a watch and
+removal of a watch due to some other queue being destroyed.
+
+KEYCTL_WATCH_KEY guards against this for the same {key,queue} pair by
+holding the key->sem writelocked and by holding refs on both the key and
+the queue - but that doesn't prevent interaction from other {key,queue}
+pairs.
+
+While add_watch_to_object() does take the spinlock on the event queue,
+it doesn't take the lock on the source's watch list.  The assumption was
+that the caller would prevent that (say by taking key->sem) - but that
+doesn't prevent interference from the destruction of another queue.
+
+Fix this by locking the watcher list in add_watch_to_object().
+
+Fixes: c73be61cede5 ("pipe: Add general notification queue support")
+Reported-by: syzbot+03d7b43290037d1f87ca@syzkaller.appspotmail.com
+Signed-off-by: David Howells <dhowells@redhat.com>
+cc: keyrings@vger.kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/watch_queue.c | 58 +++++++++++++++++++++++++++-----------------
+ 1 file changed, 36 insertions(+), 22 deletions(-)
+
+diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
+index 7019d337ce86..1059ef6c3711 100644
+--- a/kernel/watch_queue.c
++++ b/kernel/watch_queue.c
+@@ -457,6 +457,33 @@ void init_watch(struct watch *watch, struct watch_queue *wqueue)
+       rcu_assign_pointer(watch->queue, wqueue);
+ }
++static int add_one_watch(struct watch *watch, struct watch_list *wlist, struct watch_queue *wqueue)
++{
++      const struct cred *cred;
++      struct watch *w;
++
++      hlist_for_each_entry(w, &wlist->watchers, list_node) {
++              struct watch_queue *wq = rcu_access_pointer(w->queue);
++              if (wqueue == wq && watch->id == w->id)
++                      return -EBUSY;
++      }
++
++      cred = current_cred();
++      if (atomic_inc_return(&cred->user->nr_watches) > task_rlimit(current, RLIMIT_NOFILE)) {
++              atomic_dec(&cred->user->nr_watches);
++              return -EAGAIN;
++      }
++
++      watch->cred = get_cred(cred);
++      rcu_assign_pointer(watch->watch_list, wlist);
++
++      kref_get(&wqueue->usage);
++      kref_get(&watch->usage);
++      hlist_add_head(&watch->queue_node, &wqueue->watches);
++      hlist_add_head_rcu(&watch->list_node, &wlist->watchers);
++      return 0;
++}
++
+ /**
+  * add_watch_to_object - Add a watch on an object to a watch list
+  * @watch: The watch to add
+@@ -471,34 +498,21 @@ void init_watch(struct watch *watch, struct watch_queue *wqueue)
+  */
+ int add_watch_to_object(struct watch *watch, struct watch_list *wlist)
+ {
+-      struct watch_queue *wqueue = rcu_access_pointer(watch->queue);
+-      struct watch *w;
+-
+-      hlist_for_each_entry(w, &wlist->watchers, list_node) {
+-              struct watch_queue *wq = rcu_access_pointer(w->queue);
+-              if (wqueue == wq && watch->id == w->id)
+-                      return -EBUSY;
+-      }
+-
+-      watch->cred = get_current_cred();
+-      rcu_assign_pointer(watch->watch_list, wlist);
++      struct watch_queue *wqueue;
++      int ret = -ENOENT;
+-      if (atomic_inc_return(&watch->cred->user->nr_watches) >
+-          task_rlimit(current, RLIMIT_NOFILE)) {
+-              atomic_dec(&watch->cred->user->nr_watches);
+-              put_cred(watch->cred);
+-              return -EAGAIN;
+-      }
++      rcu_read_lock();
++      wqueue = rcu_access_pointer(watch->queue);
+       if (lock_wqueue(wqueue)) {
+-              kref_get(&wqueue->usage);
+-              kref_get(&watch->usage);
+-              hlist_add_head(&watch->queue_node, &wqueue->watches);
++              spin_lock(&wlist->lock);
++              ret = add_one_watch(watch, wlist, wqueue);
++              spin_unlock(&wlist->lock);
+               unlock_wqueue(wqueue);
+       }
+-      hlist_add_head_rcu(&watch->list_node, &wlist->watchers);
+-      return 0;
++      rcu_read_unlock();
++      return ret;
+ }
+ EXPORT_SYMBOL(add_watch_to_object);
+-- 
+2.35.1
+
diff --git a/queue-5.15/watch_queue-fix-missing-rcu-annotation.patch-18505 b/queue-5.15/watch_queue-fix-missing-rcu-annotation.patch-18505
new file mode 100644 (file)
index 0000000..b892960
--- /dev/null
@@ -0,0 +1,40 @@
+From efb849e71a853a285d2b1728572cb644e20913e9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Jul 2022 10:31:06 +0100
+Subject: watch_queue: Fix missing rcu annotation
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit e0339f036ef4beb9b20f0b6532a1e0ece7f594c6 ]
+
+Since __post_watch_notification() walks wlist->watchers with only the
+RCU read lock held, we need to use RCU methods to add to the list (we
+already use RCU methods to remove from the list).
+
+Fix add_watch_to_object() to use hlist_add_head_rcu() instead of
+hlist_add_head() for that list.
+
+Fixes: c73be61cede5 ("pipe: Add general notification queue support")
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/watch_queue.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
+index debebcd2664e..7019d337ce86 100644
+--- a/kernel/watch_queue.c
++++ b/kernel/watch_queue.c
+@@ -497,7 +497,7 @@ int add_watch_to_object(struct watch *watch, struct watch_list *wlist)
+               unlock_wqueue(wqueue);
+       }
+-      hlist_add_head(&watch->list_node, &wlist->watchers);
++      hlist_add_head_rcu(&watch->list_node, &wlist->watchers);
+       return 0;
+ }
+ EXPORT_SYMBOL(add_watch_to_object);
+-- 
+2.35.1
+