]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.10
authorSasha Levin <sashal@kernel.org>
Sun, 28 Aug 2022 14:31:02 +0000 (10:31 -0400)
committerSasha Levin <sashal@kernel.org>
Sun, 28 Aug 2022 14:31:02 +0000 (10:31 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
53 files changed:
queue-5.10/af_key-do-not-call-xfrm_probe_algs-in-parallel.patch [new file with mode: 0644]
queue-5.10/bnxt_en-fix-nq-resource-accounting-during-vf-creatio.patch [new file with mode: 0644]
queue-5.10/bonding-802.3ad-fix-no-transmission-of-lacpdus.patch [new file with mode: 0644]
queue-5.10/bpf-folding-omem_charge-into-sk_storage_charge.patch [new file with mode: 0644]
queue-5.10/ice-xsk-force-rings-to-be-sized-to-power-of-2.patch [new file with mode: 0644]
queue-5.10/ice-xsk-prohibit-usage-of-non-balanced-queue-id.patch [new file with mode: 0644]
queue-5.10/ionic-fix-up-issues-with-handling-eagain-on-fw-cmds.patch [new file with mode: 0644]
queue-5.10/ixgbe-stop-resetting-systime-in-ixgbe_ptp_start_cycl.patch [new file with mode: 0644]
queue-5.10/mm-huge_memory.c-use-helper-function-migration_entry.patch [new file with mode: 0644]
queue-5.10/mm-smaps-don-t-access-young-dirty-bit-if-pte-unprese.patch [new file with mode: 0644]
queue-5.10/net-fix-a-data-race-around-netdev_budget.patch [new file with mode: 0644]
queue-5.10/net-fix-a-data-race-around-netdev_budget_usecs.patch [new file with mode: 0644]
queue-5.10/net-fix-a-data-race-around-sysctl_net_busy_poll.patch [new file with mode: 0644]
queue-5.10/net-fix-a-data-race-around-sysctl_net_busy_read.patch [new file with mode: 0644]
queue-5.10/net-fix-a-data-race-around-sysctl_somaxconn.patch [new file with mode: 0644]
queue-5.10/net-fix-a-data-race-around-sysctl_tstamp_allow_data.patch [new file with mode: 0644]
queue-5.10/net-fix-data-races-around-netdev_max_backlog.patch [new file with mode: 0644]
queue-5.10/net-fix-data-races-around-netdev_tstamp_prequeue.patch [new file with mode: 0644]
queue-5.10/net-fix-data-races-around-sysctl_-rw-mem-_offset.patch [new file with mode: 0644]
queue-5.10/net-fix-data-races-around-sysctl_-rw-mem_-max-defaul.patch [new file with mode: 0644]
queue-5.10/net-fix-data-races-around-sysctl_devconf_inherit_ini.patch [new file with mode: 0644]
queue-5.10/net-fix-data-races-around-sysctl_fb_tunnels_only_for.patch [new file with mode: 0644]
queue-5.10/net-fix-data-races-around-sysctl_optmem_max.patch [new file with mode: 0644]
queue-5.10/net-fix-data-races-around-weight_p-and-dev_weight_-r.patch [new file with mode: 0644]
queue-5.10/net-ipa-don-t-assume-smem-is-page-aligned.patch [new file with mode: 0644]
queue-5.10/net-ipvtap-add-__init-__exit-annotations-to-module-i.patch [new file with mode: 0644]
queue-5.10/net-mlx5e-properly-disable-vlan-strip-on-non-ul-reps.patch [new file with mode: 0644]
queue-5.10/net-moxa-get-rid-of-asymmetry-in-dma-mapping-unmappi.patch [new file with mode: 0644]
queue-5.10/netfilter-bitwise-improve-error-goto-labels.patch [new file with mode: 0644]
queue-5.10/netfilter-ebtables-reject-blobs-that-don-t-provide-a.patch [new file with mode: 0644]
queue-5.10/netfilter-nf_tables-consolidate-rule-verdict-trace-c.patch [new file with mode: 0644]
queue-5.10/netfilter-nf_tables-disallow-binding-to-already-boun.patch [new file with mode: 0644]
queue-5.10/netfilter-nf_tables-disallow-jump-to-implicit-chain-.patch [new file with mode: 0644]
queue-5.10/netfilter-nf_tables-do-not-leave-chain-stats-enabled.patch [new file with mode: 0644]
queue-5.10/netfilter-nf_tables-upfront-validation-of-data-via-n.patch [new file with mode: 0644]
queue-5.10/netfilter-nft_cmp-optimize-comparison-for-16-bytes.patch [new file with mode: 0644]
queue-5.10/netfilter-nft_osf-restrict-osf-to-ipv4-ipv6-and-inet.patch [new file with mode: 0644]
queue-5.10/netfilter-nft_payload-do-not-truncate-csum_offset-an.patch [new file with mode: 0644]
queue-5.10/netfilter-nft_payload-report-erange-for-too-long-off.patch [new file with mode: 0644]
queue-5.10/netfilter-nft_tunnel-restrict-it-to-netdev-family.patch [new file with mode: 0644]
queue-5.10/netfilter-nftables-remove-redundant-assignment-of-va.patch [new file with mode: 0644]
queue-5.10/nfc-pn533-fix-use-after-free-bugs-caused-by-pn532_cm.patch [new file with mode: 0644]
queue-5.10/nfs-don-t-allocate-nfs_fattr-on-the-stack-in-__nfs42.patch [new file with mode: 0644]
queue-5.10/nfsv4.2-fix-problems-with-__nfs42_ssc_open.patch [new file with mode: 0644]
queue-5.10/ratelimit-fix-data-races-in-___ratelimit.patch [new file with mode: 0644]
queue-5.10/rose-check-null-rose_loopback_neigh-loopback.patch [new file with mode: 0644]
queue-5.10/rxrpc-fix-locking-in-rxrpc-s-sendmsg.patch [new file with mode: 0644]
queue-5.10/series
queue-5.10/sunrpc-rpc-level-errors-should-set-task-tk_rpc_statu.patch [new file with mode: 0644]
queue-5.10/tcp-tweak-len-truesize-ratio-for-coalesce-candidates.patch [new file with mode: 0644]
queue-5.10/xfrm-clone-missing-x-lastused-in-xfrm_do_migrate.patch [new file with mode: 0644]
queue-5.10/xfrm-fix-refcount-leak-in-__xfrm_policy_check.patch [new file with mode: 0644]
queue-5.10/xfrm-policy-fix-metadata-dst-dev-xmit-null-pointer-d.patch [new file with mode: 0644]

diff --git a/queue-5.10/af_key-do-not-call-xfrm_probe_algs-in-parallel.patch b/queue-5.10/af_key-do-not-call-xfrm_probe_algs-in-parallel.patch
new file mode 100644 (file)
index 0000000..fd57daa
--- /dev/null
@@ -0,0 +1,42 @@
+From 4bf43acee75918ef3a6dcb582e4cdd9ef3621153 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 4 Aug 2022 18:03:46 +0800
+Subject: af_key: Do not call xfrm_probe_algs in parallel
+
+From: Herbert Xu <herbert@gondor.apana.org.au>
+
+[ Upstream commit ba953a9d89a00c078b85f4b190bc1dde66fe16b5 ]
+
+When namespace support was added to xfrm/afkey, it caused the
+previously single-threaded call to xfrm_probe_algs to become
+multi-threaded.  This is buggy and needs to be fixed with a mutex.
+
+Reported-by: Abhishek Shah <abhishek.shah@columbia.edu>
+Fixes: 283bc9f35bbb ("xfrm: Namespacify xfrm state/policy locks")
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/key/af_key.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/net/key/af_key.c b/net/key/af_key.c
+index 2aa16a171285b..05e2710988883 100644
+--- a/net/key/af_key.c
++++ b/net/key/af_key.c
+@@ -1701,9 +1701,12 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad
+               pfk->registered |= (1<<hdr->sadb_msg_satype);
+       }
++      mutex_lock(&pfkey_mutex);
+       xfrm_probe_algs();
+       supp_skb = compose_sadb_supported(hdr, GFP_KERNEL | __GFP_ZERO);
++      mutex_unlock(&pfkey_mutex);
++
+       if (!supp_skb) {
+               if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC)
+                       pfk->registered &= ~(1<<hdr->sadb_msg_satype);
+-- 
+2.35.1
+
diff --git a/queue-5.10/bnxt_en-fix-nq-resource-accounting-during-vf-creatio.patch b/queue-5.10/bnxt_en-fix-nq-resource-accounting-during-vf-creatio.patch
new file mode 100644 (file)
index 0000000..5676528
--- /dev/null
@@ -0,0 +1,44 @@
+From 646fe8c249aabc9aebcda85f0fcf30e1444c25aa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Aug 2022 11:06:53 -0400
+Subject: bnxt_en: fix NQ resource accounting during vf creation on 57500 chips
+
+From: Vikas Gupta <vikas.gupta@broadcom.com>
+
+[ Upstream commit 09a89cc59ad67794a11e1d3dd13c5b3172adcc51 ]
+
+There are 2 issues:
+
+1. We should decrement hw_resc->max_nqs instead of hw_resc->max_irqs
+   with the number of NQs assigned to the VFs.  The IRQs are fixed
+   on each function and cannot be re-assigned.  Only the NQs are being
+   assigned to the VFs.
+
+2. vf_msix is the total number of NQs to be assigned to the VFs.  So
+   we should decrement vf_msix from hw_resc->max_nqs.
+
+Fixes: b16b68918674 ("bnxt_en: Add SR-IOV support for 57500 chips.")
+Signed-off-by: Vikas Gupta <vikas.gupta@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+index 23b80aa171dd0..819f9df9425c6 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+@@ -599,7 +599,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset)
+               hw_resc->max_stat_ctxs -= le16_to_cpu(req.min_stat_ctx) * n;
+               hw_resc->max_vnics -= le16_to_cpu(req.min_vnics) * n;
+               if (bp->flags & BNXT_FLAG_CHIP_P5)
+-                      hw_resc->max_irqs -= vf_msix * n;
++                      hw_resc->max_nqs -= vf_msix;
+               rc = pf->active_vfs;
+       }
+-- 
+2.35.1
+
diff --git a/queue-5.10/bonding-802.3ad-fix-no-transmission-of-lacpdus.patch b/queue-5.10/bonding-802.3ad-fix-no-transmission-of-lacpdus.patch
new file mode 100644 (file)
index 0000000..0cd393b
--- /dev/null
@@ -0,0 +1,137 @@
+From b2a310c546ef41070e63c208b78e7b9fe115b918 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 19 Aug 2022 11:15:13 -0400
+Subject: bonding: 802.3ad: fix no transmission of LACPDUs
+
+From: Jonathan Toppins <jtoppins@redhat.com>
+
+[ Upstream commit d745b5062ad2b5da90a5e728d7ca884fc07315fd ]
+
+This is caused by the global variable ad_ticks_per_sec being zero as
+demonstrated by the reproducer script discussed below. This causes
+all timer values in __ad_timer_to_ticks to be zero, resulting
+in the periodic timer to never fire.
+
+To reproduce:
+Run the script in
+`tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh` which
+puts bonding into a state where it never transmits LACPDUs.
+
+line 44: ip link add fbond type bond mode 4 miimon 200 \
+            xmit_hash_policy 1 ad_actor_sys_prio 65535 lacp_rate fast
+setting bond param: ad_actor_sys_prio
+given:
+    params.ad_actor_system = 0
+call stack:
+    bond_option_ad_actor_sys_prio()
+    -> bond_3ad_update_ad_actor_settings()
+       -> set ad.system.sys_priority = bond->params.ad_actor_sys_prio
+       -> ad.system.sys_mac_addr = bond->dev->dev_addr; because
+            params.ad_actor_system == 0
+results:
+     ad.system.sys_mac_addr = bond->dev->dev_addr
+
+line 48: ip link set fbond address 52:54:00:3B:7C:A6
+setting bond MAC addr
+call stack:
+    bond->dev->dev_addr = new_mac
+
+line 52: ip link set fbond type bond ad_actor_sys_prio 65535
+setting bond param: ad_actor_sys_prio
+given:
+    params.ad_actor_system = 0
+call stack:
+    bond_option_ad_actor_sys_prio()
+    -> bond_3ad_update_ad_actor_settings()
+       -> set ad.system.sys_priority = bond->params.ad_actor_sys_prio
+       -> ad.system.sys_mac_addr = bond->dev->dev_addr; because
+            params.ad_actor_system == 0
+results:
+     ad.system.sys_mac_addr = bond->dev->dev_addr
+
+line 60: ip link set veth1-bond down master fbond
+given:
+    params.ad_actor_system = 0
+    params.mode = BOND_MODE_8023AD
+    ad.system.sys_mac_addr == bond->dev->dev_addr
+call stack:
+    bond_enslave
+    -> bond_3ad_initialize(); because first slave
+       -> if ad.system.sys_mac_addr != bond->dev->dev_addr
+          return
+results:
+     Nothing is run in bond_3ad_initialize() because dev_addr equals
+     sys_mac_addr leaving the global ad_ticks_per_sec zero as it is
+     never initialized anywhere else.
+
+The if check around the contents of bond_3ad_initialize() is no longer
+needed due to commit 5ee14e6d336f ("bonding: 3ad: apply ad_actor settings
+changes immediately") which sets ad.system.sys_mac_addr if any one of
+the bonding parameters whos set function calls
+bond_3ad_update_ad_actor_settings(). This is because if
+ad.system.sys_mac_addr is zero it will be set to the current bond mac
+address, this causes the if check to never be true.
+
+Fixes: 5ee14e6d336f ("bonding: 3ad: apply ad_actor settings changes immediately")
+Signed-off-by: Jonathan Toppins <jtoppins@redhat.com>
+Acked-by: Jay Vosburgh <jay.vosburgh@canonical.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/bonding/bond_3ad.c | 38 ++++++++++++++--------------------
+ 1 file changed, 16 insertions(+), 22 deletions(-)
+
+diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
+index 325b20729d8ba..b0f8d551b61db 100644
+--- a/drivers/net/bonding/bond_3ad.c
++++ b/drivers/net/bonding/bond_3ad.c
+@@ -1988,30 +1988,24 @@ void bond_3ad_initiate_agg_selection(struct bonding *bond, int timeout)
+  */
+ void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution)
+ {
+-      /* check that the bond is not initialized yet */
+-      if (!MAC_ADDRESS_EQUAL(&(BOND_AD_INFO(bond).system.sys_mac_addr),
+-                              bond->dev->dev_addr)) {
+-
+-              BOND_AD_INFO(bond).aggregator_identifier = 0;
+-
+-              BOND_AD_INFO(bond).system.sys_priority =
+-                      bond->params.ad_actor_sys_prio;
+-              if (is_zero_ether_addr(bond->params.ad_actor_system))
+-                      BOND_AD_INFO(bond).system.sys_mac_addr =
+-                          *((struct mac_addr *)bond->dev->dev_addr);
+-              else
+-                      BOND_AD_INFO(bond).system.sys_mac_addr =
+-                          *((struct mac_addr *)bond->params.ad_actor_system);
++      BOND_AD_INFO(bond).aggregator_identifier = 0;
++      BOND_AD_INFO(bond).system.sys_priority =
++              bond->params.ad_actor_sys_prio;
++      if (is_zero_ether_addr(bond->params.ad_actor_system))
++              BOND_AD_INFO(bond).system.sys_mac_addr =
++                  *((struct mac_addr *)bond->dev->dev_addr);
++      else
++              BOND_AD_INFO(bond).system.sys_mac_addr =
++                  *((struct mac_addr *)bond->params.ad_actor_system);
+-              /* initialize how many times this module is called in one
+-               * second (should be about every 100ms)
+-               */
+-              ad_ticks_per_sec = tick_resolution;
++      /* initialize how many times this module is called in one
++       * second (should be about every 100ms)
++       */
++      ad_ticks_per_sec = tick_resolution;
+-              bond_3ad_initiate_agg_selection(bond,
+-                                              AD_AGGREGATOR_SELECTION_TIMER *
+-                                              ad_ticks_per_sec);
+-      }
++      bond_3ad_initiate_agg_selection(bond,
++                                      AD_AGGREGATOR_SELECTION_TIMER *
++                                      ad_ticks_per_sec);
+ }
+ /**
+-- 
+2.35.1
+
diff --git a/queue-5.10/bpf-folding-omem_charge-into-sk_storage_charge.patch b/queue-5.10/bpf-folding-omem_charge-into-sk_storage_charge.patch
new file mode 100644 (file)
index 0000000..21a2e50
--- /dev/null
@@ -0,0 +1,67 @@
+From 8b52d75606ce276f1b8672d4bd54a30fa7df1359 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Nov 2020 13:13:01 -0800
+Subject: bpf: Folding omem_charge() into sk_storage_charge()
+
+From: Martin KaFai Lau <kafai@fb.com>
+
+[ Upstream commit 9e838b02b0bb795793f12049307a354e28b5749c ]
+
+sk_storage_charge() is the only user of omem_charge().
+This patch simplifies it by folding omem_charge() into
+sk_storage_charge().
+
+Signed-off-by: Martin KaFai Lau <kafai@fb.com>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Song Liu <songliubraving@fb.com>
+Acked-by: KP Singh <kpsingh@google.com>
+Link: https://lore.kernel.org/bpf/20201112211301.2586255-1-kafai@fb.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/bpf_sk_storage.c | 23 ++++++++++-------------
+ 1 file changed, 10 insertions(+), 13 deletions(-)
+
+diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
+index 5f773624948ff..39c5a059d1c2b 100644
+--- a/net/core/bpf_sk_storage.c
++++ b/net/core/bpf_sk_storage.c
+@@ -15,18 +15,6 @@
+ DEFINE_BPF_STORAGE_CACHE(sk_cache);
+-static int omem_charge(struct sock *sk, unsigned int size)
+-{
+-      /* same check as in sock_kmalloc() */
+-      if (size <= sysctl_optmem_max &&
+-          atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
+-              atomic_add(size, &sk->sk_omem_alloc);
+-              return 0;
+-      }
+-
+-      return -ENOMEM;
+-}
+-
+ static struct bpf_local_storage_data *
+ sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit)
+ {
+@@ -316,7 +304,16 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
+ static int sk_storage_charge(struct bpf_local_storage_map *smap,
+                            void *owner, u32 size)
+ {
+-      return omem_charge(owner, size);
++      struct sock *sk = (struct sock *)owner;
++
++      /* same check as in sock_kmalloc() */
++      if (size <= sysctl_optmem_max &&
++          atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
++              atomic_add(size, &sk->sk_omem_alloc);
++              return 0;
++      }
++
++      return -ENOMEM;
+ }
+ static void sk_storage_uncharge(struct bpf_local_storage_map *smap,
+-- 
+2.35.1
+
diff --git a/queue-5.10/ice-xsk-force-rings-to-be-sized-to-power-of-2.patch b/queue-5.10/ice-xsk-force-rings-to-be-sized-to-power-of-2.patch
new file mode 100644 (file)
index 0000000..b8152de
--- /dev/null
@@ -0,0 +1,59 @@
+From ed86295b781f1db5c7d99dcb4da867d62635a73b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Jan 2022 17:04:40 +0100
+Subject: ice: xsk: Force rings to be sized to power of 2
+
+From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+
+[ Upstream commit 296f13ff3854535009a185aaf8e3603266d39d94 ]
+
+With the upcoming introduction of batching to XSK data path,
+performance wise it will be the best to have the ring descriptor count
+to be aligned to power of 2.
+
+Check if ring sizes that user is going to attach the XSK socket fulfill
+the condition above. For Tx side, although check is being done against
+the Tx queue and in the end the socket will be attached to the XDP
+queue, it is fine since XDP queues get the ring->count setting from Tx
+queues.
+
+Suggested-by: Alexander Lobakin <alexandr.lobakin@intel.com>
+Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: Alexander Lobakin <alexandr.lobakin@intel.com>
+Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>
+Link: https://lore.kernel.org/bpf/20220125160446.78976-3-maciej.fijalkowski@intel.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_xsk.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
+index 5733526fa245c..4bb62950d92de 100644
+--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
++++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
+@@ -371,6 +371,13 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
+       bool if_running, pool_present = !!pool;
+       int ret = 0, pool_failure = 0;
++      if (!is_power_of_2(vsi->rx_rings[qid]->count) ||
++          !is_power_of_2(vsi->tx_rings[qid]->count)) {
++              netdev_err(vsi->netdev, "Please align ring sizes to power of 2\n");
++              pool_failure = -EINVAL;
++              goto failure;
++      }
++
+       if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi);
+       if (if_running) {
+@@ -393,6 +400,7 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
+                       netdev_err(vsi->netdev, "ice_qp_ena error = %d\n", ret);
+       }
++failure:
+       if (pool_failure) {
+               netdev_err(vsi->netdev, "Could not %sable buffer pool, error = %d\n",
+                          pool_present ? "en" : "dis", pool_failure);
+-- 
+2.35.1
+
diff --git a/queue-5.10/ice-xsk-prohibit-usage-of-non-balanced-queue-id.patch b/queue-5.10/ice-xsk-prohibit-usage-of-non-balanced-queue-id.patch
new file mode 100644 (file)
index 0000000..c25cd81
--- /dev/null
@@ -0,0 +1,115 @@
+From 14105e09bac624af958401be36397055b204c5e0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 11 Aug 2022 20:21:48 +0200
+Subject: ice: xsk: prohibit usage of non-balanced queue id
+
+From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+
+[ Upstream commit 5a42f112d367bb4700a8a41f5c12724fde6bfbb9 ]
+
+Fix the following scenario:
+1. ethtool -L $IFACE rx 8 tx 96
+2. xdpsock -q 10 -t -z
+
+Above refers to a case where user would like to attach XSK socket in
+txonly mode at a queue id that does not have a corresponding Rx queue.
+At this moment ice's XSK logic is tightly bound to act on a "queue pair",
+e.g. both Tx and Rx queues at a given queue id are disabled/enabled and
+both of them will get XSK pool assigned, which is broken for the presented
+queue configuration. This results in the splat included at the bottom,
+which is basically an OOB access to Rx ring array.
+
+To fix this, allow using the ids only in scope of "combined" queues
+reported by ethtool. However, logic should be rewritten to allow such
+configurations later on, which would end up as a complete rewrite of the
+control path, so let us go with this temporary fix.
+
+[420160.558008] BUG: kernel NULL pointer dereference, address: 0000000000000082
+[420160.566359] #PF: supervisor read access in kernel mode
+[420160.572657] #PF: error_code(0x0000) - not-present page
+[420160.579002] PGD 0 P4D 0
+[420160.582756] Oops: 0000 [#1] PREEMPT SMP NOPTI
+[420160.588396] CPU: 10 PID: 21232 Comm: xdpsock Tainted: G           OE     5.19.0-rc7+ #10
+[420160.597893] Hardware name: Intel Corporation S2600WFT/S2600WFT, BIOS SE5C620.86B.02.01.0008.031920191559 03/19/2019
+[420160.609894] RIP: 0010:ice_xsk_pool_setup+0x44/0x7d0 [ice]
+[420160.616968] Code: f3 48 83 ec 40 48 8b 4f 20 48 8b 3f 65 48 8b 04 25 28 00 00 00 48 89 44 24 38 31 c0 48 8d 04 ed 00 00 00 00 48 01 c1 48 8b 11 <0f> b7 92 82 00 00 00 48 85 d2 0f 84 2d 75 00 00 48 8d 72 ff 48 85
+[420160.639421] RSP: 0018:ffffc9002d2afd48 EFLAGS: 00010282
+[420160.646650] RAX: 0000000000000050 RBX: ffff88811d8bdd00 RCX: ffff888112c14ff8
+[420160.655893] RDX: 0000000000000000 RSI: ffff88811d8bdd00 RDI: ffff888109861000
+[420160.665166] RBP: 000000000000000a R08: 000000000000000a R09: 0000000000000000
+[420160.674493] R10: 000000000000889f R11: 0000000000000000 R12: 000000000000000a
+[420160.683833] R13: 000000000000000a R14: 0000000000000000 R15: ffff888117611828
+[420160.693211] FS:  00007fa869fc1f80(0000) GS:ffff8897e0880000(0000) knlGS:0000000000000000
+[420160.703645] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[420160.711783] CR2: 0000000000000082 CR3: 00000001d076c001 CR4: 00000000007706e0
+[420160.721399] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[420160.731045] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[420160.740707] PKRU: 55555554
+[420160.745960] Call Trace:
+[420160.750962]  <TASK>
+[420160.755597]  ? kmalloc_large_node+0x79/0x90
+[420160.762703]  ? __kmalloc_node+0x3f5/0x4b0
+[420160.769341]  xp_assign_dev+0xfd/0x210
+[420160.775661]  ? shmem_file_read_iter+0x29a/0x420
+[420160.782896]  xsk_bind+0x152/0x490
+[420160.788943]  __sys_bind+0xd0/0x100
+[420160.795097]  ? exit_to_user_mode_prepare+0x20/0x120
+[420160.802801]  __x64_sys_bind+0x16/0x20
+[420160.809298]  do_syscall_64+0x38/0x90
+[420160.815741]  entry_SYSCALL_64_after_hwframe+0x63/0xcd
+[420160.823731] RIP: 0033:0x7fa86a0dd2fb
+[420160.830264] Code: c3 66 0f 1f 44 00 00 48 8b 15 69 8b 0c 00 f7 d8 64 89 02 b8 ff ff ff ff eb bc 0f 1f 44 00 00 f3 0f 1e fa b8 31 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 3d 8b 0c 00 f7 d8 64 89 01 48
+[420160.855410] RSP: 002b:00007ffc1146f618 EFLAGS: 00000246 ORIG_RAX: 0000000000000031
+[420160.866366] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fa86a0dd2fb
+[420160.876957] RDX: 0000000000000010 RSI: 00007ffc1146f680 RDI: 0000000000000003
+[420160.887604] RBP: 000055d7113a0520 R08: 00007fa868fb8000 R09: 0000000080000000
+[420160.898293] R10: 0000000000008001 R11: 0000000000000246 R12: 000055d7113a04e0
+[420160.909038] R13: 000055d7113a0320 R14: 000000000000000a R15: 0000000000000000
+[420160.919817]  </TASK>
+[420160.925659] Modules linked in: ice(OE) af_packet binfmt_misc nls_iso8859_1 ipmi_ssif intel_rapl_msr intel_rapl_common x86_pkg_temp_thermal intel_powerclamp mei_me coretemp ioatdma mei ipmi_si wmi ipmi_msghandler acpi_pad acpi_power_meter ip_tables x_tables autofs4 ixgbe i40e crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel crypto_simd cryptd ahci mdio dca libahci lpc_ich [last unloaded: ice]
+[420160.977576] CR2: 0000000000000082
+[420160.985037] ---[ end trace 0000000000000000 ]---
+[420161.097724] RIP: 0010:ice_xsk_pool_setup+0x44/0x7d0 [ice]
+[420161.107341] Code: f3 48 83 ec 40 48 8b 4f 20 48 8b 3f 65 48 8b 04 25 28 00 00 00 48 89 44 24 38 31 c0 48 8d 04 ed 00 00 00 00 48 01 c1 48 8b 11 <0f> b7 92 82 00 00 00 48 85 d2 0f 84 2d 75 00 00 48 8d 72 ff 48 85
+[420161.134741] RSP: 0018:ffffc9002d2afd48 EFLAGS: 00010282
+[420161.144274] RAX: 0000000000000050 RBX: ffff88811d8bdd00 RCX: ffff888112c14ff8
+[420161.155690] RDX: 0000000000000000 RSI: ffff88811d8bdd00 RDI: ffff888109861000
+[420161.168088] RBP: 000000000000000a R08: 000000000000000a R09: 0000000000000000
+[420161.179295] R10: 000000000000889f R11: 0000000000000000 R12: 000000000000000a
+[420161.190420] R13: 000000000000000a R14: 0000000000000000 R15: ffff888117611828
+[420161.201505] FS:  00007fa869fc1f80(0000) GS:ffff8897e0880000(0000) knlGS:0000000000000000
+[420161.213628] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[420161.223413] CR2: 0000000000000082 CR3: 00000001d076c001 CR4: 00000000007706e0
+[420161.234653] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[420161.245893] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[420161.257052] PKRU: 55555554
+
+Fixes: 2d4238f55697 ("ice: Add support for AF_XDP")
+Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Tested-by: George Kuruvinakunnel <george.kuruvinakunnel@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_xsk.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
+index 4bb62950d92de..59963b901be0f 100644
+--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
++++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
+@@ -371,6 +371,12 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
+       bool if_running, pool_present = !!pool;
+       int ret = 0, pool_failure = 0;
++      if (qid >= vsi->num_rxq || qid >= vsi->num_txq) {
++              netdev_err(vsi->netdev, "Please use queue id in scope of combined queues count\n");
++              pool_failure = -EINVAL;
++              goto failure;
++      }
++
+       if (!is_power_of_2(vsi->rx_rings[qid]->count) ||
+           !is_power_of_2(vsi->tx_rings[qid]->count)) {
+               netdev_err(vsi->netdev, "Please align ring sizes to power of 2\n");
+-- 
+2.35.1
+
diff --git a/queue-5.10/ionic-fix-up-issues-with-handling-eagain-on-fw-cmds.patch b/queue-5.10/ionic-fix-up-issues-with-handling-eagain-on-fw-cmds.patch
new file mode 100644 (file)
index 0000000..32deb27
--- /dev/null
@@ -0,0 +1,53 @@
+From 28e919a39fee125280baecdb6e9821c399a972a8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 Aug 2022 09:50:50 -0700
+Subject: ionic: fix up issues with handling EAGAIN on FW cmds
+
+From: Shannon Nelson <snelson@pensando.io>
+
+[ Upstream commit 0fc4dd452d6c14828eed6369155c75c0ac15bab3 ]
+
+In looping on FW update tests we occasionally see the
+FW_ACTIVATE_STATUS command fail while it is in its EAGAIN loop
+waiting for the FW activate step to finsh inside the FW.  The
+firmware is complaining that the done bit is set when a new
+dev_cmd is going to be processed.
+
+Doing a clean on the cmd registers and doorbell before exiting
+the wait-for-done and cleaning the done bit before the sleep
+prevents this from occurring.
+
+Fixes: fbfb8031533c ("ionic: Add hardware init and device commands")
+Signed-off-by: Shannon Nelson <snelson@pensando.io>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/pensando/ionic/ionic_main.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
+index e14869a2e24a5..f60ffef33e0ce 100644
+--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
++++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
+@@ -378,8 +378,8 @@ int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds)
+                               ionic_opcode_to_str(opcode), opcode,
+                               ionic_error_to_str(err), err);
+-                      msleep(1000);
+                       iowrite32(0, &idev->dev_cmd_regs->done);
++                      msleep(1000);
+                       iowrite32(1, &idev->dev_cmd_regs->doorbell);
+                       goto try_again;
+               }
+@@ -392,6 +392,8 @@ int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds)
+               return ionic_error_to_errno(err);
+       }
++      ionic_dev_cmd_clean(ionic);
++
+       return 0;
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.10/ixgbe-stop-resetting-systime-in-ixgbe_ptp_start_cycl.patch b/queue-5.10/ixgbe-stop-resetting-systime-in-ixgbe_ptp_start_cycl.patch
new file mode 100644 (file)
index 0000000..188e189
--- /dev/null
@@ -0,0 +1,137 @@
+From 10a24f61edda75898e42cda47e06f6e420400905 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 1 Aug 2022 17:24:19 -0700
+Subject: ixgbe: stop resetting SYSTIME in ixgbe_ptp_start_cyclecounter
+
+From: Jacob Keller <jacob.e.keller@intel.com>
+
+[ Upstream commit 25d7a5f5a6bb15a2dae0a3f39ea5dda215024726 ]
+
+The ixgbe_ptp_start_cyclecounter is intended to be called whenever the
+cyclecounter parameters need to be changed.
+
+Since commit a9763f3cb54c ("ixgbe: Update PTP to support X550EM_x
+devices"), this function has cleared the SYSTIME registers and reset the
+TSAUXC DISABLE_SYSTIME bit.
+
+While these need to be cleared during ixgbe_ptp_reset, it is wrong to clear
+them during ixgbe_ptp_start_cyclecounter. This function may be called
+during both reset and link status change. When link changes, the SYSTIME
+counter is still operating normally, but the cyclecounter should be updated
+to account for the possibly changed parameters.
+
+Clearing SYSTIME when link changes causes the timecounter to jump because
+the cycle counter now reads zero.
+
+Extract the SYSTIME initialization out to a new function and call this
+during ixgbe_ptp_reset. This prevents the timecounter adjustment and avoids
+an unnecessary reset of the current time.
+
+This also restores the original SYSTIME clearing that occurred during
+ixgbe_ptp_reset before the commit above.
+
+Reported-by: Steve Payne <spayne@aurora.tech>
+Reported-by: Ilya Evenbach <ievenbach@aurora.tech>
+Fixes: a9763f3cb54c ("ixgbe: Update PTP to support X550EM_x devices")
+Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
+Tested-by: Gurucharan <gurucharanx.g@intel.com> (A Contingent worker at Intel)
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c | 59 +++++++++++++++-----
+ 1 file changed, 46 insertions(+), 13 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
+index 22a874eee2e84..8b7f300355710 100644
+--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
+@@ -1211,7 +1211,6 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter)
+       struct cyclecounter cc;
+       unsigned long flags;
+       u32 incval = 0;
+-      u32 tsauxc = 0;
+       u32 fuse0 = 0;
+       /* For some of the boards below this mask is technically incorrect.
+@@ -1246,18 +1245,6 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter)
+       case ixgbe_mac_x550em_a:
+       case ixgbe_mac_X550:
+               cc.read = ixgbe_ptp_read_X550;
+-
+-              /* enable SYSTIME counter */
+-              IXGBE_WRITE_REG(hw, IXGBE_SYSTIMR, 0);
+-              IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0);
+-              IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0);
+-              tsauxc = IXGBE_READ_REG(hw, IXGBE_TSAUXC);
+-              IXGBE_WRITE_REG(hw, IXGBE_TSAUXC,
+-                              tsauxc & ~IXGBE_TSAUXC_DISABLE_SYSTIME);
+-              IXGBE_WRITE_REG(hw, IXGBE_TSIM, IXGBE_TSIM_TXTS);
+-              IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_TIMESYNC);
+-
+-              IXGBE_WRITE_FLUSH(hw);
+               break;
+       case ixgbe_mac_X540:
+               cc.read = ixgbe_ptp_read_82599;
+@@ -1289,6 +1276,50 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter)
+       spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+ }
++/**
++ * ixgbe_ptp_init_systime - Initialize SYSTIME registers
++ * @adapter: the ixgbe private board structure
++ *
++ * Initialize and start the SYSTIME registers.
++ */
++static void ixgbe_ptp_init_systime(struct ixgbe_adapter *adapter)
++{
++      struct ixgbe_hw *hw = &adapter->hw;
++      u32 tsauxc;
++
++      switch (hw->mac.type) {
++      case ixgbe_mac_X550EM_x:
++      case ixgbe_mac_x550em_a:
++      case ixgbe_mac_X550:
++              tsauxc = IXGBE_READ_REG(hw, IXGBE_TSAUXC);
++
++              /* Reset SYSTIME registers to 0 */
++              IXGBE_WRITE_REG(hw, IXGBE_SYSTIMR, 0);
++              IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0);
++              IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0);
++
++              /* Reset interrupt settings */
++              IXGBE_WRITE_REG(hw, IXGBE_TSIM, IXGBE_TSIM_TXTS);
++              IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_TIMESYNC);
++
++              /* Activate the SYSTIME counter */
++              IXGBE_WRITE_REG(hw, IXGBE_TSAUXC,
++                              tsauxc & ~IXGBE_TSAUXC_DISABLE_SYSTIME);
++              break;
++      case ixgbe_mac_X540:
++      case ixgbe_mac_82599EB:
++              /* Reset SYSTIME registers to 0 */
++              IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0);
++              IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0);
++              break;
++      default:
++              /* Other devices aren't supported */
++              return;
++      };
++
++      IXGBE_WRITE_FLUSH(hw);
++}
++
+ /**
+  * ixgbe_ptp_reset
+  * @adapter: the ixgbe private board structure
+@@ -1315,6 +1346,8 @@ void ixgbe_ptp_reset(struct ixgbe_adapter *adapter)
+       ixgbe_ptp_start_cyclecounter(adapter);
++      ixgbe_ptp_init_systime(adapter);
++
+       spin_lock_irqsave(&adapter->tmreg_lock, flags);
+       timecounter_init(&adapter->hw_tc, &adapter->hw_cc,
+                        ktime_to_ns(ktime_get_real()));
+-- 
+2.35.1
+
diff --git a/queue-5.10/mm-huge_memory.c-use-helper-function-migration_entry.patch b/queue-5.10/mm-huge_memory.c-use-helper-function-migration_entry.patch
new file mode 100644 (file)
index 0000000..96ae0cf
--- /dev/null
@@ -0,0 +1,59 @@
+From 62d6fafeabdba7f53ae5fdadd77f163d0e0bcedb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 May 2021 18:34:08 -0700
+Subject: mm/huge_memory.c: use helper function migration_entry_to_page()
+
+From: Miaohe Lin <linmiaohe@huawei.com>
+
+[ Upstream commit a44f89dc6c5f8ba70240b81a570260d29d04bcb0 ]
+
+It's more recommended to use helper function migration_entry_to_page()
+to get the page via migration entry.  We can also enjoy the PageLocked()
+check there.
+
+Link: https://lkml.kernel.org/r/20210318122722.13135-7-linmiaohe@huawei.com
+Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Michel Lespinasse <walken@google.com>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Thomas Hellstrm (Intel) <thomas_os@shipmail.org>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
+Cc: William Kucharski <william.kucharski@oracle.com>
+Cc: Yang Shi <yang.shi@linux.alibaba.com>
+Cc: yuleixzhang <yulei.kernel@gmail.com>
+Cc: Zi Yan <ziy@nvidia.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/huge_memory.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/mm/huge_memory.c b/mm/huge_memory.c
+index 594368f6134f1..cb7b0aead7096 100644
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -1691,7 +1691,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
+                       VM_BUG_ON(!is_pmd_migration_entry(orig_pmd));
+                       entry = pmd_to_swp_entry(orig_pmd);
+-                      page = pfn_to_page(swp_offset(entry));
++                      page = migration_entry_to_page(entry);
+                       flush_needed = 0;
+               } else
+                       WARN_ONCE(1, "Non present huge pmd without pmd migration enabled!");
+@@ -2110,7 +2110,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
+               swp_entry_t entry;
+               entry = pmd_to_swp_entry(old_pmd);
+-              page = pfn_to_page(swp_offset(entry));
++              page = migration_entry_to_page(entry);
+               write = is_write_migration_entry(entry);
+               young = false;
+               soft_dirty = pmd_swp_soft_dirty(old_pmd);
+-- 
+2.35.1
+
diff --git a/queue-5.10/mm-smaps-don-t-access-young-dirty-bit-if-pte-unprese.patch b/queue-5.10/mm-smaps-don-t-access-young-dirty-bit-if-pte-unprese.patch
new file mode 100644 (file)
index 0000000..efa1dac
--- /dev/null
@@ -0,0 +1,67 @@
+From 4603835ba6cb729d6bfeecc8ceb2b16cfec721f4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 5 Aug 2022 12:00:03 -0400
+Subject: mm/smaps: don't access young/dirty bit if pte unpresent
+
+From: Peter Xu <peterx@redhat.com>
+
+[ Upstream commit efd4149342db2df41b1bbe68972ead853b30e444 ]
+
+These bits should only be valid when the ptes are present.  Introducing
+two booleans for it and set it to false when !pte_present() for both pte
+and pmd accountings.
+
+The bug is found during code reading and no real world issue reported, but
+logically such an error can cause incorrect readings for either smaps or
+smaps_rollup output on quite a few fields.
+
+For example, it could cause over-estimate on values like Shared_Dirty,
+Private_Dirty, Referenced.  Or it could also cause under-estimate on
+values like LazyFree, Shared_Clean, Private_Clean.
+
+Link: https://lkml.kernel.org/r/20220805160003.58929-1-peterx@redhat.com
+Fixes: b1d4d9e0cbd0 ("proc/smaps: carefully handle migration entries")
+Fixes: c94b6923fa0a ("/proc/PID/smaps: Add PMD migration entry parsing")
+Signed-off-by: Peter Xu <peterx@redhat.com>
+Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Yang Shi <shy828301@gmail.com>
+Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
+Cc: Huang Ying <ying.huang@intel.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/proc/task_mmu.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
+index ba98371e9d164..ef18f0d71b11b 100644
+--- a/fs/proc/task_mmu.c
++++ b/fs/proc/task_mmu.c
+@@ -503,10 +503,12 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
+       struct vm_area_struct *vma = walk->vma;
+       bool locked = !!(vma->vm_flags & VM_LOCKED);
+       struct page *page = NULL;
+-      bool migration = false;
++      bool migration = false, young = false, dirty = false;
+       if (pte_present(*pte)) {
+               page = vm_normal_page(vma, addr, *pte);
++              young = pte_young(*pte);
++              dirty = pte_dirty(*pte);
+       } else if (is_swap_pte(*pte)) {
+               swp_entry_t swpent = pte_to_swp_entry(*pte);
+@@ -540,8 +542,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
+       if (!page)
+               return;
+-      smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte),
+-                    locked, migration);
++      smaps_account(mss, page, false, young, dirty, locked, migration);
+ }
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+-- 
+2.35.1
+
diff --git a/queue-5.10/net-fix-a-data-race-around-netdev_budget.patch b/queue-5.10/net-fix-a-data-race-around-netdev_budget.patch
new file mode 100644 (file)
index 0000000..e62fd6c
--- /dev/null
@@ -0,0 +1,36 @@
+From e61d24e07fef3aae012f060932773918d08f5b0f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Aug 2022 10:46:53 -0700
+Subject: net: Fix a data-race around netdev_budget.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 2e0c42374ee32e72948559d2ae2f7ba3dc6b977c ]
+
+While reading netdev_budget, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 51b0bdedb8e7 ("[NET]: Separate two usages of netdev_max_backlog.")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/dev.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/core/dev.c b/net/core/dev.c
+index 1ea75768c5b23..c4eb1b666a21c 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -6880,7 +6880,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
+       struct softnet_data *sd = this_cpu_ptr(&softnet_data);
+       unsigned long time_limit = jiffies +
+               usecs_to_jiffies(netdev_budget_usecs);
+-      int budget = netdev_budget;
++      int budget = READ_ONCE(netdev_budget);
+       LIST_HEAD(list);
+       LIST_HEAD(repoll);
+-- 
+2.35.1
+
diff --git a/queue-5.10/net-fix-a-data-race-around-netdev_budget_usecs.patch b/queue-5.10/net-fix-a-data-race-around-netdev_budget_usecs.patch
new file mode 100644 (file)
index 0000000..b8402ed
--- /dev/null
@@ -0,0 +1,36 @@
+From a3ff3ae47a30f5458e193091552f130218061a7b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Aug 2022 10:46:55 -0700
+Subject: net: Fix a data-race around netdev_budget_usecs.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit fa45d484c52c73f79db2c23b0cdfc6c6455093ad ]
+
+While reading netdev_budget_usecs, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 7acf8a1e8a28 ("Replace 2 jiffies with sysctl netdev_budget_usecs to enable softirq tuning")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/dev.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/core/dev.c b/net/core/dev.c
+index c4eb1b666a21c..8355cc5e11a98 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -6879,7 +6879,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
+ {
+       struct softnet_data *sd = this_cpu_ptr(&softnet_data);
+       unsigned long time_limit = jiffies +
+-              usecs_to_jiffies(netdev_budget_usecs);
++              usecs_to_jiffies(READ_ONCE(netdev_budget_usecs));
+       int budget = READ_ONCE(netdev_budget);
+       LIST_HEAD(list);
+       LIST_HEAD(repoll);
+-- 
+2.35.1
+
diff --git a/queue-5.10/net-fix-a-data-race-around-sysctl_net_busy_poll.patch b/queue-5.10/net-fix-a-data-race-around-sysctl_net_busy_poll.patch
new file mode 100644 (file)
index 0000000..29b74cf
--- /dev/null
@@ -0,0 +1,36 @@
+From f668bbf679cfcb3144736b8b6a1268aad9a81d25 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Aug 2022 10:46:51 -0700
+Subject: net: Fix a data-race around sysctl_net_busy_poll.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit c42b7cddea47503411bfb5f2f93a4154aaffa2d9 ]
+
+While reading sysctl_net_busy_poll, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 060212928670 ("net: add low latency socket poll")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/busy_poll.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
+index 716b7c5f6fdd9..36e5e75e71720 100644
+--- a/include/net/busy_poll.h
++++ b/include/net/busy_poll.h
+@@ -31,7 +31,7 @@ extern unsigned int sysctl_net_busy_poll __read_mostly;
+ static inline bool net_busy_loop_on(void)
+ {
+-      return sysctl_net_busy_poll;
++      return READ_ONCE(sysctl_net_busy_poll);
+ }
+ static inline bool sk_can_busy_loop(const struct sock *sk)
+-- 
+2.35.1
+
diff --git a/queue-5.10/net-fix-a-data-race-around-sysctl_net_busy_read.patch b/queue-5.10/net-fix-a-data-race-around-sysctl_net_busy_read.patch
new file mode 100644 (file)
index 0000000..48f9873
--- /dev/null
@@ -0,0 +1,36 @@
+From 1acc46a9a100dcaf7ec1a4510da4a03ac8bb63e2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Aug 2022 10:46:52 -0700
+Subject: net: Fix a data-race around sysctl_net_busy_read.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit e59ef36f0795696ab229569c153936bfd068d21c ]
+
+While reading sysctl_net_busy_read, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 2d48d67fa8cd ("net: poll/select low latency socket support")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index f01e71c98d5be..1bb6a003323b3 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -3032,7 +3032,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
+ #ifdef CONFIG_NET_RX_BUSY_POLL
+       sk->sk_napi_id          =       0;
+-      sk->sk_ll_usec          =       sysctl_net_busy_read;
++      sk->sk_ll_usec          =       READ_ONCE(sysctl_net_busy_read);
+ #endif
+       sk->sk_max_pacing_rate = ~0UL;
+-- 
+2.35.1
+
diff --git a/queue-5.10/net-fix-a-data-race-around-sysctl_somaxconn.patch b/queue-5.10/net-fix-a-data-race-around-sysctl_somaxconn.patch
new file mode 100644 (file)
index 0000000..cb71363
--- /dev/null
@@ -0,0 +1,36 @@
+From 1138e016d836ce791890229ff2206c82ec44859f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Aug 2022 10:47:00 -0700
+Subject: net: Fix a data-race around sysctl_somaxconn.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 3c9ba81d72047f2e81bb535d42856517b613aba7 ]
+
+While reading sysctl_somaxconn, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/socket.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/socket.c b/net/socket.c
+index d52c265ad449b..bcf68b150fe29 100644
+--- a/net/socket.c
++++ b/net/socket.c
+@@ -1670,7 +1670,7 @@ int __sys_listen(int fd, int backlog)
+       sock = sockfd_lookup_light(fd, &err, &fput_needed);
+       if (sock) {
+-              somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
++              somaxconn = READ_ONCE(sock_net(sock->sk)->core.sysctl_somaxconn);
+               if ((unsigned int)backlog > somaxconn)
+                       backlog = somaxconn;
+-- 
+2.35.1
+
diff --git a/queue-5.10/net-fix-a-data-race-around-sysctl_tstamp_allow_data.patch b/queue-5.10/net-fix-a-data-race-around-sysctl_tstamp_allow_data.patch
new file mode 100644 (file)
index 0000000..29e6fc9
--- /dev/null
@@ -0,0 +1,36 @@
+From 7a218057c6471bcc033912ef9103968158daf3bf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Aug 2022 10:46:50 -0700
+Subject: net: Fix a data-race around sysctl_tstamp_allow_data.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit d2154b0afa73c0159b2856f875c6b4fe7cf6a95e ]
+
+While reading sysctl_tstamp_allow_data, it can be changed
+concurrently.  Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: b245be1f4db1 ("net-timestamp: no-payload only sysctl")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/skbuff.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/core/skbuff.c b/net/core/skbuff.c
+index 48b6438f2a3d9..635cabcf8794f 100644
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -4691,7 +4691,7 @@ static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly)
+ {
+       bool ret;
+-      if (likely(sysctl_tstamp_allow_data || tsonly))
++      if (likely(READ_ONCE(sysctl_tstamp_allow_data) || tsonly))
+               return true;
+       read_lock_bh(&sk->sk_callback_lock);
+-- 
+2.35.1
+
diff --git a/queue-5.10/net-fix-data-races-around-netdev_max_backlog.patch b/queue-5.10/net-fix-data-races-around-netdev_max_backlog.patch
new file mode 100644 (file)
index 0000000..5e9d03c
--- /dev/null
@@ -0,0 +1,103 @@
+From b69559976b9c6b71299cf6df2b55e01d78e79677 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Aug 2022 10:46:46 -0700
+Subject: net: Fix data-races around netdev_max_backlog.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 5dcd08cd19912892586c6082d56718333e2d19db ]
+
+While reading netdev_max_backlog, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its readers.
+
+While at it, we remove the unnecessary spaces in the doc.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/admin-guide/sysctl/net.rst | 2 +-
+ net/core/dev.c                           | 4 ++--
+ net/core/gro_cells.c                     | 2 +-
+ net/xfrm/espintcp.c                      | 2 +-
+ net/xfrm/xfrm_input.c                    | 2 +-
+ 5 files changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
+index f2ab8a5b6a4b8..7f553859dba82 100644
+--- a/Documentation/admin-guide/sysctl/net.rst
++++ b/Documentation/admin-guide/sysctl/net.rst
+@@ -271,7 +271,7 @@ poll cycle or the number of packets processed reaches netdev_budget.
+ netdev_max_backlog
+ ------------------
+-Maximum number  of  packets,  queued  on  the  INPUT  side, when the interface
++Maximum number of packets, queued on the INPUT side, when the interface
+ receives packets faster than kernel can process them.
+ netdev_rss_key
+diff --git a/net/core/dev.c b/net/core/dev.c
+index 701a1afc91ff1..215c43aecc67e 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -4516,7 +4516,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
+       struct softnet_data *sd;
+       unsigned int old_flow, new_flow;
+-      if (qlen < (netdev_max_backlog >> 1))
++      if (qlen < (READ_ONCE(netdev_max_backlog) >> 1))
+               return false;
+       sd = this_cpu_ptr(&softnet_data);
+@@ -4564,7 +4564,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
+       if (!netif_running(skb->dev))
+               goto drop;
+       qlen = skb_queue_len(&sd->input_pkt_queue);
+-      if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
++      if (qlen <= READ_ONCE(netdev_max_backlog) && !skb_flow_limit(skb, qlen)) {
+               if (qlen) {
+ enqueue:
+                       __skb_queue_tail(&sd->input_pkt_queue, skb);
+diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
+index 6eb2e5ec2c506..2f66f3f295630 100644
+--- a/net/core/gro_cells.c
++++ b/net/core/gro_cells.c
+@@ -26,7 +26,7 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
+       cell = this_cpu_ptr(gcells->cells);
+-      if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) {
++      if (skb_queue_len(&cell->napi_skbs) > READ_ONCE(netdev_max_backlog)) {
+ drop:
+               atomic_long_inc(&dev->rx_dropped);
+               kfree_skb(skb);
+diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c
+index 1f08ebf7d80c5..24ca49ecebea3 100644
+--- a/net/xfrm/espintcp.c
++++ b/net/xfrm/espintcp.c
+@@ -170,7 +170,7 @@ int espintcp_queue_out(struct sock *sk, struct sk_buff *skb)
+ {
+       struct espintcp_ctx *ctx = espintcp_getctx(sk);
+-      if (skb_queue_len(&ctx->out_queue) >= netdev_max_backlog)
++      if (skb_queue_len(&ctx->out_queue) >= READ_ONCE(netdev_max_backlog))
+               return -ENOBUFS;
+       __skb_queue_tail(&ctx->out_queue, skb);
+diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
+index 61e6220ddd5ae..77e82033ad700 100644
+--- a/net/xfrm/xfrm_input.c
++++ b/net/xfrm/xfrm_input.c
+@@ -782,7 +782,7 @@ int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb,
+       trans = this_cpu_ptr(&xfrm_trans_tasklet);
+-      if (skb_queue_len(&trans->queue) >= netdev_max_backlog)
++      if (skb_queue_len(&trans->queue) >= READ_ONCE(netdev_max_backlog))
+               return -ENOBUFS;
+       BUILD_BUG_ON(sizeof(struct xfrm_trans_cb) > sizeof(skb->cb));
+-- 
+2.35.1
+
diff --git a/queue-5.10/net-fix-data-races-around-netdev_tstamp_prequeue.patch b/queue-5.10/net-fix-data-races-around-netdev_tstamp_prequeue.patch
new file mode 100644 (file)
index 0000000..91f5ccb
--- /dev/null
@@ -0,0 +1,63 @@
+From 6f004c7c3b7a7966a4f57bd71cfa0e2ec773a64d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Aug 2022 10:46:47 -0700
+Subject: net: Fix data-races around netdev_tstamp_prequeue.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 61adf447e38664447526698872e21c04623afb8e ]
+
+While reading netdev_tstamp_prequeue, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: 3b098e2d7c69 ("net: Consistent skb timestamping")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/dev.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/net/core/dev.c b/net/core/dev.c
+index 215c43aecc67e..1ea75768c5b23 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -4795,7 +4795,7 @@ static int netif_rx_internal(struct sk_buff *skb)
+ {
+       int ret;
+-      net_timestamp_check(netdev_tstamp_prequeue, skb);
++      net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
+       trace_netif_rx(skb);
+@@ -5156,7 +5156,7 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
+       int ret = NET_RX_DROP;
+       __be16 type;
+-      net_timestamp_check(!netdev_tstamp_prequeue, skb);
++      net_timestamp_check(!READ_ONCE(netdev_tstamp_prequeue), skb);
+       trace_netif_receive_skb(skb);
+@@ -5558,7 +5558,7 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
+ {
+       int ret;
+-      net_timestamp_check(netdev_tstamp_prequeue, skb);
++      net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
+       if (skb_defer_rx_timestamp(skb))
+               return NET_RX_SUCCESS;
+@@ -5588,7 +5588,7 @@ static void netif_receive_skb_list_internal(struct list_head *head)
+       INIT_LIST_HEAD(&sublist);
+       list_for_each_entry_safe(skb, next, head, list) {
+-              net_timestamp_check(netdev_tstamp_prequeue, skb);
++              net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
+               skb_list_del_init(skb);
+               if (!skb_defer_rx_timestamp(skb))
+                       list_add_tail(&skb->list, &sublist);
+-- 
+2.35.1
+
diff --git a/queue-5.10/net-fix-data-races-around-sysctl_-rw-mem-_offset.patch b/queue-5.10/net-fix-data-races-around-sysctl_-rw-mem-_offset.patch
new file mode 100644 (file)
index 0000000..26c51b0
--- /dev/null
@@ -0,0 +1,207 @@
+From bc1ecf23ef8f8689e1e0826b077f050bd6310aa4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 11:22:00 -0700
+Subject: net: Fix data-races around sysctl_[rw]mem(_offset)?.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 02739545951ad4c1215160db7fbf9b7a918d3c0b ]
+
+While reading these sysctl variables, they can be changed concurrently.
+Thus, we need to add READ_ONCE() to their readers.
+
+  - .sysctl_rmem
+  - .sysctl_rwmem
+  - .sysctl_rmem_offset
+  - .sysctl_wmem_offset
+  - sysctl_tcp_rmem[1, 2]
+  - sysctl_tcp_wmem[1, 2]
+  - sysctl_decnet_rmem[1]
+  - sysctl_decnet_wmem[1]
+  - sysctl_tipc_rmem[1]
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/sock.h     |  8 ++++----
+ net/decnet/af_decnet.c |  4 ++--
+ net/ipv4/tcp.c         |  6 +++---
+ net/ipv4/tcp_input.c   | 13 +++++++------
+ net/ipv4/tcp_output.c  |  2 +-
+ net/mptcp/protocol.c   |  6 +++---
+ net/tipc/socket.c      |  2 +-
+ 7 files changed, 21 insertions(+), 20 deletions(-)
+
+diff --git a/include/net/sock.h b/include/net/sock.h
+index 333131f47ac13..d31c2b9107e54 100644
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -2678,18 +2678,18 @@ static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto)
+ {
+       /* Does this proto have per netns sysctl_wmem ? */
+       if (proto->sysctl_wmem_offset)
+-              return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset);
++              return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset));
+-      return *proto->sysctl_wmem;
++      return READ_ONCE(*proto->sysctl_wmem);
+ }
+ static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto)
+ {
+       /* Does this proto have per netns sysctl_rmem ? */
+       if (proto->sysctl_rmem_offset)
+-              return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset);
++              return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset));
+-      return *proto->sysctl_rmem;
++      return READ_ONCE(*proto->sysctl_rmem);
+ }
+ /* Default TCP Small queue budget is ~1 ms of data (1sec >> 10)
+diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
+index dc92a67baea39..7d542eb461729 100644
+--- a/net/decnet/af_decnet.c
++++ b/net/decnet/af_decnet.c
+@@ -480,8 +480,8 @@ static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gf
+       sk->sk_family      = PF_DECnet;
+       sk->sk_protocol    = 0;
+       sk->sk_allocation  = gfp;
+-      sk->sk_sndbuf      = sysctl_decnet_wmem[1];
+-      sk->sk_rcvbuf      = sysctl_decnet_rmem[1];
++      sk->sk_sndbuf      = READ_ONCE(sysctl_decnet_wmem[1]);
++      sk->sk_rcvbuf      = READ_ONCE(sysctl_decnet_rmem[1]);
+       /* Initialization of DECnet Session Control Port                */
+       scp = DN_SK(sk);
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 78460eb39b3af..bfeb05f62b94f 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -451,8 +451,8 @@ void tcp_init_sock(struct sock *sk)
+       icsk->icsk_sync_mss = tcp_sync_mss;
+-      WRITE_ONCE(sk->sk_sndbuf, sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
+-      WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
++      WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]));
++      WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]));
+       sk_sockets_allocated_inc(sk);
+       sk->sk_route_forced_caps = NETIF_F_GSO;
+@@ -1711,7 +1711,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
+       if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
+               cap = sk->sk_rcvbuf >> 1;
+       else
+-              cap = sock_net(sk)->ipv4.sysctl_tcp_rmem[2] >> 1;
++              cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
+       val = min(val, cap);
+       WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 33a3fb04ac4df..41b44b311e8a0 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -425,7 +425,7 @@ static void tcp_sndbuf_expand(struct sock *sk)
+       if (sk->sk_sndbuf < sndmem)
+               WRITE_ONCE(sk->sk_sndbuf,
+-                         min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]));
++                         min(sndmem, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[2])));
+ }
+ /* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
+@@ -460,7 +460,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb,
+       struct tcp_sock *tp = tcp_sk(sk);
+       /* Optimize this! */
+       int truesize = tcp_win_from_space(sk, skbtruesize) >> 1;
+-      int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
++      int window = tcp_win_from_space(sk, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])) >> 1;
+       while (tp->rcv_ssthresh <= window) {
+               if (truesize <= skb->len)
+@@ -565,16 +565,17 @@ static void tcp_clamp_window(struct sock *sk)
+       struct tcp_sock *tp = tcp_sk(sk);
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       struct net *net = sock_net(sk);
++      int rmem2;
+       icsk->icsk_ack.quick = 0;
++      rmem2 = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]);
+-      if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
++      if (sk->sk_rcvbuf < rmem2 &&
+           !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
+           !tcp_under_memory_pressure(sk) &&
+           sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
+               WRITE_ONCE(sk->sk_rcvbuf,
+-                         min(atomic_read(&sk->sk_rmem_alloc),
+-                             net->ipv4.sysctl_tcp_rmem[2]));
++                         min(atomic_read(&sk->sk_rmem_alloc), rmem2));
+       }
+       if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
+               tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
+@@ -736,7 +737,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
+               do_div(rcvwin, tp->advmss);
+               rcvbuf = min_t(u64, rcvwin * rcvmem,
+-                             sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
++                             READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
+               if (rcvbuf > sk->sk_rcvbuf) {
+                       WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 4c9274cb92d55..c90c9541996bf 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -238,7 +238,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
+       *rcv_wscale = 0;
+       if (wscale_ok) {
+               /* Set window scaling on max possible window */
+-              space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
++              space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
+               space = max_t(u32, space, sysctl_rmem_max);
+               space = min_t(u32, space, *window_clamp);
+               *rcv_wscale = clamp_t(int, ilog2(space) - 15,
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index d0e91aa7b30e5..e61c85873ea2f 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -1439,7 +1439,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
+               do_div(rcvwin, advmss);
+               rcvbuf = min_t(u64, rcvwin * rcvmem,
+-                             sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
++                             READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
+               if (rcvbuf > sk->sk_rcvbuf) {
+                       u32 window_clamp;
+@@ -1872,8 +1872,8 @@ static int mptcp_init_sock(struct sock *sk)
+               return ret;
+       sk_sockets_allocated_inc(sk);
+-      sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
+-      sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
++      sk->sk_rcvbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
++      sk->sk_sndbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
+       return 0;
+ }
+diff --git a/net/tipc/socket.c b/net/tipc/socket.c
+index 38256aabf4f1d..8f3c9fbb99165 100644
+--- a/net/tipc/socket.c
++++ b/net/tipc/socket.c
+@@ -504,7 +504,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
+       timer_setup(&sk->sk_timer, tipc_sk_timeout, 0);
+       sk->sk_shutdown = 0;
+       sk->sk_backlog_rcv = tipc_sk_backlog_rcv;
+-      sk->sk_rcvbuf = sysctl_tipc_rmem[1];
++      sk->sk_rcvbuf = READ_ONCE(sysctl_tipc_rmem[1]);
+       sk->sk_data_ready = tipc_data_ready;
+       sk->sk_write_space = tipc_write_space;
+       sk->sk_destruct = tipc_sock_destruct;
+-- 
+2.35.1
+
diff --git a/queue-5.10/net-fix-data-races-around-sysctl_-rw-mem_-max-defaul.patch b/queue-5.10/net-fix-data-races-around-sysctl_-rw-mem_-max-defaul.patch
new file mode 100644 (file)
index 0000000..a553775
--- /dev/null
@@ -0,0 +1,126 @@
+From 068ed5c51cce4f9f03c8758072ba7019cf0b4525 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Aug 2022 10:46:44 -0700
+Subject: net: Fix data-races around sysctl_[rw]mem_(max|default).
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 1227c1771dd2ad44318aa3ab9e3a293b3f34ff2a ]
+
+While reading sysctl_[rw]mem_(max|default), they can be changed
+concurrently.  Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/filter.c               | 4 ++--
+ net/core/sock.c                 | 8 ++++----
+ net/ipv4/ip_output.c            | 2 +-
+ net/ipv4/tcp_output.c           | 2 +-
+ net/netfilter/ipvs/ip_vs_sync.c | 4 ++--
+ 5 files changed, 10 insertions(+), 10 deletions(-)
+
+diff --git a/net/core/filter.c b/net/core/filter.c
+index 815edf7bc4390..6a90c1eb6f67e 100644
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -4713,14 +4713,14 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
+               /* Only some socketops are supported */
+               switch (optname) {
+               case SO_RCVBUF:
+-                      val = min_t(u32, val, sysctl_rmem_max);
++                      val = min_t(u32, val, READ_ONCE(sysctl_rmem_max));
+                       val = min_t(int, val, INT_MAX / 2);
+                       sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+                       WRITE_ONCE(sk->sk_rcvbuf,
+                                  max_t(int, val * 2, SOCK_MIN_RCVBUF));
+                       break;
+               case SO_SNDBUF:
+-                      val = min_t(u32, val, sysctl_wmem_max);
++                      val = min_t(u32, val, READ_ONCE(sysctl_wmem_max));
+                       val = min_t(int, val, INT_MAX / 2);
+                       sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
+                       WRITE_ONCE(sk->sk_sndbuf,
+diff --git a/net/core/sock.c b/net/core/sock.c
+index 6d9af4ef93d7a..25d25dcd0c3db 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -887,7 +887,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
+                * play 'guess the biggest size' games. RCVBUF/SNDBUF
+                * are treated in BSD as hints
+                */
+-              val = min_t(u32, val, sysctl_wmem_max);
++              val = min_t(u32, val, READ_ONCE(sysctl_wmem_max));
+ set_sndbuf:
+               /* Ensure val * 2 fits into an int, to prevent max_t()
+                * from treating it as a negative value.
+@@ -919,7 +919,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
+                * play 'guess the biggest size' games. RCVBUF/SNDBUF
+                * are treated in BSD as hints
+                */
+-              __sock_set_rcvbuf(sk, min_t(u32, val, sysctl_rmem_max));
++              __sock_set_rcvbuf(sk, min_t(u32, val, READ_ONCE(sysctl_rmem_max)));
+               break;
+       case SO_RCVBUFFORCE:
+@@ -2974,8 +2974,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
+       timer_setup(&sk->sk_timer, NULL, 0);
+       sk->sk_allocation       =       GFP_KERNEL;
+-      sk->sk_rcvbuf           =       sysctl_rmem_default;
+-      sk->sk_sndbuf           =       sysctl_wmem_default;
++      sk->sk_rcvbuf           =       READ_ONCE(sysctl_rmem_default);
++      sk->sk_sndbuf           =       READ_ONCE(sysctl_wmem_default);
+       sk->sk_state            =       TCP_CLOSE;
+       sk_set_socket(sk, sock);
+diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
+index f77b0af3cb657..0dbf950de832f 100644
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -1721,7 +1721,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
+       sk->sk_protocol = ip_hdr(skb)->protocol;
+       sk->sk_bound_dev_if = arg->bound_dev_if;
+-      sk->sk_sndbuf = sysctl_wmem_default;
++      sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default);
+       ipc.sockc.mark = fl4.flowi4_mark;
+       err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base,
+                            len, 0, &ipc, &rt, MSG_DONTWAIT);
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index c90c9541996bf..48fce999dc612 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -239,7 +239,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
+       if (wscale_ok) {
+               /* Set window scaling on max possible window */
+               space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
+-              space = max_t(u32, space, sysctl_rmem_max);
++              space = max_t(u32, space, READ_ONCE(sysctl_rmem_max));
+               space = min_t(u32, space, *window_clamp);
+               *rcv_wscale = clamp_t(int, ilog2(space) - 15,
+                                     0, TCP_MAX_WSCALE);
+diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
+index 16b48064f715e..daab857c52a80 100644
+--- a/net/netfilter/ipvs/ip_vs_sync.c
++++ b/net/netfilter/ipvs/ip_vs_sync.c
+@@ -1280,12 +1280,12 @@ static void set_sock_size(struct sock *sk, int mode, int val)
+       lock_sock(sk);
+       if (mode) {
+               val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2,
+-                            sysctl_wmem_max);
++                            READ_ONCE(sysctl_wmem_max));
+               sk->sk_sndbuf = val * 2;
+               sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
+       } else {
+               val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2,
+-                            sysctl_rmem_max);
++                            READ_ONCE(sysctl_rmem_max));
+               sk->sk_rcvbuf = val * 2;
+               sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+       }
+-- 
+2.35.1
+
diff --git a/queue-5.10/net-fix-data-races-around-sysctl_devconf_inherit_ini.patch b/queue-5.10/net-fix-data-races-around-sysctl_devconf_inherit_ini.patch
new file mode 100644 (file)
index 0000000..5529ad9
--- /dev/null
@@ -0,0 +1,99 @@
+From bb8b5cd7c4c454ca32a9c647948dd31da0c44dcd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Aug 2022 10:46:57 -0700
+Subject: net: Fix data-races around sysctl_devconf_inherit_init_net.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit a5612ca10d1aa05624ebe72633e0c8c792970833 ]
+
+While reading sysctl_devconf_inherit_init_net, it can be changed
+concurrently.  Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: 856c395cfa63 ("net: introduce a knob to control whether to inherit devconf config")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/netdevice.h |  9 +++++++++
+ net/ipv4/devinet.c        | 16 ++++++++++------
+ net/ipv6/addrconf.c       |  5 ++---
+ 3 files changed, 21 insertions(+), 9 deletions(-)
+
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index c05701f89d6dd..6564fb4ac49e1 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -643,6 +643,15 @@ static inline bool net_has_fallback_tunnels(const struct net *net)
+ #endif
+ }
++static inline int net_inherit_devconf(void)
++{
++#if IS_ENABLED(CONFIG_SYSCTL)
++      return READ_ONCE(sysctl_devconf_inherit_init_net);
++#else
++      return 0;
++#endif
++}
++
+ static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
+ {
+ #if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
+diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
+index 148ef484a66ce..8f17538755507 100644
+--- a/net/ipv4/devinet.c
++++ b/net/ipv4/devinet.c
+@@ -2668,23 +2668,27 @@ static __net_init int devinet_init_net(struct net *net)
+ #endif
+       if (!net_eq(net, &init_net)) {
+-              if (IS_ENABLED(CONFIG_SYSCTL) &&
+-                  sysctl_devconf_inherit_init_net == 3) {
++              switch (net_inherit_devconf()) {
++              case 3:
+                       /* copy from the current netns */
+                       memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
+                              sizeof(ipv4_devconf));
+                       memcpy(dflt,
+                              current->nsproxy->net_ns->ipv4.devconf_dflt,
+                              sizeof(ipv4_devconf_dflt));
+-              } else if (!IS_ENABLED(CONFIG_SYSCTL) ||
+-                         sysctl_devconf_inherit_init_net != 2) {
+-                      /* inherit == 0 or 1: copy from init_net */
++                      break;
++              case 0:
++              case 1:
++                      /* copy from init_net */
+                       memcpy(all, init_net.ipv4.devconf_all,
+                              sizeof(ipv4_devconf));
+                       memcpy(dflt, init_net.ipv4.devconf_dflt,
+                              sizeof(ipv4_devconf_dflt));
++                      break;
++              case 2:
++                      /* use compiled values */
++                      break;
+               }
+-              /* else inherit == 2: use compiled values */
+       }
+ #ifdef CONFIG_SYSCTL
+diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
+index 05317e6f48f8a..ed1e5bfc97b31 100644
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -7042,9 +7042,8 @@ static int __net_init addrconf_init_net(struct net *net)
+       if (!dflt)
+               goto err_alloc_dflt;
+-      if (IS_ENABLED(CONFIG_SYSCTL) &&
+-          !net_eq(net, &init_net)) {
+-              switch (sysctl_devconf_inherit_init_net) {
++      if (!net_eq(net, &init_net)) {
++              switch (net_inherit_devconf()) {
+               case 1:  /* copy from init_net */
+                       memcpy(all, init_net.ipv6.devconf_all,
+                              sizeof(ipv6_devconf));
+-- 
+2.35.1
+
diff --git a/queue-5.10/net-fix-data-races-around-sysctl_fb_tunnels_only_for.patch b/queue-5.10/net-fix-data-races-around-sysctl_fb_tunnels_only_for.patch
new file mode 100644 (file)
index 0000000..90b1ac5
--- /dev/null
@@ -0,0 +1,45 @@
+From 3231c22ae1e7f41ce29e937c4d71cd32b5bb602e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Aug 2022 10:46:56 -0700
+Subject: net: Fix data-races around sysctl_fb_tunnels_only_for_init_net.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit af67508ea6cbf0e4ea27f8120056fa2efce127dd ]
+
+While reading sysctl_fb_tunnels_only_for_init_net, it can be changed
+concurrently.  Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: 79134e6ce2c9 ("net: do not create fallback tunnels for non-default namespaces")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/netdevice.h | 11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index ed2d531400051..c05701f89d6dd 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -633,9 +633,14 @@ extern int sysctl_devconf_inherit_init_net;
+  */
+ static inline bool net_has_fallback_tunnels(const struct net *net)
+ {
+-      return !IS_ENABLED(CONFIG_SYSCTL) ||
+-             !sysctl_fb_tunnels_only_for_init_net ||
+-             (net == &init_net && sysctl_fb_tunnels_only_for_init_net == 1);
++#if IS_ENABLED(CONFIG_SYSCTL)
++      int fb_tunnels_only_for_init_net = READ_ONCE(sysctl_fb_tunnels_only_for_init_net);
++
++      return !fb_tunnels_only_for_init_net ||
++              (net_eq(net, &init_net) && fb_tunnels_only_for_init_net == 1);
++#else
++      return true;
++#endif
+ }
+ static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
+-- 
+2.35.1
+
diff --git a/queue-5.10/net-fix-data-races-around-sysctl_optmem_max.patch b/queue-5.10/net-fix-data-races-around-sysctl_optmem_max.patch
new file mode 100644 (file)
index 0000000..a065056
--- /dev/null
@@ -0,0 +1,161 @@
+From 2faafa5e6d66976e21b92c4abb52e3fb3c95e681 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Aug 2022 10:46:49 -0700
+Subject: net: Fix data-races around sysctl_optmem_max.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 7de6d09f51917c829af2b835aba8bb5040f8e86a ]
+
+While reading sysctl_optmem_max, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/bpf_sk_storage.c | 5 +++--
+ net/core/filter.c         | 9 +++++----
+ net/core/sock.c           | 8 +++++---
+ net/ipv4/ip_sockglue.c    | 6 +++---
+ net/ipv6/ipv6_sockglue.c  | 4 ++--
+ 5 files changed, 18 insertions(+), 14 deletions(-)
+
+diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
+index 39c5a059d1c2b..d67d06d6b817c 100644
+--- a/net/core/bpf_sk_storage.c
++++ b/net/core/bpf_sk_storage.c
+@@ -304,11 +304,12 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
+ static int sk_storage_charge(struct bpf_local_storage_map *smap,
+                            void *owner, u32 size)
+ {
++      int optmem_max = READ_ONCE(sysctl_optmem_max);
+       struct sock *sk = (struct sock *)owner;
+       /* same check as in sock_kmalloc() */
+-      if (size <= sysctl_optmem_max &&
+-          atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
++      if (size <= optmem_max &&
++          atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
+               atomic_add(size, &sk->sk_omem_alloc);
+               return 0;
+       }
+diff --git a/net/core/filter.c b/net/core/filter.c
+index 6a90c1eb6f67e..4c22e6d1da746 100644
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -1212,10 +1212,11 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
+ static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
+ {
+       u32 filter_size = bpf_prog_size(fp->prog->len);
++      int optmem_max = READ_ONCE(sysctl_optmem_max);
+       /* same check as in sock_kmalloc() */
+-      if (filter_size <= sysctl_optmem_max &&
+-          atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
++      if (filter_size <= optmem_max &&
++          atomic_read(&sk->sk_omem_alloc) + filter_size < optmem_max) {
+               atomic_add(filter_size, &sk->sk_omem_alloc);
+               return true;
+       }
+@@ -1547,7 +1548,7 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
+       if (IS_ERR(prog))
+               return PTR_ERR(prog);
+-      if (bpf_prog_size(prog->len) > sysctl_optmem_max)
++      if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max))
+               err = -ENOMEM;
+       else
+               err = reuseport_attach_prog(sk, prog);
+@@ -1614,7 +1615,7 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
+               }
+       } else {
+               /* BPF_PROG_TYPE_SOCKET_FILTER */
+-              if (bpf_prog_size(prog->len) > sysctl_optmem_max) {
++              if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max)) {
+                       err = -ENOMEM;
+                       goto err_prog_put;
+               }
+diff --git a/net/core/sock.c b/net/core/sock.c
+index 25d25dcd0c3db..f01e71c98d5be 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -2219,7 +2219,7 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
+       /* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
+       if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
+-          sysctl_optmem_max)
++          READ_ONCE(sysctl_optmem_max))
+               return NULL;
+       skb = alloc_skb(size, priority);
+@@ -2237,8 +2237,10 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
+  */
+ void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
+ {
+-      if ((unsigned int)size <= sysctl_optmem_max &&
+-          atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
++      int optmem_max = READ_ONCE(sysctl_optmem_max);
++
++      if ((unsigned int)size <= optmem_max &&
++          atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
+               void *mem;
+               /* First do the add, to avoid the race if kmalloc
+                * might sleep.
+diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
+index 22507a6a3f71c..4cc39c62af55d 100644
+--- a/net/ipv4/ip_sockglue.c
++++ b/net/ipv4/ip_sockglue.c
+@@ -773,7 +773,7 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen)
+       if (optlen < GROUP_FILTER_SIZE(0))
+               return -EINVAL;
+-      if (optlen > sysctl_optmem_max)
++      if (optlen > READ_ONCE(sysctl_optmem_max))
+               return -ENOBUFS;
+       gsf = memdup_sockptr(optval, optlen);
+@@ -808,7 +808,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
+       if (optlen < size0)
+               return -EINVAL;
+-      if (optlen > sysctl_optmem_max - 4)
++      if (optlen > READ_ONCE(sysctl_optmem_max) - 4)
+               return -ENOBUFS;
+       p = kmalloc(optlen + 4, GFP_KERNEL);
+@@ -1231,7 +1231,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname,
+               if (optlen < IP_MSFILTER_SIZE(0))
+                       goto e_inval;
+-              if (optlen > sysctl_optmem_max) {
++              if (optlen > READ_ONCE(sysctl_optmem_max)) {
+                       err = -ENOBUFS;
+                       break;
+               }
+diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
+index 43a894bf9a1be..6fa118bf40cdd 100644
+--- a/net/ipv6/ipv6_sockglue.c
++++ b/net/ipv6/ipv6_sockglue.c
+@@ -208,7 +208,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
+       if (optlen < GROUP_FILTER_SIZE(0))
+               return -EINVAL;
+-      if (optlen > sysctl_optmem_max)
++      if (optlen > READ_ONCE(sysctl_optmem_max))
+               return -ENOBUFS;
+       gsf = memdup_sockptr(optval, optlen);
+@@ -242,7 +242,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
+       if (optlen < size0)
+               return -EINVAL;
+-      if (optlen > sysctl_optmem_max - 4)
++      if (optlen > READ_ONCE(sysctl_optmem_max) - 4)
+               return -ENOBUFS;
+       p = kmalloc(optlen + 4, GFP_KERNEL);
+-- 
+2.35.1
+
diff --git a/queue-5.10/net-fix-data-races-around-weight_p-and-dev_weight_-r.patch b/queue-5.10/net-fix-data-races-around-weight_p-and-dev_weight_-r.patch
new file mode 100644 (file)
index 0000000..ea05593
--- /dev/null
@@ -0,0 +1,85 @@
+From c750bdc6664ec8fa668b22c2f5ede2f177c4ae20 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Aug 2022 10:46:45 -0700
+Subject: net: Fix data-races around weight_p and dev_weight_[rt]x_bias.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit bf955b5ab8f6f7b0632cdef8e36b14e4f6e77829 ]
+
+While reading weight_p, it can be changed concurrently.  Thus, we need
+to add READ_ONCE() to its reader.
+
+Also, dev_[rt]x_weight can be read/written at the same time.  So, we
+need to use READ_ONCE() and WRITE_ONCE() for its access.  Moreover, to
+use the same weight_p while changing dev_[rt]x_weight, we add a mutex
+in proc_do_dev_weight().
+
+Fixes: 3d48b53fb2ae ("net: dev_weight: TX/RX orthogonality")
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/dev.c             |  2 +-
+ net/core/sysctl_net_core.c | 15 +++++++++------
+ net/sched/sch_generic.c    |  2 +-
+ 3 files changed, 11 insertions(+), 8 deletions(-)
+
+diff --git a/net/core/dev.c b/net/core/dev.c
+index 637bc576fbd26..701a1afc91ff1 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -6371,7 +6371,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
+               net_rps_action_and_irq_enable(sd);
+       }
+-      napi->weight = dev_rx_weight;
++      napi->weight = READ_ONCE(dev_rx_weight);
+       while (again) {
+               struct sk_buff *skb;
+diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
+index 2e0a4378e778a..0dfe9f255ab3a 100644
+--- a/net/core/sysctl_net_core.c
++++ b/net/core/sysctl_net_core.c
+@@ -235,14 +235,17 @@ static int set_default_qdisc(struct ctl_table *table, int write,
+ static int proc_do_dev_weight(struct ctl_table *table, int write,
+                          void *buffer, size_t *lenp, loff_t *ppos)
+ {
+-      int ret;
++      static DEFINE_MUTEX(dev_weight_mutex);
++      int ret, weight;
++      mutex_lock(&dev_weight_mutex);
+       ret = proc_dointvec(table, write, buffer, lenp, ppos);
+-      if (ret != 0)
+-              return ret;
+-
+-      dev_rx_weight = weight_p * dev_weight_rx_bias;
+-      dev_tx_weight = weight_p * dev_weight_tx_bias;
++      if (!ret && write) {
++              weight = READ_ONCE(weight_p);
++              WRITE_ONCE(dev_rx_weight, weight * dev_weight_rx_bias);
++              WRITE_ONCE(dev_tx_weight, weight * dev_weight_tx_bias);
++      }
++      mutex_unlock(&dev_weight_mutex);
+       return ret;
+ }
+diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
+index 5d5391adb667c..68f1e89430b3b 100644
+--- a/net/sched/sch_generic.c
++++ b/net/sched/sch_generic.c
+@@ -403,7 +403,7 @@ static inline bool qdisc_restart(struct Qdisc *q, int *packets)
+ void __qdisc_run(struct Qdisc *q)
+ {
+-      int quota = dev_tx_weight;
++      int quota = READ_ONCE(dev_tx_weight);
+       int packets;
+       while (qdisc_restart(q, &packets)) {
+-- 
+2.35.1
+
diff --git a/queue-5.10/net-ipa-don-t-assume-smem-is-page-aligned.patch b/queue-5.10/net-ipa-don-t-assume-smem-is-page-aligned.patch
new file mode 100644 (file)
index 0000000..593130b
--- /dev/null
@@ -0,0 +1,48 @@
+From 99ff79d814344e8966664fb71caf61369aa882f7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 18 Aug 2022 08:42:05 -0500
+Subject: net: ipa: don't assume SMEM is page-aligned
+
+From: Alex Elder <elder@linaro.org>
+
+[ Upstream commit b8d4380365c515d8e0351f2f46d371738dd19be1 ]
+
+In ipa_smem_init(), a Qualcomm SMEM region is allocated (if needed)
+and then its virtual address is fetched using qcom_smem_get().  The
+physical address associated with that region is also fetched.
+
+The physical address is adjusted so that it is page-aligned, and an
+attempt is made to update the size of the region to compensate for
+any non-zero adjustment.
+
+But that adjustment isn't done properly.  The physical address is
+aligned twice, and as a result the size is never actually adjusted.
+
+Fix this by *not* aligning the "addr" local variable, and instead
+making the "phys" local variable be the adjusted "addr" value.
+
+Fixes: a0036bb413d5b ("net: ipa: define SMEM memory region for IPA")
+Signed-off-by: Alex Elder <elder@linaro.org>
+Link: https://lore.kernel.org/r/20220818134206.567618-1-elder@linaro.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ipa/ipa_mem.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c
+index a78d66051a17d..25a8d029f2075 100644
+--- a/drivers/net/ipa/ipa_mem.c
++++ b/drivers/net/ipa/ipa_mem.c
+@@ -414,7 +414,7 @@ static int ipa_smem_init(struct ipa *ipa, u32 item, size_t size)
+       }
+       /* Align the address down and the size up to a page boundary */
+-      addr = qcom_smem_virt_to_phys(virt) & PAGE_MASK;
++      addr = qcom_smem_virt_to_phys(virt);
+       phys = addr & PAGE_MASK;
+       size = PAGE_ALIGN(size + addr - phys);
+       iova = phys;    /* We just want a direct mapping */
+-- 
+2.35.1
+
diff --git a/queue-5.10/net-ipvtap-add-__init-__exit-annotations-to-module-i.patch b/queue-5.10/net-ipvtap-add-__init-__exit-annotations-to-module-i.patch
new file mode 100644 (file)
index 0000000..235cf69
--- /dev/null
@@ -0,0 +1,50 @@
+From 45a018d7a1f5e49ada660ab98cd37122afa13ce2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 21 Aug 2022 06:08:08 -0700
+Subject: net: ipvtap - add __init/__exit annotations to module init/exit funcs
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Maciej Å»enczykowski <maze@google.com>
+
+[ Upstream commit 4b2e3a17e9f279325712b79fb01d1493f9e3e005 ]
+
+Looks to have been left out in an oversight.
+
+Cc: Mahesh Bandewar <maheshb@google.com>
+Cc: Sainath Grandhi <sainath.grandhi@intel.com>
+Fixes: 235a9d89da97 ('ipvtap: IP-VLAN based tap driver')
+Signed-off-by: Maciej Å»enczykowski <maze@google.com>
+Link: https://lore.kernel.org/r/20220821130808.12143-1-zenczykowski@gmail.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ipvlan/ipvtap.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ipvlan/ipvtap.c b/drivers/net/ipvlan/ipvtap.c
+index 1cedb634f4f7b..f01078b2581ce 100644
+--- a/drivers/net/ipvlan/ipvtap.c
++++ b/drivers/net/ipvlan/ipvtap.c
+@@ -194,7 +194,7 @@ static struct notifier_block ipvtap_notifier_block __read_mostly = {
+       .notifier_call  = ipvtap_device_event,
+ };
+-static int ipvtap_init(void)
++static int __init ipvtap_init(void)
+ {
+       int err;
+@@ -228,7 +228,7 @@ static int ipvtap_init(void)
+ }
+ module_init(ipvtap_init);
+-static void ipvtap_exit(void)
++static void __exit ipvtap_exit(void)
+ {
+       rtnl_link_unregister(&ipvtap_link_ops);
+       unregister_netdevice_notifier(&ipvtap_notifier_block);
+-- 
+2.35.1
+
diff --git a/queue-5.10/net-mlx5e-properly-disable-vlan-strip-on-non-ul-reps.patch b/queue-5.10/net-mlx5e-properly-disable-vlan-strip-on-non-ul-reps.patch
new file mode 100644 (file)
index 0000000..84854b8
--- /dev/null
@@ -0,0 +1,40 @@
+From dd8c644722e874b57c45997539c1a65896119670 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jul 2022 21:41:48 +0200
+Subject: net/mlx5e: Properly disable vlan strip on non-UL reps
+
+From: Vlad Buslov <vladbu@nvidia.com>
+
+[ Upstream commit f37044fd759b6bc40b6398a978e0b1acdf717372 ]
+
+When querying mlx5 non-uplink representors capabilities with ethtool
+rx-vlan-offload is marked as "off [fixed]". However, it is actually always
+enabled because mlx5e_params->vlan_strip_disable is 0 by default when
+initializing struct mlx5e_params instance. Fix the issue by explicitly
+setting the vlan_strip_disable to 'true' for non-uplink representors.
+
+Fixes: cb67b832921c ("net/mlx5e: Introduce SRIOV VF representors")
+Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+index 304435e561170..b991f03c7e991 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+@@ -706,6 +706,8 @@ static void mlx5e_build_rep_params(struct net_device *netdev)
+       params->num_tc                = 1;
+       params->tunneled_offload_en = false;
++      if (rep->vport != MLX5_VPORT_UPLINK)
++              params->vlan_strip_disable = true;
+       mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
+-- 
+2.35.1
+
diff --git a/queue-5.10/net-moxa-get-rid-of-asymmetry-in-dma-mapping-unmappi.patch b/queue-5.10/net-moxa-get-rid-of-asymmetry-in-dma-mapping-unmappi.patch
new file mode 100644 (file)
index 0000000..d6370a5
--- /dev/null
@@ -0,0 +1,104 @@
+From 71ef23c484a96fe95fc0fd75544d9a4d111e3893 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 19 Aug 2022 14:05:19 +0300
+Subject: net: moxa: get rid of asymmetry in DMA mapping/unmapping
+
+From: Sergei Antonov <saproj@gmail.com>
+
+[ Upstream commit 0ee7828dfc56e97d71e51e6374dc7b4eb2b6e081 ]
+
+Since priv->rx_mapping[i] is maped in moxart_mac_open(), we
+should unmap it from moxart_mac_stop(). Fixes 2 warnings.
+
+1. During error unwinding in moxart_mac_probe(): "goto init_fail;",
+then moxart_mac_free_memory() calls dma_unmap_single() with
+priv->rx_mapping[i] pointers zeroed.
+
+WARNING: CPU: 0 PID: 1 at kernel/dma/debug.c:963 check_unmap+0x704/0x980
+DMA-API: moxart-ethernet 92000000.mac: device driver tries to free DMA memory it has not allocated [device address=0x0000000000000000] [size=1600 bytes]
+CPU: 0 PID: 1 Comm: swapper Not tainted 5.19.0+ #60
+Hardware name: Generic DT based system
+ unwind_backtrace from show_stack+0x10/0x14
+ show_stack from dump_stack_lvl+0x34/0x44
+ dump_stack_lvl from __warn+0xbc/0x1f0
+ __warn from warn_slowpath_fmt+0x94/0xc8
+ warn_slowpath_fmt from check_unmap+0x704/0x980
+ check_unmap from debug_dma_unmap_page+0x8c/0x9c
+ debug_dma_unmap_page from moxart_mac_free_memory+0x3c/0xa8
+ moxart_mac_free_memory from moxart_mac_probe+0x190/0x218
+ moxart_mac_probe from platform_probe+0x48/0x88
+ platform_probe from really_probe+0xc0/0x2e4
+
+2. After commands:
+ ip link set dev eth0 down
+ ip link set dev eth0 up
+
+WARNING: CPU: 0 PID: 55 at kernel/dma/debug.c:570 add_dma_entry+0x204/0x2ec
+DMA-API: moxart-ethernet 92000000.mac: cacheline tracking EEXIST, overlapping mappings aren't supported
+CPU: 0 PID: 55 Comm: ip Not tainted 5.19.0+ #57
+Hardware name: Generic DT based system
+ unwind_backtrace from show_stack+0x10/0x14
+ show_stack from dump_stack_lvl+0x34/0x44
+ dump_stack_lvl from __warn+0xbc/0x1f0
+ __warn from warn_slowpath_fmt+0x94/0xc8
+ warn_slowpath_fmt from add_dma_entry+0x204/0x2ec
+ add_dma_entry from dma_map_page_attrs+0x110/0x328
+ dma_map_page_attrs from moxart_mac_open+0x134/0x320
+ moxart_mac_open from __dev_open+0x11c/0x1ec
+ __dev_open from __dev_change_flags+0x194/0x22c
+ __dev_change_flags from dev_change_flags+0x14/0x44
+ dev_change_flags from devinet_ioctl+0x6d4/0x93c
+ devinet_ioctl from inet_ioctl+0x1ac/0x25c
+
+v1 -> v2:
+Extraneous change removed.
+
+Fixes: 6c821bd9edc9 ("net: Add MOXA ART SoCs ethernet driver")
+Signed-off-by: Sergei Antonov <saproj@gmail.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Link: https://lore.kernel.org/r/20220819110519.1230877-1-saproj@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/moxa/moxart_ether.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c
+index 6137000b11c5c..73aac97fb5c96 100644
+--- a/drivers/net/ethernet/moxa/moxart_ether.c
++++ b/drivers/net/ethernet/moxa/moxart_ether.c
+@@ -74,11 +74,6 @@ static int moxart_set_mac_address(struct net_device *ndev, void *addr)
+ static void moxart_mac_free_memory(struct net_device *ndev)
+ {
+       struct moxart_mac_priv_t *priv = netdev_priv(ndev);
+-      int i;
+-
+-      for (i = 0; i < RX_DESC_NUM; i++)
+-              dma_unmap_single(&priv->pdev->dev, priv->rx_mapping[i],
+-                               priv->rx_buf_size, DMA_FROM_DEVICE);
+       if (priv->tx_desc_base)
+               dma_free_coherent(&priv->pdev->dev,
+@@ -193,6 +188,7 @@ static int moxart_mac_open(struct net_device *ndev)
+ static int moxart_mac_stop(struct net_device *ndev)
+ {
+       struct moxart_mac_priv_t *priv = netdev_priv(ndev);
++      int i;
+       napi_disable(&priv->napi);
+@@ -204,6 +200,11 @@ static int moxart_mac_stop(struct net_device *ndev)
+       /* disable all functions */
+       writel(0, priv->base + REG_MAC_CTRL);
++      /* unmap areas mapped in moxart_mac_setup_desc_ring() */
++      for (i = 0; i < RX_DESC_NUM; i++)
++              dma_unmap_single(&priv->pdev->dev, priv->rx_mapping[i],
++                               priv->rx_buf_size, DMA_FROM_DEVICE);
++
+       return 0;
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.10/netfilter-bitwise-improve-error-goto-labels.patch b/queue-5.10/netfilter-bitwise-improve-error-goto-labels.patch
new file mode 100644 (file)
index 0000000..d3d4193
--- /dev/null
@@ -0,0 +1,54 @@
+From c09e0c7f4e13fc06707260ec08e2ea33daabf82c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 4 Apr 2022 13:04:15 +0100
+Subject: netfilter: bitwise: improve error goto labels
+
+From: Jeremy Sowden <jeremy@azazel.net>
+
+[ Upstream commit 00bd435208e5201eb935d273052930bd3b272b6f ]
+
+Replace two labels (`err1` and `err2`) with more informative ones.
+
+Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nft_bitwise.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
+index 47b0dba95054f..d0c648b64cd40 100644
+--- a/net/netfilter/nft_bitwise.c
++++ b/net/netfilter/nft_bitwise.c
+@@ -109,22 +109,23 @@ static int nft_bitwise_init_bool(struct nft_bitwise *priv,
+               return err;
+       if (mask.type != NFT_DATA_VALUE || mask.len != priv->len) {
+               err = -EINVAL;
+-              goto err1;
++              goto err_mask_release;
+       }
+       err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &xor,
+                           tb[NFTA_BITWISE_XOR]);
+       if (err < 0)
+-              goto err1;
++              goto err_mask_release;
+       if (xor.type != NFT_DATA_VALUE || xor.len != priv->len) {
+               err = -EINVAL;
+-              goto err2;
++              goto err_xor_release;
+       }
+       return 0;
+-err2:
++
++err_xor_release:
+       nft_data_release(&priv->xor, xor.type);
+-err1:
++err_mask_release:
+       nft_data_release(&priv->mask, mask.type);
+       return err;
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.10/netfilter-ebtables-reject-blobs-that-don-t-provide-a.patch b/queue-5.10/netfilter-ebtables-reject-blobs-that-don-t-provide-a.patch
new file mode 100644 (file)
index 0000000..3051493
--- /dev/null
@@ -0,0 +1,165 @@
+From ea3b3e8dff3204b6f0a3c4c2db34e59a2ec199b9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 20 Aug 2022 17:38:37 +0200
+Subject: netfilter: ebtables: reject blobs that don't provide all entry points
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 7997eff82828304b780dc0a39707e1946d6f1ebf ]
+
+Harshit Mogalapalli says:
+ In ebt_do_table() function dereferencing 'private->hook_entry[hook]'
+ can lead to NULL pointer dereference. [..] Kernel panic:
+
+general protection fault, probably for non-canonical address 0xdffffc0000000005: 0000 [#1] PREEMPT SMP KASAN
+KASAN: null-ptr-deref in range [0x0000000000000028-0x000000000000002f]
+[..]
+RIP: 0010:ebt_do_table+0x1dc/0x1ce0
+Code: 89 fa 48 c1 ea 03 80 3c 02 00 0f 85 5c 16 00 00 48 b8 00 00 00 00 00 fc ff df 49 8b 6c df 08 48 8d 7d 2c 48 89 fa 48 c1 ea 03 <0f> b6 14 02 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 88
+[..]
+Call Trace:
+ nf_hook_slow+0xb1/0x170
+ __br_forward+0x289/0x730
+ maybe_deliver+0x24b/0x380
+ br_flood+0xc6/0x390
+ br_dev_xmit+0xa2e/0x12c0
+
+For some reason ebtables rejects blobs that provide entry points that are
+not supported by the table, but what it should instead reject is the
+opposite: blobs that DO NOT provide an entry point supported by the table.
+
+t->valid_hooks is the bitmask of hooks (input, forward ...) that will see
+packets.  Providing an entry point that is not support is harmless
+(never called/used), but the inverse isn't: it results in a crash
+because the ebtables traverser doesn't expect a NULL blob for a location
+its receiving packets for.
+
+Instead of fixing all the individual checks, do what iptables is doing and
+reject all blobs that differ from the expected hooks.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Reported-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
+Reported-by: syzkaller <syzkaller@googlegroups.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/netfilter_bridge/ebtables.h | 4 ----
+ net/bridge/netfilter/ebtable_broute.c     | 8 --------
+ net/bridge/netfilter/ebtable_filter.c     | 8 --------
+ net/bridge/netfilter/ebtable_nat.c        | 8 --------
+ net/bridge/netfilter/ebtables.c           | 8 +-------
+ 5 files changed, 1 insertion(+), 35 deletions(-)
+
+diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
+index 3a956145a25cb..a18fb73a2b772 100644
+--- a/include/linux/netfilter_bridge/ebtables.h
++++ b/include/linux/netfilter_bridge/ebtables.h
+@@ -94,10 +94,6 @@ struct ebt_table {
+       struct ebt_replace_kernel *table;
+       unsigned int valid_hooks;
+       rwlock_t lock;
+-      /* e.g. could be the table explicitly only allows certain
+-       * matches, targets, ... 0 == let it in */
+-      int (*check)(const struct ebt_table_info *info,
+-         unsigned int valid_hooks);
+       /* the data used by the kernel */
+       struct ebt_table_info *private;
+       struct module *me;
+diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
+index 32bc2821027f3..57f91efce0f73 100644
+--- a/net/bridge/netfilter/ebtable_broute.c
++++ b/net/bridge/netfilter/ebtable_broute.c
+@@ -36,18 +36,10 @@ static struct ebt_replace_kernel initial_table = {
+       .entries        = (char *)&initial_chain,
+ };
+-static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
+-{
+-      if (valid_hooks & ~(1 << NF_BR_BROUTING))
+-              return -EINVAL;
+-      return 0;
+-}
+-
+ static const struct ebt_table broute_table = {
+       .name           = "broute",
+       .table          = &initial_table,
+       .valid_hooks    = 1 << NF_BR_BROUTING,
+-      .check          = check,
+       .me             = THIS_MODULE,
+ };
+diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
+index bcf982e12f16b..7f2e620f4978f 100644
+--- a/net/bridge/netfilter/ebtable_filter.c
++++ b/net/bridge/netfilter/ebtable_filter.c
+@@ -43,18 +43,10 @@ static struct ebt_replace_kernel initial_table = {
+       .entries        = (char *)initial_chains,
+ };
+-static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
+-{
+-      if (valid_hooks & ~FILTER_VALID_HOOKS)
+-              return -EINVAL;
+-      return 0;
+-}
+-
+ static const struct ebt_table frame_filter = {
+       .name           = "filter",
+       .table          = &initial_table,
+       .valid_hooks    = FILTER_VALID_HOOKS,
+-      .check          = check,
+       .me             = THIS_MODULE,
+ };
+diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
+index 0d092773f8161..1743a105485c4 100644
+--- a/net/bridge/netfilter/ebtable_nat.c
++++ b/net/bridge/netfilter/ebtable_nat.c
+@@ -43,18 +43,10 @@ static struct ebt_replace_kernel initial_table = {
+       .entries        = (char *)initial_chains,
+ };
+-static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
+-{
+-      if (valid_hooks & ~NAT_VALID_HOOKS)
+-              return -EINVAL;
+-      return 0;
+-}
+-
+ static const struct ebt_table frame_nat = {
+       .name           = "nat",
+       .table          = &initial_table,
+       .valid_hooks    = NAT_VALID_HOOKS,
+-      .check          = check,
+       .me             = THIS_MODULE,
+ };
+diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
+index d481ff24a1501..310740cc684ad 100644
+--- a/net/bridge/netfilter/ebtables.c
++++ b/net/bridge/netfilter/ebtables.c
+@@ -999,8 +999,7 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
+               goto free_iterate;
+       }
+-      /* the table doesn't like it */
+-      if (t->check && (ret = t->check(newinfo, repl->valid_hooks)))
++      if (repl->valid_hooks != t->valid_hooks)
+               goto free_unlock;
+       if (repl->num_counters && repl->num_counters != t->private->nentries) {
+@@ -1186,11 +1185,6 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table,
+       if (ret != 0)
+               goto free_chainstack;
+-      if (table->check && table->check(newinfo, table->valid_hooks)) {
+-              ret = -EINVAL;
+-              goto free_chainstack;
+-      }
+-
+       table->private = newinfo;
+       rwlock_init(&table->lock);
+       mutex_lock(&ebt_mutex);
+-- 
+2.35.1
+
diff --git a/queue-5.10/netfilter-nf_tables-consolidate-rule-verdict-trace-c.patch b/queue-5.10/netfilter-nf_tables-consolidate-rule-verdict-trace-c.patch
new file mode 100644 (file)
index 0000000..e2793e5
--- /dev/null
@@ -0,0 +1,93 @@
+From 9897d8db8f4b1accb104f60db842df69c6600ae6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 Dec 2021 00:10:12 +0100
+Subject: netfilter: nf_tables: consolidate rule verdict trace call
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit 4765473fefd4403b5eeca371637065b561522c50 ]
+
+Add function to consolidate verdict tracing.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_core.c | 39 ++++++++++++++++++++++++++++------
+ 1 file changed, 32 insertions(+), 7 deletions(-)
+
+diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
+index a61b5bf5aa0fb..6dd27c8cd4253 100644
+--- a/net/netfilter/nf_tables_core.c
++++ b/net/netfilter/nf_tables_core.c
+@@ -67,6 +67,36 @@ static void nft_cmp_fast_eval(const struct nft_expr *expr,
+       regs->verdict.code = NFT_BREAK;
+ }
++static noinline void __nft_trace_verdict(struct nft_traceinfo *info,
++                                       const struct nft_chain *chain,
++                                       const struct nft_regs *regs)
++{
++      enum nft_trace_types type;
++
++      switch (regs->verdict.code) {
++      case NFT_CONTINUE:
++      case NFT_RETURN:
++              type = NFT_TRACETYPE_RETURN;
++              break;
++      default:
++              type = NFT_TRACETYPE_RULE;
++              break;
++      }
++
++      __nft_trace_packet(info, chain, type);
++}
++
++static inline void nft_trace_verdict(struct nft_traceinfo *info,
++                                   const struct nft_chain *chain,
++                                   const struct nft_rule *rule,
++                                   const struct nft_regs *regs)
++{
++      if (static_branch_unlikely(&nft_trace_enabled)) {
++              info->rule = rule;
++              __nft_trace_verdict(info, chain, regs);
++      }
++}
++
+ static bool nft_payload_fast_eval(const struct nft_expr *expr,
+                                 struct nft_regs *regs,
+                                 const struct nft_pktinfo *pkt)
+@@ -207,13 +237,13 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
+               break;
+       }
++      nft_trace_verdict(&info, chain, rule, &regs);
++
+       switch (regs.verdict.code & NF_VERDICT_MASK) {
+       case NF_ACCEPT:
+       case NF_DROP:
+       case NF_QUEUE:
+       case NF_STOLEN:
+-              nft_trace_packet(&info, chain, rule,
+-                               NFT_TRACETYPE_RULE);
+               return regs.verdict.code;
+       }
+@@ -226,15 +256,10 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
+               stackptr++;
+               fallthrough;
+       case NFT_GOTO:
+-              nft_trace_packet(&info, chain, rule,
+-                               NFT_TRACETYPE_RULE);
+-
+               chain = regs.verdict.chain;
+               goto do_chain;
+       case NFT_CONTINUE:
+       case NFT_RETURN:
+-              nft_trace_packet(&info, chain, rule,
+-                               NFT_TRACETYPE_RETURN);
+               break;
+       default:
+               WARN_ON(1);
+-- 
+2.35.1
+
diff --git a/queue-5.10/netfilter-nf_tables-disallow-binding-to-already-boun.patch b/queue-5.10/netfilter-nf_tables-disallow-binding-to-already-boun.patch
new file mode 100644 (file)
index 0000000..4b9eb02
--- /dev/null
@@ -0,0 +1,35 @@
+From 519a7eef6767c278ee0d56f4e7ebad4f59ae7433 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Aug 2022 11:06:39 +0200
+Subject: netfilter: nf_tables: disallow binding to already bound chain
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit e02f0d3970404bfea385b6edb86f2d936db0ea2b ]
+
+Update nft_data_init() to report EINVAL if chain is already bound.
+
+Fixes: d0e2c7de92c7 ("netfilter: nf_tables: add NFT_CHAIN_BINDING")
+Reported-by: Gwangun Jung <exsociety@gmail.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index b36728cfc5d81..1b039476e4d6a 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -8678,6 +8678,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
+                       return PTR_ERR(chain);
+               if (nft_is_base_chain(chain))
+                       return -EOPNOTSUPP;
++              if (nft_chain_is_bound(chain))
++                      return -EINVAL;
+               if (desc->flags & NFT_DATA_DESC_SETELEM &&
+                   chain->flags & NFT_CHAIN_BINDING)
+                       return -EINVAL;
+-- 
+2.35.1
+
diff --git a/queue-5.10/netfilter-nf_tables-disallow-jump-to-implicit-chain-.patch b/queue-5.10/netfilter-nf_tables-disallow-jump-to-implicit-chain-.patch
new file mode 100644 (file)
index 0000000..0cbc602
--- /dev/null
@@ -0,0 +1,69 @@
+From e44aa17048b59b4953c11717909a815e70b716dd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 8 Aug 2022 19:30:07 +0200
+Subject: netfilter: nf_tables: disallow jump to implicit chain from set
+ element
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit f323ef3a0d49e147365284bc1f02212e617b7f09 ]
+
+Extend struct nft_data_desc to add a flag field that specifies
+nft_data_init() is being called for set element data.
+
+Use it to disallow jump to implicit chain from set element, only jump
+to chain via immediate expression is allowed.
+
+Fixes: d0e2c7de92c7 ("netfilter: nf_tables: add NFT_CHAIN_BINDING")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netfilter/nf_tables.h | 5 +++++
+ net/netfilter/nf_tables_api.c     | 4 ++++
+ 2 files changed, 9 insertions(+)
+
+diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
+index 6c062b2509b9b..e66fee99ed3ea 100644
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -176,10 +176,15 @@ struct nft_ctx {
+       bool                            report;
+ };
++enum nft_data_desc_flags {
++      NFT_DATA_DESC_SETELEM   = (1 << 0),
++};
++
+ struct nft_data_desc {
+       enum nft_data_types             type;
+       unsigned int                    size;
+       unsigned int                    len;
++      unsigned int                    flags;
+ };
+ int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index df79ea6004a59..b36728cfc5d81 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -4865,6 +4865,7 @@ static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set,
+       desc->type = dtype;
+       desc->size = NFT_DATA_VALUE_MAXLEN;
+       desc->len = set->dlen;
++      desc->flags = NFT_DATA_DESC_SETELEM;
+       return nft_data_init(ctx, data, desc, attr);
+ }
+@@ -8677,6 +8678,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
+                       return PTR_ERR(chain);
+               if (nft_is_base_chain(chain))
+                       return -EOPNOTSUPP;
++              if (desc->flags & NFT_DATA_DESC_SETELEM &&
++                  chain->flags & NFT_CHAIN_BINDING)
++                      return -EINVAL;
+               chain->use++;
+               data->verdict.chain = chain;
+-- 
+2.35.1
+
diff --git a/queue-5.10/netfilter-nf_tables-do-not-leave-chain-stats-enabled.patch b/queue-5.10/netfilter-nf_tables-do-not-leave-chain-stats-enabled.patch
new file mode 100644 (file)
index 0000000..133c10c
--- /dev/null
@@ -0,0 +1,55 @@
+From 897ac6107363a4708c803d2e7392dad81423f5d3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 21 Aug 2022 12:41:33 +0200
+Subject: netfilter: nf_tables: do not leave chain stats enabled on error
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit 43eb8949cfdffa764b92bc6c54b87cbe5b0003fe ]
+
+Error might occur later in the nf_tables_addchain() codepath, enable
+static key only after transaction has been created.
+
+Fixes: 9f08ea848117 ("netfilter: nf_tables: keep chain counters away from hot path")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 30bd4b867912c..456988b5c076e 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -1999,9 +1999,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
+                             u8 policy, u32 flags)
+ {
+       const struct nlattr * const *nla = ctx->nla;
++      struct nft_stats __percpu *stats = NULL;
+       struct nft_table *table = ctx->table;
+       struct nft_base_chain *basechain;
+-      struct nft_stats __percpu *stats;
+       struct net *net = ctx->net;
+       char name[NFT_NAME_MAXLEN];
+       struct nft_trans *trans;
+@@ -2037,7 +2037,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
+                               return PTR_ERR(stats);
+                       }
+                       rcu_assign_pointer(basechain->stats, stats);
+-                      static_branch_inc(&nft_counters_enabled);
+               }
+               err = nft_basechain_init(basechain, family, &hook, flags);
+@@ -2120,6 +2119,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
+               goto err_unregister_hook;
+       }
++      if (stats)
++              static_branch_inc(&nft_counters_enabled);
++
+       table->use++;
+       return 0;
+-- 
+2.35.1
+
diff --git a/queue-5.10/netfilter-nf_tables-upfront-validation-of-data-via-n.patch b/queue-5.10/netfilter-nf_tables-upfront-validation-of-data-via-n.patch
new file mode 100644 (file)
index 0000000..1e99b45
--- /dev/null
@@ -0,0 +1,528 @@
+From 5f7cc0e83bd203fb17a7cebb53d0eed16992ed8f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 8 Aug 2022 19:30:06 +0200
+Subject: netfilter: nf_tables: upfront validation of data via nft_data_init()
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit 341b6941608762d8235f3fd1e45e4d7114ed8c2c ]
+
+Instead of parsing the data and then validate that type and length are
+correct, pass a description of the expected data so it can be validated
+upfront before parsing it to bail out earlier.
+
+This patch adds a new .size field to specify the maximum size of the
+data area. The .len field is optional and it is used as an input/output
+field, it provides the specific length of the expected data in the input
+path. If then .len field is not specified, then obtained length from the
+netlink attribute is stored. This is required by cmp, bitwise, range and
+immediate, which provide no netlink attribute that describes the data
+length. The immediate expression uses the destination register type to
+infer the expected data type.
+
+Relying on opencoded validation of the expected data might lead to
+subtle bugs as described in 7e6bc1f6cabc ("netfilter: nf_tables:
+stricter validation of element data").
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netfilter/nf_tables.h |  4 +-
+ net/netfilter/nf_tables_api.c     | 78 ++++++++++++++++---------------
+ net/netfilter/nft_bitwise.c       | 66 +++++++++++++-------------
+ net/netfilter/nft_cmp.c           | 44 ++++++++---------
+ net/netfilter/nft_immediate.c     | 22 +++++++--
+ net/netfilter/nft_range.c         | 27 +++++------
+ 6 files changed, 126 insertions(+), 115 deletions(-)
+
+diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
+index b9948e7861f22..6c062b2509b9b 100644
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -178,11 +178,11 @@ struct nft_ctx {
+ struct nft_data_desc {
+       enum nft_data_types             type;
++      unsigned int                    size;
+       unsigned int                    len;
+ };
+-int nft_data_init(const struct nft_ctx *ctx,
+-                struct nft_data *data, unsigned int size,
++int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
+                 struct nft_data_desc *desc, const struct nlattr *nla);
+ void nft_data_hold(const struct nft_data *data, enum nft_data_types type);
+ void nft_data_release(const struct nft_data *data, enum nft_data_types type);
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 456988b5c076e..df79ea6004a59 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -4841,19 +4841,13 @@ static int nft_setelem_parse_flags(const struct nft_set *set,
+ static int nft_setelem_parse_key(struct nft_ctx *ctx, struct nft_set *set,
+                                struct nft_data *key, struct nlattr *attr)
+ {
+-      struct nft_data_desc desc;
+-      int err;
+-
+-      err = nft_data_init(ctx, key, NFT_DATA_VALUE_MAXLEN, &desc, attr);
+-      if (err < 0)
+-              return err;
+-
+-      if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) {
+-              nft_data_release(key, desc.type);
+-              return -EINVAL;
+-      }
++      struct nft_data_desc desc = {
++              .type   = NFT_DATA_VALUE,
++              .size   = NFT_DATA_VALUE_MAXLEN,
++              .len    = set->klen,
++      };
+-      return 0;
++      return nft_data_init(ctx, key, &desc, attr);
+ }
+ static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set,
+@@ -4862,24 +4856,17 @@ static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set,
+                                 struct nlattr *attr)
+ {
+       u32 dtype;
+-      int err;
+-
+-      err = nft_data_init(ctx, data, NFT_DATA_VALUE_MAXLEN, desc, attr);
+-      if (err < 0)
+-              return err;
+       if (set->dtype == NFT_DATA_VERDICT)
+               dtype = NFT_DATA_VERDICT;
+       else
+               dtype = NFT_DATA_VALUE;
+-      if (dtype != desc->type ||
+-          set->dlen != desc->len) {
+-              nft_data_release(data, desc->type);
+-              return -EINVAL;
+-      }
++      desc->type = dtype;
++      desc->size = NFT_DATA_VALUE_MAXLEN;
++      desc->len = set->dlen;
+-      return 0;
++      return nft_data_init(ctx, data, desc, attr);
+ }
+ static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set,
+@@ -8697,7 +8684,7 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
+       }
+       desc->len = sizeof(data->verdict);
+-      desc->type = NFT_DATA_VERDICT;
++
+       return 0;
+ }
+@@ -8750,20 +8737,25 @@ int nft_verdict_dump(struct sk_buff *skb, int type, const struct nft_verdict *v)
+ }
+ static int nft_value_init(const struct nft_ctx *ctx,
+-                        struct nft_data *data, unsigned int size,
+-                        struct nft_data_desc *desc, const struct nlattr *nla)
++                        struct nft_data *data, struct nft_data_desc *desc,
++                        const struct nlattr *nla)
+ {
+       unsigned int len;
+       len = nla_len(nla);
+       if (len == 0)
+               return -EINVAL;
+-      if (len > size)
++      if (len > desc->size)
+               return -EOVERFLOW;
++      if (desc->len) {
++              if (len != desc->len)
++                      return -EINVAL;
++      } else {
++              desc->len = len;
++      }
+       nla_memcpy(data->data, nla, len);
+-      desc->type = NFT_DATA_VALUE;
+-      desc->len  = len;
++
+       return 0;
+ }
+@@ -8783,7 +8775,6 @@ static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = {
+  *
+  *    @ctx: context of the expression using the data
+  *    @data: destination struct nft_data
+- *    @size: maximum data length
+  *    @desc: data description
+  *    @nla: netlink attribute containing data
+  *
+@@ -8793,24 +8784,35 @@ static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = {
+  *    The caller can indicate that it only wants to accept data of type
+  *    NFT_DATA_VALUE by passing NULL for the ctx argument.
+  */
+-int nft_data_init(const struct nft_ctx *ctx,
+-                struct nft_data *data, unsigned int size,
++int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
+                 struct nft_data_desc *desc, const struct nlattr *nla)
+ {
+       struct nlattr *tb[NFTA_DATA_MAX + 1];
+       int err;
++      if (WARN_ON_ONCE(!desc->size))
++              return -EINVAL;
++
+       err = nla_parse_nested_deprecated(tb, NFTA_DATA_MAX, nla,
+                                         nft_data_policy, NULL);
+       if (err < 0)
+               return err;
+-      if (tb[NFTA_DATA_VALUE])
+-              return nft_value_init(ctx, data, size, desc,
+-                                    tb[NFTA_DATA_VALUE]);
+-      if (tb[NFTA_DATA_VERDICT] && ctx != NULL)
+-              return nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]);
+-      return -EINVAL;
++      if (tb[NFTA_DATA_VALUE]) {
++              if (desc->type != NFT_DATA_VALUE)
++                      return -EINVAL;
++
++              err = nft_value_init(ctx, data, desc, tb[NFTA_DATA_VALUE]);
++      } else if (tb[NFTA_DATA_VERDICT] && ctx != NULL) {
++              if (desc->type != NFT_DATA_VERDICT)
++                      return -EINVAL;
++
++              err = nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]);
++      } else {
++              err = -EINVAL;
++      }
++
++      return err;
+ }
+ EXPORT_SYMBOL_GPL(nft_data_init);
+diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
+index d0c648b64cd40..d6ab7aa14adc2 100644
+--- a/net/netfilter/nft_bitwise.c
++++ b/net/netfilter/nft_bitwise.c
+@@ -93,7 +93,16 @@ static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = {
+ static int nft_bitwise_init_bool(struct nft_bitwise *priv,
+                                const struct nlattr *const tb[])
+ {
+-      struct nft_data_desc mask, xor;
++      struct nft_data_desc mask = {
++              .type   = NFT_DATA_VALUE,
++              .size   = sizeof(priv->mask),
++              .len    = priv->len,
++      };
++      struct nft_data_desc xor = {
++              .type   = NFT_DATA_VALUE,
++              .size   = sizeof(priv->xor),
++              .len    = priv->len,
++      };
+       int err;
+       if (tb[NFTA_BITWISE_DATA])
+@@ -103,37 +112,30 @@ static int nft_bitwise_init_bool(struct nft_bitwise *priv,
+           !tb[NFTA_BITWISE_XOR])
+               return -EINVAL;
+-      err = nft_data_init(NULL, &priv->mask, sizeof(priv->mask), &mask,
+-                          tb[NFTA_BITWISE_MASK]);
++      err = nft_data_init(NULL, &priv->mask, &mask, tb[NFTA_BITWISE_MASK]);
+       if (err < 0)
+               return err;
+-      if (mask.type != NFT_DATA_VALUE || mask.len != priv->len) {
+-              err = -EINVAL;
+-              goto err_mask_release;
+-      }
+-      err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &xor,
+-                          tb[NFTA_BITWISE_XOR]);
++      err = nft_data_init(NULL, &priv->xor, &xor, tb[NFTA_BITWISE_XOR]);
+       if (err < 0)
+-              goto err_mask_release;
+-      if (xor.type != NFT_DATA_VALUE || xor.len != priv->len) {
+-              err = -EINVAL;
+-              goto err_xor_release;
+-      }
++              goto err_xor_err;
+       return 0;
+-err_xor_release:
+-      nft_data_release(&priv->xor, xor.type);
+-err_mask_release:
++err_xor_err:
+       nft_data_release(&priv->mask, mask.type);
++
+       return err;
+ }
+ static int nft_bitwise_init_shift(struct nft_bitwise *priv,
+                                 const struct nlattr *const tb[])
+ {
+-      struct nft_data_desc d;
++      struct nft_data_desc desc = {
++              .type   = NFT_DATA_VALUE,
++              .size   = sizeof(priv->data),
++              .len    = sizeof(u32),
++      };
+       int err;
+       if (tb[NFTA_BITWISE_MASK] ||
+@@ -143,13 +145,12 @@ static int nft_bitwise_init_shift(struct nft_bitwise *priv,
+       if (!tb[NFTA_BITWISE_DATA])
+               return -EINVAL;
+-      err = nft_data_init(NULL, &priv->data, sizeof(priv->data), &d,
+-                          tb[NFTA_BITWISE_DATA]);
++      err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_BITWISE_DATA]);
+       if (err < 0)
+               return err;
+-      if (d.type != NFT_DATA_VALUE || d.len != sizeof(u32) ||
+-          priv->data.data[0] >= BITS_PER_TYPE(u32)) {
+-              nft_data_release(&priv->data, d.type);
++
++      if (priv->data.data[0] >= BITS_PER_TYPE(u32)) {
++              nft_data_release(&priv->data, desc.type);
+               return -EINVAL;
+       }
+@@ -291,22 +292,21 @@ static const struct nft_expr_ops nft_bitwise_ops = {
+ static int
+ nft_bitwise_extract_u32_data(const struct nlattr * const tb, u32 *out)
+ {
+-      struct nft_data_desc desc;
+       struct nft_data data;
+-      int err = 0;
++      struct nft_data_desc desc = {
++              .type   = NFT_DATA_VALUE,
++              .size   = sizeof(data),
++              .len    = sizeof(u32),
++      };
++      int err;
+-      err = nft_data_init(NULL, &data, sizeof(data), &desc, tb);
++      err = nft_data_init(NULL, &data, &desc, tb);
+       if (err < 0)
+               return err;
+-      if (desc.type != NFT_DATA_VALUE || desc.len != sizeof(u32)) {
+-              err = -EINVAL;
+-              goto err;
+-      }
+       *out = data.data[0];
+-err:
+-      nft_data_release(&data, desc.type);
+-      return err;
++
++      return 0;
+ }
+ static int nft_bitwise_fast_init(const struct nft_ctx *ctx,
+diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
+index 917072af09df9..461763a571f20 100644
+--- a/net/netfilter/nft_cmp.c
++++ b/net/netfilter/nft_cmp.c
+@@ -73,20 +73,16 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+                       const struct nlattr * const tb[])
+ {
+       struct nft_cmp_expr *priv = nft_expr_priv(expr);
+-      struct nft_data_desc desc;
++      struct nft_data_desc desc = {
++              .type   = NFT_DATA_VALUE,
++              .size   = sizeof(priv->data),
++      };
+       int err;
+-      err = nft_data_init(NULL, &priv->data, sizeof(priv->data), &desc,
+-                          tb[NFTA_CMP_DATA]);
++      err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]);
+       if (err < 0)
+               return err;
+-      if (desc.type != NFT_DATA_VALUE) {
+-              err = -EINVAL;
+-              nft_data_release(&priv->data, desc.type);
+-              return err;
+-      }
+-
+       err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
+       if (err < 0)
+               return err;
+@@ -201,12 +197,14 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx,
+                            const struct nlattr * const tb[])
+ {
+       struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
+-      struct nft_data_desc desc;
+       struct nft_data data;
++      struct nft_data_desc desc = {
++              .type   = NFT_DATA_VALUE,
++              .size   = sizeof(data),
++      };
+       int err;
+-      err = nft_data_init(NULL, &data, sizeof(data), &desc,
+-                          tb[NFTA_CMP_DATA]);
++      err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]);
+       if (err < 0)
+               return err;
+@@ -299,11 +297,13 @@ static int nft_cmp16_fast_init(const struct nft_ctx *ctx,
+                              const struct nlattr * const tb[])
+ {
+       struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr);
+-      struct nft_data_desc desc;
++      struct nft_data_desc desc = {
++              .type   = NFT_DATA_VALUE,
++              .size   = sizeof(priv->data),
++      };
+       int err;
+-      err = nft_data_init(NULL, &priv->data, sizeof(priv->data), &desc,
+-                          tb[NFTA_CMP_DATA]);
++      err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]);
+       if (err < 0)
+               return err;
+@@ -365,8 +365,11 @@ const struct nft_expr_ops nft_cmp16_fast_ops = {
+ static const struct nft_expr_ops *
+ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
+ {
+-      struct nft_data_desc desc;
+       struct nft_data data;
++      struct nft_data_desc desc = {
++              .type   = NFT_DATA_VALUE,
++              .size   = sizeof(data),
++      };
+       enum nft_cmp_ops op;
+       u8 sreg;
+       int err;
+@@ -389,14 +392,10 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
+               return ERR_PTR(-EINVAL);
+       }
+-      err = nft_data_init(NULL, &data, sizeof(data), &desc,
+-                          tb[NFTA_CMP_DATA]);
++      err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]);
+       if (err < 0)
+               return ERR_PTR(err);
+-      if (desc.type != NFT_DATA_VALUE)
+-              goto err1;
+-
+       sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
+       if (op == NFT_CMP_EQ || op == NFT_CMP_NEQ) {
+@@ -408,9 +407,6 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
+                       return &nft_cmp16_fast_ops;
+       }
+       return &nft_cmp_ops;
+-err1:
+-      nft_data_release(&data, desc.type);
+-      return ERR_PTR(-EINVAL);
+ }
+ struct nft_expr_type nft_cmp_type __read_mostly = {
+diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
+index d0f67d325bdfd..fcdbc5ed3f367 100644
+--- a/net/netfilter/nft_immediate.c
++++ b/net/netfilter/nft_immediate.c
+@@ -29,20 +29,36 @@ static const struct nla_policy nft_immediate_policy[NFTA_IMMEDIATE_MAX + 1] = {
+       [NFTA_IMMEDIATE_DATA]   = { .type = NLA_NESTED },
+ };
++static enum nft_data_types nft_reg_to_type(const struct nlattr *nla)
++{
++      enum nft_data_types type;
++      u8 reg;
++
++      reg = ntohl(nla_get_be32(nla));
++      if (reg == NFT_REG_VERDICT)
++              type = NFT_DATA_VERDICT;
++      else
++              type = NFT_DATA_VALUE;
++
++      return type;
++}
++
+ static int nft_immediate_init(const struct nft_ctx *ctx,
+                             const struct nft_expr *expr,
+                             const struct nlattr * const tb[])
+ {
+       struct nft_immediate_expr *priv = nft_expr_priv(expr);
+-      struct nft_data_desc desc;
++      struct nft_data_desc desc = {
++              .size   = sizeof(priv->data),
++      };
+       int err;
+       if (tb[NFTA_IMMEDIATE_DREG] == NULL ||
+           tb[NFTA_IMMEDIATE_DATA] == NULL)
+               return -EINVAL;
+-      err = nft_data_init(ctx, &priv->data, sizeof(priv->data), &desc,
+-                          tb[NFTA_IMMEDIATE_DATA]);
++      desc.type = nft_reg_to_type(tb[NFTA_IMMEDIATE_DREG]);
++      err = nft_data_init(ctx, &priv->data, &desc, tb[NFTA_IMMEDIATE_DATA]);
+       if (err < 0)
+               return err;
+diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c
+index e4a1c44d7f513..e6bbe32c323df 100644
+--- a/net/netfilter/nft_range.c
++++ b/net/netfilter/nft_range.c
+@@ -51,7 +51,14 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr
+                       const struct nlattr * const tb[])
+ {
+       struct nft_range_expr *priv = nft_expr_priv(expr);
+-      struct nft_data_desc desc_from, desc_to;
++      struct nft_data_desc desc_from = {
++              .type   = NFT_DATA_VALUE,
++              .size   = sizeof(priv->data_from),
++      };
++      struct nft_data_desc desc_to = {
++              .type   = NFT_DATA_VALUE,
++              .size   = sizeof(priv->data_to),
++      };
+       int err;
+       u32 op;
+@@ -61,26 +68,16 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr
+           !tb[NFTA_RANGE_TO_DATA])
+               return -EINVAL;
+-      err = nft_data_init(NULL, &priv->data_from, sizeof(priv->data_from),
+-                          &desc_from, tb[NFTA_RANGE_FROM_DATA]);
++      err = nft_data_init(NULL, &priv->data_from, &desc_from,
++                          tb[NFTA_RANGE_FROM_DATA]);
+       if (err < 0)
+               return err;
+-      if (desc_from.type != NFT_DATA_VALUE) {
+-              err = -EINVAL;
+-              goto err1;
+-      }
+-
+-      err = nft_data_init(NULL, &priv->data_to, sizeof(priv->data_to),
+-                          &desc_to, tb[NFTA_RANGE_TO_DATA]);
++      err = nft_data_init(NULL, &priv->data_to, &desc_to,
++                          tb[NFTA_RANGE_TO_DATA]);
+       if (err < 0)
+               goto err1;
+-      if (desc_to.type != NFT_DATA_VALUE) {
+-              err = -EINVAL;
+-              goto err2;
+-      }
+-
+       if (desc_from.len != desc_to.len) {
+               err = -EINVAL;
+               goto err2;
+-- 
+2.35.1
+
diff --git a/queue-5.10/netfilter-nft_cmp-optimize-comparison-for-16-bytes.patch b/queue-5.10/netfilter-nft_cmp-optimize-comparison-for-16-bytes.patch
new file mode 100644 (file)
index 0000000..f81057e
--- /dev/null
@@ -0,0 +1,213 @@
+From 40c132aa61b83a42e0a7b609f510d974069cceaf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 7 Feb 2022 19:25:08 +0100
+Subject: netfilter: nft_cmp: optimize comparison for 16-bytes
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit 23f68d462984bfda47c7bf663dca347e8e3df549 ]
+
+Allow up to 16-byte comparisons with a new cmp fast version. Use two
+64-bit words and calculate the mask representing the bits to be
+compared. Make sure the comparison is 64-bit aligned and avoid
+out-of-bound memory access on registers.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netfilter/nf_tables_core.h |   9 +++
+ net/netfilter/nf_tables_core.c         |  16 ++++
+ net/netfilter/nft_cmp.c                | 102 ++++++++++++++++++++++++-
+ 3 files changed, 125 insertions(+), 2 deletions(-)
+
+diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
+index fd10a7862fdc6..ce75121782bf7 100644
+--- a/include/net/netfilter/nf_tables_core.h
++++ b/include/net/netfilter/nf_tables_core.h
+@@ -38,6 +38,14 @@ struct nft_cmp_fast_expr {
+       bool                    inv;
+ };
++struct nft_cmp16_fast_expr {
++      struct nft_data         data;
++      struct nft_data         mask;
++      u8                      sreg;
++      u8                      len;
++      bool                    inv;
++};
++
+ struct nft_immediate_expr {
+       struct nft_data         data;
+       u8                      dreg;
+@@ -55,6 +63,7 @@ static inline u32 nft_cmp_fast_mask(unsigned int len)
+ }
+ extern const struct nft_expr_ops nft_cmp_fast_ops;
++extern const struct nft_expr_ops nft_cmp16_fast_ops;
+ struct nft_payload {
+       enum nft_payload_bases  base:8;
+diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
+index 6dd27c8cd4253..9dc18429ed875 100644
+--- a/net/netfilter/nf_tables_core.c
++++ b/net/netfilter/nf_tables_core.c
+@@ -67,6 +67,20 @@ static void nft_cmp_fast_eval(const struct nft_expr *expr,
+       regs->verdict.code = NFT_BREAK;
+ }
++static void nft_cmp16_fast_eval(const struct nft_expr *expr,
++                              struct nft_regs *regs)
++{
++      const struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr);
++      const u64 *reg_data = (const u64 *)&regs->data[priv->sreg];
++      const u64 *mask = (const u64 *)&priv->mask;
++      const u64 *data = (const u64 *)&priv->data;
++
++      if (((reg_data[0] & mask[0]) == data[0] &&
++          ((reg_data[1] & mask[1]) == data[1])) ^ priv->inv)
++              return;
++      regs->verdict.code = NFT_BREAK;
++}
++
+ static noinline void __nft_trace_verdict(struct nft_traceinfo *info,
+                                        const struct nft_chain *chain,
+                                        const struct nft_regs *regs)
+@@ -215,6 +229,8 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
+               nft_rule_for_each_expr(expr, last, rule) {
+                       if (expr->ops == &nft_cmp_fast_ops)
+                               nft_cmp_fast_eval(expr, &regs);
++                      else if (expr->ops == &nft_cmp16_fast_ops)
++                              nft_cmp16_fast_eval(expr, &regs);
+                       else if (expr->ops == &nft_bitwise_fast_ops)
+                               nft_bitwise_fast_eval(expr, &regs);
+                       else if (expr->ops != &nft_payload_fast_ops ||
+diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
+index 47b6d05f1ae69..917072af09df9 100644
+--- a/net/netfilter/nft_cmp.c
++++ b/net/netfilter/nft_cmp.c
+@@ -272,12 +272,103 @@ const struct nft_expr_ops nft_cmp_fast_ops = {
+       .offload        = nft_cmp_fast_offload,
+ };
++static u32 nft_cmp_mask(u32 bitlen)
++{
++      return (__force u32)cpu_to_le32(~0U >> (sizeof(u32) * BITS_PER_BYTE - bitlen));
++}
++
++static void nft_cmp16_fast_mask(struct nft_data *data, unsigned int bitlen)
++{
++      int len = bitlen / BITS_PER_BYTE;
++      int i, words = len / sizeof(u32);
++
++      for (i = 0; i < words; i++) {
++              data->data[i] = 0xffffffff;
++              bitlen -= sizeof(u32) * BITS_PER_BYTE;
++      }
++
++      if (len % sizeof(u32))
++              data->data[i++] = nft_cmp_mask(bitlen);
++
++      for (; i < 4; i++)
++              data->data[i] = 0;
++}
++
++static int nft_cmp16_fast_init(const struct nft_ctx *ctx,
++                             const struct nft_expr *expr,
++                             const struct nlattr * const tb[])
++{
++      struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr);
++      struct nft_data_desc desc;
++      int err;
++
++      err = nft_data_init(NULL, &priv->data, sizeof(priv->data), &desc,
++                          tb[NFTA_CMP_DATA]);
++      if (err < 0)
++              return err;
++
++      err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
++      if (err < 0)
++              return err;
++
++      nft_cmp16_fast_mask(&priv->mask, desc.len * BITS_PER_BYTE);
++      priv->inv = ntohl(nla_get_be32(tb[NFTA_CMP_OP])) != NFT_CMP_EQ;
++      priv->len = desc.len;
++
++      return 0;
++}
++
++static int nft_cmp16_fast_offload(struct nft_offload_ctx *ctx,
++                                struct nft_flow_rule *flow,
++                                const struct nft_expr *expr)
++{
++      const struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr);
++      struct nft_cmp_expr cmp = {
++              .data   = priv->data,
++              .sreg   = priv->sreg,
++              .len    = priv->len,
++              .op     = priv->inv ? NFT_CMP_NEQ : NFT_CMP_EQ,
++      };
++
++      return __nft_cmp_offload(ctx, flow, &cmp);
++}
++
++static int nft_cmp16_fast_dump(struct sk_buff *skb, const struct nft_expr *expr)
++{
++      const struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr);
++      enum nft_cmp_ops op = priv->inv ? NFT_CMP_NEQ : NFT_CMP_EQ;
++
++      if (nft_dump_register(skb, NFTA_CMP_SREG, priv->sreg))
++              goto nla_put_failure;
++      if (nla_put_be32(skb, NFTA_CMP_OP, htonl(op)))
++              goto nla_put_failure;
++
++      if (nft_data_dump(skb, NFTA_CMP_DATA, &priv->data,
++                        NFT_DATA_VALUE, priv->len) < 0)
++              goto nla_put_failure;
++      return 0;
++
++nla_put_failure:
++      return -1;
++}
++
++
++const struct nft_expr_ops nft_cmp16_fast_ops = {
++      .type           = &nft_cmp_type,
++      .size           = NFT_EXPR_SIZE(sizeof(struct nft_cmp16_fast_expr)),
++      .eval           = NULL, /* inlined */
++      .init           = nft_cmp16_fast_init,
++      .dump           = nft_cmp16_fast_dump,
++      .offload        = nft_cmp16_fast_offload,
++};
++
+ static const struct nft_expr_ops *
+ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
+ {
+       struct nft_data_desc desc;
+       struct nft_data data;
+       enum nft_cmp_ops op;
++      u8 sreg;
+       int err;
+       if (tb[NFTA_CMP_SREG] == NULL ||
+@@ -306,9 +397,16 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
+       if (desc.type != NFT_DATA_VALUE)
+               goto err1;
+-      if (desc.len <= sizeof(u32) && (op == NFT_CMP_EQ || op == NFT_CMP_NEQ))
+-              return &nft_cmp_fast_ops;
++      sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
++      if (op == NFT_CMP_EQ || op == NFT_CMP_NEQ) {
++              if (desc.len <= sizeof(u32))
++                      return &nft_cmp_fast_ops;
++              else if (desc.len <= sizeof(data) &&
++                       ((sreg >= NFT_REG_1 && sreg <= NFT_REG_4) ||
++                        (sreg >= NFT_REG32_00 && sreg <= NFT_REG32_12 && sreg % 2 == 0)))
++                      return &nft_cmp16_fast_ops;
++      }
+       return &nft_cmp_ops;
+ err1:
+       nft_data_release(&data, desc.type);
+-- 
+2.35.1
+
diff --git a/queue-5.10/netfilter-nft_osf-restrict-osf-to-ipv4-ipv6-and-inet.patch b/queue-5.10/netfilter-nft_osf-restrict-osf-to-ipv4-ipv6-and-inet.patch
new file mode 100644 (file)
index 0000000..37174e6
--- /dev/null
@@ -0,0 +1,50 @@
+From f1d01f6bd83645fa24e72e3a54556d0ed10e9477 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 21 Aug 2022 16:25:07 +0200
+Subject: netfilter: nft_osf: restrict osf to ipv4, ipv6 and inet families
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit 5f3b7aae14a706d0d7da9f9e39def52ff5fc3d39 ]
+
+As it was originally intended, restrict extension to supported families.
+
+Fixes: b96af92d6eaf ("netfilter: nf_tables: implement Passive OS fingerprint module in nft_osf")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nft_osf.c | 18 +++++++++++++++---
+ 1 file changed, 15 insertions(+), 3 deletions(-)
+
+diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
+index d82677e83400b..720dc9fba6d4f 100644
+--- a/net/netfilter/nft_osf.c
++++ b/net/netfilter/nft_osf.c
+@@ -115,9 +115,21 @@ static int nft_osf_validate(const struct nft_ctx *ctx,
+                           const struct nft_expr *expr,
+                           const struct nft_data **data)
+ {
+-      return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) |
+-                                                  (1 << NF_INET_PRE_ROUTING) |
+-                                                  (1 << NF_INET_FORWARD));
++      unsigned int hooks;
++
++      switch (ctx->family) {
++      case NFPROTO_IPV4:
++      case NFPROTO_IPV6:
++      case NFPROTO_INET:
++              hooks = (1 << NF_INET_LOCAL_IN) |
++                      (1 << NF_INET_PRE_ROUTING) |
++                      (1 << NF_INET_FORWARD);
++              break;
++      default:
++              return -EOPNOTSUPP;
++      }
++
++      return nft_chain_validate_hooks(ctx->chain, hooks);
+ }
+ static struct nft_expr_type nft_osf_type;
+-- 
+2.35.1
+
diff --git a/queue-5.10/netfilter-nft_payload-do-not-truncate-csum_offset-an.patch b/queue-5.10/netfilter-nft_payload-do-not-truncate-csum_offset-an.patch
new file mode 100644 (file)
index 0000000..c65e574
--- /dev/null
@@ -0,0 +1,72 @@
+From 6536d80fad58b7b9922112ea5e0d8fc1ce87ef0d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 21 Aug 2022 11:55:19 +0200
+Subject: netfilter: nft_payload: do not truncate csum_offset and csum_type
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit 7044ab281febae9e2fa9b0b247693d6026166293 ]
+
+Instead report ERANGE if csum_offset is too long, and EOPNOTSUPP if type
+is not support.
+
+Fixes: 7ec3f7b47b8d ("netfilter: nft_payload: add packet mangling support")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nft_payload.c | 19 +++++++++++++------
+ 1 file changed, 13 insertions(+), 6 deletions(-)
+
+diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
+index bdb07362c0ef0..551e0d6cf63d4 100644
+--- a/net/netfilter/nft_payload.c
++++ b/net/netfilter/nft_payload.c
+@@ -660,17 +660,23 @@ static int nft_payload_set_init(const struct nft_ctx *ctx,
+                               const struct nlattr * const tb[])
+ {
+       struct nft_payload_set *priv = nft_expr_priv(expr);
++      u32 csum_offset, csum_type = NFT_PAYLOAD_CSUM_NONE;
++      int err;
+       priv->base        = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
+       priv->offset      = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
+       priv->len         = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
+       if (tb[NFTA_PAYLOAD_CSUM_TYPE])
+-              priv->csum_type =
+-                      ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE]));
+-      if (tb[NFTA_PAYLOAD_CSUM_OFFSET])
+-              priv->csum_offset =
+-                      ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_OFFSET]));
++              csum_type = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE]));
++      if (tb[NFTA_PAYLOAD_CSUM_OFFSET]) {
++              err = nft_parse_u32_check(tb[NFTA_PAYLOAD_CSUM_OFFSET], U8_MAX,
++                                        &csum_offset);
++              if (err < 0)
++                      return err;
++
++              priv->csum_offset = csum_offset;
++      }
+       if (tb[NFTA_PAYLOAD_CSUM_FLAGS]) {
+               u32 flags;
+@@ -681,7 +687,7 @@ static int nft_payload_set_init(const struct nft_ctx *ctx,
+               priv->csum_flags = flags;
+       }
+-      switch (priv->csum_type) {
++      switch (csum_type) {
+       case NFT_PAYLOAD_CSUM_NONE:
+       case NFT_PAYLOAD_CSUM_INET:
+               break;
+@@ -695,6 +701,7 @@ static int nft_payload_set_init(const struct nft_ctx *ctx,
+       default:
+               return -EOPNOTSUPP;
+       }
++      priv->csum_type = csum_type;
+       return nft_parse_register_load(tb[NFTA_PAYLOAD_SREG], &priv->sreg,
+                                      priv->len);
+-- 
+2.35.1
+
diff --git a/queue-5.10/netfilter-nft_payload-report-erange-for-too-long-off.patch b/queue-5.10/netfilter-nft_payload-report-erange-for-too-long-off.patch
new file mode 100644 (file)
index 0000000..773a11e
--- /dev/null
@@ -0,0 +1,49 @@
+From f31d45f1257a4a8a4bdec4505b73daf9acbc75b3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 21 Aug 2022 11:47:04 +0200
+Subject: netfilter: nft_payload: report ERANGE for too long offset and length
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit 94254f990c07e9ddf1634e0b727fab821c3b5bf9 ]
+
+Instead of offset and length are truncation to u8, report ERANGE.
+
+Fixes: 96518518cc41 ("netfilter: add nftables")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nft_payload.c | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
+index 01878c16418c2..bdb07362c0ef0 100644
+--- a/net/netfilter/nft_payload.c
++++ b/net/netfilter/nft_payload.c
+@@ -733,6 +733,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
+ {
+       enum nft_payload_bases base;
+       unsigned int offset, len;
++      int err;
+       if (tb[NFTA_PAYLOAD_BASE] == NULL ||
+           tb[NFTA_PAYLOAD_OFFSET] == NULL ||
+@@ -758,8 +759,13 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
+       if (tb[NFTA_PAYLOAD_DREG] == NULL)
+               return ERR_PTR(-EINVAL);
+-      offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
+-      len    = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
++      err = nft_parse_u32_check(tb[NFTA_PAYLOAD_OFFSET], U8_MAX, &offset);
++      if (err < 0)
++              return ERR_PTR(err);
++
++      err = nft_parse_u32_check(tb[NFTA_PAYLOAD_LEN], U8_MAX, &len);
++      if (err < 0)
++              return ERR_PTR(err);
+       if (len <= 4 && is_power_of_2(len) && IS_ALIGNED(offset, len) &&
+           base != NFT_PAYLOAD_LL_HEADER)
+-- 
+2.35.1
+
diff --git a/queue-5.10/netfilter-nft_tunnel-restrict-it-to-netdev-family.patch b/queue-5.10/netfilter-nft_tunnel-restrict-it-to-netdev-family.patch
new file mode 100644 (file)
index 0000000..0e4352b
--- /dev/null
@@ -0,0 +1,33 @@
+From 196082d7126928c567af17a3487a7b418df81c6e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 21 Aug 2022 16:32:44 +0200
+Subject: netfilter: nft_tunnel: restrict it to netdev family
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit 01e4092d53bc4fe122a6e4b6d664adbd57528ca3 ]
+
+Only allow to use this expression from NFPROTO_NETDEV family.
+
+Fixes: af308b94a2a4 ("netfilter: nf_tables: add tunnel support")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nft_tunnel.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
+index 3b27926d5382c..2ee50996da8cc 100644
+--- a/net/netfilter/nft_tunnel.c
++++ b/net/netfilter/nft_tunnel.c
+@@ -133,6 +133,7 @@ static const struct nft_expr_ops nft_tunnel_get_ops = {
+ static struct nft_expr_type nft_tunnel_type __read_mostly = {
+       .name           = "tunnel",
++      .family         = NFPROTO_NETDEV,
+       .ops            = &nft_tunnel_get_ops,
+       .policy         = nft_tunnel_policy,
+       .maxattr        = NFTA_TUNNEL_MAX,
+-- 
+2.35.1
+
diff --git a/queue-5.10/netfilter-nftables-remove-redundant-assignment-of-va.patch b/queue-5.10/netfilter-nftables-remove-redundant-assignment-of-va.patch
new file mode 100644 (file)
index 0000000..9f86ebf
--- /dev/null
@@ -0,0 +1,40 @@
+From 02de40047b17dc26955f97fa3529ae05404427a5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Jan 2021 17:59:23 +0000
+Subject: netfilter: nftables: remove redundant assignment of variable err
+
+From: Colin Ian King <colin.king@canonical.com>
+
+[ Upstream commit 626899a02e6afcd4b2ce5c0551092e3554cec4aa ]
+
+The variable err is being assigned a value that is never read,
+the same error number is being returned at the error return
+path via label err1.  Clean up the code by removing the assignment.
+
+Addresses-Coverity: ("Unused value")
+Signed-off-by: Colin Ian King <colin.king@canonical.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nft_cmp.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
+index b529c0e865466..47b6d05f1ae69 100644
+--- a/net/netfilter/nft_cmp.c
++++ b/net/netfilter/nft_cmp.c
+@@ -303,10 +303,8 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
+       if (err < 0)
+               return ERR_PTR(err);
+-      if (desc.type != NFT_DATA_VALUE) {
+-              err = -EINVAL;
++      if (desc.type != NFT_DATA_VALUE)
+               goto err1;
+-      }
+       if (desc.len <= sizeof(u32) && (op == NFT_CMP_EQ || op == NFT_CMP_NEQ))
+               return &nft_cmp_fast_ops;
+-- 
+2.35.1
+
diff --git a/queue-5.10/nfc-pn533-fix-use-after-free-bugs-caused-by-pn532_cm.patch b/queue-5.10/nfc-pn533-fix-use-after-free-bugs-caused-by-pn532_cm.patch
new file mode 100644 (file)
index 0000000..1002a37
--- /dev/null
@@ -0,0 +1,50 @@
+From a7c333dd8d8a0834567ab60b7285a81835b2567f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 18 Aug 2022 17:06:21 +0800
+Subject: nfc: pn533: Fix use-after-free bugs caused by pn532_cmd_timeout
+
+From: Duoming Zhou <duoming@zju.edu.cn>
+
+[ Upstream commit f1e941dbf80a9b8bab0bffbc4cbe41cc7f4c6fb6 ]
+
+When the pn532 uart device is detaching, the pn532_uart_remove()
+is called. But there are no functions in pn532_uart_remove() that
+could delete the cmd_timeout timer, which will cause use-after-free
+bugs. The process is shown below:
+
+    (thread 1)                  |        (thread 2)
+                                |  pn532_uart_send_frame
+pn532_uart_remove               |    mod_timer(&pn532->cmd_timeout,...)
+  ...                           |    (wait a time)
+  kfree(pn532) //FREE           |    pn532_cmd_timeout
+                                |      pn532_uart_send_frame
+                                |        pn532->... //USE
+
+This patch adds del_timer_sync() in pn532_uart_remove() in order to
+prevent the use-after-free bugs. What's more, the pn53x_unregister_nfc()
+is well synchronized, it sets nfc_dev->shutting_down to true and there
+are no syscalls could restart the cmd_timeout timer.
+
+Fixes: c656aa4c27b1 ("nfc: pn533: add UART phy driver")
+Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nfc/pn533/uart.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/nfc/pn533/uart.c b/drivers/nfc/pn533/uart.c
+index a0665d8ea85bc..e92535ebb5287 100644
+--- a/drivers/nfc/pn533/uart.c
++++ b/drivers/nfc/pn533/uart.c
+@@ -310,6 +310,7 @@ static void pn532_uart_remove(struct serdev_device *serdev)
+       pn53x_unregister_nfc(pn532->priv);
+       serdev_device_close(serdev);
+       pn53x_common_clean(pn532->priv);
++      del_timer_sync(&pn532->cmd_timeout);
+       kfree_skb(pn532->recv_skb);
+       kfree(pn532);
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.10/nfs-don-t-allocate-nfs_fattr-on-the-stack-in-__nfs42.patch b/queue-5.10/nfs-don-t-allocate-nfs_fattr-on-the-stack-in-__nfs42.patch
new file mode 100644 (file)
index 0000000..3ae0ff4
--- /dev/null
@@ -0,0 +1,64 @@
+From 9f85bf8ae126a1f539ddae6637c6c4e319083946 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 5 Nov 2021 14:23:30 -0400
+Subject: NFS: Don't allocate nfs_fattr on the stack in __nfs42_ssc_open()
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit 156cd28562a4e8ca454d11b234d9f634a45d6390 ]
+
+The preferred behaviour is always to allocate struct nfs_fattr from the
+slab.
+
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/nfs4file.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
+index 9fdecd9090493..4928eaa0d4c02 100644
+--- a/fs/nfs/nfs4file.c
++++ b/fs/nfs/nfs4file.c
+@@ -321,7 +321,7 @@ static int read_name_gen = 1;
+ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
+               struct nfs_fh *src_fh, nfs4_stateid *stateid)
+ {
+-      struct nfs_fattr fattr;
++      struct nfs_fattr *fattr = nfs_alloc_fattr();
+       struct file *filep, *res;
+       struct nfs_server *server;
+       struct inode *r_ino = NULL;
+@@ -332,9 +332,10 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
+       server = NFS_SERVER(ss_mnt->mnt_root->d_inode);
+-      nfs_fattr_init(&fattr);
++      if (!fattr)
++              return ERR_PTR(-ENOMEM);
+-      status = nfs4_proc_getattr(server, src_fh, &fattr, NULL, NULL);
++      status = nfs4_proc_getattr(server, src_fh, fattr, NULL, NULL);
+       if (status < 0) {
+               res = ERR_PTR(status);
+               goto out;
+@@ -347,7 +348,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
+               goto out;
+       snprintf(read_name, len, SSC_READ_NAME_BODY, read_name_gen++);
+-      r_ino = nfs_fhget(ss_mnt->mnt_root->d_inode->i_sb, src_fh, &fattr,
++      r_ino = nfs_fhget(ss_mnt->mnt_root->d_inode->i_sb, src_fh, fattr,
+                       NULL);
+       if (IS_ERR(r_ino)) {
+               res = ERR_CAST(r_ino);
+@@ -392,6 +393,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
+ out_free_name:
+       kfree(read_name);
+ out:
++      nfs_free_fattr(fattr);
+       return res;
+ out_stateowner:
+       nfs4_put_state_owner(sp);
+-- 
+2.35.1
+
diff --git a/queue-5.10/nfsv4.2-fix-problems-with-__nfs42_ssc_open.patch b/queue-5.10/nfsv4.2-fix-problems-with-__nfs42_ssc_open.patch
new file mode 100644 (file)
index 0000000..23b23f7
--- /dev/null
@@ -0,0 +1,51 @@
+From eb9ce5f2add2c1620884d94341629c9bc57d54af Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 18 Aug 2022 15:07:05 -0400
+Subject: NFSv4.2 fix problems with __nfs42_ssc_open
+
+From: Olga Kornievskaia <kolga@netapp.com>
+
+[ Upstream commit fcfc8be1e9cf2f12b50dce8b579b3ae54443a014 ]
+
+A destination server while doing a COPY shouldn't accept using the
+passed in filehandle if its not a regular filehandle.
+
+If alloc_file_pseudo() has failed, we need to decrement a reference
+on the newly created inode, otherwise it leaks.
+
+Reported-by: Al Viro <viro@zeniv.linux.org.uk>
+Fixes: ec4b092508982 ("NFS: inter ssc open")
+Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/nfs4file.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
+index 4928eaa0d4c02..70cd0d764c447 100644
+--- a/fs/nfs/nfs4file.c
++++ b/fs/nfs/nfs4file.c
+@@ -341,6 +341,11 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
+               goto out;
+       }
++      if (!S_ISREG(fattr->mode)) {
++              res = ERR_PTR(-EBADF);
++              goto out;
++      }
++
+       res = ERR_PTR(-ENOMEM);
+       len = strlen(SSC_READ_NAME_BODY) + 16;
+       read_name = kzalloc(len, GFP_NOFS);
+@@ -359,6 +364,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
+                                    r_ino->i_fop);
+       if (IS_ERR(filep)) {
+               res = ERR_CAST(filep);
++              iput(r_ino);
+               goto out_free_name;
+       }
+       filep->f_mode |= FMODE_READ;
+-- 
+2.35.1
+
diff --git a/queue-5.10/ratelimit-fix-data-races-in-___ratelimit.patch b/queue-5.10/ratelimit-fix-data-races-in-___ratelimit.patch
new file mode 100644 (file)
index 0000000..a13035a
--- /dev/null
@@ -0,0 +1,64 @@
+From 5b5e65fd370ae74da3776865840f037f50c4b3bb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Aug 2022 10:46:48 -0700
+Subject: ratelimit: Fix data-races in ___ratelimit().
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 6bae8ceb90ba76cdba39496db936164fa672b9be ]
+
+While reading rs->interval and rs->burst, they can be changed
+concurrently via sysctl (e.g. net_ratelimit_state).  Thus, we
+need to add READ_ONCE() to their readers.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ lib/ratelimit.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/lib/ratelimit.c b/lib/ratelimit.c
+index e01a93f46f833..ce945c17980b9 100644
+--- a/lib/ratelimit.c
++++ b/lib/ratelimit.c
+@@ -26,10 +26,16 @@
+  */
+ int ___ratelimit(struct ratelimit_state *rs, const char *func)
+ {
++      /* Paired with WRITE_ONCE() in .proc_handler().
++       * Changing two values seperately could be inconsistent
++       * and some message could be lost.  (See: net_ratelimit_state).
++       */
++      int interval = READ_ONCE(rs->interval);
++      int burst = READ_ONCE(rs->burst);
+       unsigned long flags;
+       int ret;
+-      if (!rs->interval)
++      if (!interval)
+               return 1;
+       /*
+@@ -44,7 +50,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func)
+       if (!rs->begin)
+               rs->begin = jiffies;
+-      if (time_is_before_jiffies(rs->begin + rs->interval)) {
++      if (time_is_before_jiffies(rs->begin + interval)) {
+               if (rs->missed) {
+                       if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) {
+                               printk_deferred(KERN_WARNING
+@@ -56,7 +62,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func)
+               rs->begin   = jiffies;
+               rs->printed = 0;
+       }
+-      if (rs->burst && rs->burst > rs->printed) {
++      if (burst && burst > rs->printed) {
+               rs->printed++;
+               ret = 1;
+       } else {
+-- 
+2.35.1
+
diff --git a/queue-5.10/rose-check-null-rose_loopback_neigh-loopback.patch b/queue-5.10/rose-check-null-rose_loopback_neigh-loopback.patch
new file mode 100644 (file)
index 0000000..fcbaee1
--- /dev/null
@@ -0,0 +1,69 @@
+From f57cdfca43f3f01432a27f391d151d4f8ff33edf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 18 Aug 2022 02:02:13 +0200
+Subject: rose: check NULL rose_loopback_neigh->loopback
+
+From: Bernard Pidoux <f6bvp@free.fr>
+
+[ Upstream commit 3c53cd65dece47dd1f9d3a809f32e59d1d87b2b8 ]
+
+Commit 3b3fd068c56e3fbea30090859216a368398e39bf added NULL check for
+`rose_loopback_neigh->dev` in rose_loopback_timer() but omitted to
+check rose_loopback_neigh->loopback.
+
+It thus prevents *all* rose connect.
+
+The reason is that a special rose_neigh loopback has a NULL device.
+
+/proc/net/rose_neigh illustrates it via rose_neigh_show() function :
+[...]
+seq_printf(seq, "%05d %-9s %-4s   %3d %3d  %3s     %3s %3lu %3lu",
+          rose_neigh->number,
+          (rose_neigh->loopback) ? "RSLOOP-0" : ax2asc(buf, &rose_neigh->callsign),
+          rose_neigh->dev ? rose_neigh->dev->name : "???",
+          rose_neigh->count,
+
+/proc/net/rose_neigh displays special rose_loopback_neigh->loopback as
+callsign RSLOOP-0:
+
+addr  callsign  dev  count use mode restart  t0  tf digipeaters
+00001 RSLOOP-0  ???      1   2  DCE     yes   0   0
+
+By checking rose_loopback_neigh->loopback, rose_rx_call_request() is called
+even in case rose_loopback_neigh->dev is NULL. This repairs rose connections.
+
+Verification with rose client application FPAC:
+
+FPAC-Node v 4.1.3 (built Aug  5 2022) for LINUX (help = h)
+F6BVP-4 (Commands = ?) : u
+Users - AX.25 Level 2 sessions :
+Port   Callsign     Callsign  AX.25 state  ROSE state  NetRom status
+axudp  F6BVP-5   -> F6BVP-9   Connected    Connected   ---------
+
+Fixes: 3b3fd068c56e ("rose: Fix Null pointer dereference in rose_send_frame()")
+Signed-off-by: Bernard Pidoux <f6bvp@free.fr>
+Suggested-by: Francois Romieu <romieu@fr.zoreil.com>
+Cc: Thomas DL9SAU Osterried <thomas@osterried.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/rose/rose_loopback.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
+index 11c45c8c6c164..036d92c0ad794 100644
+--- a/net/rose/rose_loopback.c
++++ b/net/rose/rose_loopback.c
+@@ -96,7 +96,8 @@ static void rose_loopback_timer(struct timer_list *unused)
+               }
+               if (frametype == ROSE_CALL_REQUEST) {
+-                      if (!rose_loopback_neigh->dev) {
++                      if (!rose_loopback_neigh->dev &&
++                          !rose_loopback_neigh->loopback) {
+                               kfree_skb(skb);
+                               continue;
+                       }
+-- 
+2.35.1
+
diff --git a/queue-5.10/rxrpc-fix-locking-in-rxrpc-s-sendmsg.patch b/queue-5.10/rxrpc-fix-locking-in-rxrpc-s-sendmsg.patch
new file mode 100644 (file)
index 0000000..61dacb5
--- /dev/null
@@ -0,0 +1,287 @@
+From 4110c15e7417804f791bc44891829590d3235bdf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 Aug 2022 17:35:45 +0100
+Subject: rxrpc: Fix locking in rxrpc's sendmsg
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit b0f571ecd7943423c25947439045f0d352ca3dbf ]
+
+Fix three bugs in the rxrpc's sendmsg implementation:
+
+ (1) rxrpc_new_client_call() should release the socket lock when returning
+     an error from rxrpc_get_call_slot().
+
+ (2) rxrpc_wait_for_tx_window_intr() will return without the call mutex
+     held in the event that we're interrupted by a signal whilst waiting
+     for tx space on the socket or relocking the call mutex afterwards.
+
+     Fix this by: (a) moving the unlock/lock of the call mutex up to
+     rxrpc_send_data() such that the lock is not held around all of
+     rxrpc_wait_for_tx_window*() and (b) indicating to higher callers
+     whether we're return with the lock dropped.  Note that this means
+     recvmsg() will not block on this call whilst we're waiting.
+
+ (3) After dropping and regaining the call mutex, rxrpc_send_data() needs
+     to go and recheck the state of the tx_pending buffer and the
+     tx_total_len check in case we raced with another sendmsg() on the same
+     call.
+
+Thinking on this some more, it might make sense to have different locks for
+sendmsg() and recvmsg().  There's probably no need to make recvmsg() wait
+for sendmsg().  It does mean that recvmsg() can return MSG_EOR indicating
+that a call is dead before a sendmsg() to that call returns - but that can
+currently happen anyway.
+
+Without fix (2), something like the following can be induced:
+
+       WARNING: bad unlock balance detected!
+       5.16.0-rc6-syzkaller #0 Not tainted
+       -------------------------------------
+       syz-executor011/3597 is trying to release lock (&call->user_mutex) at:
+       [<ffffffff885163a3>] rxrpc_do_sendmsg+0xc13/0x1350 net/rxrpc/sendmsg.c:748
+       but there are no more locks to release!
+
+       other info that might help us debug this:
+       no locks held by syz-executor011/3597.
+       ...
+       Call Trace:
+        <TASK>
+        __dump_stack lib/dump_stack.c:88 [inline]
+        dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
+        print_unlock_imbalance_bug include/trace/events/lock.h:58 [inline]
+        __lock_release kernel/locking/lockdep.c:5306 [inline]
+        lock_release.cold+0x49/0x4e kernel/locking/lockdep.c:5657
+        __mutex_unlock_slowpath+0x99/0x5e0 kernel/locking/mutex.c:900
+        rxrpc_do_sendmsg+0xc13/0x1350 net/rxrpc/sendmsg.c:748
+        rxrpc_sendmsg+0x420/0x630 net/rxrpc/af_rxrpc.c:561
+        sock_sendmsg_nosec net/socket.c:704 [inline]
+        sock_sendmsg+0xcf/0x120 net/socket.c:724
+        ____sys_sendmsg+0x6e8/0x810 net/socket.c:2409
+        ___sys_sendmsg+0xf3/0x170 net/socket.c:2463
+        __sys_sendmsg+0xe5/0x1b0 net/socket.c:2492
+        do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+        do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
+        entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+[Thanks to Hawkins Jiawei and Khalid Masum for their attempts to fix this]
+
+Fixes: bc5e3a546d55 ("rxrpc: Use MSG_WAITALL to tell sendmsg() to temporarily ignore signals")
+Reported-by: syzbot+7f0483225d0c94cb3441@syzkaller.appspotmail.com
+Signed-off-by: David Howells <dhowells@redhat.com>
+Reviewed-by: Marc Dionne <marc.dionne@auristor.com>
+Tested-by: syzbot+7f0483225d0c94cb3441@syzkaller.appspotmail.com
+cc: Hawkins Jiawei <yin31149@gmail.com>
+cc: Khalid Masum <khalid.masum.92@gmail.com>
+cc: Dan Carpenter <dan.carpenter@oracle.com>
+cc: linux-afs@lists.infradead.org
+Link: https://lore.kernel.org/r/166135894583.600315.7170979436768124075.stgit@warthog.procyon.org.uk
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/rxrpc/call_object.c |  4 +-
+ net/rxrpc/sendmsg.c     | 92 ++++++++++++++++++++++++-----------------
+ 2 files changed, 57 insertions(+), 39 deletions(-)
+
+diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
+index 043508fd8d8a5..150cd7b2154c8 100644
+--- a/net/rxrpc/call_object.c
++++ b/net/rxrpc/call_object.c
+@@ -285,8 +285,10 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
+       _enter("%p,%lx", rx, p->user_call_ID);
+       limiter = rxrpc_get_call_slot(p, gfp);
+-      if (!limiter)
++      if (!limiter) {
++              release_sock(&rx->sk);
+               return ERR_PTR(-ERESTARTSYS);
++      }
+       call = rxrpc_alloc_client_call(rx, srx, gfp, debug_id);
+       if (IS_ERR(call)) {
+diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
+index aa23ba4e25662..eef3c14fd1c18 100644
+--- a/net/rxrpc/sendmsg.c
++++ b/net/rxrpc/sendmsg.c
+@@ -51,10 +51,7 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
+                       return sock_intr_errno(*timeo);
+               trace_rxrpc_transmit(call, rxrpc_transmit_wait);
+-              mutex_unlock(&call->user_mutex);
+               *timeo = schedule_timeout(*timeo);
+-              if (mutex_lock_interruptible(&call->user_mutex) < 0)
+-                      return sock_intr_errno(*timeo);
+       }
+ }
+@@ -290,37 +287,48 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
+ static int rxrpc_send_data(struct rxrpc_sock *rx,
+                          struct rxrpc_call *call,
+                          struct msghdr *msg, size_t len,
+-                         rxrpc_notify_end_tx_t notify_end_tx)
++                         rxrpc_notify_end_tx_t notify_end_tx,
++                         bool *_dropped_lock)
+ {
+       struct rxrpc_skb_priv *sp;
+       struct sk_buff *skb;
+       struct sock *sk = &rx->sk;
++      enum rxrpc_call_state state;
+       long timeo;
+-      bool more;
+-      int ret, copied;
++      bool more = msg->msg_flags & MSG_MORE;
++      int ret, copied = 0;
+       timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+       /* this should be in poll */
+       sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
++reload:
++      ret = -EPIPE;
+       if (sk->sk_shutdown & SEND_SHUTDOWN)
+-              return -EPIPE;
+-
+-      more = msg->msg_flags & MSG_MORE;
+-
++              goto maybe_error;
++      state = READ_ONCE(call->state);
++      ret = -ESHUTDOWN;
++      if (state >= RXRPC_CALL_COMPLETE)
++              goto maybe_error;
++      ret = -EPROTO;
++      if (state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
++          state != RXRPC_CALL_SERVER_ACK_REQUEST &&
++          state != RXRPC_CALL_SERVER_SEND_REPLY)
++              goto maybe_error;
++
++      ret = -EMSGSIZE;
+       if (call->tx_total_len != -1) {
+-              if (len > call->tx_total_len)
+-                      return -EMSGSIZE;
+-              if (!more && len != call->tx_total_len)
+-                      return -EMSGSIZE;
++              if (len - copied > call->tx_total_len)
++                      goto maybe_error;
++              if (!more && len - copied != call->tx_total_len)
++                      goto maybe_error;
+       }
+       skb = call->tx_pending;
+       call->tx_pending = NULL;
+       rxrpc_see_skb(skb, rxrpc_skb_seen);
+-      copied = 0;
+       do {
+               /* Check to see if there's a ping ACK to reply to. */
+               if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE)
+@@ -331,16 +339,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
+                       _debug("alloc");
+-                      if (!rxrpc_check_tx_space(call, NULL)) {
+-                              ret = -EAGAIN;
+-                              if (msg->msg_flags & MSG_DONTWAIT)
+-                                      goto maybe_error;
+-                              ret = rxrpc_wait_for_tx_window(rx, call,
+-                                                             &timeo,
+-                                                             msg->msg_flags & MSG_WAITALL);
+-                              if (ret < 0)
+-                                      goto maybe_error;
+-                      }
++                      if (!rxrpc_check_tx_space(call, NULL))
++                              goto wait_for_space;
+                       max = RXRPC_JUMBO_DATALEN;
+                       max -= call->conn->security_size;
+@@ -485,6 +485,27 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
+ efault:
+       ret = -EFAULT;
+       goto out;
++
++wait_for_space:
++      ret = -EAGAIN;
++      if (msg->msg_flags & MSG_DONTWAIT)
++              goto maybe_error;
++      mutex_unlock(&call->user_mutex);
++      *_dropped_lock = true;
++      ret = rxrpc_wait_for_tx_window(rx, call, &timeo,
++                                     msg->msg_flags & MSG_WAITALL);
++      if (ret < 0)
++              goto maybe_error;
++      if (call->interruptibility == RXRPC_INTERRUPTIBLE) {
++              if (mutex_lock_interruptible(&call->user_mutex) < 0) {
++                      ret = sock_intr_errno(timeo);
++                      goto maybe_error;
++              }
++      } else {
++              mutex_lock(&call->user_mutex);
++      }
++      *_dropped_lock = false;
++      goto reload;
+ }
+ /*
+@@ -646,6 +667,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
+       enum rxrpc_call_state state;
+       struct rxrpc_call *call;
+       unsigned long now, j;
++      bool dropped_lock = false;
+       int ret;
+       struct rxrpc_send_params p = {
+@@ -754,21 +776,13 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
+                       ret = rxrpc_send_abort_packet(call);
+       } else if (p.command != RXRPC_CMD_SEND_DATA) {
+               ret = -EINVAL;
+-      } else if (rxrpc_is_client_call(call) &&
+-                 state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
+-              /* request phase complete for this client call */
+-              ret = -EPROTO;
+-      } else if (rxrpc_is_service_call(call) &&
+-                 state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+-                 state != RXRPC_CALL_SERVER_SEND_REPLY) {
+-              /* Reply phase not begun or not complete for service call. */
+-              ret = -EPROTO;
+       } else {
+-              ret = rxrpc_send_data(rx, call, msg, len, NULL);
++              ret = rxrpc_send_data(rx, call, msg, len, NULL, &dropped_lock);
+       }
+ out_put_unlock:
+-      mutex_unlock(&call->user_mutex);
++      if (!dropped_lock)
++              mutex_unlock(&call->user_mutex);
+ error_put:
+       rxrpc_put_call(call, rxrpc_call_put);
+       _leave(" = %d", ret);
+@@ -796,6 +810,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
+                          struct msghdr *msg, size_t len,
+                          rxrpc_notify_end_tx_t notify_end_tx)
+ {
++      bool dropped_lock = false;
+       int ret;
+       _enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]);
+@@ -813,7 +828,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
+       case RXRPC_CALL_SERVER_ACK_REQUEST:
+       case RXRPC_CALL_SERVER_SEND_REPLY:
+               ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len,
+-                                    notify_end_tx);
++                                    notify_end_tx, &dropped_lock);
+               break;
+       case RXRPC_CALL_COMPLETE:
+               read_lock_bh(&call->state_lock);
+@@ -827,7 +842,8 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
+               break;
+       }
+-      mutex_unlock(&call->user_mutex);
++      if (!dropped_lock)
++              mutex_unlock(&call->user_mutex);
+       _leave(" = %d", ret);
+       return ret;
+ }
+-- 
+2.35.1
+
index 99213b35c82c24e14bfc4ae51fead483d3010f30..12e6dfbd908d32a8a31110fdc5e4d1c72d709361 100644 (file)
@@ -9,3 +9,55 @@ vfs-make-sync_filesystem-return-errors-from-sync_fs.patch
 xfs-return-errors-in-xfs_fs_sync_fs.patch
 xfs-only-bother-with-sync_filesystem-during-readonly-remount.patch
 kernel-sched-remove-dl_boosted-flag-comment.patch
+xfrm-fix-refcount-leak-in-__xfrm_policy_check.patch
+xfrm-clone-missing-x-lastused-in-xfrm_do_migrate.patch
+af_key-do-not-call-xfrm_probe_algs-in-parallel.patch
+xfrm-policy-fix-metadata-dst-dev-xmit-null-pointer-d.patch
+nfs-don-t-allocate-nfs_fattr-on-the-stack-in-__nfs42.patch
+nfsv4.2-fix-problems-with-__nfs42_ssc_open.patch
+sunrpc-rpc-level-errors-should-set-task-tk_rpc_statu.patch
+mm-huge_memory.c-use-helper-function-migration_entry.patch
+mm-smaps-don-t-access-young-dirty-bit-if-pte-unprese.patch
+rose-check-null-rose_loopback_neigh-loopback.patch
+nfc-pn533-fix-use-after-free-bugs-caused-by-pn532_cm.patch
+ice-xsk-force-rings-to-be-sized-to-power-of-2.patch
+ice-xsk-prohibit-usage-of-non-balanced-queue-id.patch
+net-mlx5e-properly-disable-vlan-strip-on-non-ul-reps.patch
+net-ipa-don-t-assume-smem-is-page-aligned.patch
+net-moxa-get-rid-of-asymmetry-in-dma-mapping-unmappi.patch
+bonding-802.3ad-fix-no-transmission-of-lacpdus.patch
+net-ipvtap-add-__init-__exit-annotations-to-module-i.patch
+netfilter-ebtables-reject-blobs-that-don-t-provide-a.patch
+bnxt_en-fix-nq-resource-accounting-during-vf-creatio.patch
+netfilter-nft_payload-report-erange-for-too-long-off.patch
+netfilter-nft_payload-do-not-truncate-csum_offset-an.patch
+netfilter-nf_tables-do-not-leave-chain-stats-enabled.patch
+netfilter-nft_osf-restrict-osf-to-ipv4-ipv6-and-inet.patch
+netfilter-nft_tunnel-restrict-it-to-netdev-family.patch
+netfilter-nftables-remove-redundant-assignment-of-va.patch
+netfilter-nf_tables-consolidate-rule-verdict-trace-c.patch
+netfilter-nft_cmp-optimize-comparison-for-16-bytes.patch
+netfilter-bitwise-improve-error-goto-labels.patch
+netfilter-nf_tables-upfront-validation-of-data-via-n.patch
+netfilter-nf_tables-disallow-jump-to-implicit-chain-.patch
+netfilter-nf_tables-disallow-binding-to-already-boun.patch
+tcp-tweak-len-truesize-ratio-for-coalesce-candidates.patch
+net-fix-data-races-around-sysctl_-rw-mem-_offset.patch
+net-fix-data-races-around-sysctl_-rw-mem_-max-defaul.patch
+net-fix-data-races-around-weight_p-and-dev_weight_-r.patch
+net-fix-data-races-around-netdev_max_backlog.patch
+net-fix-data-races-around-netdev_tstamp_prequeue.patch
+ratelimit-fix-data-races-in-___ratelimit.patch
+bpf-folding-omem_charge-into-sk_storage_charge.patch
+net-fix-data-races-around-sysctl_optmem_max.patch
+net-fix-a-data-race-around-sysctl_tstamp_allow_data.patch
+net-fix-a-data-race-around-sysctl_net_busy_poll.patch
+net-fix-a-data-race-around-sysctl_net_busy_read.patch
+net-fix-a-data-race-around-netdev_budget.patch
+net-fix-a-data-race-around-netdev_budget_usecs.patch
+net-fix-data-races-around-sysctl_fb_tunnels_only_for.patch
+net-fix-data-races-around-sysctl_devconf_inherit_ini.patch
+net-fix-a-data-race-around-sysctl_somaxconn.patch
+ixgbe-stop-resetting-systime-in-ixgbe_ptp_start_cycl.patch
+rxrpc-fix-locking-in-rxrpc-s-sendmsg.patch
+ionic-fix-up-issues-with-handling-eagain-on-fw-cmds.patch
diff --git a/queue-5.10/sunrpc-rpc-level-errors-should-set-task-tk_rpc_statu.patch b/queue-5.10/sunrpc-rpc-level-errors-should-set-task-tk_rpc_statu.patch
new file mode 100644 (file)
index 0000000..a5d99f3
--- /dev/null
@@ -0,0 +1,35 @@
+From ea1829a98d1576586b285880ca1e185e94d78598 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Aug 2022 14:55:03 -0400
+Subject: SUNRPC: RPC level errors should set task->tk_rpc_status
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit ed06fce0b034b2e25bd93430f5c4cbb28036cc1a ]
+
+Fix up a case in call_encode() where we're failing to set
+task->tk_rpc_status when an RPC level error occurred.
+
+Fixes: 9c5948c24869 ("SUNRPC: task should be exit if encode return EKEYEXPIRED more times")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sunrpc/clnt.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
+index c5af31312e0cf..78c6648af7827 100644
+--- a/net/sunrpc/clnt.c
++++ b/net/sunrpc/clnt.c
+@@ -1867,7 +1867,7 @@ call_encode(struct rpc_task *task)
+                       break;
+               case -EKEYEXPIRED:
+                       if (!task->tk_cred_retry) {
+-                              rpc_exit(task, task->tk_status);
++                              rpc_call_rpcerror(task, task->tk_status);
+                       } else {
+                               task->tk_action = call_refresh;
+                               task->tk_cred_retry--;
+-- 
+2.35.1
+
diff --git a/queue-5.10/tcp-tweak-len-truesize-ratio-for-coalesce-candidates.patch b/queue-5.10/tcp-tweak-len-truesize-ratio-for-coalesce-candidates.patch
new file mode 100644 (file)
index 0000000..afc0b7e
--- /dev/null
@@ -0,0 +1,147 @@
+From 641483a35e48ecf7bb55574136a57e1e62630769 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Jul 2021 03:15:28 -0700
+Subject: tcp: tweak len/truesize ratio for coalesce candidates
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 240bfd134c592791fdceba1ce7fc3f973c33df2d ]
+
+tcp_grow_window() is using skb->len/skb->truesize to increase tp->rcv_ssthresh
+which has a direct impact on advertized window sizes.
+
+We added TCP coalescing in linux-3.4 & linux-3.5:
+
+Instead of storing skbs with one or two MSS in receive queue (or OFO queue),
+we try to append segments together to reduce memory overhead.
+
+High performance network drivers tend to cook skb with 3 parts :
+
+1) sk_buff structure (256 bytes)
+2) skb->head contains room to copy headers as needed, and skb_shared_info
+3) page fragment(s) containing the ~1514 bytes frame (or more depending on MTU)
+
+Once coalesced into a previous skb, 1) and 2) are freed.
+
+We can therefore tweak the way we compute len/truesize ratio knowing
+that skb->truesize is inflated by 1) and 2) soon to be freed.
+
+This is done only for in-order skb, or skb coalesced into OFO queue.
+
+The result is that low rate flows no longer pay the memory price of having
+low GRO aggregation factor. Same result for drivers not using GRO.
+
+This is critical to allow a big enough receiver window,
+typically tcp_rmem[2] / 2.
+
+We have been using this at Google for about 5 years, it is due time
+to make it upstream.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Soheil Hassas Yeganeh <soheil@google.com>
+Cc: Neal Cardwell <ncardwell@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 38 ++++++++++++++++++++++++++++++--------
+ 1 file changed, 30 insertions(+), 8 deletions(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index d35e88b5ffcbe..33a3fb04ac4df 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -454,11 +454,12 @@ static void tcp_sndbuf_expand(struct sock *sk)
+  */
+ /* Slow part of check#2. */
+-static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
++static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb,
++                           unsigned int skbtruesize)
+ {
+       struct tcp_sock *tp = tcp_sk(sk);
+       /* Optimize this! */
+-      int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
++      int truesize = tcp_win_from_space(sk, skbtruesize) >> 1;
+       int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
+       while (tp->rcv_ssthresh <= window) {
+@@ -471,7 +472,27 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
+       return 0;
+ }
+-static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
++/* Even if skb appears to have a bad len/truesize ratio, TCP coalescing
++ * can play nice with us, as sk_buff and skb->head might be either
++ * freed or shared with up to MAX_SKB_FRAGS segments.
++ * Only give a boost to drivers using page frag(s) to hold the frame(s),
++ * and if no payload was pulled in skb->head before reaching us.
++ */
++static u32 truesize_adjust(bool adjust, const struct sk_buff *skb)
++{
++      u32 truesize = skb->truesize;
++
++      if (adjust && !skb_headlen(skb)) {
++              truesize -= SKB_TRUESIZE(skb_end_offset(skb));
++              /* paranoid check, some drivers might be buggy */
++              if (unlikely((int)truesize < (int)skb->len))
++                      truesize = skb->truesize;
++      }
++      return truesize;
++}
++
++static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb,
++                          bool adjust)
+ {
+       struct tcp_sock *tp = tcp_sk(sk);
+       int room;
+@@ -480,15 +501,16 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
+       /* Check #1 */
+       if (room > 0 && !tcp_under_memory_pressure(sk)) {
++              unsigned int truesize = truesize_adjust(adjust, skb);
+               int incr;
+               /* Check #2. Increase window, if skb with such overhead
+                * will fit to rcvbuf in future.
+                */
+-              if (tcp_win_from_space(sk, skb->truesize) <= skb->len)
++              if (tcp_win_from_space(sk, truesize) <= skb->len)
+                       incr = 2 * tp->advmss;
+               else
+-                      incr = __tcp_grow_window(sk, skb);
++                      incr = __tcp_grow_window(sk, skb, truesize);
+               if (incr) {
+                       incr = max_t(int, incr, 2 * skb->len);
+@@ -782,7 +804,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
+       tcp_ecn_check_ce(sk, skb);
+       if (skb->len >= 128)
+-              tcp_grow_window(sk, skb);
++              tcp_grow_window(sk, skb, true);
+ }
+ /* Called to compute a smoothed rtt estimate. The data fed to this
+@@ -4761,7 +4783,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
+                * and trigger fast retransmit.
+                */
+               if (tcp_is_sack(tp))
+-                      tcp_grow_window(sk, skb);
++                      tcp_grow_window(sk, skb, true);
+               kfree_skb_partial(skb, fragstolen);
+               skb = NULL;
+               goto add_sack;
+@@ -4849,7 +4871,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
+                * and trigger fast retransmit.
+                */
+               if (tcp_is_sack(tp))
+-                      tcp_grow_window(sk, skb);
++                      tcp_grow_window(sk, skb, false);
+               skb_condense(skb);
+               skb_set_owner_r(skb, sk);
+       }
+-- 
+2.35.1
+
diff --git a/queue-5.10/xfrm-clone-missing-x-lastused-in-xfrm_do_migrate.patch b/queue-5.10/xfrm-clone-missing-x-lastused-in-xfrm_do_migrate.patch
new file mode 100644 (file)
index 0000000..7fce24b
--- /dev/null
@@ -0,0 +1,35 @@
+From 9977a0b819fc9ef637716cfcfb4cbe9cb62dfb51 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Jul 2022 17:41:22 +0200
+Subject: xfrm: clone missing x->lastused in xfrm_do_migrate
+
+From: Antony Antony <antony.antony@secunet.com>
+
+[ Upstream commit 6aa811acdb76facca0b705f4e4c1d948ccb6af8b ]
+
+x->lastused was not cloned in xfrm_do_migrate. Add it to clone during
+migrate.
+
+Fixes: 80c9abaabf42 ("[XFRM]: Extension for dynamic update of endpoint address(es)")
+Signed-off-by: Antony Antony <antony.antony@secunet.com>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/xfrm/xfrm_state.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
+index bc0bbb1571cef..fdbd56ed4bd52 100644
+--- a/net/xfrm/xfrm_state.c
++++ b/net/xfrm/xfrm_state.c
+@@ -1557,6 +1557,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
+       x->replay = orig->replay;
+       x->preplay = orig->preplay;
+       x->mapping_maxage = orig->mapping_maxage;
++      x->lastused = orig->lastused;
+       x->new_mapping = 0;
+       x->new_mapping_sport = 0;
+-- 
+2.35.1
+
diff --git a/queue-5.10/xfrm-fix-refcount-leak-in-__xfrm_policy_check.patch b/queue-5.10/xfrm-fix-refcount-leak-in-__xfrm_policy_check.patch
new file mode 100644 (file)
index 0000000..af01285
--- /dev/null
@@ -0,0 +1,41 @@
+From b160006a887ff63c1503626ead78c749d11b6d47 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 24 Jul 2022 17:55:58 +0800
+Subject: xfrm: fix refcount leak in __xfrm_policy_check()
+
+From: Xin Xiong <xiongx18@fudan.edu.cn>
+
+[ Upstream commit 9c9cb23e00ddf45679b21b4dacc11d1ae7961ebe ]
+
+The issue happens on an error path in __xfrm_policy_check(). When the
+fetching process of the object `pols[1]` fails, the function simply
+returns 0, forgetting to decrement the reference count of `pols[0]`,
+which is incremented earlier by either xfrm_sk_policy_lookup() or
+xfrm_policy_lookup(). This may result in memory leaks.
+
+Fix it by decreasing the reference count of `pols[0]` in that path.
+
+Fixes: 134b0fc544ba ("IPsec: propagate security module errors up from flow_cache_lookup")
+Signed-off-by: Xin Xiong <xiongx18@fudan.edu.cn>
+Signed-off-by: Xin Tan <tanxin.ctf@gmail.com>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/xfrm/xfrm_policy.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
+index 603b05ed7eb4c..2cd66f3e52386 100644
+--- a/net/xfrm/xfrm_policy.c
++++ b/net/xfrm/xfrm_policy.c
+@@ -3641,6 +3641,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
+               if (pols[1]) {
+                       if (IS_ERR(pols[1])) {
+                               XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
++                              xfrm_pol_put(pols[0]);
+                               return 0;
+                       }
+                       pols[1]->curlft.use_time = ktime_get_real_seconds();
+-- 
+2.35.1
+
diff --git a/queue-5.10/xfrm-policy-fix-metadata-dst-dev-xmit-null-pointer-d.patch b/queue-5.10/xfrm-policy-fix-metadata-dst-dev-xmit-null-pointer-d.patch
new file mode 100644 (file)
index 0000000..1a904a3
--- /dev/null
@@ -0,0 +1,95 @@
+From 698c782651239247fd493bc57a0f75cd56428690 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 Aug 2022 18:30:50 +0300
+Subject: xfrm: policy: fix metadata dst->dev xmit null pointer dereference
+
+From: Nikolay Aleksandrov <razor@blackwall.org>
+
+[ Upstream commit 17ecd4a4db4783392edd4944f5e8268205083f70 ]
+
+When we try to transmit an skb with metadata_dst attached (i.e. dst->dev
+== NULL) through xfrm interface we can hit a null pointer dereference[1]
+in xfrmi_xmit2() -> xfrm_lookup_with_ifid() due to the check for a
+loopback skb device when there's no policy which dereferences dst->dev
+unconditionally. Not having dst->dev can be interepreted as it not being
+a loopback device, so just add a check for a null dst_orig->dev.
+
+With this fix xfrm interface's Tx error counters go up as usual.
+
+[1] net-next calltrace captured via netconsole:
+  BUG: kernel NULL pointer dereference, address: 00000000000000c0
+  #PF: supervisor read access in kernel mode
+  #PF: error_code(0x0000) - not-present page
+  PGD 0 P4D 0
+  Oops: 0000 [#1] PREEMPT SMP
+  CPU: 1 PID: 7231 Comm: ping Kdump: loaded Not tainted 5.19.0+ #24
+  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.0-1.fc36 04/01/2014
+  RIP: 0010:xfrm_lookup_with_ifid+0x5eb/0xa60
+  Code: 8d 74 24 38 e8 26 a4 37 00 48 89 c1 e9 12 fc ff ff 49 63 ed 41 83 fd be 0f 85 be 01 00 00 41 be ff ff ff ff 45 31 ed 48 8b 03 <f6> 80 c0 00 00 00 08 75 0f 41 80 bc 24 19 0d 00 00 01 0f 84 1e 02
+  RSP: 0018:ffffb0db82c679f0 EFLAGS: 00010246
+  RAX: 0000000000000000 RBX: ffffd0db7fcad430 RCX: ffffb0db82c67a10
+  RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffb0db82c67a80
+  RBP: ffffb0db82c67a80 R08: ffffb0db82c67a14 R09: 0000000000000000
+  R10: 0000000000000000 R11: ffff8fa449667dc8 R12: ffffffff966db880
+  R13: 0000000000000000 R14: 00000000ffffffff R15: 0000000000000000
+  FS:  00007ff35c83f000(0000) GS:ffff8fa478480000(0000) knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  CR2: 00000000000000c0 CR3: 000000001ebb7000 CR4: 0000000000350ee0
+  Call Trace:
+   <TASK>
+   xfrmi_xmit+0xde/0x460
+   ? tcf_bpf_act+0x13d/0x2a0
+   dev_hard_start_xmit+0x72/0x1e0
+   __dev_queue_xmit+0x251/0xd30
+   ip_finish_output2+0x140/0x550
+   ip_push_pending_frames+0x56/0x80
+   raw_sendmsg+0x663/0x10a0
+   ? try_charge_memcg+0x3fd/0x7a0
+   ? __mod_memcg_lruvec_state+0x93/0x110
+   ? sock_sendmsg+0x30/0x40
+   sock_sendmsg+0x30/0x40
+   __sys_sendto+0xeb/0x130
+   ? handle_mm_fault+0xae/0x280
+   ? do_user_addr_fault+0x1e7/0x680
+   ? kvm_read_and_reset_apf_flags+0x3b/0x50
+   __x64_sys_sendto+0x20/0x30
+   do_syscall_64+0x34/0x80
+   entry_SYSCALL_64_after_hwframe+0x46/0xb0
+  RIP: 0033:0x7ff35cac1366
+  Code: eb 0b 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 11 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 72 c3 90 55 48 83 ec 30 44 89 4c 24 2c 4c 89
+  RSP: 002b:00007fff738e4028 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
+  RAX: ffffffffffffffda RBX: 00007fff738e57b0 RCX: 00007ff35cac1366
+  RDX: 0000000000000040 RSI: 0000557164e4b450 RDI: 0000000000000003
+  RBP: 0000557164e4b450 R08: 00007fff738e7a2c R09: 0000000000000010
+  R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000040
+  R13: 00007fff738e5770 R14: 00007fff738e4030 R15: 0000001d00000001
+   </TASK>
+  Modules linked in: netconsole veth br_netfilter bridge bonding virtio_net [last unloaded: netconsole]
+  CR2: 00000000000000c0
+
+CC: Steffen Klassert <steffen.klassert@secunet.com>
+CC: Daniel Borkmann <daniel@iogearbox.net>
+Fixes: 2d151d39073a ("xfrm: Add possibility to set the default to block if we have no policy")
+Signed-off-by: Nikolay Aleksandrov <razor@blackwall.org>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/xfrm/xfrm_policy.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
+index 2cd66f3e52386..0d12bdf59d4cc 100644
+--- a/net/xfrm/xfrm_policy.c
++++ b/net/xfrm/xfrm_policy.c
+@@ -3164,7 +3164,7 @@ struct dst_entry *xfrm_lookup_with_ifid(struct net *net,
+       return dst;
+ nopol:
+-      if (!(dst_orig->dev->flags & IFF_LOOPBACK) &&
++      if ((!dst_orig->dev || !(dst_orig->dev->flags & IFF_LOOPBACK)) &&
+           net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) {
+               err = -EPERM;
+               goto error;
+-- 
+2.35.1
+