]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 4.19
authorSasha Levin <sashal@kernel.org>
Mon, 1 Feb 2021 16:51:37 +0000 (11:51 -0500)
committerSasha Levin <sashal@kernel.org>
Mon, 1 Feb 2021 16:51:37 +0000 (11:51 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
queue-4.19/can-dev-prevent-potential-information-leak-in-can_fi.patch [new file with mode: 0644]
queue-4.19/iwlwifi-pcie-reschedule-in-long-running-memory-reads.patch [new file with mode: 0644]
queue-4.19/iwlwifi-pcie-use-jiffies-for-memory-read-spin-time-l.patch [new file with mode: 0644]
queue-4.19/mac80211-pause-tx-while-changing-interface-type.patch [new file with mode: 0644]
queue-4.19/net-mlx5-fix-memory-leak-on-flow-table-creation-erro.patch [new file with mode: 0644]
queue-4.19/pnfs-nfsv4-fix-a-layout-segment-leak-in-pnfs_layout_.patch [new file with mode: 0644]
queue-4.19/rdma-cxgb4-fix-the-reported-max_recv_sge-value.patch [new file with mode: 0644]
queue-4.19/series
queue-4.19/xfrm-fix-disable_xfrm-sysctl-when-used-on-xfrm-inter.patch [new file with mode: 0644]
queue-4.19/xfrm-fix-oops-in-xfrm_replay_advance_bmp.patch [new file with mode: 0644]

diff --git a/queue-4.19/can-dev-prevent-potential-information-leak-in-can_fi.patch b/queue-4.19/can-dev-prevent-potential-information-leak-in-can_fi.patch
new file mode 100644 (file)
index 0000000..1e8368d
--- /dev/null
@@ -0,0 +1,38 @@
+From 6c8c4759f36efc183bbcdefe22743a7bcdf49449 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Jan 2021 09:08:05 +0300
+Subject: can: dev: prevent potential information leak in can_fill_info()
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+[ Upstream commit b552766c872f5b0d90323b24e4c9e8fa67486dd5 ]
+
+The "bec" struct isn't necessarily always initialized. For example, the
+mcp251xfd_get_berr_counter() function doesn't initialize anything if the
+interface is down.
+
+Fixes: 52c793f24054 ("can: netlink support for bus-error reporting and counters")
+Link: https://lore.kernel.org/r/YAkaRdRJncsJO8Ve@mwanda
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/can/dev.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
+index 953c6fdc75cc4..1bd181b33c24f 100644
+--- a/drivers/net/can/dev.c
++++ b/drivers/net/can/dev.c
+@@ -1142,7 +1142,7 @@ static int can_fill_info(struct sk_buff *skb, const struct net_device *dev)
+ {
+       struct can_priv *priv = netdev_priv(dev);
+       struct can_ctrlmode cm = {.flags = priv->ctrlmode};
+-      struct can_berr_counter bec;
++      struct can_berr_counter bec = { };
+       enum can_state state = priv->state;
+       if (priv->do_get_state)
+-- 
+2.27.0
+
diff --git a/queue-4.19/iwlwifi-pcie-reschedule-in-long-running-memory-reads.patch b/queue-4.19/iwlwifi-pcie-reschedule-in-long-running-memory-reads.patch
new file mode 100644 (file)
index 0000000..72504d7
--- /dev/null
@@ -0,0 +1,70 @@
+From 61fb3e9751bc85bba3e9297e9bad2c6f7952a7bf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jan 2021 13:05:58 +0200
+Subject: iwlwifi: pcie: reschedule in long-running memory reads
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+[ Upstream commit 3d372c4edfd4dffb7dea71c6b096fb414782b776 ]
+
+If we spin for a long time in memory reads that (for some reason in
+hardware) take a long time, then we'll eventually get messages such
+as
+
+  watchdog: BUG: soft lockup - CPU#2 stuck for 24s! [kworker/2:2:272]
+
+This is because the reading really does take a very long time, and
+we don't schedule, so we're hogging the CPU with this task, at least
+if CONFIG_PREEMPT is not set, e.g. with CONFIG_PREEMPT_VOLUNTARY=y.
+
+Previously I misinterpreted the situation and thought that this was
+only going to happen if we had interrupts disabled, and then fixed
+this (which is good anyway, however), but that didn't always help;
+looking at it again now I realized that the spin unlock will only
+reschedule if CONFIG_PREEMPT is used.
+
+In order to avoid this issue, change the code to cond_resched() if
+we've been spinning for too long here.
+
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Fixes: 04516706bb99 ("iwlwifi: pcie: limit memory read spin time")
+Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
+Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
+Link: https://lore.kernel.org/r/iwlwifi.20210115130253.217a9d6a6a12.If964cb582ab0aaa94e81c4ff3b279eaafda0fd3f@changeid
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+index fe772b716a8df..fcda33482887b 100644
+--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
++++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+@@ -2127,6 +2127,7 @@ static int iwl_trans_pcie_read_mem(struct iwl_trans *trans, u32 addr,
+       while (offs < dwords) {
+               /* limit the time we spin here under lock to 1/2s */
+               unsigned long end = jiffies + HZ / 2;
++              bool resched = false;
+               if (iwl_trans_grab_nic_access(trans, &flags)) {
+                       iwl_write32(trans, HBUS_TARG_MEM_RADDR,
+@@ -2137,10 +2138,15 @@ static int iwl_trans_pcie_read_mem(struct iwl_trans *trans, u32 addr,
+                                                       HBUS_TARG_MEM_RDAT);
+                               offs++;
+-                              if (time_after(jiffies, end))
++                              if (time_after(jiffies, end)) {
++                                      resched = true;
+                                       break;
++                              }
+                       }
+                       iwl_trans_release_nic_access(trans, &flags);
++
++                      if (resched)
++                              cond_resched();
+               } else {
+                       return -EBUSY;
+               }
+-- 
+2.27.0
+
diff --git a/queue-4.19/iwlwifi-pcie-use-jiffies-for-memory-read-spin-time-l.patch b/queue-4.19/iwlwifi-pcie-use-jiffies-for-memory-read-spin-time-l.patch
new file mode 100644 (file)
index 0000000..85434af
--- /dev/null
@@ -0,0 +1,56 @@
+From 03731552753026a77d739d95e48a95d822ff4071 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jan 2021 13:05:57 +0200
+Subject: iwlwifi: pcie: use jiffies for memory read spin time limit
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+[ Upstream commit 6701317476bbfb1f341aa935ddf75eb73af784f9 ]
+
+There's no reason to use ktime_get() since we don't need any better
+precision than jiffies, and since we no longer disable interrupts
+around this code (when grabbing NIC access), jiffies will work fine.
+Use jiffies instead of ktime_get().
+
+This cleanup is preparation for the following patch "iwlwifi: pcie: reschedule
+in long-running memory reads". The code gets simpler with the weird clock use
+etc. removed before we add cond_resched().
+
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
+Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
+Link: https://lore.kernel.org/r/iwlwifi.20210115130253.621c948b1fad.I3ee9f4bc4e74a0c9125d42fb7c35cd80df4698a1@changeid
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 8 ++------
+ 1 file changed, 2 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+index f48c7cac122e9..fe772b716a8df 100644
+--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
++++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+@@ -2126,7 +2126,7 @@ static int iwl_trans_pcie_read_mem(struct iwl_trans *trans, u32 addr,
+       while (offs < dwords) {
+               /* limit the time we spin here under lock to 1/2s */
+-              ktime_t timeout = ktime_add_us(ktime_get(), 500 * USEC_PER_MSEC);
++              unsigned long end = jiffies + HZ / 2;
+               if (iwl_trans_grab_nic_access(trans, &flags)) {
+                       iwl_write32(trans, HBUS_TARG_MEM_RADDR,
+@@ -2137,11 +2137,7 @@ static int iwl_trans_pcie_read_mem(struct iwl_trans *trans, u32 addr,
+                                                       HBUS_TARG_MEM_RDAT);
+                               offs++;
+-                              /* calling ktime_get is expensive so
+-                               * do it once in 128 reads
+-                               */
+-                              if (offs % 128 == 0 && ktime_after(ktime_get(),
+-                                                                 timeout))
++                              if (time_after(jiffies, end))
+                                       break;
+                       }
+                       iwl_trans_release_nic_access(trans, &flags);
+-- 
+2.27.0
+
diff --git a/queue-4.19/mac80211-pause-tx-while-changing-interface-type.patch b/queue-4.19/mac80211-pause-tx-while-changing-interface-type.patch
new file mode 100644 (file)
index 0000000..19035b8
--- /dev/null
@@ -0,0 +1,66 @@
+From 47ae45af4241dc2fdcddb3566d4d2c5170e80662 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jan 2021 17:11:16 +0100
+Subject: mac80211: pause TX while changing interface type
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+[ Upstream commit 054c9939b4800a91475d8d89905827bf9e1ad97a ]
+
+syzbot reported a crash that happened when changing the interface
+type around a lot, and while it might have been easy to fix just
+the symptom there, a little deeper investigation found that really
+the reason is that we allowed packets to be transmitted while in
+the middle of changing the interface type.
+
+Disallow TX by stopping the queues while changing the type.
+
+Fixes: 34d4bc4d41d2 ("mac80211: support runtime interface type changes")
+Reported-by: syzbot+d7a3b15976bf7de2238a@syzkaller.appspotmail.com
+Link: https://lore.kernel.org/r/20210122171115.b321f98f4d4f.I6997841933c17b093535c31d29355be3c0c39628@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mac80211/ieee80211_i.h | 1 +
+ net/mac80211/iface.c       | 6 ++++++
+ 2 files changed, 7 insertions(+)
+
+diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
+index a879d8071712b..fc715bba59146 100644
+--- a/net/mac80211/ieee80211_i.h
++++ b/net/mac80211/ieee80211_i.h
+@@ -1051,6 +1051,7 @@ enum queue_stop_reason {
+       IEEE80211_QUEUE_STOP_REASON_FLUSH,
+       IEEE80211_QUEUE_STOP_REASON_TDLS_TEARDOWN,
+       IEEE80211_QUEUE_STOP_REASON_RESERVE_TID,
++      IEEE80211_QUEUE_STOP_REASON_IFTYPE_CHANGE,
+       IEEE80211_QUEUE_STOP_REASONS,
+ };
+diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
+index 152d4365f9616..511ca6f74239d 100644
+--- a/net/mac80211/iface.c
++++ b/net/mac80211/iface.c
+@@ -1542,6 +1542,10 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
+       if (ret)
+               return ret;
++      ieee80211_stop_vif_queues(local, sdata,
++                                IEEE80211_QUEUE_STOP_REASON_IFTYPE_CHANGE);
++      synchronize_net();
++
+       ieee80211_do_stop(sdata, false);
+       ieee80211_teardown_sdata(sdata);
+@@ -1562,6 +1566,8 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
+       err = ieee80211_do_open(&sdata->wdev, false);
+       WARN(err, "type change: do_open returned %d", err);
++      ieee80211_wake_vif_queues(local, sdata,
++                                IEEE80211_QUEUE_STOP_REASON_IFTYPE_CHANGE);
+       return ret;
+ }
+-- 
+2.27.0
+
diff --git a/queue-4.19/net-mlx5-fix-memory-leak-on-flow-table-creation-erro.patch b/queue-4.19/net-mlx5-fix-memory-leak-on-flow-table-creation-erro.patch
new file mode 100644 (file)
index 0000000..42434a6
--- /dev/null
@@ -0,0 +1,36 @@
+From 4d9810d67091a08343610afdaa0fd7e35e3297f7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jan 2021 14:04:29 +0200
+Subject: net/mlx5: Fix memory leak on flow table creation error flow
+
+From: Roi Dayan <roid@nvidia.com>
+
+[ Upstream commit 487c6ef81eb98d0a43cb08be91b1fcc9b4250626 ]
+
+When we create the ft object we also init rhltable in ft->fgs_hash.
+So in error flow before kfree of ft we need to destroy that rhltable.
+
+Fixes: 693c6883bbc4 ("net/mlx5: Add hash table for flow groups in flow table")
+Signed-off-by: Roi Dayan <roid@nvidia.com>
+Reviewed-by: Maor Dickman <maord@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+index b16e0f45d28c5..a38a0c86705ab 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+@@ -1004,6 +1004,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
+ destroy_ft:
+       root->cmds->destroy_flow_table(root->dev, ft);
+ free_ft:
++      rhltable_destroy(&ft->fgs_hash);
+       kfree(ft);
+ unlock_root:
+       mutex_unlock(&root->chain_lock);
+-- 
+2.27.0
+
diff --git a/queue-4.19/pnfs-nfsv4-fix-a-layout-segment-leak-in-pnfs_layout_.patch b/queue-4.19/pnfs-nfsv4-fix-a-layout-segment-leak-in-pnfs_layout_.patch
new file mode 100644 (file)
index 0000000..b304bb7
--- /dev/null
@@ -0,0 +1,35 @@
+From 44bafcd0c6f61dd0117f9777fd4be0109a6773e7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Jan 2021 16:34:37 -0500
+Subject: pNFS/NFSv4: Fix a layout segment leak in pnfs_layout_process()
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit 814b84971388cd5fb182f2e914265b3827758455 ]
+
+If the server returns a new stateid that does not match the one in our
+cache, then pnfs_layout_process() will leak the layout segments returned
+by pnfs_mark_layout_stateid_invalid().
+
+Fixes: 9888d837f3cf ("pNFS: Force a retry of LAYOUTGET if the stateid doesn't match our cache")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/pnfs.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
+index 46ca5592b8b0d..4b165aa5a2561 100644
+--- a/fs/nfs/pnfs.c
++++ b/fs/nfs/pnfs.c
+@@ -2320,6 +2320,7 @@ out_forget:
+       spin_unlock(&ino->i_lock);
+       lseg->pls_layout = lo;
+       NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
++      pnfs_free_lseg_list(&free_me);
+       return ERR_PTR(-EAGAIN);
+ }
+-- 
+2.27.0
+
diff --git a/queue-4.19/rdma-cxgb4-fix-the-reported-max_recv_sge-value.patch b/queue-4.19/rdma-cxgb4-fix-the-reported-max_recv_sge-value.patch
new file mode 100644 (file)
index 0000000..3a054d4
--- /dev/null
@@ -0,0 +1,39 @@
+From b050382a69a9d2e4a68deaf4c408caea41f4b628 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Jan 2021 21:14:23 +0200
+Subject: RDMA/cxgb4: Fix the reported max_recv_sge value
+
+From: Kamal Heib <kamalheib1@gmail.com>
+
+[ Upstream commit a372173bf314d374da4dd1155549d8ca7fc44709 ]
+
+The max_recv_sge value is wrongly reported when calling query_qp, This is
+happening due to a typo when assigning the max_recv_sge value, the value
+of sq_max_sges was assigned instead of rq_max_sges.
+
+Fixes: 3e5c02c9ef9a ("iw_cxgb4: Support query_qp() verb")
+Link: https://lore.kernel.org/r/20210114191423.423529-1-kamalheib1@gmail.com
+Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
+Reviewed-by: Potnuri Bharat Teja <bharat@chelsio.com>
+Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/infiniband/hw/cxgb4/qp.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
+index a9e3a11bea54a..caa6a502c37e2 100644
+--- a/drivers/infiniband/hw/cxgb4/qp.c
++++ b/drivers/infiniband/hw/cxgb4/qp.c
+@@ -2485,7 +2485,7 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+       init_attr->cap.max_send_wr = qhp->attr.sq_num_entries;
+       init_attr->cap.max_recv_wr = qhp->attr.rq_num_entries;
+       init_attr->cap.max_send_sge = qhp->attr.sq_max_sges;
+-      init_attr->cap.max_recv_sge = qhp->attr.sq_max_sges;
++      init_attr->cap.max_recv_sge = qhp->attr.rq_max_sges;
+       init_attr->cap.max_inline_data = T4_MAX_SEND_INLINE;
+       init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0;
+       return 0;
+-- 
+2.27.0
+
index 3e8ce83dea29241ad1ab967665ff77515e5521b4..ed62f86df4d063e5b156d6a9a1108bfc6dc1668b 100644 (file)
@@ -19,3 +19,12 @@ mt7601u-fix-rx-buffer-refcounting.patch
 xen-blkfront-allow-discard-nodes-to-be-optional.patch
 arm-imx-build-suspend-imx6.s-with-arm-instruction-set.patch
 netfilter-nft_dynset-add-timeout-extension-to-template.patch
+xfrm-fix-oops-in-xfrm_replay_advance_bmp.patch
+xfrm-fix-disable_xfrm-sysctl-when-used-on-xfrm-inter.patch
+rdma-cxgb4-fix-the-reported-max_recv_sge-value.patch
+pnfs-nfsv4-fix-a-layout-segment-leak-in-pnfs_layout_.patch
+iwlwifi-pcie-use-jiffies-for-memory-read-spin-time-l.patch
+iwlwifi-pcie-reschedule-in-long-running-memory-reads.patch
+mac80211-pause-tx-while-changing-interface-type.patch
+net-mlx5-fix-memory-leak-on-flow-table-creation-erro.patch
+can-dev-prevent-potential-information-leak-in-can_fi.patch
diff --git a/queue-4.19/xfrm-fix-disable_xfrm-sysctl-when-used-on-xfrm-inter.patch b/queue-4.19/xfrm-fix-disable_xfrm-sysctl-when-used-on-xfrm-inter.patch
new file mode 100644 (file)
index 0000000..971d79b
--- /dev/null
@@ -0,0 +1,53 @@
+From 5f421a7d0cd7998ea8119d86b30db59810e70b12 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Dec 2020 17:00:46 +0200
+Subject: xfrm: fix disable_xfrm sysctl when used on xfrm interfaces
+
+From: Eyal Birger <eyal.birger@gmail.com>
+
+[ Upstream commit 9f8550e4bd9d78a8436c2061ad2530215f875376 ]
+
+The disable_xfrm flag signals that xfrm should not be performed during
+routing towards a device before reaching device xmit.
+
+For xfrm interfaces this is usually desired as they perform the outbound
+policy lookup as part of their xmit using their if_id.
+
+Before this change enabling this flag on xfrm interfaces prevented them
+from xmitting as xfrm_lookup_with_ifid() would not perform a policy lookup
+in case the original dst had the DST_NOXFRM flag.
+
+This optimization is incorrect when the lookup is done by the xfrm
+interface xmit logic.
+
+Fix by performing policy lookup when invoked by xfrmi as if_id != 0.
+
+Similarly it's unlikely for the 'no policy exists on net' check to yield
+any performance benefits when invoked from xfrmi.
+
+Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces")
+Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/xfrm/xfrm_policy.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
+index 939f3adf075aa..e9aea82f370de 100644
+--- a/net/xfrm/xfrm_policy.c
++++ b/net/xfrm/xfrm_policy.c
+@@ -2101,8 +2101,8 @@ struct dst_entry *xfrm_lookup_with_ifid(struct net *net,
+               xflo.flags = flags;
+               /* To accelerate a bit...  */
+-              if ((dst_orig->flags & DST_NOXFRM) ||
+-                  !net->xfrm.policy_count[XFRM_POLICY_OUT])
++              if (!if_id && ((dst_orig->flags & DST_NOXFRM) ||
++                             !net->xfrm.policy_count[XFRM_POLICY_OUT]))
+                       goto nopol;
+               xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo, if_id);
+-- 
+2.27.0
+
diff --git a/queue-4.19/xfrm-fix-oops-in-xfrm_replay_advance_bmp.patch b/queue-4.19/xfrm-fix-oops-in-xfrm_replay_advance_bmp.patch
new file mode 100644 (file)
index 0000000..0184198
--- /dev/null
@@ -0,0 +1,92 @@
+From 161569ffead38f8278b06da17eefba28133790dd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 14 Dec 2020 15:38:32 +0200
+Subject: xfrm: Fix oops in xfrm_replay_advance_bmp
+
+From: Shmulik Ladkani <shmulik@metanetworks.com>
+
+[ Upstream commit 56ce7c25ae1525d83cf80a880cf506ead1914250 ]
+
+When setting xfrm replay_window to values higher than 32, a rare
+page-fault occurs in xfrm_replay_advance_bmp:
+
+  BUG: unable to handle page fault for address: ffff8af350ad7920
+  #PF: supervisor write access in kernel mode
+  #PF: error_code(0x0002) - not-present page
+  PGD ad001067 P4D ad001067 PUD 0
+  Oops: 0002 [#1] SMP PTI
+  CPU: 3 PID: 30 Comm: ksoftirqd/3 Kdump: loaded Not tainted 5.4.52-050452-generic #202007160732
+  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014
+  RIP: 0010:xfrm_replay_advance_bmp+0xbb/0x130
+  RSP: 0018:ffffa1304013ba40 EFLAGS: 00010206
+  RAX: 000000000000010d RBX: 0000000000000002 RCX: 00000000ffffff4b
+  RDX: 0000000000000018 RSI: 00000000004c234c RDI: 00000000ffb3dbff
+  RBP: ffffa1304013ba50 R08: ffff8af330ad7920 R09: 0000000007fffffa
+  R10: 0000000000000800 R11: 0000000000000010 R12: ffff8af29d6258c0
+  R13: ffff8af28b95c700 R14: 0000000000000000 R15: ffff8af29d6258fc
+  FS:  0000000000000000(0000) GS:ffff8af339ac0000(0000) knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  CR2: ffff8af350ad7920 CR3: 0000000015ee4000 CR4: 00000000001406e0
+  Call Trace:
+   xfrm_input+0x4e5/0xa10
+   xfrm4_rcv_encap+0xb5/0xe0
+   xfrm4_udp_encap_rcv+0x140/0x1c0
+
+Analysis revealed offending code is when accessing:
+
+       replay_esn->bmp[nr] |= (1U << bitnr);
+
+with 'nr' being 0x07fffffa.
+
+This happened in an SMP system when reordering of packets was present;
+A packet arrived with a "too old" sequence number (outside the window,
+i.e 'diff > replay_window'), and therefore the following calculation:
+
+                       bitnr = replay_esn->replay_window - (diff - pos);
+
+yields a negative result, but since bitnr is u32 we get a large unsigned
+quantity (in crash dump above: 0xffffff4b seen in ecx).
+
+This was supposed to be protected by xfrm_input()'s former call to:
+
+               if (x->repl->check(x, skb, seq)) {
+
+However, the state's spinlock x->lock is *released* after '->check()'
+is performed, and gets re-acquired before '->advance()' - which gives a
+chance for a different core to update the xfrm state, e.g. by advancing
+'replay_esn->seq' when it encounters more packets - leading to a
+'diff > replay_window' situation when original core continues to
+xfrm_replay_advance_bmp().
+
+An attempt to fix this issue was suggested in commit bcf66bf54aab
+("xfrm: Perform a replay check after return from async codepaths"),
+by calling 'x->repl->recheck()' after lock is re-acquired, but fix
+applied only to asyncronous crypto algorithms.
+
+Augment the fix, by *always* calling 'recheck()' - irrespective if we're
+using async crypto.
+
+Fixes: 0ebea8ef3559 ("[IPSEC]: Move state lock into x->type->input")
+Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/xfrm/xfrm_input.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
+index 0ee13d12782fb..fcba8a139f61e 100644
+--- a/net/xfrm/xfrm_input.c
++++ b/net/xfrm/xfrm_input.c
+@@ -420,7 +420,7 @@ resume:
+               /* only the first xfrm gets the encap type */
+               encap_type = 0;
+-              if (async && x->repl->recheck(x, skb, seq)) {
++              if (x->repl->recheck(x, skb, seq)) {
+                       XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR);
+                       goto drop_unlock;
+               }
+-- 
+2.27.0
+