5.10-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 5 Mar 2022 13:45:39 +0000 (14:45 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 5 Mar 2022 13:45:39 +0000 (14:45 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 5 Mar 2022 13:45:39 +0000 (14:45 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 5 Mar 2022 13:45:39 +0000 (14:45 +0100)
diff --git a/queue-5.10/ice-fix-concurrent-reset-and-removal-of-vfs.patch b/queue-5.10/ice-fix-concurrent-reset-and-removal-of-vfs.patch

new file mode 100644 (file)

index 0000000..8f3640a
--- /dev/null
+++ b/queue-5.10/ice-fix-concurrent-reset-and-removal-of-vfs.patch
@@ -0,0 +1,148 @@
+From fadead80fe4c033b5e514fcbadd20b55c4494112 Mon Sep 17 00:00:00 2001
+From: Jacob Keller <jacob.e.keller@intel.com>
+Date: Mon, 7 Feb 2022 10:23:29 -0800
+Subject: ice: fix concurrent reset and removal of VFs
+
+From: Jacob Keller <jacob.e.keller@intel.com>
+
+commit fadead80fe4c033b5e514fcbadd20b55c4494112 upstream.
+
+Commit c503e63200c6 ("ice: Stop processing VF messages during teardown")
+introduced a driver state flag, ICE_VF_DEINIT_IN_PROGRESS, which is
+intended to prevent some issues with concurrently handling messages from
+VFs while tearing down the VFs.
+
+This change was motivated by crashes caused while tearing down and
+bringing up VFs in rapid succession.
+
+It turns out that the fix actually introduces issues with the VF driver
+caused because the PF no longer responds to any messages sent by the VF
+during its .remove routine. This results in the VF potentially removing
+its DMA memory before the PF has shut down the device queues.
+
+Additionally, the fix doesn't actually resolve concurrency issues within
+the ice driver. It is possible for a VF to initiate a reset just prior
+to the ice driver removing VFs. This can result in the remove task
+concurrently operating while the VF is being reset. This results in
+similar memory corruption and panics purportedly fixed by that commit.
+
+Fix this concurrency at its root by protecting both the reset and
+removal flows using the existing VF cfg_lock. This ensures that we
+cannot remove the VF while any outstanding critical tasks such as a
+virtchnl message or a reset are occurring.
+
+This locking change also fixes the root cause originally fixed by commit
+c503e63200c6 ("ice: Stop processing VF messages during teardown"), so we
+can simply revert it.
+
+Note that I kept these two changes together because simply reverting the
+original commit alone would leave the driver vulnerable to worse race
+conditions.
+
+Fixes: c503e63200c6 ("ice: Stop processing VF messages during teardown")
+Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
+Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/intel/ice/ice_main.c        |    2 +
+ drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c |   35 ++++++++++++++++-------
+ 2 files changed, 27 insertions(+), 10 deletions(-)
+
+--- a/drivers/net/ethernet/intel/ice/ice_main.c
++++ b/drivers/net/ethernet/intel/ice/ice_main.c
+@@ -1602,7 +1602,9 @@ static void ice_handle_mdd_event(struct
+                                * reset, so print the event prior to reset.
+                                */
+                               ice_print_vf_rx_mdd_event(vf);
++                              mutex_lock(&pf->vf[i].cfg_lock);
+                               ice_reset_vf(&pf->vf[i], false);
++                              mutex_unlock(&pf->vf[i].cfg_lock);
+                       }
+               }
+       }
+--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
++++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+@@ -360,22 +360,26 @@ void ice_free_vfs(struct ice_pf *pf)
+       else
+               dev_warn(dev, "VFs are assigned - not disabling SR-IOV\n");
+ 
+-      /* Avoid wait time by stopping all VFs at the same time */
+-      ice_for_each_vf(pf, i)
+-              ice_dis_vf_qs(&pf->vf[i]);
+-
+       tmp = pf->num_alloc_vfs;
+       pf->num_qps_per_vf = 0;
+       pf->num_alloc_vfs = 0;
+       for (i = 0; i < tmp; i++) {
+-              if (test_bit(ICE_VF_STATE_INIT, pf->vf[i].vf_states)) {
++              struct ice_vf *vf = &pf->vf[i];
++
++              mutex_lock(&vf->cfg_lock);
++
++              ice_dis_vf_qs(vf);
++
++              if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
+                       /* disable VF qp mappings and set VF disable state */
+-                      ice_dis_vf_mappings(&pf->vf[i]);
+-                      set_bit(ICE_VF_STATE_DIS, pf->vf[i].vf_states);
+-                      ice_free_vf_res(&pf->vf[i]);
++                      ice_dis_vf_mappings(vf);
++                      set_bit(ICE_VF_STATE_DIS, vf->vf_states);
++                      ice_free_vf_res(vf);
+               }
+ 
+-              mutex_destroy(&pf->vf[i].cfg_lock);
++              mutex_unlock(&vf->cfg_lock);
++
++              mutex_destroy(&vf->cfg_lock);
+       }
+ 
+       if (ice_sriov_free_msix_res(pf))
+@@ -1223,9 +1227,13 @@ bool ice_reset_all_vfs(struct ice_pf *pf
+       ice_for_each_vf(pf, v) {
+               vf = &pf->vf[v];
+ 
++              mutex_lock(&vf->cfg_lock);
++
+               ice_vf_pre_vsi_rebuild(vf);
+               ice_vf_rebuild_vsi(vf);
+               ice_vf_post_vsi_rebuild(vf);
++
++              mutex_unlock(&vf->cfg_lock);
+       }
+ 
+       ice_flush(hw);
+@@ -1272,6 +1280,8 @@ bool ice_reset_vf(struct ice_vf *vf, boo
+       u32 reg;
+       int i;
+ 
++      lockdep_assert_held(&vf->cfg_lock);
++
+       dev = ice_pf_to_dev(pf);
+ 
+       if (test_bit(__ICE_VF_RESETS_DISABLED, pf->state)) {
+@@ -1725,9 +1735,12 @@ void ice_process_vflr_event(struct ice_p
+               bit_idx = (hw->func_caps.vf_base_id + vf_id) % 32;
+               /* read GLGEN_VFLRSTAT register to find out the flr VFs */
+               reg = rd32(hw, GLGEN_VFLRSTAT(reg_idx));
+-              if (reg & BIT(bit_idx))
++              if (reg & BIT(bit_idx)) {
+                       /* GLGEN_VFLRSTAT bit will be cleared in ice_reset_vf */
++                      mutex_lock(&vf->cfg_lock);
+                       ice_reset_vf(vf, true);
++                      mutex_unlock(&vf->cfg_lock);
++              }
+       }
+ }
+ 
+@@ -1804,7 +1817,9 @@ ice_vf_lan_overflow_event(struct ice_pf
+       if (!vf)
+               return;
+ 
++      mutex_lock(&vf->cfg_lock);
+       ice_vc_reset_vf(vf);
++      mutex_unlock(&vf->cfg_lock);
+ }
+ 
+ /**
diff --git a/queue-5.10/ice-fix-race-conditions-between-virtchnl-handling-and-vf-ndo-ops.patch b/queue-5.10/ice-fix-race-conditions-between-virtchnl-handling-and-vf-ndo-ops.patch

new file mode 100644 (file)

index 0000000..deedfbc
--- /dev/null
+++ b/queue-5.10/ice-fix-race-conditions-between-virtchnl-handling-and-vf-ndo-ops.patch
@@ -0,0 +1,176 @@
+From e6ba5273d4ede03d075d7a116b8edad1f6115f4d Mon Sep 17 00:00:00 2001
+From: Brett Creeley <brett.creeley@intel.com>
+Date: Thu, 9 Sep 2021 14:38:09 -0700
+Subject: ice: Fix race conditions between virtchnl handling and VF ndo ops
+
+From: Brett Creeley <brett.creeley@intel.com>
+
+commit e6ba5273d4ede03d075d7a116b8edad1f6115f4d upstream.
+
+The VF can be configured via the PF's ndo ops at the same time the PF is
+receiving/handling virtchnl messages. This has many issues, with
+one of them being the ndo op could be actively resetting a VF (i.e.
+resetting it to the default state and deleting/re-adding the VF's VSI)
+while a virtchnl message is being handled. The following error was seen
+because a VF ndo op was used to change a VF's trust setting while the
+VIRTCHNL_OP_CONFIG_VSI_QUEUES was ongoing:
+
+[35274.192484] ice 0000:88:00.0: Failed to set LAN Tx queue context, error: ICE_ERR_PARAM
+[35274.193074] ice 0000:88:00.0: VF 0 failed opcode 6, retval: -5
+[35274.193640] iavf 0000:88:01.0: PF returned error -5 (IAVF_ERR_PARAM) to our request 6
+
+Fix this by making sure the virtchnl handling and VF ndo ops that
+trigger VF resets cannot run concurrently. This is done by adding a
+struct mutex cfg_lock to each VF structure. For VF ndo ops, the mutex
+will be locked around the critical operations and VFR. Since the ndo ops
+will trigger a VFR, the virtchnl thread will use mutex_trylock(). This
+is done because if any other thread (i.e. VF ndo op) has the mutex, then
+that means the current VF message being handled is no longer valid, so
+just ignore it.
+
+This issue can be seen using the following commands:
+
+for i in {0..50}; do
+        rmmod ice
+        modprobe ice
+
+        sleep 1
+
+        echo 1 > /sys/class/net/ens785f0/device/sriov_numvfs
+        echo 1 > /sys/class/net/ens785f1/device/sriov_numvfs
+
+        ip link set ens785f1 vf 0 trust on
+        ip link set ens785f0 vf 0 trust on
+
+        sleep 2
+
+        echo 0 > /sys/class/net/ens785f0/device/sriov_numvfs
+        echo 0 > /sys/class/net/ens785f1/device/sriov_numvfs
+        sleep 1
+        echo 1 > /sys/class/net/ens785f0/device/sriov_numvfs
+        echo 1 > /sys/class/net/ens785f1/device/sriov_numvfs
+
+        ip link set ens785f1 vf 0 trust on
+        ip link set ens785f0 vf 0 trust on
+done
+
+Fixes: 7c710869d64e ("ice: Add handlers for VF netdevice operations")
+Signed-off-by: Brett Creeley <brett.creeley@intel.com>
+Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c |   25 +++++++++++++++++++++++
+ drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h |    5 ++++
+ 2 files changed, 30 insertions(+)
+
+--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
++++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+@@ -374,6 +374,8 @@ void ice_free_vfs(struct ice_pf *pf)
+                       set_bit(ICE_VF_STATE_DIS, pf->vf[i].vf_states);
+                       ice_free_vf_res(&pf->vf[i]);
+               }
++
++              mutex_destroy(&pf->vf[i].cfg_lock);
+       }
+ 
+       if (ice_sriov_free_msix_res(pf))
+@@ -1518,6 +1520,8 @@ static void ice_set_dflt_settings_vfs(st
+               set_bit(ICE_VIRTCHNL_VF_CAP_L2, &vf->vf_caps);
+               vf->spoofchk = true;
+               vf->num_vf_qs = pf->num_qps_per_vf;
++
++              mutex_init(&vf->cfg_lock);
+       }
+ }
+ 
+@@ -3345,6 +3349,8 @@ ice_set_vf_port_vlan(struct net_device *
+               return 0;
+       }
+ 
++      mutex_lock(&vf->cfg_lock);
++
+       vf->port_vlan_info = vlanprio;
+ 
+       if (vf->port_vlan_info)
+@@ -3354,6 +3360,7 @@ ice_set_vf_port_vlan(struct net_device *
+               dev_info(dev, "Clearing port VLAN on VF %d\n", vf_id);
+ 
+       ice_vc_reset_vf(vf);
++      mutex_unlock(&vf->cfg_lock);
+ 
+       return 0;
+ }
+@@ -3719,6 +3726,15 @@ error_handler:
+               return;
+       }
+ 
++      /* VF is being configured in another context that triggers a VFR, so no
++       * need to process this message
++       */
++      if (!mutex_trylock(&vf->cfg_lock)) {
++              dev_info(dev, "VF %u is being configured in another context that will trigger a VFR, so there is no need to handle this message\n",
++                       vf->vf_id);
++              return;
++      }
++
+       switch (v_opcode) {
+       case VIRTCHNL_OP_VERSION:
+               err = ice_vc_get_ver_msg(vf, msg);
+@@ -3795,6 +3811,8 @@ error_handler:
+               dev_info(dev, "PF failed to honor VF %d, opcode %d, error %d\n",
+                        vf_id, v_opcode, err);
+       }
++
++      mutex_unlock(&vf->cfg_lock);
+ }
+ 
+ /**
+@@ -3909,6 +3927,8 @@ int ice_set_vf_mac(struct net_device *ne
+               return -EINVAL;
+       }
+ 
++      mutex_lock(&vf->cfg_lock);
++
+       /* VF is notified of its new MAC via the PF's response to the
+        * VIRTCHNL_OP_GET_VF_RESOURCES message after the VF has been reset
+        */
+@@ -3926,6 +3946,7 @@ int ice_set_vf_mac(struct net_device *ne
+       }
+ 
+       ice_vc_reset_vf(vf);
++      mutex_unlock(&vf->cfg_lock);
+       return 0;
+ }
+ 
+@@ -3955,11 +3976,15 @@ int ice_set_vf_trust(struct net_device *
+       if (trusted == vf->trusted)
+               return 0;
+ 
++      mutex_lock(&vf->cfg_lock);
++
+       vf->trusted = trusted;
+       ice_vc_reset_vf(vf);
+       dev_info(ice_pf_to_dev(pf), "VF %u is now %strusted\n",
+                vf_id, trusted ? "" : "un");
+ 
++      mutex_unlock(&vf->cfg_lock);
++
+       return 0;
+ }
+ 
+--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
++++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+@@ -68,6 +68,11 @@ struct ice_mdd_vf_events {
+ struct ice_vf {
+       struct ice_pf *pf;
+ 
++      /* Used during virtchnl message handling and NDO ops against the VF
++       * that will trigger a VFR
++       */
++      struct mutex cfg_lock;
++
+       u16 vf_id;                      /* VF ID in the PF space */
+       u16 lan_vsi_idx;                /* index into PF struct */
+       /* first vector index of this VF in the PF space */
diff --git a/queue-5.10/rcu-nocb-fix-missed-nocb_timer-requeue.patch b/queue-5.10/rcu-nocb-fix-missed-nocb_timer-requeue.patch

new file mode 100644 (file)

index 0000000..4b951b0
--- /dev/null
+++ b/queue-5.10/rcu-nocb-fix-missed-nocb_timer-requeue.patch
@@ -0,0 +1,123 @@
+From b2fcf2102049f6e56981e0ab3d9b633b8e2741da Mon Sep 17 00:00:00 2001
+From: Frederic Weisbecker <frederic@kernel.org>
+Date: Tue, 23 Feb 2021 01:09:59 +0100
+Subject: rcu/nocb: Fix missed nocb_timer requeue
+
+From: Frederic Weisbecker <frederic@kernel.org>
+
+commit b2fcf2102049f6e56981e0ab3d9b633b8e2741da upstream.
+
+This sequence of events can lead to a failure to requeue a CPU's
+->nocb_timer:
+
+1.     There are no callbacks queued for any CPU covered by CPU 0-2's
+       ->nocb_gp_kthread.  Note that ->nocb_gp_kthread is associated
+       with CPU 0.
+
+2.     CPU 1 enqueues its first callback with interrupts disabled, and
+       thus must defer awakening its ->nocb_gp_kthread.  It therefore
+       queues its rcu_data structure's ->nocb_timer.  At this point,
+       CPU 1's rdp->nocb_defer_wakeup is RCU_NOCB_WAKE.
+
+3.     CPU 2, which shares the same ->nocb_gp_kthread, also enqueues a
+       callback, but with interrupts enabled, allowing it to directly
+       awaken the ->nocb_gp_kthread.
+
+4.     The newly awakened ->nocb_gp_kthread associates both CPU 1's
+       and CPU 2's callbacks with a future grace period and arranges
+       for that grace period to be started.
+
+5.     This ->nocb_gp_kthread goes to sleep waiting for the end of this
+       future grace period.
+
+6.     This grace period elapses before the CPU 1's timer fires.
+       This is normally improbably given that the timer is set for only
+       one jiffy, but timers can be delayed.  Besides, it is possible
+       that kernel was built with CONFIG_RCU_STRICT_GRACE_PERIOD=y.
+
+7.     The grace period ends, so rcu_gp_kthread awakens the
+       ->nocb_gp_kthread, which in turn awakens both CPU 1's and
+       CPU 2's ->nocb_cb_kthread.  Then ->nocb_gb_kthread sleeps
+       waiting for more newly queued callbacks.
+
+8.     CPU 1's ->nocb_cb_kthread invokes its callback, then sleeps
+       waiting for more invocable callbacks.
+
+9.     Note that neither kthread updated any ->nocb_timer state,
+       so CPU 1's ->nocb_defer_wakeup is still set to RCU_NOCB_WAKE.
+
+10.    CPU 1 enqueues its second callback, this time with interrupts
+       enabled so it can wake directly ->nocb_gp_kthread.
+       It does so with calling wake_nocb_gp() which also cancels the
+       pending timer that got queued in step 2. But that doesn't reset
+       CPU 1's ->nocb_defer_wakeup which is still set to RCU_NOCB_WAKE.
+       So CPU 1's ->nocb_defer_wakeup and its ->nocb_timer are now
+       desynchronized.
+
+11.    ->nocb_gp_kthread associates the callback queued in 10 with a new
+       grace period, arranges for that grace period to start and sleeps
+       waiting for it to complete.
+
+12.    The grace period ends, rcu_gp_kthread awakens ->nocb_gp_kthread,
+       which in turn wakes up CPU 1's ->nocb_cb_kthread which then
+       invokes the callback queued in 10.
+
+13.    CPU 1 enqueues its third callback, this time with interrupts
+       disabled so it must queue a timer for a deferred wakeup. However
+       the value of its ->nocb_defer_wakeup is RCU_NOCB_WAKE which
+       incorrectly indicates that a timer is already queued.  Instead,
+       CPU 1's ->nocb_timer was cancelled in 10.  CPU 1 therefore fails
+       to queue the ->nocb_timer.
+
+14.    CPU 1 has its pending callback and it may go unnoticed until
+       some other CPU ever wakes up ->nocb_gp_kthread or CPU 1 ever
+       calls an explicit deferred wakeup, for example, during idle entry.
+
+This commit fixes this bug by resetting rdp->nocb_defer_wakeup everytime
+we delete the ->nocb_timer.
+
+It is quite possible that there is a similar scenario involving
+->nocb_bypass_timer and ->nocb_defer_wakeup.  However, despite some
+effort from several people, a failure scenario has not yet been located.
+However, that by no means guarantees that no such scenario exists.
+Finding a failure scenario is left as an exercise for the reader, and the
+"Fixes:" tag below relates to ->nocb_bypass_timer instead of ->nocb_timer.
+
+Fixes: d1b222c6be1f (rcu/nocb: Add bypass callback queueing)
+Cc: <stable@vger.kernel.org>
+Cc: Josh Triplett <josh@joshtriplett.org>
+Cc: Lai Jiangshan <jiangshanlai@gmail.com>
+Cc: Joel Fernandes <joel@joelfernandes.org>
+Cc: Boqun Feng <boqun.feng@gmail.com>
+Reviewed-by: Neeraj Upadhyay <neeraju@codeaurora.org>
+Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/rcu/tree_plugin.h |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/kernel/rcu/tree_plugin.h
++++ b/kernel/rcu/tree_plugin.h
+@@ -1646,7 +1646,11 @@ static void wake_nocb_gp(struct rcu_data
+               rcu_nocb_unlock_irqrestore(rdp, flags);
+               return;
+       }
+-      del_timer(&rdp->nocb_timer);
++
++      if (READ_ONCE(rdp->nocb_defer_wakeup) > RCU_NOCB_WAKE_NOT) {
++              WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
++              del_timer(&rdp->nocb_timer);
++      }
+       rcu_nocb_unlock_irqrestore(rdp, flags);
+       raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
+       if (force || READ_ONCE(rdp_gp->nocb_gp_sleep)) {
+@@ -2164,7 +2168,6 @@ static void do_nocb_deferred_wakeup_comm
+               return;
+       }
+       ndw = READ_ONCE(rdp->nocb_defer_wakeup);
+-      WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
+       wake_nocb_gp(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);
+       trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DeferredWake"));
+ }
diff --git a/queue-5.10/series b/queue-5.10/series

index d32bfd5a27c6f3b00d633be83b233c55fc8a5f18..ad804b153ca3f43dc45667a29c9644107ac526de 100644 (file)
--- a/queue-5.10/series
+++ b/queue-5.10/series
@@ -51,3 +51,6 @@ net-dcb-flush-lingering-app-table-entries-for-unregistered-devices.patch
  net-smc-fix-connection-leak.patch
  net-smc-fix-unexpected-smc_clc_decl_err_regrmb-error-generated-by-client.patch
  net-smc-fix-unexpected-smc_clc_decl_err_regrmb-error-cause-by-server.patch
+rcu-nocb-fix-missed-nocb_timer-requeue.patch
+ice-fix-race-conditions-between-virtchnl-handling-and-vf-ndo-ops.patch
+ice-fix-concurrent-reset-and-removal-of-vfs.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 5 Mar 2022 13:45:39 +0000 (14:45 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 5 Mar 2022 13:45:39 +0000 (14:45 +0100)
queue-5.10/ice-fix-concurrent-reset-and-removal-of-vfs.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/ice-fix-race-conditions-between-virtchnl-handling-and-vf-ndo-ops.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/rcu-nocb-fix-missed-nocb_timer-requeue.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/series		patch \| blob \| blame \| history