From: Greg Kroah-Hartman Date: Sat, 5 Mar 2022 13:45:39 +0000 (+0100) Subject: 5.10-stable patches X-Git-Tag: v4.9.305~65 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=059327ed1ea8f7735d948669d71acb2ebf990a8e;p=thirdparty%2Fkernel%2Fstable-queue.git 5.10-stable patches added patches: ice-fix-concurrent-reset-and-removal-of-vfs.patch ice-fix-race-conditions-between-virtchnl-handling-and-vf-ndo-ops.patch rcu-nocb-fix-missed-nocb_timer-requeue.patch --- diff --git a/queue-5.10/ice-fix-concurrent-reset-and-removal-of-vfs.patch b/queue-5.10/ice-fix-concurrent-reset-and-removal-of-vfs.patch new file mode 100644 index 00000000000..8f3640a9aa3 --- /dev/null +++ b/queue-5.10/ice-fix-concurrent-reset-and-removal-of-vfs.patch @@ -0,0 +1,148 @@ +From fadead80fe4c033b5e514fcbadd20b55c4494112 Mon Sep 17 00:00:00 2001 +From: Jacob Keller +Date: Mon, 7 Feb 2022 10:23:29 -0800 +Subject: ice: fix concurrent reset and removal of VFs + +From: Jacob Keller + +commit fadead80fe4c033b5e514fcbadd20b55c4494112 upstream. + +Commit c503e63200c6 ("ice: Stop processing VF messages during teardown") +introduced a driver state flag, ICE_VF_DEINIT_IN_PROGRESS, which is +intended to prevent some issues with concurrently handling messages from +VFs while tearing down the VFs. + +This change was motivated by crashes caused while tearing down and +bringing up VFs in rapid succession. + +It turns out that the fix actually introduces issues with the VF driver +caused because the PF no longer responds to any messages sent by the VF +during its .remove routine. This results in the VF potentially removing +its DMA memory before the PF has shut down the device queues. + +Additionally, the fix doesn't actually resolve concurrency issues within +the ice driver. It is possible for a VF to initiate a reset just prior +to the ice driver removing VFs. This can result in the remove task +concurrently operating while the VF is being reset. This results in +similar memory corruption and panics purportedly fixed by that commit. + +Fix this concurrency at its root by protecting both the reset and +removal flows using the existing VF cfg_lock. This ensures that we +cannot remove the VF while any outstanding critical tasks such as a +virtchnl message or a reset are occurring. + +This locking change also fixes the root cause originally fixed by commit +c503e63200c6 ("ice: Stop processing VF messages during teardown"), so we +can simply revert it. + +Note that I kept these two changes together because simply reverting the +original commit alone would leave the driver vulnerable to worse race +conditions. + +Fixes: c503e63200c6 ("ice: Stop processing VF messages during teardown") +Signed-off-by: Jacob Keller +Tested-by: Konrad Jankowski +Signed-off-by: Tony Nguyen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/intel/ice/ice_main.c | 2 + + drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 35 ++++++++++++++++------- + 2 files changed, 27 insertions(+), 10 deletions(-) + +--- a/drivers/net/ethernet/intel/ice/ice_main.c ++++ b/drivers/net/ethernet/intel/ice/ice_main.c +@@ -1602,7 +1602,9 @@ static void ice_handle_mdd_event(struct + * reset, so print the event prior to reset. + */ + ice_print_vf_rx_mdd_event(vf); ++ mutex_lock(&pf->vf[i].cfg_lock); + ice_reset_vf(&pf->vf[i], false); ++ mutex_unlock(&pf->vf[i].cfg_lock); + } + } + } +--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c ++++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +@@ -360,22 +360,26 @@ void ice_free_vfs(struct ice_pf *pf) + else + dev_warn(dev, "VFs are assigned - not disabling SR-IOV\n"); + +- /* Avoid wait time by stopping all VFs at the same time */ +- ice_for_each_vf(pf, i) +- ice_dis_vf_qs(&pf->vf[i]); +- + tmp = pf->num_alloc_vfs; + pf->num_qps_per_vf = 0; + pf->num_alloc_vfs = 0; + for (i = 0; i < tmp; i++) { +- if (test_bit(ICE_VF_STATE_INIT, pf->vf[i].vf_states)) { ++ struct ice_vf *vf = &pf->vf[i]; ++ ++ mutex_lock(&vf->cfg_lock); ++ ++ ice_dis_vf_qs(vf); ++ ++ if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) { + /* disable VF qp mappings and set VF disable state */ +- ice_dis_vf_mappings(&pf->vf[i]); +- set_bit(ICE_VF_STATE_DIS, pf->vf[i].vf_states); +- ice_free_vf_res(&pf->vf[i]); ++ ice_dis_vf_mappings(vf); ++ set_bit(ICE_VF_STATE_DIS, vf->vf_states); ++ ice_free_vf_res(vf); + } + +- mutex_destroy(&pf->vf[i].cfg_lock); ++ mutex_unlock(&vf->cfg_lock); ++ ++ mutex_destroy(&vf->cfg_lock); + } + + if (ice_sriov_free_msix_res(pf)) +@@ -1223,9 +1227,13 @@ bool ice_reset_all_vfs(struct ice_pf *pf + ice_for_each_vf(pf, v) { + vf = &pf->vf[v]; + ++ mutex_lock(&vf->cfg_lock); ++ + ice_vf_pre_vsi_rebuild(vf); + ice_vf_rebuild_vsi(vf); + ice_vf_post_vsi_rebuild(vf); ++ ++ mutex_unlock(&vf->cfg_lock); + } + + ice_flush(hw); +@@ -1272,6 +1280,8 @@ bool ice_reset_vf(struct ice_vf *vf, boo + u32 reg; + int i; + ++ lockdep_assert_held(&vf->cfg_lock); ++ + dev = ice_pf_to_dev(pf); + + if (test_bit(__ICE_VF_RESETS_DISABLED, pf->state)) { +@@ -1725,9 +1735,12 @@ void ice_process_vflr_event(struct ice_p + bit_idx = (hw->func_caps.vf_base_id + vf_id) % 32; + /* read GLGEN_VFLRSTAT register to find out the flr VFs */ + reg = rd32(hw, GLGEN_VFLRSTAT(reg_idx)); +- if (reg & BIT(bit_idx)) ++ if (reg & BIT(bit_idx)) { + /* GLGEN_VFLRSTAT bit will be cleared in ice_reset_vf */ ++ mutex_lock(&vf->cfg_lock); + ice_reset_vf(vf, true); ++ mutex_unlock(&vf->cfg_lock); ++ } + } + } + +@@ -1804,7 +1817,9 @@ ice_vf_lan_overflow_event(struct ice_pf + if (!vf) + return; + ++ mutex_lock(&vf->cfg_lock); + ice_vc_reset_vf(vf); ++ mutex_unlock(&vf->cfg_lock); + } + + /** diff --git a/queue-5.10/ice-fix-race-conditions-between-virtchnl-handling-and-vf-ndo-ops.patch b/queue-5.10/ice-fix-race-conditions-between-virtchnl-handling-and-vf-ndo-ops.patch new file mode 100644 index 00000000000..deedfbc2a7b --- /dev/null +++ b/queue-5.10/ice-fix-race-conditions-between-virtchnl-handling-and-vf-ndo-ops.patch @@ -0,0 +1,176 @@ +From e6ba5273d4ede03d075d7a116b8edad1f6115f4d Mon Sep 17 00:00:00 2001 +From: Brett Creeley +Date: Thu, 9 Sep 2021 14:38:09 -0700 +Subject: ice: Fix race conditions between virtchnl handling and VF ndo ops + +From: Brett Creeley + +commit e6ba5273d4ede03d075d7a116b8edad1f6115f4d upstream. + +The VF can be configured via the PF's ndo ops at the same time the PF is +receiving/handling virtchnl messages. This has many issues, with +one of them being the ndo op could be actively resetting a VF (i.e. +resetting it to the default state and deleting/re-adding the VF's VSI) +while a virtchnl message is being handled. The following error was seen +because a VF ndo op was used to change a VF's trust setting while the +VIRTCHNL_OP_CONFIG_VSI_QUEUES was ongoing: + +[35274.192484] ice 0000:88:00.0: Failed to set LAN Tx queue context, error: ICE_ERR_PARAM +[35274.193074] ice 0000:88:00.0: VF 0 failed opcode 6, retval: -5 +[35274.193640] iavf 0000:88:01.0: PF returned error -5 (IAVF_ERR_PARAM) to our request 6 + +Fix this by making sure the virtchnl handling and VF ndo ops that +trigger VF resets cannot run concurrently. This is done by adding a +struct mutex cfg_lock to each VF structure. For VF ndo ops, the mutex +will be locked around the critical operations and VFR. Since the ndo ops +will trigger a VFR, the virtchnl thread will use mutex_trylock(). This +is done because if any other thread (i.e. VF ndo op) has the mutex, then +that means the current VF message being handled is no longer valid, so +just ignore it. + +This issue can be seen using the following commands: + +for i in {0..50}; do + rmmod ice + modprobe ice + + sleep 1 + + echo 1 > /sys/class/net/ens785f0/device/sriov_numvfs + echo 1 > /sys/class/net/ens785f1/device/sriov_numvfs + + ip link set ens785f1 vf 0 trust on + ip link set ens785f0 vf 0 trust on + + sleep 2 + + echo 0 > /sys/class/net/ens785f0/device/sriov_numvfs + echo 0 > /sys/class/net/ens785f1/device/sriov_numvfs + sleep 1 + echo 1 > /sys/class/net/ens785f0/device/sriov_numvfs + echo 1 > /sys/class/net/ens785f1/device/sriov_numvfs + + ip link set ens785f1 vf 0 trust on + ip link set ens785f0 vf 0 trust on +done + +Fixes: 7c710869d64e ("ice: Add handlers for VF netdevice operations") +Signed-off-by: Brett Creeley +Tested-by: Konrad Jankowski +Signed-off-by: Tony Nguyen +Signed-off-by: Jacob Keller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 25 +++++++++++++++++++++++ + drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h | 5 ++++ + 2 files changed, 30 insertions(+) + +--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c ++++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +@@ -374,6 +374,8 @@ void ice_free_vfs(struct ice_pf *pf) + set_bit(ICE_VF_STATE_DIS, pf->vf[i].vf_states); + ice_free_vf_res(&pf->vf[i]); + } ++ ++ mutex_destroy(&pf->vf[i].cfg_lock); + } + + if (ice_sriov_free_msix_res(pf)) +@@ -1518,6 +1520,8 @@ static void ice_set_dflt_settings_vfs(st + set_bit(ICE_VIRTCHNL_VF_CAP_L2, &vf->vf_caps); + vf->spoofchk = true; + vf->num_vf_qs = pf->num_qps_per_vf; ++ ++ mutex_init(&vf->cfg_lock); + } + } + +@@ -3345,6 +3349,8 @@ ice_set_vf_port_vlan(struct net_device * + return 0; + } + ++ mutex_lock(&vf->cfg_lock); ++ + vf->port_vlan_info = vlanprio; + + if (vf->port_vlan_info) +@@ -3354,6 +3360,7 @@ ice_set_vf_port_vlan(struct net_device * + dev_info(dev, "Clearing port VLAN on VF %d\n", vf_id); + + ice_vc_reset_vf(vf); ++ mutex_unlock(&vf->cfg_lock); + + return 0; + } +@@ -3719,6 +3726,15 @@ error_handler: + return; + } + ++ /* VF is being configured in another context that triggers a VFR, so no ++ * need to process this message ++ */ ++ if (!mutex_trylock(&vf->cfg_lock)) { ++ dev_info(dev, "VF %u is being configured in another context that will trigger a VFR, so there is no need to handle this message\n", ++ vf->vf_id); ++ return; ++ } ++ + switch (v_opcode) { + case VIRTCHNL_OP_VERSION: + err = ice_vc_get_ver_msg(vf, msg); +@@ -3795,6 +3811,8 @@ error_handler: + dev_info(dev, "PF failed to honor VF %d, opcode %d, error %d\n", + vf_id, v_opcode, err); + } ++ ++ mutex_unlock(&vf->cfg_lock); + } + + /** +@@ -3909,6 +3927,8 @@ int ice_set_vf_mac(struct net_device *ne + return -EINVAL; + } + ++ mutex_lock(&vf->cfg_lock); ++ + /* VF is notified of its new MAC via the PF's response to the + * VIRTCHNL_OP_GET_VF_RESOURCES message after the VF has been reset + */ +@@ -3926,6 +3946,7 @@ int ice_set_vf_mac(struct net_device *ne + } + + ice_vc_reset_vf(vf); ++ mutex_unlock(&vf->cfg_lock); + return 0; + } + +@@ -3955,11 +3976,15 @@ int ice_set_vf_trust(struct net_device * + if (trusted == vf->trusted) + return 0; + ++ mutex_lock(&vf->cfg_lock); ++ + vf->trusted = trusted; + ice_vc_reset_vf(vf); + dev_info(ice_pf_to_dev(pf), "VF %u is now %strusted\n", + vf_id, trusted ? "" : "un"); + ++ mutex_unlock(&vf->cfg_lock); ++ + return 0; + } + +--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h ++++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h +@@ -68,6 +68,11 @@ struct ice_mdd_vf_events { + struct ice_vf { + struct ice_pf *pf; + ++ /* Used during virtchnl message handling and NDO ops against the VF ++ * that will trigger a VFR ++ */ ++ struct mutex cfg_lock; ++ + u16 vf_id; /* VF ID in the PF space */ + u16 lan_vsi_idx; /* index into PF struct */ + /* first vector index of this VF in the PF space */ diff --git a/queue-5.10/rcu-nocb-fix-missed-nocb_timer-requeue.patch b/queue-5.10/rcu-nocb-fix-missed-nocb_timer-requeue.patch new file mode 100644 index 00000000000..4b951b00330 --- /dev/null +++ b/queue-5.10/rcu-nocb-fix-missed-nocb_timer-requeue.patch @@ -0,0 +1,123 @@ +From b2fcf2102049f6e56981e0ab3d9b633b8e2741da Mon Sep 17 00:00:00 2001 +From: Frederic Weisbecker +Date: Tue, 23 Feb 2021 01:09:59 +0100 +Subject: rcu/nocb: Fix missed nocb_timer requeue + +From: Frederic Weisbecker + +commit b2fcf2102049f6e56981e0ab3d9b633b8e2741da upstream. + +This sequence of events can lead to a failure to requeue a CPU's +->nocb_timer: + +1. There are no callbacks queued for any CPU covered by CPU 0-2's + ->nocb_gp_kthread. Note that ->nocb_gp_kthread is associated + with CPU 0. + +2. CPU 1 enqueues its first callback with interrupts disabled, and + thus must defer awakening its ->nocb_gp_kthread. It therefore + queues its rcu_data structure's ->nocb_timer. At this point, + CPU 1's rdp->nocb_defer_wakeup is RCU_NOCB_WAKE. + +3. CPU 2, which shares the same ->nocb_gp_kthread, also enqueues a + callback, but with interrupts enabled, allowing it to directly + awaken the ->nocb_gp_kthread. + +4. The newly awakened ->nocb_gp_kthread associates both CPU 1's + and CPU 2's callbacks with a future grace period and arranges + for that grace period to be started. + +5. This ->nocb_gp_kthread goes to sleep waiting for the end of this + future grace period. + +6. This grace period elapses before the CPU 1's timer fires. + This is normally improbably given that the timer is set for only + one jiffy, but timers can be delayed. Besides, it is possible + that kernel was built with CONFIG_RCU_STRICT_GRACE_PERIOD=y. + +7. The grace period ends, so rcu_gp_kthread awakens the + ->nocb_gp_kthread, which in turn awakens both CPU 1's and + CPU 2's ->nocb_cb_kthread. Then ->nocb_gb_kthread sleeps + waiting for more newly queued callbacks. + +8. CPU 1's ->nocb_cb_kthread invokes its callback, then sleeps + waiting for more invocable callbacks. + +9. Note that neither kthread updated any ->nocb_timer state, + so CPU 1's ->nocb_defer_wakeup is still set to RCU_NOCB_WAKE. + +10. CPU 1 enqueues its second callback, this time with interrupts + enabled so it can wake directly ->nocb_gp_kthread. + It does so with calling wake_nocb_gp() which also cancels the + pending timer that got queued in step 2. But that doesn't reset + CPU 1's ->nocb_defer_wakeup which is still set to RCU_NOCB_WAKE. + So CPU 1's ->nocb_defer_wakeup and its ->nocb_timer are now + desynchronized. + +11. ->nocb_gp_kthread associates the callback queued in 10 with a new + grace period, arranges for that grace period to start and sleeps + waiting for it to complete. + +12. The grace period ends, rcu_gp_kthread awakens ->nocb_gp_kthread, + which in turn wakes up CPU 1's ->nocb_cb_kthread which then + invokes the callback queued in 10. + +13. CPU 1 enqueues its third callback, this time with interrupts + disabled so it must queue a timer for a deferred wakeup. However + the value of its ->nocb_defer_wakeup is RCU_NOCB_WAKE which + incorrectly indicates that a timer is already queued. Instead, + CPU 1's ->nocb_timer was cancelled in 10. CPU 1 therefore fails + to queue the ->nocb_timer. + +14. CPU 1 has its pending callback and it may go unnoticed until + some other CPU ever wakes up ->nocb_gp_kthread or CPU 1 ever + calls an explicit deferred wakeup, for example, during idle entry. + +This commit fixes this bug by resetting rdp->nocb_defer_wakeup everytime +we delete the ->nocb_timer. + +It is quite possible that there is a similar scenario involving +->nocb_bypass_timer and ->nocb_defer_wakeup. However, despite some +effort from several people, a failure scenario has not yet been located. +However, that by no means guarantees that no such scenario exists. +Finding a failure scenario is left as an exercise for the reader, and the +"Fixes:" tag below relates to ->nocb_bypass_timer instead of ->nocb_timer. + +Fixes: d1b222c6be1f (rcu/nocb: Add bypass callback queueing) +Cc: +Cc: Josh Triplett +Cc: Lai Jiangshan +Cc: Joel Fernandes +Cc: Boqun Feng +Reviewed-by: Neeraj Upadhyay +Signed-off-by: Frederic Weisbecker +Signed-off-by: Paul E. McKenney +Signed-off-by: Zhen Lei +Signed-off-by: Greg Kroah-Hartman +--- + kernel/rcu/tree_plugin.h | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/kernel/rcu/tree_plugin.h ++++ b/kernel/rcu/tree_plugin.h +@@ -1646,7 +1646,11 @@ static void wake_nocb_gp(struct rcu_data + rcu_nocb_unlock_irqrestore(rdp, flags); + return; + } +- del_timer(&rdp->nocb_timer); ++ ++ if (READ_ONCE(rdp->nocb_defer_wakeup) > RCU_NOCB_WAKE_NOT) { ++ WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT); ++ del_timer(&rdp->nocb_timer); ++ } + rcu_nocb_unlock_irqrestore(rdp, flags); + raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags); + if (force || READ_ONCE(rdp_gp->nocb_gp_sleep)) { +@@ -2164,7 +2168,6 @@ static void do_nocb_deferred_wakeup_comm + return; + } + ndw = READ_ONCE(rdp->nocb_defer_wakeup); +- WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT); + wake_nocb_gp(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags); + trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DeferredWake")); + } diff --git a/queue-5.10/series b/queue-5.10/series index d32bfd5a27c..ad804b153ca 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -51,3 +51,6 @@ net-dcb-flush-lingering-app-table-entries-for-unregistered-devices.patch net-smc-fix-connection-leak.patch net-smc-fix-unexpected-smc_clc_decl_err_regrmb-error-generated-by-client.patch net-smc-fix-unexpected-smc_clc_decl_err_regrmb-error-cause-by-server.patch +rcu-nocb-fix-missed-nocb_timer-requeue.patch +ice-fix-race-conditions-between-virtchnl-handling-and-vf-ndo-ops.patch +ice-fix-concurrent-reset-and-removal-of-vfs.patch