From: Greg Kroah-Hartman Date: Mon, 28 Feb 2022 11:23:23 +0000 (+0100) Subject: 5.15-stable patches X-Git-Tag: v4.9.304~2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=06c3d9684f6bd8f488f1922b465328254be5e5ab;p=thirdparty%2Fkernel%2Fstable-queue.git 5.15-stable patches added patches: ice-fix-concurrent-reset-and-removal-of-vfs.patch ice-fix-race-conditions-between-virtchnl-handling-and-vf-ndo-ops.patch --- diff --git a/queue-5.15/ice-fix-concurrent-reset-and-removal-of-vfs.patch b/queue-5.15/ice-fix-concurrent-reset-and-removal-of-vfs.patch new file mode 100644 index 00000000000..a09a1bc26d9 --- /dev/null +++ b/queue-5.15/ice-fix-concurrent-reset-and-removal-of-vfs.patch @@ -0,0 +1,196 @@ +From foo@baz Mon Feb 28 12:22:21 PM CET 2022 +From: Jacob Keller +Date: Fri, 25 Feb 2022 12:21:01 -0800 +Subject: ice: fix concurrent reset and removal of VFs +To: stable@vger.kernel.org +Cc: Jacob Keller , Konrad Jankowski , Tony Nguyen +Message-ID: <20220225202101.4077712-2-jacob.e.keller@intel.com> + +From: Jacob Keller + +commit fadead80fe4c033b5e514fcbadd20b55c4494112 upstream. + +Commit c503e63200c6 ("ice: Stop processing VF messages during teardown") +introduced a driver state flag, ICE_VF_DEINIT_IN_PROGRESS, which is +intended to prevent some issues with concurrently handling messages from +VFs while tearing down the VFs. + +This change was motivated by crashes caused while tearing down and +bringing up VFs in rapid succession. + +It turns out that the fix actually introduces issues with the VF driver +caused because the PF no longer responds to any messages sent by the VF +during its .remove routine. This results in the VF potentially removing +its DMA memory before the PF has shut down the device queues. + +Additionally, the fix doesn't actually resolve concurrency issues within +the ice driver. It is possible for a VF to initiate a reset just prior +to the ice driver removing VFs. This can result in the remove task +concurrently operating while the VF is being reset. This results in +similar memory corruption and panics purportedly fixed by that commit. + +Fix this concurrency at its root by protecting both the reset and +removal flows using the existing VF cfg_lock. This ensures that we +cannot remove the VF while any outstanding critical tasks such as a +virtchnl message or a reset are occurring. + +This locking change also fixes the root cause originally fixed by commit +c503e63200c6 ("ice: Stop processing VF messages during teardown"), so we +can simply revert it. + +Note that I kept these two changes together because simply reverting the +original commit alone would leave the driver vulnerable to worse race +conditions. + +Fixes: c503e63200c6 ("ice: Stop processing VF messages during teardown") +Cc: # e6ba5273d4ed: ice: Fix race conditions between virtchnl handling and VF ndo ops +Cc: +Signed-off-by: Jacob Keller +Tested-by: Konrad Jankowski +Signed-off-by: Tony Nguyen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/intel/ice/ice.h | 1 + drivers/net/ethernet/intel/ice/ice_main.c | 2 + + drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 42 +++++++++++++---------- + 3 files changed, 27 insertions(+), 18 deletions(-) + +--- a/drivers/net/ethernet/intel/ice/ice.h ++++ b/drivers/net/ethernet/intel/ice/ice.h +@@ -231,7 +231,6 @@ enum ice_pf_state { + ICE_VFLR_EVENT_PENDING, + ICE_FLTR_OVERFLOW_PROMISC, + ICE_VF_DIS, +- ICE_VF_DEINIT_IN_PROGRESS, + ICE_CFG_BUSY, + ICE_SERVICE_SCHED, + ICE_SERVICE_DIS, +--- a/drivers/net/ethernet/intel/ice/ice_main.c ++++ b/drivers/net/ethernet/intel/ice/ice_main.c +@@ -1679,7 +1679,9 @@ static void ice_handle_mdd_event(struct + * reset, so print the event prior to reset. + */ + ice_print_vf_rx_mdd_event(vf); ++ mutex_lock(&pf->vf[i].cfg_lock); + ice_reset_vf(&pf->vf[i], false); ++ mutex_unlock(&pf->vf[i].cfg_lock); + } + } + } +--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c ++++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +@@ -615,8 +615,6 @@ void ice_free_vfs(struct ice_pf *pf) + struct ice_hw *hw = &pf->hw; + unsigned int tmp, i; + +- set_bit(ICE_VF_DEINIT_IN_PROGRESS, pf->state); +- + if (!pf->vf) + return; + +@@ -632,22 +630,26 @@ void ice_free_vfs(struct ice_pf *pf) + else + dev_warn(dev, "VFs are assigned - not disabling SR-IOV\n"); + +- /* Avoid wait time by stopping all VFs at the same time */ +- ice_for_each_vf(pf, i) +- ice_dis_vf_qs(&pf->vf[i]); +- + tmp = pf->num_alloc_vfs; + pf->num_qps_per_vf = 0; + pf->num_alloc_vfs = 0; + for (i = 0; i < tmp; i++) { +- if (test_bit(ICE_VF_STATE_INIT, pf->vf[i].vf_states)) { ++ struct ice_vf *vf = &pf->vf[i]; ++ ++ mutex_lock(&vf->cfg_lock); ++ ++ ice_dis_vf_qs(vf); ++ ++ if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) { + /* disable VF qp mappings and set VF disable state */ +- ice_dis_vf_mappings(&pf->vf[i]); +- set_bit(ICE_VF_STATE_DIS, pf->vf[i].vf_states); +- ice_free_vf_res(&pf->vf[i]); ++ ice_dis_vf_mappings(vf); ++ set_bit(ICE_VF_STATE_DIS, vf->vf_states); ++ ice_free_vf_res(vf); + } + +- mutex_destroy(&pf->vf[i].cfg_lock); ++ mutex_unlock(&vf->cfg_lock); ++ ++ mutex_destroy(&vf->cfg_lock); + } + + if (ice_sriov_free_msix_res(pf)) +@@ -683,7 +685,6 @@ void ice_free_vfs(struct ice_pf *pf) + i); + + clear_bit(ICE_VF_DIS, pf->state); +- clear_bit(ICE_VF_DEINIT_IN_PROGRESS, pf->state); + clear_bit(ICE_FLAG_SRIOV_ENA, pf->flags); + } + +@@ -1567,6 +1568,8 @@ bool ice_reset_all_vfs(struct ice_pf *pf + ice_for_each_vf(pf, v) { + vf = &pf->vf[v]; + ++ mutex_lock(&vf->cfg_lock); ++ + vf->driver_caps = 0; + ice_vc_set_default_allowlist(vf); + +@@ -1581,6 +1584,8 @@ bool ice_reset_all_vfs(struct ice_pf *pf + ice_vf_pre_vsi_rebuild(vf); + ice_vf_rebuild_vsi(vf); + ice_vf_post_vsi_rebuild(vf); ++ ++ mutex_unlock(&vf->cfg_lock); + } + + ice_flush(hw); +@@ -1627,6 +1632,8 @@ bool ice_reset_vf(struct ice_vf *vf, boo + u32 reg; + int i; + ++ lockdep_assert_held(&vf->cfg_lock); ++ + dev = ice_pf_to_dev(pf); + + if (test_bit(ICE_VF_RESETS_DISABLED, pf->state)) { +@@ -2113,9 +2120,12 @@ void ice_process_vflr_event(struct ice_p + bit_idx = (hw->func_caps.vf_base_id + vf_id) % 32; + /* read GLGEN_VFLRSTAT register to find out the flr VFs */ + reg = rd32(hw, GLGEN_VFLRSTAT(reg_idx)); +- if (reg & BIT(bit_idx)) ++ if (reg & BIT(bit_idx)) { + /* GLGEN_VFLRSTAT bit will be cleared in ice_reset_vf */ ++ mutex_lock(&vf->cfg_lock); + ice_reset_vf(vf, true); ++ mutex_unlock(&vf->cfg_lock); ++ } + } + } + +@@ -2192,7 +2202,9 @@ ice_vf_lan_overflow_event(struct ice_pf + if (!vf) + return; + ++ mutex_lock(&vf->cfg_lock); + ice_vc_reset_vf(vf); ++ mutex_unlock(&vf->cfg_lock); + } + + /** +@@ -4429,10 +4441,6 @@ void ice_vc_process_vf_msg(struct ice_pf + struct device *dev; + int err = 0; + +- /* if de-init is underway, don't process messages from VF */ +- if (test_bit(ICE_VF_DEINIT_IN_PROGRESS, pf->state)) +- return; +- + dev = ice_pf_to_dev(pf); + if (ice_validate_vf_id(pf, vf_id)) { + err = -EINVAL; diff --git a/queue-5.15/ice-fix-race-conditions-between-virtchnl-handling-and-vf-ndo-ops.patch b/queue-5.15/ice-fix-race-conditions-between-virtchnl-handling-and-vf-ndo-ops.patch new file mode 100644 index 00000000000..99bba7f5073 --- /dev/null +++ b/queue-5.15/ice-fix-race-conditions-between-virtchnl-handling-and-vf-ndo-ops.patch @@ -0,0 +1,182 @@ +From foo@baz Mon Feb 28 12:22:21 PM CET 2022 +From: Jacob Keller +Date: Fri, 25 Feb 2022 12:21:00 -0800 +Subject: ice: Fix race conditions between virtchnl handling and VF ndo ops +To: stable@vger.kernel.org +Cc: Brett Creeley , Konrad Jankowski , Tony Nguyen , Jacob Keller +Message-ID: <20220225202101.4077712-1-jacob.e.keller@intel.com> + +From: Brett Creeley + +commit e6ba5273d4ede03d075d7a116b8edad1f6115f4d upstream. + +The VF can be configured via the PF's ndo ops at the same time the PF is +receiving/handling virtchnl messages. This has many issues, with +one of them being the ndo op could be actively resetting a VF (i.e. +resetting it to the default state and deleting/re-adding the VF's VSI) +while a virtchnl message is being handled. The following error was seen +because a VF ndo op was used to change a VF's trust setting while the +VIRTCHNL_OP_CONFIG_VSI_QUEUES was ongoing: + +[35274.192484] ice 0000:88:00.0: Failed to set LAN Tx queue context, error: ICE_ERR_PARAM +[35274.193074] ice 0000:88:00.0: VF 0 failed opcode 6, retval: -5 +[35274.193640] iavf 0000:88:01.0: PF returned error -5 (IAVF_ERR_PARAM) to our request 6 + +Fix this by making sure the virtchnl handling and VF ndo ops that +trigger VF resets cannot run concurrently. This is done by adding a +struct mutex cfg_lock to each VF structure. For VF ndo ops, the mutex +will be locked around the critical operations and VFR. Since the ndo ops +will trigger a VFR, the virtchnl thread will use mutex_trylock(). This +is done because if any other thread (i.e. VF ndo op) has the mutex, then +that means the current VF message being handled is no longer valid, so +just ignore it. + +This issue can be seen using the following commands: + +for i in {0..50}; do + rmmod ice + modprobe ice + + sleep 1 + + echo 1 > /sys/class/net/ens785f0/device/sriov_numvfs + echo 1 > /sys/class/net/ens785f1/device/sriov_numvfs + + ip link set ens785f1 vf 0 trust on + ip link set ens785f0 vf 0 trust on + + sleep 2 + + echo 0 > /sys/class/net/ens785f0/device/sriov_numvfs + echo 0 > /sys/class/net/ens785f1/device/sriov_numvfs + sleep 1 + echo 1 > /sys/class/net/ens785f0/device/sriov_numvfs + echo 1 > /sys/class/net/ens785f1/device/sriov_numvfs + + ip link set ens785f1 vf 0 trust on + ip link set ens785f0 vf 0 trust on +done + +Fixes: 7c710869d64e ("ice: Add handlers for VF netdevice operations") +Cc: +Signed-off-by: Brett Creeley +Tested-by: Konrad Jankowski +Signed-off-by: Tony Nguyen +[I had to fix the cherry-pick manually as the patch added a line around +some context that was missing.] +Signed-off-by: Jacob Keller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 25 +++++++++++++++++++++++ + drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h | 5 ++++ + 2 files changed, 30 insertions(+) + +--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c ++++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +@@ -646,6 +646,8 @@ void ice_free_vfs(struct ice_pf *pf) + set_bit(ICE_VF_STATE_DIS, pf->vf[i].vf_states); + ice_free_vf_res(&pf->vf[i]); + } ++ ++ mutex_destroy(&pf->vf[i].cfg_lock); + } + + if (ice_sriov_free_msix_res(pf)) +@@ -1894,6 +1896,8 @@ static void ice_set_dflt_settings_vfs(st + */ + ice_vf_ctrl_invalidate_vsi(vf); + ice_vf_fdir_init(vf); ++ ++ mutex_init(&vf->cfg_lock); + } + } + +@@ -4082,6 +4086,8 @@ ice_set_vf_port_vlan(struct net_device * + return 0; + } + ++ mutex_lock(&vf->cfg_lock); ++ + vf->port_vlan_info = vlanprio; + + if (vf->port_vlan_info) +@@ -4091,6 +4097,7 @@ ice_set_vf_port_vlan(struct net_device * + dev_info(dev, "Clearing port VLAN on VF %d\n", vf_id); + + ice_vc_reset_vf(vf); ++ mutex_unlock(&vf->cfg_lock); + + return 0; + } +@@ -4465,6 +4472,15 @@ error_handler: + return; + } + ++ /* VF is being configured in another context that triggers a VFR, so no ++ * need to process this message ++ */ ++ if (!mutex_trylock(&vf->cfg_lock)) { ++ dev_info(dev, "VF %u is being configured in another context that will trigger a VFR, so there is no need to handle this message\n", ++ vf->vf_id); ++ return; ++ } ++ + switch (v_opcode) { + case VIRTCHNL_OP_VERSION: + err = ice_vc_get_ver_msg(vf, msg); +@@ -4553,6 +4569,8 @@ error_handler: + dev_info(dev, "PF failed to honor VF %d, opcode %d, error %d\n", + vf_id, v_opcode, err); + } ++ ++ mutex_unlock(&vf->cfg_lock); + } + + /** +@@ -4668,6 +4686,8 @@ int ice_set_vf_mac(struct net_device *ne + return -EINVAL; + } + ++ mutex_lock(&vf->cfg_lock); ++ + /* VF is notified of its new MAC via the PF's response to the + * VIRTCHNL_OP_GET_VF_RESOURCES message after the VF has been reset + */ +@@ -4686,6 +4706,7 @@ int ice_set_vf_mac(struct net_device *ne + } + + ice_vc_reset_vf(vf); ++ mutex_unlock(&vf->cfg_lock); + return 0; + } + +@@ -4715,11 +4736,15 @@ int ice_set_vf_trust(struct net_device * + if (trusted == vf->trusted) + return 0; + ++ mutex_lock(&vf->cfg_lock); ++ + vf->trusted = trusted; + ice_vc_reset_vf(vf); + dev_info(ice_pf_to_dev(pf), "VF %u is now %strusted\n", + vf_id, trusted ? "" : "un"); + ++ mutex_unlock(&vf->cfg_lock); ++ + return 0; + } + +--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h ++++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h +@@ -74,6 +74,11 @@ struct ice_mdd_vf_events { + struct ice_vf { + struct ice_pf *pf; + ++ /* Used during virtchnl message handling and NDO ops against the VF ++ * that will trigger a VFR ++ */ ++ struct mutex cfg_lock; ++ + u16 vf_id; /* VF ID in the PF space */ + u16 lan_vsi_idx; /* index into PF struct */ + u16 ctrl_vsi_idx; diff --git a/queue-5.15/series b/queue-5.15/series index f254ab8edfa..fb84a01d9ee 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -135,3 +135,5 @@ pinctrl-fix-loop-in-k210_pinconf_get_drive.patch pinctrl-k210-fix-bias-pull-up.patch gpio-tegra186-fix-chip_data-type-confusion.patch memblock-use-kfree-to-release-kmalloced-memblock-regions.patch +ice-fix-race-conditions-between-virtchnl-handling-and-vf-ndo-ops.patch +ice-fix-concurrent-reset-and-removal-of-vfs.patch