]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 27 Jan 2022 16:02:05 +0000 (17:02 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 27 Jan 2022 16:02:05 +0000 (17:02 +0100)
added patches:
bnx2x-invalidate-fastpath-hsi-version-for-vfs.patch
bnx2x-utilize-firmware-7.13.21.0.patch
kvm-x86-mmu-fix-write-protection-of-pts-mapped-by-the-tdp-mmu.patch
rcu-tighten-rcu_advance_cbs_nowake-checks.patch
select-fix-indefinitely-sleeping-task-in-poll_schedule_timeout.patch

queue-5.10/bnx2x-invalidate-fastpath-hsi-version-for-vfs.patch [new file with mode: 0644]
queue-5.10/bnx2x-utilize-firmware-7.13.21.0.patch [new file with mode: 0644]
queue-5.10/kvm-x86-mmu-fix-write-protection-of-pts-mapped-by-the-tdp-mmu.patch [new file with mode: 0644]
queue-5.10/rcu-tighten-rcu_advance_cbs_nowake-checks.patch [new file with mode: 0644]
queue-5.10/select-fix-indefinitely-sleeping-task-in-poll_schedule_timeout.patch [new file with mode: 0644]
queue-5.10/series

diff --git a/queue-5.10/bnx2x-invalidate-fastpath-hsi-version-for-vfs.patch b/queue-5.10/bnx2x-invalidate-fastpath-hsi-version-for-vfs.patch
new file mode 100644 (file)
index 0000000..e707f03
--- /dev/null
@@ -0,0 +1,56 @@
+From foo@baz Thu Jan 27 04:23:09 PM CET 2022
+From: Manish Chopra <manishc@marvell.com>
+Date: Tue, 25 Jan 2022 10:57:49 -0800
+Subject: bnx2x: Invalidate fastpath HSI version for VFs
+To: <stable@vger.kernel.org>
+Cc: <aelior@marvell.com>, <gregkh@linuxfoundation.org>, <manishc@marvell.com>
+Message-ID: <20220125185749.26774-2-manishc@marvell.com>
+
+From: Manish Chopra <manishc@marvell.com>
+
+commit 802d4d207e75d7208ff75adb712b556c1e91cf1c upstream
+
+Commit 0a6890b9b4df ("bnx2x: Utilize FW 7.13.15.0.")
+added validation for fastpath HSI versions for different
+client init which was not meant for SR-IOV VF clients, which
+resulted in firmware asserts when running VF clients with
+different fastpath HSI version.
+
+This patch along with the new firmware support in patch #1
+fixes this behavior in order to not validate fastpath HSI
+version for the VFs.
+
+Fixes: 0a6890b9b4df ("bnx2x: Utilize FW 7.13.15.0.")
+Signed-off-by: Manish Chopra <manishc@marvell.com>
+Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
+Signed-off-by: Alok Prasad <palok@marvell.com>
+Signed-off-by: Ariel Elior <aelior@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c |   13 +++++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+@@ -758,9 +758,18 @@ static void bnx2x_vf_igu_reset(struct bn
+ void bnx2x_vf_enable_access(struct bnx2x *bp, u8 abs_vfid)
+ {
++      u16 abs_fid;
++
++      abs_fid = FW_VF_HANDLE(abs_vfid);
++
+       /* set the VF-PF association in the FW */
+-      storm_memset_vf_to_pf(bp, FW_VF_HANDLE(abs_vfid), BP_FUNC(bp));
+-      storm_memset_func_en(bp, FW_VF_HANDLE(abs_vfid), 1);
++      storm_memset_vf_to_pf(bp, abs_fid, BP_FUNC(bp));
++      storm_memset_func_en(bp, abs_fid, 1);
++
++      /* Invalidate fp_hsi version for vfs */
++      if (bp->fw_cap & FW_CAP_INVALIDATE_VF_FP_HSI)
++              REG_WR8(bp, BAR_XSTRORM_INTMEM +
++                          XSTORM_ETH_FUNCTION_INFO_FP_HSI_VALID_E2_OFFSET(abs_fid), 0);
+       /* clear vf errors*/
+       bnx2x_vf_semi_clear_err(bp, abs_vfid);
diff --git a/queue-5.10/bnx2x-utilize-firmware-7.13.21.0.patch b/queue-5.10/bnx2x-utilize-firmware-7.13.21.0.patch
new file mode 100644 (file)
index 0000000..e48a8f3
--- /dev/null
@@ -0,0 +1,257 @@
+From foo@baz Thu Jan 27 04:23:09 PM CET 2022
+From: Manish Chopra <manishc@marvell.com>
+Date: Tue, 25 Jan 2022 10:57:48 -0800
+Subject: bnx2x: Utilize firmware 7.13.21.0
+To: <stable@vger.kernel.org>
+Cc: <aelior@marvell.com>, <gregkh@linuxfoundation.org>, <manishc@marvell.com>
+Message-ID: <20220125185749.26774-1-manishc@marvell.com>
+
+From: Manish Chopra <manishc@marvell.com>
+
+commit b7a49f73059fe6147b6b78e8f674ce0d21237432 upstream
+
+This new firmware addresses few important issues and enhancements
+as mentioned below -
+
+- Support direct invalidation of FP HSI Ver per function ID, required for
+  invalidating FP HSI Ver prior to each VF start, as there is no VF start
+- BRB hardware block parity error detection support for the driver
+- Fix the FCOE underrun flow
+- Fix PSOD during FCoE BFS over the NIC ports after preboot driver
+- Maintains backward compatibility
+
+This patch incorporates this new firmware 7.13.21.0 in bnx2x driver.
+
+Signed-off-by: Manish Chopra <manishc@marvell.com>
+Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
+Signed-off-by: Alok Prasad <palok@marvell.com>
+Signed-off-by: Ariel Elior <aelior@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/bnx2x/bnx2x.h         |   11 ++
+ drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c     |    6 -
+ drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h |    2 
+ drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h     |    3 
+ drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c    |   75 ++++++++++++++------
+ 5 files changed, 69 insertions(+), 28 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+@@ -1850,6 +1850,14 @@ struct bnx2x {
+       /* Vxlan/Geneve related information */
+       u16 udp_tunnel_ports[BNX2X_UDP_PORT_MAX];
++
++#define FW_CAP_INVALIDATE_VF_FP_HSI   BIT(0)
++      u32 fw_cap;
++
++      u32 fw_major;
++      u32 fw_minor;
++      u32 fw_rev;
++      u32 fw_eng;
+ };
+ /* Tx queues may be less or equal to Rx queues */
+@@ -2526,5 +2534,6 @@ void bnx2x_register_phc(struct bnx2x *bp
+  * Meant for implicit re-load flows.
+  */
+ int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp);
+-
++int bnx2x_init_firmware(struct bnx2x *bp);
++void bnx2x_release_firmware(struct bnx2x *bp);
+ #endif /* bnx2x.h */
+--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+@@ -2364,10 +2364,8 @@ int bnx2x_compare_fw_ver(struct bnx2x *b
+       if (load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP &&
+           load_code != FW_MSG_CODE_DRV_LOAD_COMMON) {
+               /* build my FW version dword */
+-              u32 my_fw = (BCM_5710_FW_MAJOR_VERSION) +
+-                      (BCM_5710_FW_MINOR_VERSION << 8) +
+-                      (BCM_5710_FW_REVISION_VERSION << 16) +
+-                      (BCM_5710_FW_ENGINEERING_VERSION << 24);
++              u32 my_fw = (bp->fw_major) + (bp->fw_minor << 8) +
++                              (bp->fw_rev << 16) + (bp->fw_eng << 24);
+               /* read loaded FW from chip */
+               u32 loaded_fw = REG_RD(bp, XSEM_REG_PRAM);
+--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h
+@@ -241,6 +241,8 @@
+       IRO[221].m2))
+ #define XSTORM_VF_TO_PF_OFFSET(funcId) \
+       (IRO[48].base + ((funcId) * IRO[48].m1))
++#define XSTORM_ETH_FUNCTION_INFO_FP_HSI_VALID_E2_OFFSET(fid)  \
++      (IRO[386].base + ((fid) * IRO[386].m1))
+ #define COMMON_ASM_INVALID_ASSERT_OPCODE 0x0
+ /* eth hsi version */
+--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
+@@ -3024,7 +3024,8 @@ struct afex_stats {
+ #define BCM_5710_FW_MAJOR_VERSION                     7
+ #define BCM_5710_FW_MINOR_VERSION                     13
+-#define BCM_5710_FW_REVISION_VERSION          15
++#define BCM_5710_FW_REVISION_VERSION          21
++#define BCM_5710_FW_REVISION_VERSION_V15      15
+ #define BCM_5710_FW_ENGINEERING_VERSION               0
+ #define BCM_5710_FW_COMPILE_FLAGS                     1
+--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+@@ -74,9 +74,19 @@
+       __stringify(BCM_5710_FW_MINOR_VERSION) "."      \
+       __stringify(BCM_5710_FW_REVISION_VERSION) "."   \
+       __stringify(BCM_5710_FW_ENGINEERING_VERSION)
++
++#define FW_FILE_VERSION_V15                           \
++      __stringify(BCM_5710_FW_MAJOR_VERSION) "."      \
++      __stringify(BCM_5710_FW_MINOR_VERSION) "."      \
++      __stringify(BCM_5710_FW_REVISION_VERSION_V15) "."       \
++      __stringify(BCM_5710_FW_ENGINEERING_VERSION)
++
+ #define FW_FILE_NAME_E1               "bnx2x/bnx2x-e1-" FW_FILE_VERSION ".fw"
+ #define FW_FILE_NAME_E1H      "bnx2x/bnx2x-e1h-" FW_FILE_VERSION ".fw"
+ #define FW_FILE_NAME_E2               "bnx2x/bnx2x-e2-" FW_FILE_VERSION ".fw"
++#define FW_FILE_NAME_E1_V15   "bnx2x/bnx2x-e1-" FW_FILE_VERSION_V15 ".fw"
++#define FW_FILE_NAME_E1H_V15  "bnx2x/bnx2x-e1h-" FW_FILE_VERSION_V15 ".fw"
++#define FW_FILE_NAME_E2_V15   "bnx2x/bnx2x-e2-" FW_FILE_VERSION_V15 ".fw"
+ /* Time in jiffies before concluding the transmitter is hung */
+ #define TX_TIMEOUT            (5*HZ)
+@@ -747,9 +757,7 @@ static int bnx2x_mc_assert(struct bnx2x
+                 CHIP_IS_E1(bp) ? "everest1" :
+                 CHIP_IS_E1H(bp) ? "everest1h" :
+                 CHIP_IS_E2(bp) ? "everest2" : "everest3",
+-                BCM_5710_FW_MAJOR_VERSION,
+-                BCM_5710_FW_MINOR_VERSION,
+-                BCM_5710_FW_REVISION_VERSION);
++                bp->fw_major, bp->fw_minor, bp->fw_rev);
+       return rc;
+ }
+@@ -12355,6 +12363,15 @@ static int bnx2x_init_bp(struct bnx2x *b
+       bnx2x_read_fwinfo(bp);
++      if (IS_PF(bp)) {
++              rc = bnx2x_init_firmware(bp);
++
++              if (rc) {
++                      bnx2x_free_mem_bp(bp);
++                      return rc;
++              }
++      }
++
+       func = BP_FUNC(bp);
+       /* need to reset chip if undi was active */
+@@ -12367,6 +12384,7 @@ static int bnx2x_init_bp(struct bnx2x *b
+               rc = bnx2x_prev_unload(bp);
+               if (rc) {
++                      bnx2x_release_firmware(bp);
+                       bnx2x_free_mem_bp(bp);
+                       return rc;
+               }
+@@ -13366,16 +13384,11 @@ static int bnx2x_check_firmware(struct b
+       /* Check FW version */
+       offset = be32_to_cpu(fw_hdr->fw_version.offset);
+       fw_ver = firmware->data + offset;
+-      if ((fw_ver[0] != BCM_5710_FW_MAJOR_VERSION) ||
+-          (fw_ver[1] != BCM_5710_FW_MINOR_VERSION) ||
+-          (fw_ver[2] != BCM_5710_FW_REVISION_VERSION) ||
+-          (fw_ver[3] != BCM_5710_FW_ENGINEERING_VERSION)) {
++      if (fw_ver[0] != bp->fw_major || fw_ver[1] != bp->fw_minor ||
++          fw_ver[2] != bp->fw_rev || fw_ver[3] != bp->fw_eng) {
+               BNX2X_ERR("Bad FW version:%d.%d.%d.%d. Should be %d.%d.%d.%d\n",
+-                     fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3],
+-                     BCM_5710_FW_MAJOR_VERSION,
+-                     BCM_5710_FW_MINOR_VERSION,
+-                     BCM_5710_FW_REVISION_VERSION,
+-                     BCM_5710_FW_ENGINEERING_VERSION);
++                        fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3],
++                        bp->fw_major, bp->fw_minor, bp->fw_rev, bp->fw_eng);
+               return -EINVAL;
+       }
+@@ -13453,34 +13466,51 @@ do {                                                                 \
+            (u8 *)bp->arr, len);                                       \
+ } while (0)
+-static int bnx2x_init_firmware(struct bnx2x *bp)
++int bnx2x_init_firmware(struct bnx2x *bp)
+ {
+-      const char *fw_file_name;
++      const char *fw_file_name, *fw_file_name_v15;
+       struct bnx2x_fw_file_hdr *fw_hdr;
+       int rc;
+       if (bp->firmware)
+               return 0;
+-      if (CHIP_IS_E1(bp))
++      if (CHIP_IS_E1(bp)) {
+               fw_file_name = FW_FILE_NAME_E1;
+-      else if (CHIP_IS_E1H(bp))
++              fw_file_name_v15 = FW_FILE_NAME_E1_V15;
++      } else if (CHIP_IS_E1H(bp)) {
+               fw_file_name = FW_FILE_NAME_E1H;
+-      else if (!CHIP_IS_E1x(bp))
++              fw_file_name_v15 = FW_FILE_NAME_E1H_V15;
++      } else if (!CHIP_IS_E1x(bp)) {
+               fw_file_name = FW_FILE_NAME_E2;
+-      else {
++              fw_file_name_v15 = FW_FILE_NAME_E2_V15;
++      } else {
+               BNX2X_ERR("Unsupported chip revision\n");
+               return -EINVAL;
+       }
++
+       BNX2X_DEV_INFO("Loading %s\n", fw_file_name);
+       rc = request_firmware(&bp->firmware, fw_file_name, &bp->pdev->dev);
+       if (rc) {
+-              BNX2X_ERR("Can't load firmware file %s\n",
+-                        fw_file_name);
+-              goto request_firmware_exit;
++              BNX2X_DEV_INFO("Trying to load older fw %s\n", fw_file_name_v15);
++
++              /* try to load prev version */
++              rc = request_firmware(&bp->firmware, fw_file_name_v15, &bp->pdev->dev);
++
++              if (rc)
++                      goto request_firmware_exit;
++
++              bp->fw_rev = BCM_5710_FW_REVISION_VERSION_V15;
++      } else {
++              bp->fw_cap |= FW_CAP_INVALIDATE_VF_FP_HSI;
++              bp->fw_rev = BCM_5710_FW_REVISION_VERSION;
+       }
++      bp->fw_major = BCM_5710_FW_MAJOR_VERSION;
++      bp->fw_minor = BCM_5710_FW_MINOR_VERSION;
++      bp->fw_eng = BCM_5710_FW_ENGINEERING_VERSION;
++
+       rc = bnx2x_check_firmware(bp);
+       if (rc) {
+               BNX2X_ERR("Corrupt firmware file %s\n", fw_file_name);
+@@ -13536,7 +13566,7 @@ request_firmware_exit:
+       return rc;
+ }
+-static void bnx2x_release_firmware(struct bnx2x *bp)
++void bnx2x_release_firmware(struct bnx2x *bp)
+ {
+       kfree(bp->init_ops_offsets);
+       kfree(bp->init_ops);
+@@ -14053,6 +14083,7 @@ static int bnx2x_init_one(struct pci_dev
+       return 0;
+ init_one_freemem:
++      bnx2x_release_firmware(bp);
+       bnx2x_free_mem_bp(bp);
+ init_one_exit:
diff --git a/queue-5.10/kvm-x86-mmu-fix-write-protection-of-pts-mapped-by-the-tdp-mmu.patch b/queue-5.10/kvm-x86-mmu-fix-write-protection-of-pts-mapped-by-the-tdp-mmu.patch
new file mode 100644 (file)
index 0000000..29613b6
--- /dev/null
@@ -0,0 +1,56 @@
+From 7c8a4742c4abe205ec9daf416c9d42fd6b406e8e Mon Sep 17 00:00:00 2001
+From: David Matlack <dmatlack@google.com>
+Date: Thu, 13 Jan 2022 23:30:17 +0000
+Subject: KVM: x86/mmu: Fix write-protection of PTs mapped by the TDP MMU
+
+From: David Matlack <dmatlack@google.com>
+
+commit 7c8a4742c4abe205ec9daf416c9d42fd6b406e8e upstream.
+
+When the TDP MMU is write-protection GFNs for page table protection (as
+opposed to for dirty logging, or due to the HVA not being writable), it
+checks if the SPTE is already write-protected and if so skips modifying
+the SPTE and the TLB flush.
+
+This behavior is incorrect because it fails to check if the SPTE
+is write-protected for page table protection, i.e. fails to check
+that MMU-writable is '0'.  If the SPTE was write-protected for dirty
+logging but not page table protection, the SPTE could locklessly be made
+writable, and vCPUs could still be running with writable mappings cached
+in their TLB.
+
+Fix this by only skipping setting the SPTE if the SPTE is already
+write-protected *and* MMU-writable is already clear.  Technically,
+checking only MMU-writable would suffice; a SPTE cannot be writable
+without MMU-writable being set.  But check both to be paranoid and
+because it arguably yields more readable code.
+
+Fixes: 46044f72c382 ("kvm: x86/mmu: Support write protection for nesting in tdp MMU")
+Cc: stable@vger.kernel.org
+Signed-off-by: David Matlack <dmatlack@google.com>
+Message-Id: <20220113233020.3986005-2-dmatlack@google.com>
+Reviewed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kvm/mmu/tdp_mmu.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/kvm/mmu/tdp_mmu.c
++++ b/arch/x86/kvm/mmu/tdp_mmu.c
+@@ -1130,12 +1130,12 @@ static bool write_protect_gfn(struct kvm
+       bool spte_set = false;
+       tdp_root_for_each_leaf_pte(iter, root, gfn, gfn + 1) {
+-              if (!is_writable_pte(iter.old_spte))
+-                      break;
+-
+               new_spte = iter.old_spte &
+                       ~(PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE);
++              if (new_spte == iter.old_spte)
++                      break;
++
+               tdp_mmu_set_spte(kvm, &iter, new_spte);
+               spte_set = true;
+       }
diff --git a/queue-5.10/rcu-tighten-rcu_advance_cbs_nowake-checks.patch b/queue-5.10/rcu-tighten-rcu_advance_cbs_nowake-checks.patch
new file mode 100644 (file)
index 0000000..9fcb399
--- /dev/null
@@ -0,0 +1,43 @@
+From 614ddad17f22a22e035e2ea37a04815f50362017 Mon Sep 17 00:00:00 2001
+From: "Paul E. McKenney" <paulmck@kernel.org>
+Date: Fri, 17 Sep 2021 15:04:48 -0700
+Subject: rcu: Tighten rcu_advance_cbs_nowake() checks
+
+From: Paul E. McKenney <paulmck@kernel.org>
+
+commit 614ddad17f22a22e035e2ea37a04815f50362017 upstream.
+
+Currently, rcu_advance_cbs_nowake() checks that a grace period is in
+progress, however, that grace period could end just after the check.
+This commit rechecks that a grace period is still in progress while
+holding the rcu_node structure's lock.  The grace period cannot end while
+the current CPU's rcu_node structure's ->lock is held, thus avoiding
+false positives from the WARN_ON_ONCE().
+
+As Daniel Vacek noted, it is not necessary for the rcu_node structure
+to have a CPU that has not yet passed through its quiescent state.
+
+Tested-by: Guillaume Morin <guillaume@morinfr.org>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/rcu/tree.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/kernel/rcu/tree.c
++++ b/kernel/rcu/tree.c
+@@ -1581,10 +1581,11 @@ static void __maybe_unused rcu_advance_c
+                                                 struct rcu_data *rdp)
+ {
+       rcu_lockdep_assert_cblist_protected(rdp);
+-      if (!rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) ||
+-          !raw_spin_trylock_rcu_node(rnp))
++      if (!rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) || !raw_spin_trylock_rcu_node(rnp))
+               return;
+-      WARN_ON_ONCE(rcu_advance_cbs(rnp, rdp));
++      // The grace period cannot end while we hold the rcu_node lock.
++      if (rcu_seq_state(rcu_seq_current(&rnp->gp_seq)))
++              WARN_ON_ONCE(rcu_advance_cbs(rnp, rdp));
+       raw_spin_unlock_rcu_node(rnp);
+ }
diff --git a/queue-5.10/select-fix-indefinitely-sleeping-task-in-poll_schedule_timeout.patch b/queue-5.10/select-fix-indefinitely-sleeping-task-in-poll_schedule_timeout.patch
new file mode 100644 (file)
index 0000000..9d11e52
--- /dev/null
@@ -0,0 +1,135 @@
+From 68514dacf2715d11b91ca50d88de047c086fea9c Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Mon, 10 Jan 2022 19:19:23 +0100
+Subject: select: Fix indefinitely sleeping task in poll_schedule_timeout()
+
+From: Jan Kara <jack@suse.cz>
+
+commit 68514dacf2715d11b91ca50d88de047c086fea9c upstream.
+
+A task can end up indefinitely sleeping in do_select() ->
+poll_schedule_timeout() when the following race happens:
+
+  TASK1 (thread1)             TASK2                   TASK1 (thread2)
+  do_select()
+    setup poll_wqueues table
+    with 'fd'
+                              write data to 'fd'
+                                pollwake()
+                                  table->triggered = 1
+                                                      closes 'fd' thread1 is
+                                                        waiting for
+    poll_schedule_timeout()
+      - sees table->triggered
+      table->triggered = 0
+      return -EINTR
+    loop back in do_select()
+
+But at this point when TASK1 loops back, the fdget() in the setup of
+poll_wqueues fails.  So now so we never find 'fd' is ready for reading
+and sleep in poll_schedule_timeout() indefinitely.
+
+Treat an fd that got closed as a fd on which some event happened.  This
+makes sure cannot block indefinitely in do_select().
+
+Another option would be to return -EBADF in this case but that has a
+potential of subtly breaking applications that excercise this behavior
+and it happens to work for them.  So returning fd as active seems like a
+safer choice.
+
+Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
+CC: stable@vger.kernel.org
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/select.c |   63 +++++++++++++++++++++++++++++++-----------------------------
+ 1 file changed, 33 insertions(+), 30 deletions(-)
+
+--- a/fs/select.c
++++ b/fs/select.c
+@@ -458,9 +458,11 @@ get_max:
+       return max;
+ }
+-#define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR)
+-#define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR)
+-#define POLLEX_SET (EPOLLPRI)
++#define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR |\
++                      EPOLLNVAL)
++#define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR |\
++                       EPOLLNVAL)
++#define POLLEX_SET (EPOLLPRI | EPOLLNVAL)
+ static inline void wait_key_set(poll_table *wait, unsigned long in,
+                               unsigned long out, unsigned long bit,
+@@ -527,6 +529,7 @@ static int do_select(int n, fd_set_bits
+                                       break;
+                               if (!(bit & all_bits))
+                                       continue;
++                              mask = EPOLLNVAL;
+                               f = fdget(i);
+                               if (f.file) {
+                                       wait_key_set(wait, in, out, bit,
+@@ -534,34 +537,34 @@ static int do_select(int n, fd_set_bits
+                                       mask = vfs_poll(f.file, wait);
+                                       fdput(f);
+-                                      if ((mask & POLLIN_SET) && (in & bit)) {
+-                                              res_in |= bit;
+-                                              retval++;
+-                                              wait->_qproc = NULL;
+-                                      }
+-                                      if ((mask & POLLOUT_SET) && (out & bit)) {
+-                                              res_out |= bit;
+-                                              retval++;
+-                                              wait->_qproc = NULL;
+-                                      }
+-                                      if ((mask & POLLEX_SET) && (ex & bit)) {
+-                                              res_ex |= bit;
+-                                              retval++;
+-                                              wait->_qproc = NULL;
+-                                      }
+-                                      /* got something, stop busy polling */
+-                                      if (retval) {
+-                                              can_busy_loop = false;
+-                                              busy_flag = 0;
+-
+-                                      /*
+-                                       * only remember a returned
+-                                       * POLL_BUSY_LOOP if we asked for it
+-                                       */
+-                                      } else if (busy_flag & mask)
+-                                              can_busy_loop = true;
+-
+                               }
++                              if ((mask & POLLIN_SET) && (in & bit)) {
++                                      res_in |= bit;
++                                      retval++;
++                                      wait->_qproc = NULL;
++                              }
++                              if ((mask & POLLOUT_SET) && (out & bit)) {
++                                      res_out |= bit;
++                                      retval++;
++                                      wait->_qproc = NULL;
++                              }
++                              if ((mask & POLLEX_SET) && (ex & bit)) {
++                                      res_ex |= bit;
++                                      retval++;
++                                      wait->_qproc = NULL;
++                              }
++                              /* got something, stop busy polling */
++                              if (retval) {
++                                      can_busy_loop = false;
++                                      busy_flag = 0;
++
++                              /*
++                               * only remember a returned
++                               * POLL_BUSY_LOOP if we asked for it
++                               */
++                              } else if (busy_flag & mask)
++                                      can_busy_loop = true;
++
+                       }
+                       if (res_in)
+                               *rinp = res_in;
index 4f89d40aca8a2b04061a946fe2817706b7995418..0598e2918e1e50f6276ef120f64a1dd1a5c3509f 100644 (file)
@@ -1 +1,6 @@
 drm-i915-flush-tlbs-before-releasing-backing-store.patch
+bnx2x-utilize-firmware-7.13.21.0.patch
+bnx2x-invalidate-fastpath-hsi-version-for-vfs.patch
+rcu-tighten-rcu_advance_cbs_nowake-checks.patch
+kvm-x86-mmu-fix-write-protection-of-pts-mapped-by-the-tdp-mmu.patch
+select-fix-indefinitely-sleeping-task-in-poll_schedule_timeout.patch