From 0bec6a84d7034e182c3b82953a35bcb16280cd5c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 27 Jan 2022 16:48:53 +0100 Subject: [PATCH] 5.16-stable patches added patches: bnx2x-invalidate-fastpath-hsi-version-for-vfs.patch bnx2x-utilize-firmware-7.13.21.0.patch io_uring-fix-not-released-cached-task-refs.patch memcg-better-bounds-on-the-memcg-stats-updates.patch rcu-tighten-rcu_advance_cbs_nowake-checks.patch select-fix-indefinitely-sleeping-task-in-poll_schedule_timeout.patch --- ...alidate-fastpath-hsi-version-for-vfs.patch | 56 ++++ .../bnx2x-utilize-firmware-7.13.21.0.patch | 257 ++++++++++++++++++ ...ng-fix-not-released-cached-task-refs.patch | 93 +++++++ ...er-bounds-on-the-memcg-stats-updates.patch | 99 +++++++ ...ighten-rcu_advance_cbs_nowake-checks.patch | 43 +++ ...eeping-task-in-poll_schedule_timeout.patch | 135 +++++++++ queue-5.16/series | 6 + 7 files changed, 689 insertions(+) create mode 100644 queue-5.16/bnx2x-invalidate-fastpath-hsi-version-for-vfs.patch create mode 100644 queue-5.16/bnx2x-utilize-firmware-7.13.21.0.patch create mode 100644 queue-5.16/io_uring-fix-not-released-cached-task-refs.patch create mode 100644 queue-5.16/memcg-better-bounds-on-the-memcg-stats-updates.patch create mode 100644 queue-5.16/rcu-tighten-rcu_advance_cbs_nowake-checks.patch create mode 100644 queue-5.16/select-fix-indefinitely-sleeping-task-in-poll_schedule_timeout.patch diff --git a/queue-5.16/bnx2x-invalidate-fastpath-hsi-version-for-vfs.patch b/queue-5.16/bnx2x-invalidate-fastpath-hsi-version-for-vfs.patch new file mode 100644 index 00000000000..aac0d7b51b2 --- /dev/null +++ b/queue-5.16/bnx2x-invalidate-fastpath-hsi-version-for-vfs.patch @@ -0,0 +1,56 @@ +From foo@baz Thu Jan 27 04:22:04 PM CET 2022 +From: Manish Chopra +Date: Tue, 25 Jan 2022 10:57:49 -0800 +Subject: bnx2x: Invalidate fastpath HSI version for VFs +To: +Cc: , , +Message-ID: <20220125185749.26774-2-manishc@marvell.com> + +From: Manish Chopra + +commit 802d4d207e75d7208ff75adb712b556c1e91cf1c upstream + +Commit 0a6890b9b4df ("bnx2x: Utilize FW 7.13.15.0.") +added validation for fastpath HSI versions for different +client init which was not meant for SR-IOV VF clients, which +resulted in firmware asserts when running VF clients with +different fastpath HSI version. + +This patch along with the new firmware support in patch #1 +fixes this behavior in order to not validate fastpath HSI +version for the VFs. + +Fixes: 0a6890b9b4df ("bnx2x: Utilize FW 7.13.15.0.") +Signed-off-by: Manish Chopra +Signed-off-by: Prabhakar Kushwaha +Signed-off-by: Alok Prasad +Signed-off-by: Ariel Elior +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c ++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +@@ -758,9 +758,18 @@ static void bnx2x_vf_igu_reset(struct bn + + void bnx2x_vf_enable_access(struct bnx2x *bp, u8 abs_vfid) + { ++ u16 abs_fid; ++ ++ abs_fid = FW_VF_HANDLE(abs_vfid); ++ + /* set the VF-PF association in the FW */ +- storm_memset_vf_to_pf(bp, FW_VF_HANDLE(abs_vfid), BP_FUNC(bp)); +- storm_memset_func_en(bp, FW_VF_HANDLE(abs_vfid), 1); ++ storm_memset_vf_to_pf(bp, abs_fid, BP_FUNC(bp)); ++ storm_memset_func_en(bp, abs_fid, 1); ++ ++ /* Invalidate fp_hsi version for vfs */ ++ if (bp->fw_cap & FW_CAP_INVALIDATE_VF_FP_HSI) ++ REG_WR8(bp, BAR_XSTRORM_INTMEM + ++ XSTORM_ETH_FUNCTION_INFO_FP_HSI_VALID_E2_OFFSET(abs_fid), 0); + + /* clear vf errors*/ + bnx2x_vf_semi_clear_err(bp, abs_vfid); diff --git a/queue-5.16/bnx2x-utilize-firmware-7.13.21.0.patch b/queue-5.16/bnx2x-utilize-firmware-7.13.21.0.patch new file mode 100644 index 00000000000..47dd234eb71 --- /dev/null +++ b/queue-5.16/bnx2x-utilize-firmware-7.13.21.0.patch @@ -0,0 +1,257 @@ +From foo@baz Thu Jan 27 04:22:04 PM CET 2022 +From: Manish Chopra +Date: Tue, 25 Jan 2022 10:57:48 -0800 +Subject: bnx2x: Utilize firmware 7.13.21.0 +To: +Cc: , , +Message-ID: <20220125185749.26774-1-manishc@marvell.com> + +From: Manish Chopra + +commit b7a49f73059fe6147b6b78e8f674ce0d21237432 upstream + +This new firmware addresses few important issues and enhancements +as mentioned below - + +- Support direct invalidation of FP HSI Ver per function ID, required for + invalidating FP HSI Ver prior to each VF start, as there is no VF start +- BRB hardware block parity error detection support for the driver +- Fix the FCOE underrun flow +- Fix PSOD during FCoE BFS over the NIC ports after preboot driver +- Maintains backward compatibility + +This patch incorporates this new firmware 7.13.21.0 in bnx2x driver. + +Signed-off-by: Manish Chopra +Signed-off-by: Prabhakar Kushwaha +Signed-off-by: Alok Prasad +Signed-off-by: Ariel Elior +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 11 ++ + drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 6 - + drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h | 2 + drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h | 3 + drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 75 ++++++++++++++------ + 5 files changed, 69 insertions(+), 28 deletions(-) + +--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h ++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +@@ -1850,6 +1850,14 @@ struct bnx2x { + + /* Vxlan/Geneve related information */ + u16 udp_tunnel_ports[BNX2X_UDP_PORT_MAX]; ++ ++#define FW_CAP_INVALIDATE_VF_FP_HSI BIT(0) ++ u32 fw_cap; ++ ++ u32 fw_major; ++ u32 fw_minor; ++ u32 fw_rev; ++ u32 fw_eng; + }; + + /* Tx queues may be less or equal to Rx queues */ +@@ -2525,5 +2533,6 @@ void bnx2x_register_phc(struct bnx2x *bp + * Meant for implicit re-load flows. + */ + int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp); +- ++int bnx2x_init_firmware(struct bnx2x *bp); ++void bnx2x_release_firmware(struct bnx2x *bp); + #endif /* bnx2x.h */ +--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c ++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +@@ -2364,10 +2364,8 @@ int bnx2x_compare_fw_ver(struct bnx2x *b + if (load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP && + load_code != FW_MSG_CODE_DRV_LOAD_COMMON) { + /* build my FW version dword */ +- u32 my_fw = (BCM_5710_FW_MAJOR_VERSION) + +- (BCM_5710_FW_MINOR_VERSION << 8) + +- (BCM_5710_FW_REVISION_VERSION << 16) + +- (BCM_5710_FW_ENGINEERING_VERSION << 24); ++ u32 my_fw = (bp->fw_major) + (bp->fw_minor << 8) + ++ (bp->fw_rev << 16) + (bp->fw_eng << 24); + + /* read loaded FW from chip */ + u32 loaded_fw = REG_RD(bp, XSEM_REG_PRAM); +--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h ++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h +@@ -241,6 +241,8 @@ + IRO[221].m2)) + #define XSTORM_VF_TO_PF_OFFSET(funcId) \ + (IRO[48].base + ((funcId) * IRO[48].m1)) ++#define XSTORM_ETH_FUNCTION_INFO_FP_HSI_VALID_E2_OFFSET(fid) \ ++ (IRO[386].base + ((fid) * IRO[386].m1)) + #define COMMON_ASM_INVALID_ASSERT_OPCODE 0x0 + + /* eth hsi version */ +--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h ++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h +@@ -3024,7 +3024,8 @@ struct afex_stats { + + #define BCM_5710_FW_MAJOR_VERSION 7 + #define BCM_5710_FW_MINOR_VERSION 13 +-#define BCM_5710_FW_REVISION_VERSION 15 ++#define BCM_5710_FW_REVISION_VERSION 21 ++#define BCM_5710_FW_REVISION_VERSION_V15 15 + #define BCM_5710_FW_ENGINEERING_VERSION 0 + #define BCM_5710_FW_COMPILE_FLAGS 1 + +--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c ++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +@@ -74,9 +74,19 @@ + __stringify(BCM_5710_FW_MINOR_VERSION) "." \ + __stringify(BCM_5710_FW_REVISION_VERSION) "." \ + __stringify(BCM_5710_FW_ENGINEERING_VERSION) ++ ++#define FW_FILE_VERSION_V15 \ ++ __stringify(BCM_5710_FW_MAJOR_VERSION) "." \ ++ __stringify(BCM_5710_FW_MINOR_VERSION) "." \ ++ __stringify(BCM_5710_FW_REVISION_VERSION_V15) "." \ ++ __stringify(BCM_5710_FW_ENGINEERING_VERSION) ++ + #define FW_FILE_NAME_E1 "bnx2x/bnx2x-e1-" FW_FILE_VERSION ".fw" + #define FW_FILE_NAME_E1H "bnx2x/bnx2x-e1h-" FW_FILE_VERSION ".fw" + #define FW_FILE_NAME_E2 "bnx2x/bnx2x-e2-" FW_FILE_VERSION ".fw" ++#define FW_FILE_NAME_E1_V15 "bnx2x/bnx2x-e1-" FW_FILE_VERSION_V15 ".fw" ++#define FW_FILE_NAME_E1H_V15 "bnx2x/bnx2x-e1h-" FW_FILE_VERSION_V15 ".fw" ++#define FW_FILE_NAME_E2_V15 "bnx2x/bnx2x-e2-" FW_FILE_VERSION_V15 ".fw" + + /* Time in jiffies before concluding the transmitter is hung */ + #define TX_TIMEOUT (5*HZ) +@@ -747,9 +757,7 @@ static int bnx2x_mc_assert(struct bnx2x + CHIP_IS_E1(bp) ? "everest1" : + CHIP_IS_E1H(bp) ? "everest1h" : + CHIP_IS_E2(bp) ? "everest2" : "everest3", +- BCM_5710_FW_MAJOR_VERSION, +- BCM_5710_FW_MINOR_VERSION, +- BCM_5710_FW_REVISION_VERSION); ++ bp->fw_major, bp->fw_minor, bp->fw_rev); + + return rc; + } +@@ -12308,6 +12316,15 @@ static int bnx2x_init_bp(struct bnx2x *b + + bnx2x_read_fwinfo(bp); + ++ if (IS_PF(bp)) { ++ rc = bnx2x_init_firmware(bp); ++ ++ if (rc) { ++ bnx2x_free_mem_bp(bp); ++ return rc; ++ } ++ } ++ + func = BP_FUNC(bp); + + /* need to reset chip if undi was active */ +@@ -12320,6 +12337,7 @@ static int bnx2x_init_bp(struct bnx2x *b + + rc = bnx2x_prev_unload(bp); + if (rc) { ++ bnx2x_release_firmware(bp); + bnx2x_free_mem_bp(bp); + return rc; + } +@@ -13317,16 +13335,11 @@ static int bnx2x_check_firmware(struct b + /* Check FW version */ + offset = be32_to_cpu(fw_hdr->fw_version.offset); + fw_ver = firmware->data + offset; +- if ((fw_ver[0] != BCM_5710_FW_MAJOR_VERSION) || +- (fw_ver[1] != BCM_5710_FW_MINOR_VERSION) || +- (fw_ver[2] != BCM_5710_FW_REVISION_VERSION) || +- (fw_ver[3] != BCM_5710_FW_ENGINEERING_VERSION)) { ++ if (fw_ver[0] != bp->fw_major || fw_ver[1] != bp->fw_minor || ++ fw_ver[2] != bp->fw_rev || fw_ver[3] != bp->fw_eng) { + BNX2X_ERR("Bad FW version:%d.%d.%d.%d. Should be %d.%d.%d.%d\n", +- fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3], +- BCM_5710_FW_MAJOR_VERSION, +- BCM_5710_FW_MINOR_VERSION, +- BCM_5710_FW_REVISION_VERSION, +- BCM_5710_FW_ENGINEERING_VERSION); ++ fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3], ++ bp->fw_major, bp->fw_minor, bp->fw_rev, bp->fw_eng); + return -EINVAL; + } + +@@ -13404,34 +13417,51 @@ do { \ + (u8 *)bp->arr, len); \ + } while (0) + +-static int bnx2x_init_firmware(struct bnx2x *bp) ++int bnx2x_init_firmware(struct bnx2x *bp) + { +- const char *fw_file_name; ++ const char *fw_file_name, *fw_file_name_v15; + struct bnx2x_fw_file_hdr *fw_hdr; + int rc; + + if (bp->firmware) + return 0; + +- if (CHIP_IS_E1(bp)) ++ if (CHIP_IS_E1(bp)) { + fw_file_name = FW_FILE_NAME_E1; +- else if (CHIP_IS_E1H(bp)) ++ fw_file_name_v15 = FW_FILE_NAME_E1_V15; ++ } else if (CHIP_IS_E1H(bp)) { + fw_file_name = FW_FILE_NAME_E1H; +- else if (!CHIP_IS_E1x(bp)) ++ fw_file_name_v15 = FW_FILE_NAME_E1H_V15; ++ } else if (!CHIP_IS_E1x(bp)) { + fw_file_name = FW_FILE_NAME_E2; +- else { ++ fw_file_name_v15 = FW_FILE_NAME_E2_V15; ++ } else { + BNX2X_ERR("Unsupported chip revision\n"); + return -EINVAL; + } ++ + BNX2X_DEV_INFO("Loading %s\n", fw_file_name); + + rc = request_firmware(&bp->firmware, fw_file_name, &bp->pdev->dev); + if (rc) { +- BNX2X_ERR("Can't load firmware file %s\n", +- fw_file_name); +- goto request_firmware_exit; ++ BNX2X_DEV_INFO("Trying to load older fw %s\n", fw_file_name_v15); ++ ++ /* try to load prev version */ ++ rc = request_firmware(&bp->firmware, fw_file_name_v15, &bp->pdev->dev); ++ ++ if (rc) ++ goto request_firmware_exit; ++ ++ bp->fw_rev = BCM_5710_FW_REVISION_VERSION_V15; ++ } else { ++ bp->fw_cap |= FW_CAP_INVALIDATE_VF_FP_HSI; ++ bp->fw_rev = BCM_5710_FW_REVISION_VERSION; + } + ++ bp->fw_major = BCM_5710_FW_MAJOR_VERSION; ++ bp->fw_minor = BCM_5710_FW_MINOR_VERSION; ++ bp->fw_eng = BCM_5710_FW_ENGINEERING_VERSION; ++ + rc = bnx2x_check_firmware(bp); + if (rc) { + BNX2X_ERR("Corrupt firmware file %s\n", fw_file_name); +@@ -13487,7 +13517,7 @@ request_firmware_exit: + return rc; + } + +-static void bnx2x_release_firmware(struct bnx2x *bp) ++void bnx2x_release_firmware(struct bnx2x *bp) + { + kfree(bp->init_ops_offsets); + kfree(bp->init_ops); +@@ -14004,6 +14034,7 @@ static int bnx2x_init_one(struct pci_dev + return 0; + + init_one_freemem: ++ bnx2x_release_firmware(bp); + bnx2x_free_mem_bp(bp); + + init_one_exit: diff --git a/queue-5.16/io_uring-fix-not-released-cached-task-refs.patch b/queue-5.16/io_uring-fix-not-released-cached-task-refs.patch new file mode 100644 index 00000000000..b3db2629f94 --- /dev/null +++ b/queue-5.16/io_uring-fix-not-released-cached-task-refs.patch @@ -0,0 +1,93 @@ +From 3cc7fdb9f90a25ae92250bf9e6cf3b9556b230e9 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Sun, 9 Jan 2022 00:53:22 +0000 +Subject: io_uring: fix not released cached task refs + +From: Pavel Begunkov + +commit 3cc7fdb9f90a25ae92250bf9e6cf3b9556b230e9 upstream. + +tctx_task_work() may get run after io_uring cancellation and so there +will be no one to put cached in tctx task refs that may have been added +back by tw handlers using inline completion infra, Call +io_uring_drop_tctx_refs() at the end of the main tw handler to release +them. + +Cc: stable@vger.kernel.org # 5.15+ +Reported-by: Lukas Bulwahn +Fixes: e98e49b2bbf7 ("io_uring: extend task put optimisations") +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/69f226b35fbdb996ab799a8bbc1c06bf634ccec1.1641688805.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + fs/io_uring.c | 34 +++++++++++++++++++++------------- + 1 file changed, 21 insertions(+), 13 deletions(-) + +--- a/fs/io_uring.c ++++ b/fs/io_uring.c +@@ -1830,6 +1830,18 @@ static inline void io_get_task_refs(int + io_task_refs_refill(tctx); + } + ++static __cold void io_uring_drop_tctx_refs(struct task_struct *task) ++{ ++ struct io_uring_task *tctx = task->io_uring; ++ unsigned int refs = tctx->cached_refs; ++ ++ if (refs) { ++ tctx->cached_refs = 0; ++ percpu_counter_sub(&tctx->inflight, refs); ++ put_task_struct_many(task, refs); ++ } ++} ++ + static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data, + s32 res, u32 cflags) + { +@@ -2250,6 +2262,10 @@ static void tctx_task_work(struct callba + } + + ctx_flush_and_put(ctx, &locked); ++ ++ /* relaxed read is enough as only the task itself sets ->in_idle */ ++ if (unlikely(atomic_read(&tctx->in_idle))) ++ io_uring_drop_tctx_refs(current); + } + + static void io_req_task_work_add(struct io_kiocb *req) +@@ -9818,18 +9834,6 @@ static s64 tctx_inflight(struct io_uring + return percpu_counter_sum(&tctx->inflight); + } + +-static __cold void io_uring_drop_tctx_refs(struct task_struct *task) +-{ +- struct io_uring_task *tctx = task->io_uring; +- unsigned int refs = tctx->cached_refs; +- +- if (refs) { +- tctx->cached_refs = 0; +- percpu_counter_sub(&tctx->inflight, refs); +- put_task_struct_many(task, refs); +- } +-} +- + /* + * Find any io_uring ctx that this task has registered or done IO on, and cancel + * requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation. +@@ -9887,10 +9891,14 @@ static __cold void io_uring_cancel_gener + schedule(); + finish_wait(&tctx->wait, &wait); + } while (1); +- atomic_dec(&tctx->in_idle); + + io_uring_clean_tctx(tctx); + if (cancel_all) { ++ /* ++ * We shouldn't run task_works after cancel, so just leave ++ * ->in_idle set for normal exit. ++ */ ++ atomic_dec(&tctx->in_idle); + /* for exec all current's requests should be gone, kill tctx */ + __io_uring_free(current); + } diff --git a/queue-5.16/memcg-better-bounds-on-the-memcg-stats-updates.patch b/queue-5.16/memcg-better-bounds-on-the-memcg-stats-updates.patch new file mode 100644 index 00000000000..5928c350858 --- /dev/null +++ b/queue-5.16/memcg-better-bounds-on-the-memcg-stats-updates.patch @@ -0,0 +1,99 @@ +From 5b3be698a872c490dbed524f3e2463701ab21339 Mon Sep 17 00:00:00 2001 +From: Shakeel Butt +Date: Fri, 14 Jan 2022 14:05:39 -0800 +Subject: memcg: better bounds on the memcg stats updates +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Shakeel Butt + +commit 5b3be698a872c490dbed524f3e2463701ab21339 upstream. + +Commit 11192d9c124d ("memcg: flush stats only if updated") added +tracking of memcg stats updates which is used by the readers to flush +only if the updates are over a certain threshold. However each +individual update can correspond to a large value change for a given +stat. For example adding or removing a hugepage to an LRU changes the +stat by thp_nr_pages (512 on x86_64). + +Treating the update related to THP as one can keep the stat off, in +theory, by (thp_nr_pages * nr_cpus * CHARGE_BATCH) before flush. + +To handle such scenarios, this patch adds consideration of the stat +update value as well instead of just the update event. In addition let +the asyn flusher unconditionally flush the stats to put time limit on +the stats skew and hopefully a lot less readers would need to flush. + +Link: https://lkml.kernel.org/r/20211118065350.697046-1-shakeelb@google.com +Signed-off-by: Shakeel Butt +Cc: Johannes Weiner +Cc: Michal Hocko +Cc: "Michal Koutný" +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Cc: Ivan Babrou +Signed-off-by: Greg Kroah-Hartman +--- + mm/memcontrol.c | 20 +++++++++++++------- + 1 file changed, 13 insertions(+), 7 deletions(-) + +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -629,11 +629,17 @@ static DEFINE_SPINLOCK(stats_flush_lock) + static DEFINE_PER_CPU(unsigned int, stats_updates); + static atomic_t stats_flush_threshold = ATOMIC_INIT(0); + +-static inline void memcg_rstat_updated(struct mem_cgroup *memcg) ++static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val) + { ++ unsigned int x; ++ + cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id()); +- if (!(__this_cpu_inc_return(stats_updates) % MEMCG_CHARGE_BATCH)) +- atomic_inc(&stats_flush_threshold); ++ ++ x = __this_cpu_add_return(stats_updates, abs(val)); ++ if (x > MEMCG_CHARGE_BATCH) { ++ atomic_add(x / MEMCG_CHARGE_BATCH, &stats_flush_threshold); ++ __this_cpu_write(stats_updates, 0); ++ } + } + + static void __mem_cgroup_flush_stats(void) +@@ -656,7 +662,7 @@ void mem_cgroup_flush_stats(void) + + static void flush_memcg_stats_dwork(struct work_struct *w) + { +- mem_cgroup_flush_stats(); ++ __mem_cgroup_flush_stats(); + queue_delayed_work(system_unbound_wq, &stats_flush_dwork, 2UL*HZ); + } + +@@ -672,7 +678,7 @@ void __mod_memcg_state(struct mem_cgroup + return; + + __this_cpu_add(memcg->vmstats_percpu->state[idx], val); +- memcg_rstat_updated(memcg); ++ memcg_rstat_updated(memcg, val); + } + + /* idx can be of type enum memcg_stat_item or node_stat_item. */ +@@ -705,7 +711,7 @@ void __mod_memcg_lruvec_state(struct lru + /* Update lruvec */ + __this_cpu_add(pn->lruvec_stats_percpu->state[idx], val); + +- memcg_rstat_updated(memcg); ++ memcg_rstat_updated(memcg, val); + } + + /** +@@ -789,7 +795,7 @@ void __count_memcg_events(struct mem_cgr + return; + + __this_cpu_add(memcg->vmstats_percpu->events[idx], count); +- memcg_rstat_updated(memcg); ++ memcg_rstat_updated(memcg, count); + } + + static unsigned long memcg_events(struct mem_cgroup *memcg, int event) diff --git a/queue-5.16/rcu-tighten-rcu_advance_cbs_nowake-checks.patch b/queue-5.16/rcu-tighten-rcu_advance_cbs_nowake-checks.patch new file mode 100644 index 00000000000..0f0d742d6b0 --- /dev/null +++ b/queue-5.16/rcu-tighten-rcu_advance_cbs_nowake-checks.patch @@ -0,0 +1,43 @@ +From 614ddad17f22a22e035e2ea37a04815f50362017 Mon Sep 17 00:00:00 2001 +From: "Paul E. McKenney" +Date: Fri, 17 Sep 2021 15:04:48 -0700 +Subject: rcu: Tighten rcu_advance_cbs_nowake() checks + +From: Paul E. McKenney + +commit 614ddad17f22a22e035e2ea37a04815f50362017 upstream. + +Currently, rcu_advance_cbs_nowake() checks that a grace period is in +progress, however, that grace period could end just after the check. +This commit rechecks that a grace period is still in progress while +holding the rcu_node structure's lock. The grace period cannot end while +the current CPU's rcu_node structure's ->lock is held, thus avoiding +false positives from the WARN_ON_ONCE(). + +As Daniel Vacek noted, it is not necessary for the rcu_node structure +to have a CPU that has not yet passed through its quiescent state. + +Tested-by: Guillaume Morin +Signed-off-by: Paul E. McKenney +Signed-off-by: Greg Kroah-Hartman +--- + kernel/rcu/tree.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +--- a/kernel/rcu/tree.c ++++ b/kernel/rcu/tree.c +@@ -1590,10 +1590,11 @@ static void __maybe_unused rcu_advance_c + struct rcu_data *rdp) + { + rcu_lockdep_assert_cblist_protected(rdp); +- if (!rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) || +- !raw_spin_trylock_rcu_node(rnp)) ++ if (!rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) || !raw_spin_trylock_rcu_node(rnp)) + return; +- WARN_ON_ONCE(rcu_advance_cbs(rnp, rdp)); ++ // The grace period cannot end while we hold the rcu_node lock. ++ if (rcu_seq_state(rcu_seq_current(&rnp->gp_seq))) ++ WARN_ON_ONCE(rcu_advance_cbs(rnp, rdp)); + raw_spin_unlock_rcu_node(rnp); + } + diff --git a/queue-5.16/select-fix-indefinitely-sleeping-task-in-poll_schedule_timeout.patch b/queue-5.16/select-fix-indefinitely-sleeping-task-in-poll_schedule_timeout.patch new file mode 100644 index 00000000000..9d11e525237 --- /dev/null +++ b/queue-5.16/select-fix-indefinitely-sleeping-task-in-poll_schedule_timeout.patch @@ -0,0 +1,135 @@ +From 68514dacf2715d11b91ca50d88de047c086fea9c Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Mon, 10 Jan 2022 19:19:23 +0100 +Subject: select: Fix indefinitely sleeping task in poll_schedule_timeout() + +From: Jan Kara + +commit 68514dacf2715d11b91ca50d88de047c086fea9c upstream. + +A task can end up indefinitely sleeping in do_select() -> +poll_schedule_timeout() when the following race happens: + + TASK1 (thread1) TASK2 TASK1 (thread2) + do_select() + setup poll_wqueues table + with 'fd' + write data to 'fd' + pollwake() + table->triggered = 1 + closes 'fd' thread1 is + waiting for + poll_schedule_timeout() + - sees table->triggered + table->triggered = 0 + return -EINTR + loop back in do_select() + +But at this point when TASK1 loops back, the fdget() in the setup of +poll_wqueues fails. So now so we never find 'fd' is ready for reading +and sleep in poll_schedule_timeout() indefinitely. + +Treat an fd that got closed as a fd on which some event happened. This +makes sure cannot block indefinitely in do_select(). + +Another option would be to return -EBADF in this case but that has a +potential of subtly breaking applications that excercise this behavior +and it happens to work for them. So returning fd as active seems like a +safer choice. + +Suggested-by: Linus Torvalds +CC: stable@vger.kernel.org +Signed-off-by: Jan Kara +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + fs/select.c | 63 +++++++++++++++++++++++++++++++----------------------------- + 1 file changed, 33 insertions(+), 30 deletions(-) + +--- a/fs/select.c ++++ b/fs/select.c +@@ -458,9 +458,11 @@ get_max: + return max; + } + +-#define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR) +-#define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR) +-#define POLLEX_SET (EPOLLPRI) ++#define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR |\ ++ EPOLLNVAL) ++#define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR |\ ++ EPOLLNVAL) ++#define POLLEX_SET (EPOLLPRI | EPOLLNVAL) + + static inline void wait_key_set(poll_table *wait, unsigned long in, + unsigned long out, unsigned long bit, +@@ -527,6 +529,7 @@ static int do_select(int n, fd_set_bits + break; + if (!(bit & all_bits)) + continue; ++ mask = EPOLLNVAL; + f = fdget(i); + if (f.file) { + wait_key_set(wait, in, out, bit, +@@ -534,34 +537,34 @@ static int do_select(int n, fd_set_bits + mask = vfs_poll(f.file, wait); + + fdput(f); +- if ((mask & POLLIN_SET) && (in & bit)) { +- res_in |= bit; +- retval++; +- wait->_qproc = NULL; +- } +- if ((mask & POLLOUT_SET) && (out & bit)) { +- res_out |= bit; +- retval++; +- wait->_qproc = NULL; +- } +- if ((mask & POLLEX_SET) && (ex & bit)) { +- res_ex |= bit; +- retval++; +- wait->_qproc = NULL; +- } +- /* got something, stop busy polling */ +- if (retval) { +- can_busy_loop = false; +- busy_flag = 0; +- +- /* +- * only remember a returned +- * POLL_BUSY_LOOP if we asked for it +- */ +- } else if (busy_flag & mask) +- can_busy_loop = true; +- + } ++ if ((mask & POLLIN_SET) && (in & bit)) { ++ res_in |= bit; ++ retval++; ++ wait->_qproc = NULL; ++ } ++ if ((mask & POLLOUT_SET) && (out & bit)) { ++ res_out |= bit; ++ retval++; ++ wait->_qproc = NULL; ++ } ++ if ((mask & POLLEX_SET) && (ex & bit)) { ++ res_ex |= bit; ++ retval++; ++ wait->_qproc = NULL; ++ } ++ /* got something, stop busy polling */ ++ if (retval) { ++ can_busy_loop = false; ++ busy_flag = 0; ++ ++ /* ++ * only remember a returned ++ * POLL_BUSY_LOOP if we asked for it ++ */ ++ } else if (busy_flag & mask) ++ can_busy_loop = true; ++ + } + if (res_in) + *rinp = res_in; diff --git a/queue-5.16/series b/queue-5.16/series index 638a593af76..48c91daa53e 100644 --- a/queue-5.16/series +++ b/queue-5.16/series @@ -1,2 +1,8 @@ drm-i915-flush-tlbs-before-releasing-backing-store.patch drm-amd-display-reset-dcn31-smu-mailbox-on-failures.patch +io_uring-fix-not-released-cached-task-refs.patch +bnx2x-utilize-firmware-7.13.21.0.patch +bnx2x-invalidate-fastpath-hsi-version-for-vfs.patch +memcg-better-bounds-on-the-memcg-stats-updates.patch +rcu-tighten-rcu_advance_cbs_nowake-checks.patch +select-fix-indefinitely-sleeping-task-in-poll_schedule_timeout.patch -- 2.47.2