From e2f328d69869c8a4debccb05a569e81d0982fb6e Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sun, 28 Jan 2024 19:17:59 -0500 Subject: [PATCH] Fixes for 5.10 Signed-off-by: Sasha Levin --- ...exynos4210-i9100-unconditionally-ena.patch | 47 +++ ...c7180-fix-usb-wakeup-interrupt-types.patch | 41 ++ ...c7180-use-pdc-interrupts-for-usb-ins.patch | 45 +++ ...ing-btrfs_root_subvol_dead-flag-to-s.patch | 122 ++++++ ...-variable-from-btrfs_delete_subvolum.patch | 123 ++++++ ...e-move-check-to-pipe_has_watch_queue.patch | 96 +++++ ...ix-use-after-free-bug-due-to-error-p.patch | 75 ++++ ...-race-in-accessing-memory_section-us.patch | 210 ++++++++++ ...to_section-instead-of-open-coding-it.patch | 47 +++ ...ting-comment-for-nfsd4_release_locko.patch | 73 ++++ queue-5.10/nfsd-fix-release_lockowner.patch | 149 +++++++ ...sd-modernize-nfsd4_release_lockowner.patch | 86 ++++ ...keup-wr_wait-after-setting-max_usage.patch | 62 +++ ...-remove-unnecessary-void-conversions.patch | 83 ++++ ...alling-put_device-under-dpm_list_mtx.patch | 371 ++++++++++++++++++ ...sible-deadlocks-in-core-system-wide-.patch | 329 ++++++++++++++++ ...m-sleep-use-dev_printk-when-possible.patch | 57 +++ queue-5.10/series | 17 + 18 files changed, 2033 insertions(+) create mode 100644 queue-5.10/arm-dts-samsung-exynos4210-i9100-unconditionally-ena.patch create mode 100644 queue-5.10/arm64-dts-qcom-sc7180-fix-usb-wakeup-interrupt-types.patch create mode 100644 queue-5.10/arm64-dts-qcom-sc7180-use-pdc-interrupts-for-usb-ins.patch create mode 100644 queue-5.10/btrfs-avoid-copying-btrfs_root_subvol_dead-flag-to-s.patch create mode 100644 queue-5.10/btrfs-remove-err-variable-from-btrfs_delete_subvolum.patch create mode 100644 queue-5.10/fs-pipe-move-check-to-pipe_has_watch_queue.patch create mode 100644 queue-5.10/media-mtk-jpeg-fix-use-after-free-bug-due-to-error-p.patch create mode 100644 queue-5.10/mm-sparsemem-fix-race-in-accessing-memory_section-us.patch create mode 100644 queue-5.10/mm-use-__pfn_to_section-instead-of-open-coding-it.patch create mode 100644 queue-5.10/nfsd-add-documenting-comment-for-nfsd4_release_locko.patch create mode 100644 queue-5.10/nfsd-fix-release_lockowner.patch create mode 100644 queue-5.10/nfsd-modernize-nfsd4_release_lockowner.patch create mode 100644 queue-5.10/pipe-wakeup-wr_wait-after-setting-max_usage.patch create mode 100644 queue-5.10/pm-core-remove-unnecessary-void-conversions.patch create mode 100644 queue-5.10/pm-sleep-avoid-calling-put_device-under-dpm_list_mtx.patch create mode 100644 queue-5.10/pm-sleep-fix-possible-deadlocks-in-core-system-wide-.patch create mode 100644 queue-5.10/pm-sleep-use-dev_printk-when-possible.patch diff --git a/queue-5.10/arm-dts-samsung-exynos4210-i9100-unconditionally-ena.patch b/queue-5.10/arm-dts-samsung-exynos4210-i9100-unconditionally-ena.patch new file mode 100644 index 00000000000..795ed969bdb --- /dev/null +++ b/queue-5.10/arm-dts-samsung-exynos4210-i9100-unconditionally-ena.patch @@ -0,0 +1,47 @@ +From d6243ff10b345aba4e89598987ac638f718ff269 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 6 Dec 2023 23:15:54 +0100 +Subject: ARM: dts: samsung: exynos4210-i9100: Unconditionally enable LDO12 + +From: Paul Cercueil + +[ Upstream commit 84228d5e29dbc7a6be51e221000e1d122125826c ] + +The kernel hangs for a good 12 seconds without any info being printed to +dmesg, very early in the boot process, if this regulator is not enabled. + +Force-enable it to work around this issue, until we know more about the +underlying problem. + +Signed-off-by: Paul Cercueil +Fixes: 8620cc2f99b7 ("ARM: dts: exynos: Add devicetree file for the Galaxy S2") +Cc: stable@vger.kernel.org # v5.8+ +Link: https://lore.kernel.org/r/20231206221556.15348-2-paul@crapouillou.net +Signed-off-by: Krzysztof Kozlowski +Signed-off-by: Sasha Levin +--- + arch/arm/boot/dts/exynos4210-i9100.dts | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/arch/arm/boot/dts/exynos4210-i9100.dts b/arch/arm/boot/dts/exynos4210-i9100.dts +index d186b93144e3..525618197197 100644 +--- a/arch/arm/boot/dts/exynos4210-i9100.dts ++++ b/arch/arm/boot/dts/exynos4210-i9100.dts +@@ -464,6 +464,14 @@ vtcam_reg: LDO12 { + regulator-name = "VT_CAM_1.8V"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; ++ ++ /* ++ * Force-enable this regulator; otherwise the ++ * kernel hangs very early in the boot process ++ * for about 12 seconds, without apparent ++ * reason. ++ */ ++ regulator-always-on; + }; + + vcclcd_reg: LDO13 { +-- +2.43.0 + diff --git a/queue-5.10/arm64-dts-qcom-sc7180-fix-usb-wakeup-interrupt-types.patch b/queue-5.10/arm64-dts-qcom-sc7180-fix-usb-wakeup-interrupt-types.patch new file mode 100644 index 00000000000..e1ca4a6d7ec --- /dev/null +++ b/queue-5.10/arm64-dts-qcom-sc7180-fix-usb-wakeup-interrupt-types.patch @@ -0,0 +1,41 @@ +From b64708a11f428950131f80afb963d497c093cc04 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 20 Nov 2023 17:43:23 +0100 +Subject: arm64: dts: qcom: sc7180: fix USB wakeup interrupt types + +From: Johan Hovold + +[ Upstream commit 9b956999bf725fd62613f719c3178fdbee6e5f47 ] + +The DP/DM wakeup interrupts are edge triggered and which edge to trigger +on depends on use-case and whether a Low speed or Full/High speed device +is connected. + +Fixes: 0b766e7fe5a2 ("arm64: dts: qcom: sc7180: Add USB related nodes") +Cc: stable@vger.kernel.org # 5.10 +Signed-off-by: Johan Hovold +Link: https://lore.kernel.org/r/20231120164331.8116-4-johan+linaro@kernel.org +Signed-off-by: Bjorn Andersson +Signed-off-by: Sasha Levin +--- + arch/arm64/boot/dts/qcom/sc7180.dtsi | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi +index 6eb82699a4a1..be40821dfeb9 100644 +--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi ++++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi +@@ -2690,8 +2690,8 @@ usb_1: usb@a6f8800 { + + interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>, + <&pdc 6 IRQ_TYPE_LEVEL_HIGH>, +- <&pdc 8 IRQ_TYPE_LEVEL_HIGH>, +- <&pdc 9 IRQ_TYPE_LEVEL_HIGH>; ++ <&pdc 8 IRQ_TYPE_EDGE_BOTH>, ++ <&pdc 9 IRQ_TYPE_EDGE_BOTH>; + interrupt-names = "hs_phy_irq", "ss_phy_irq", + "dm_hs_phy_irq", "dp_hs_phy_irq"; + +-- +2.43.0 + diff --git a/queue-5.10/arm64-dts-qcom-sc7180-use-pdc-interrupts-for-usb-ins.patch b/queue-5.10/arm64-dts-qcom-sc7180-use-pdc-interrupts-for-usb-ins.patch new file mode 100644 index 00000000000..a0cdae8be90 --- /dev/null +++ b/queue-5.10/arm64-dts-qcom-sc7180-use-pdc-interrupts-for-usb-ins.patch @@ -0,0 +1,45 @@ +From df6ff1bf654f59f28077cbca2f45862d7f8f1eb6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 9 Jul 2020 00:40:17 +0530 +Subject: arm64: dts: qcom: sc7180: Use pdc interrupts for USB instead of GIC + interrupts + +From: Sandeep Maheswaram + +[ Upstream commit 1e6e6e7a080ca3c1e807473e067ef04d4d005097 ] + +Using pdc interrupts for USB instead of GIC interrupts to +support wake up in case xo shutdown. + +Reviewed-by: Stephen Boyd +Signed-off-by: Sandeep Maheswaram +Link: https://lore.kernel.org/r/1594235417-23066-4-git-send-email-sanm@codeaurora.org +Signed-off-by: Bjorn Andersson +Stable-dep-of: 9b956999bf72 ("arm64: dts: qcom: sc7180: fix USB wakeup interrupt types") +Signed-off-by: Sasha Levin +--- + arch/arm64/boot/dts/qcom/sc7180.dtsi | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi +index eb07a882d43b..6eb82699a4a1 100644 +--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi ++++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi +@@ -2688,10 +2688,10 @@ usb_1: usb@a6f8800 { + <&gcc GCC_USB30_PRIM_MASTER_CLK>; + assigned-clock-rates = <19200000>, <150000000>; + +- interrupts = , +- , +- , +- ; ++ interrupts-extended = <&intc GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>, ++ <&pdc 6 IRQ_TYPE_LEVEL_HIGH>, ++ <&pdc 8 IRQ_TYPE_LEVEL_HIGH>, ++ <&pdc 9 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "hs_phy_irq", "ss_phy_irq", + "dm_hs_phy_irq", "dp_hs_phy_irq"; + +-- +2.43.0 + diff --git a/queue-5.10/btrfs-avoid-copying-btrfs_root_subvol_dead-flag-to-s.patch b/queue-5.10/btrfs-avoid-copying-btrfs_root_subvol_dead-flag-to-s.patch new file mode 100644 index 00000000000..13279be4071 --- /dev/null +++ b/queue-5.10/btrfs-avoid-copying-btrfs_root_subvol_dead-flag-to-s.patch @@ -0,0 +1,122 @@ +From a04c41c4fb924df5e7062ac41d00dfc9459abb9d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 4 Jan 2024 11:48:47 -0800 +Subject: btrfs: avoid copying BTRFS_ROOT_SUBVOL_DEAD flag to snapshot of + subvolume being deleted + +From: Omar Sandoval + +[ Upstream commit 3324d0547861b16cf436d54abba7052e0c8aa9de ] + +Sweet Tea spotted a race between subvolume deletion and snapshotting +that can result in the root item for the snapshot having the +BTRFS_ROOT_SUBVOL_DEAD flag set. The race is: + +Thread 1 | Thread 2 +----------------------------------------------|---------- +btrfs_delete_subvolume | + btrfs_set_root_flags(BTRFS_ROOT_SUBVOL_DEAD)| + |btrfs_mksubvol + | down_read(subvol_sem) + | create_snapshot + | ... + | create_pending_snapshot + | copy root item from source + down_write(subvol_sem) | + +This flag is only checked in send and swap activate, which this would +cause to fail mysteriously. + +create_snapshot() now checks the root refs to reject a deleted +subvolume, so we can fix this by locking subvol_sem earlier so that the +BTRFS_ROOT_SUBVOL_DEAD flag and the root refs are updated atomically. + +CC: stable@vger.kernel.org # 4.14+ +Reported-by: Sweet Tea Dorminy +Reviewed-by: Sweet Tea Dorminy +Reviewed-by: Anand Jain +Signed-off-by: Omar Sandoval +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/inode.c | 22 +++++++++++++--------- + 1 file changed, 13 insertions(+), 9 deletions(-) + +diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c +index c6b861f5bcfe..250b6064876d 100644 +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -4008,6 +4008,8 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) + u64 root_flags; + int ret; + ++ down_write(&fs_info->subvol_sem); ++ + /* + * Don't allow to delete a subvolume with send in progress. This is + * inside the inode lock so the error handling that has to drop the bit +@@ -4019,25 +4021,25 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) + btrfs_warn(fs_info, + "attempt to delete subvolume %llu during send", + dest->root_key.objectid); +- return -EPERM; ++ ret = -EPERM; ++ goto out_up_write; + } + if (atomic_read(&dest->nr_swapfiles)) { + spin_unlock(&dest->root_item_lock); + btrfs_warn(fs_info, + "attempt to delete subvolume %llu with active swapfile", + root->root_key.objectid); +- return -EPERM; ++ ret = -EPERM; ++ goto out_up_write; + } + root_flags = btrfs_root_flags(&dest->root_item); + btrfs_set_root_flags(&dest->root_item, + root_flags | BTRFS_ROOT_SUBVOL_DEAD); + spin_unlock(&dest->root_item_lock); + +- down_write(&fs_info->subvol_sem); +- + ret = may_destroy_subvol(dest); + if (ret) +- goto out_up_write; ++ goto out_undead; + + btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP); + /* +@@ -4047,7 +4049,7 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) + */ + ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true); + if (ret) +- goto out_up_write; ++ goto out_undead; + + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { +@@ -4109,15 +4111,17 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) + inode->i_flags |= S_DEAD; + out_release: + btrfs_subvolume_release_metadata(root, &block_rsv); +-out_up_write: +- up_write(&fs_info->subvol_sem); ++out_undead: + if (ret) { + spin_lock(&dest->root_item_lock); + root_flags = btrfs_root_flags(&dest->root_item); + btrfs_set_root_flags(&dest->root_item, + root_flags & ~BTRFS_ROOT_SUBVOL_DEAD); + spin_unlock(&dest->root_item_lock); +- } else { ++ } ++out_up_write: ++ up_write(&fs_info->subvol_sem); ++ if (!ret) { + d_invalidate(dentry); + btrfs_prune_dentries(dest); + ASSERT(dest->send_in_progress == 0); +-- +2.43.0 + diff --git a/queue-5.10/btrfs-remove-err-variable-from-btrfs_delete_subvolum.patch b/queue-5.10/btrfs-remove-err-variable-from-btrfs_delete_subvolum.patch new file mode 100644 index 00000000000..df861eadb3b --- /dev/null +++ b/queue-5.10/btrfs-remove-err-variable-from-btrfs_delete_subvolum.patch @@ -0,0 +1,123 @@ +From f8d42dc3e8639bec236f77654c5c600cbb57794d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 24 Nov 2020 17:49:30 +0200 +Subject: btrfs: remove err variable from btrfs_delete_subvolume + +From: Nikolay Borisov + +[ Upstream commit ee0d904fd9c5662c58a737c77384f8959fdc8d12 ] + +Use only a single 'ret' to control whether we should abort the +transaction or not. That's fine, because if we abort a transaction then +btrfs_end_transaction will return the same value as passed to +btrfs_abort_transaction. No semantic changes. + +Signed-off-by: Nikolay Borisov +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Stable-dep-of: 3324d0547861 ("btrfs: avoid copying BTRFS_ROOT_SUBVOL_DEAD flag to snapshot of subvolume being deleted") +Signed-off-by: Sasha Levin +--- + fs/btrfs/inode.c | 21 +++++++-------------- + 1 file changed, 7 insertions(+), 14 deletions(-) + +diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c +index c900a39666e3..c6b861f5bcfe 100644 +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -4007,7 +4007,6 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) + struct btrfs_block_rsv block_rsv; + u64 root_flags; + int ret; +- int err; + + /* + * Don't allow to delete a subvolume with send in progress. This is +@@ -4036,8 +4035,8 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) + + down_write(&fs_info->subvol_sem); + +- err = may_destroy_subvol(dest); +- if (err) ++ ret = may_destroy_subvol(dest); ++ if (ret) + goto out_up_write; + + btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP); +@@ -4046,13 +4045,13 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) + * two for dir entries, + * two for root ref/backref. + */ +- err = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true); +- if (err) ++ ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true); ++ if (ret) + goto out_up_write; + + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { +- err = PTR_ERR(trans); ++ ret = PTR_ERR(trans); + goto out_release; + } + trans->block_rsv = &block_rsv; +@@ -4062,7 +4061,6 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) + + ret = btrfs_unlink_subvol(trans, dir, dentry); + if (ret) { +- err = ret; + btrfs_abort_transaction(trans, ret); + goto out_end_trans; + } +@@ -4080,7 +4078,6 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) + dest->root_key.objectid); + if (ret) { + btrfs_abort_transaction(trans, ret); +- err = ret; + goto out_end_trans; + } + } +@@ -4090,7 +4087,6 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) + dest->root_key.objectid); + if (ret && ret != -ENOENT) { + btrfs_abort_transaction(trans, ret); +- err = ret; + goto out_end_trans; + } + if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) { +@@ -4100,7 +4096,6 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) + dest->root_key.objectid); + if (ret && ret != -ENOENT) { + btrfs_abort_transaction(trans, ret); +- err = ret; + goto out_end_trans; + } + } +@@ -4111,14 +4106,12 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) + trans->block_rsv = NULL; + trans->bytes_reserved = 0; + ret = btrfs_end_transaction(trans); +- if (ret && !err) +- err = ret; + inode->i_flags |= S_DEAD; + out_release: + btrfs_subvolume_release_metadata(root, &block_rsv); + out_up_write: + up_write(&fs_info->subvol_sem); +- if (err) { ++ if (ret) { + spin_lock(&dest->root_item_lock); + root_flags = btrfs_root_flags(&dest->root_item); + btrfs_set_root_flags(&dest->root_item, +@@ -4136,7 +4129,7 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) + } + } + +- return err; ++ return ret; + } + + static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) +-- +2.43.0 + diff --git a/queue-5.10/fs-pipe-move-check-to-pipe_has_watch_queue.patch b/queue-5.10/fs-pipe-move-check-to-pipe_has_watch_queue.patch new file mode 100644 index 00000000000..219194e8732 --- /dev/null +++ b/queue-5.10/fs-pipe-move-check-to-pipe_has_watch_queue.patch @@ -0,0 +1,96 @@ +From 7cd9eb3b5dad6933775395a0edf7a8023fb62989 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 21 Sep 2023 09:57:53 +0200 +Subject: fs/pipe: move check to pipe_has_watch_queue() + +From: Max Kellermann + +[ Upstream commit b4bd6b4bac8edd61eb8f7b836969d12c0c6af165 ] + +This declutters the code by reducing the number of #ifdefs and makes +the watch_queue checks simpler. This has no runtime effect; the +machine code is identical. + +Signed-off-by: Max Kellermann +Message-Id: <20230921075755.1378787-2-max.kellermann@ionos.com> +Reviewed-by: David Howells +Signed-off-by: Christian Brauner +Stable-dep-of: e95aada4cb93 ("pipe: wakeup wr_wait after setting max_usage") +Signed-off-by: Sasha Levin +--- + fs/pipe.c | 12 +++--------- + include/linux/pipe_fs_i.h | 16 ++++++++++++++++ + 2 files changed, 19 insertions(+), 9 deletions(-) + +diff --git a/fs/pipe.c b/fs/pipe.c +index dbb090e1b026..7b3e94baba21 100644 +--- a/fs/pipe.c ++++ b/fs/pipe.c +@@ -435,12 +435,10 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) + goto out; + } + +-#ifdef CONFIG_WATCH_QUEUE +- if (pipe->watch_queue) { ++ if (pipe_has_watch_queue(pipe)) { + ret = -EXDEV; + goto out; + } +-#endif + + /* + * If it wasn't empty we try to merge new data into +@@ -1319,10 +1317,8 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) + unsigned int nr_slots, size; + long ret = 0; + +-#ifdef CONFIG_WATCH_QUEUE +- if (pipe->watch_queue) ++ if (pipe_has_watch_queue(pipe)) + return -EBUSY; +-#endif + + size = round_pipe_size(arg); + nr_slots = size >> PAGE_SHIFT; +@@ -1375,10 +1371,8 @@ struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice) + + if (file->f_op != &pipefifo_fops || !pipe) + return NULL; +-#ifdef CONFIG_WATCH_QUEUE +- if (for_splice && pipe->watch_queue) ++ if (for_splice && pipe_has_watch_queue(pipe)) + return NULL; +-#endif + return pipe; + } + +diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h +index ef236dbaa294..7b72d93c2653 100644 +--- a/include/linux/pipe_fs_i.h ++++ b/include/linux/pipe_fs_i.h +@@ -124,6 +124,22 @@ struct pipe_buf_operations { + bool (*get)(struct pipe_inode_info *, struct pipe_buffer *); + }; + ++/** ++ * pipe_has_watch_queue - Check whether the pipe is a watch_queue, ++ * i.e. it was created with O_NOTIFICATION_PIPE ++ * @pipe: The pipe to check ++ * ++ * Return: true if pipe is a watch queue, false otherwise. ++ */ ++static inline bool pipe_has_watch_queue(const struct pipe_inode_info *pipe) ++{ ++#ifdef CONFIG_WATCH_QUEUE ++ return pipe->watch_queue != NULL; ++#else ++ return false; ++#endif ++} ++ + /** + * pipe_empty - Return true if the pipe is empty + * @head: The pipe ring head pointer +-- +2.43.0 + diff --git a/queue-5.10/media-mtk-jpeg-fix-use-after-free-bug-due-to-error-p.patch b/queue-5.10/media-mtk-jpeg-fix-use-after-free-bug-due-to-error-p.patch new file mode 100644 index 00000000000..30dd43c4b93 --- /dev/null +++ b/queue-5.10/media-mtk-jpeg-fix-use-after-free-bug-due-to-error-p.patch @@ -0,0 +1,75 @@ +From 0274220202d1ad2eb270daaba9e29425a4f50222 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 6 Nov 2023 15:48:10 +0100 +Subject: media: mtk-jpeg: Fix use after free bug due to error path handling in + mtk_jpeg_dec_device_run + +From: Zheng Wang + +[ Upstream commit 206c857dd17d4d026de85866f1b5f0969f2a109e ] + +In mtk_jpeg_probe, &jpeg->job_timeout_work is bound with +mtk_jpeg_job_timeout_work. + +In mtk_jpeg_dec_device_run, if error happens in +mtk_jpeg_set_dec_dst, it will finally start the worker while +mark the job as finished by invoking v4l2_m2m_job_finish. + +There are two methods to trigger the bug. If we remove the +module, it which will call mtk_jpeg_remove to make cleanup. +The possible sequence is as follows, which will cause a +use-after-free bug. + +CPU0 CPU1 +mtk_jpeg_dec_... | + start worker | + |mtk_jpeg_job_timeout_work +mtk_jpeg_remove | + v4l2_m2m_release | + kfree(m2m_dev); | + | + | v4l2_m2m_get_curr_priv + | m2m_dev->curr_ctx //use + +If we close the file descriptor, which will call mtk_jpeg_release, +it will have a similar sequence. + +Fix this bug by starting timeout worker only if started jpegdec worker +successfully. Then v4l2_m2m_job_finish will only be called in +either mtk_jpeg_job_timeout_work or mtk_jpeg_dec_device_run. + +Fixes: b2f0d2724ba4 ("[media] vcodec: mediatek: Add Mediatek JPEG Decoder Driver") +Signed-off-by: Zheng Wang +Signed-off-by: Dmitry Osipenko +Cc: stable@vger.kernel.org +Signed-off-by: Hans Verkuil +Signed-off-by: Mauro Carvalho Chehab +Signed-off-by: Sasha Levin +--- + drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c b/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c +index 36109c324cb6..3519c2252ae8 100644 +--- a/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c ++++ b/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c +@@ -977,13 +977,13 @@ static void mtk_jpeg_dec_device_run(void *priv) + if (ret < 0) + goto dec_end; + +- schedule_delayed_work(&jpeg->job_timeout_work, +- msecs_to_jiffies(MTK_JPEG_HW_TIMEOUT_MSEC)); +- + mtk_jpeg_set_dec_src(ctx, &src_buf->vb2_buf, &bs); + if (mtk_jpeg_set_dec_dst(ctx, &jpeg_src_buf->dec_param, &dst_buf->vb2_buf, &fb)) + goto dec_end; + ++ schedule_delayed_work(&jpeg->job_timeout_work, ++ msecs_to_jiffies(MTK_JPEG_HW_TIMEOUT_MSEC)); ++ + spin_lock_irqsave(&jpeg->hw_lock, flags); + mtk_jpeg_dec_reset(jpeg->reg_base); + mtk_jpeg_dec_set_config(jpeg->reg_base, +-- +2.43.0 + diff --git a/queue-5.10/mm-sparsemem-fix-race-in-accessing-memory_section-us.patch b/queue-5.10/mm-sparsemem-fix-race-in-accessing-memory_section-us.patch new file mode 100644 index 00000000000..862a3031e3a --- /dev/null +++ b/queue-5.10/mm-sparsemem-fix-race-in-accessing-memory_section-us.patch @@ -0,0 +1,210 @@ +From 5ac16cc7de85c725aa8d9ed0b9955938de957eac Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 13 Oct 2023 18:34:27 +0530 +Subject: mm/sparsemem: fix race in accessing memory_section->usage + +From: Charan Teja Kalla + +[ Upstream commit 5ec8e8ea8b7783fab150cf86404fc38cb4db8800 ] + +The below race is observed on a PFN which falls into the device memory +region with the system memory configuration where PFN's are such that +[ZONE_NORMAL ZONE_DEVICE ZONE_NORMAL]. Since normal zone start and end +pfn contains the device memory PFN's as well, the compaction triggered +will try on the device memory PFN's too though they end up in NOP(because +pfn_to_online_page() returns NULL for ZONE_DEVICE memory sections). When +from other core, the section mappings are being removed for the +ZONE_DEVICE region, that the PFN in question belongs to, on which +compaction is currently being operated is resulting into the kernel crash +with CONFIG_SPASEMEM_VMEMAP enabled. The crash logs can be seen at [1]. + +compact_zone() memunmap_pages +------------- --------------- +__pageblock_pfn_to_page + ...... + (a)pfn_valid(): + valid_section()//return true + (b)__remove_pages()-> + sparse_remove_section()-> + section_deactivate(): + [Free the array ms->usage and set + ms->usage = NULL] + pfn_section_valid() + [Access ms->usage which + is NULL] + +NOTE: From the above it can be said that the race is reduced to between +the pfn_valid()/pfn_section_valid() and the section deactivate with +SPASEMEM_VMEMAP enabled. + +The commit b943f045a9af("mm/sparse: fix kernel crash with +pfn_section_valid check") tried to address the same problem by clearing +the SECTION_HAS_MEM_MAP with the expectation of valid_section() returns +false thus ms->usage is not accessed. + +Fix this issue by the below steps: + +a) Clear SECTION_HAS_MEM_MAP before freeing the ->usage. + +b) RCU protected read side critical section will either return NULL + when SECTION_HAS_MEM_MAP is cleared or can successfully access ->usage. + +c) Free the ->usage with kfree_rcu() and set ms->usage = NULL. No + attempt will be made to access ->usage after this as the + SECTION_HAS_MEM_MAP is cleared thus valid_section() return false. + +Thanks to David/Pavan for their inputs on this patch. + +[1] https://lore.kernel.org/linux-mm/994410bb-89aa-d987-1f50-f514903c55aa@quicinc.com/ + +On Snapdragon SoC, with the mentioned memory configuration of PFN's as +[ZONE_NORMAL ZONE_DEVICE ZONE_NORMAL], we are able to see bunch of +issues daily while testing on a device farm. + +For this particular issue below is the log. Though the below log is +not directly pointing to the pfn_section_valid(){ ms->usage;}, when we +loaded this dump on T32 lauterbach tool, it is pointing. + +[ 540.578056] Unable to handle kernel NULL pointer dereference at +virtual address 0000000000000000 +[ 540.578068] Mem abort info: +[ 540.578070] ESR = 0x0000000096000005 +[ 540.578073] EC = 0x25: DABT (current EL), IL = 32 bits +[ 540.578077] SET = 0, FnV = 0 +[ 540.578080] EA = 0, S1PTW = 0 +[ 540.578082] FSC = 0x05: level 1 translation fault +[ 540.578085] Data abort info: +[ 540.578086] ISV = 0, ISS = 0x00000005 +[ 540.578088] CM = 0, WnR = 0 +[ 540.579431] pstate: 82400005 (Nzcv daif +PAN -UAO +TCO -DIT -SSBSBTYPE=--) +[ 540.579436] pc : __pageblock_pfn_to_page+0x6c/0x14c +[ 540.579454] lr : compact_zone+0x994/0x1058 +[ 540.579460] sp : ffffffc03579b510 +[ 540.579463] x29: ffffffc03579b510 x28: 0000000000235800 x27:000000000000000c +[ 540.579470] x26: 0000000000235c00 x25: 0000000000000068 x24:ffffffc03579b640 +[ 540.579477] x23: 0000000000000001 x22: ffffffc03579b660 x21:0000000000000000 +[ 540.579483] x20: 0000000000235bff x19: ffffffdebf7e3940 x18:ffffffdebf66d140 +[ 540.579489] x17: 00000000739ba063 x16: 00000000739ba063 x15:00000000009f4bff +[ 540.579495] x14: 0000008000000000 x13: 0000000000000000 x12:0000000000000001 +[ 540.579501] x11: 0000000000000000 x10: 0000000000000000 x9 :ffffff897d2cd440 +[ 540.579507] x8 : 0000000000000000 x7 : 0000000000000000 x6 :ffffffc03579b5b4 +[ 540.579512] x5 : 0000000000027f25 x4 : ffffffc03579b5b8 x3 :0000000000000001 +[ 540.579518] x2 : ffffffdebf7e3940 x1 : 0000000000235c00 x0 :0000000000235800 +[ 540.579524] Call trace: +[ 540.579527] __pageblock_pfn_to_page+0x6c/0x14c +[ 540.579533] compact_zone+0x994/0x1058 +[ 540.579536] try_to_compact_pages+0x128/0x378 +[ 540.579540] __alloc_pages_direct_compact+0x80/0x2b0 +[ 540.579544] __alloc_pages_slowpath+0x5c0/0xe10 +[ 540.579547] __alloc_pages+0x250/0x2d0 +[ 540.579550] __iommu_dma_alloc_noncontiguous+0x13c/0x3fc +[ 540.579561] iommu_dma_alloc+0xa0/0x320 +[ 540.579565] dma_alloc_attrs+0xd4/0x108 + +[quic_charante@quicinc.com: use kfree_rcu() in place of synchronize_rcu(), per David] + Link: https://lkml.kernel.org/r/1698403778-20938-1-git-send-email-quic_charante@quicinc.com +Link: https://lkml.kernel.org/r/1697202267-23600-1-git-send-email-quic_charante@quicinc.com +Fixes: f46edbd1b151 ("mm/sparsemem: add helpers track active portions of a section at boot") +Signed-off-by: Charan Teja Kalla +Cc: Aneesh Kumar K.V +Cc: Dan Williams +Cc: David Hildenbrand +Cc: Mel Gorman +Cc: Oscar Salvador +Cc: Vlastimil Babka +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + include/linux/mmzone.h | 14 +++++++++++--- + mm/sparse.c | 17 +++++++++-------- + 2 files changed, 20 insertions(+), 11 deletions(-) + +diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h +index 11fa11c31fda..ffae2b330818 100644 +--- a/include/linux/mmzone.h ++++ b/include/linux/mmzone.h +@@ -1188,6 +1188,7 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec) + #define SUBSECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SUBSECTION_MASK) + + struct mem_section_usage { ++ struct rcu_head rcu; + #ifdef CONFIG_SPARSEMEM_VMEMMAP + DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION); + #endif +@@ -1353,7 +1354,7 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) + { + int idx = subsection_map_index(pfn); + +- return test_bit(idx, ms->usage->subsection_map); ++ return test_bit(idx, READ_ONCE(ms->usage)->subsection_map); + } + #else + static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) +@@ -1366,17 +1367,24 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) + static inline int pfn_valid(unsigned long pfn) + { + struct mem_section *ms; ++ int ret; + + if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) + return 0; + ms = __pfn_to_section(pfn); +- if (!valid_section(ms)) ++ rcu_read_lock(); ++ if (!valid_section(ms)) { ++ rcu_read_unlock(); + return 0; ++ } + /* + * Traditionally early sections always returned pfn_valid() for + * the entire section-sized span. + */ +- return early_section(ms) || pfn_section_valid(ms, pfn); ++ ret = early_section(ms) || pfn_section_valid(ms, pfn); ++ rcu_read_unlock(); ++ ++ return ret; + } + #endif + +diff --git a/mm/sparse.c b/mm/sparse.c +index 33406ea2ecc4..db0a7c53775b 100644 +--- a/mm/sparse.c ++++ b/mm/sparse.c +@@ -809,6 +809,13 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages, + if (empty) { + unsigned long section_nr = pfn_to_section_nr(pfn); + ++ /* ++ * Mark the section invalid so that valid_section() ++ * return false. This prevents code from dereferencing ++ * ms->usage array. ++ */ ++ ms->section_mem_map &= ~SECTION_HAS_MEM_MAP; ++ + /* + * When removing an early section, the usage map is kept (as the + * usage maps of other sections fall into the same page). It +@@ -817,16 +824,10 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages, + * was allocated during boot. + */ + if (!PageReserved(virt_to_page(ms->usage))) { +- kfree(ms->usage); +- ms->usage = NULL; ++ kfree_rcu(ms->usage, rcu); ++ WRITE_ONCE(ms->usage, NULL); + } + memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr); +- /* +- * Mark the section invalid so that valid_section() +- * return false. This prevents code from dereferencing +- * ms->usage array. +- */ +- ms->section_mem_map &= ~SECTION_HAS_MEM_MAP; + } + + /* +-- +2.43.0 + diff --git a/queue-5.10/mm-use-__pfn_to_section-instead-of-open-coding-it.patch b/queue-5.10/mm-use-__pfn_to_section-instead-of-open-coding-it.patch new file mode 100644 index 00000000000..d53fa95f39c --- /dev/null +++ b/queue-5.10/mm-use-__pfn_to_section-instead-of-open-coding-it.patch @@ -0,0 +1,47 @@ +From b3c6ce29108eb706dc9a5440e6edd9596e779f5b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 5 Nov 2021 13:38:15 -0700 +Subject: mm: use __pfn_to_section() instead of open coding it + +From: Rolf Eike Beer + +[ Upstream commit f1dc0db296bd25960273649fc6ef2ecbf5aaa0e0 ] + +It is defined in the same file just a few lines above. + +Link: https://lkml.kernel.org/r/4598487.Rc0NezkW7i@mobilepool36.emlix.com +Signed-off-by: Rolf Eike Beer +Reviewed-by: Andrew Morton +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Stable-dep-of: 5ec8e8ea8b77 ("mm/sparsemem: fix race in accessing memory_section->usage") +Signed-off-by: Sasha Levin +--- + include/linux/mmzone.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h +index b2e4599b8883..11fa11c31fda 100644 +--- a/include/linux/mmzone.h ++++ b/include/linux/mmzone.h +@@ -1369,7 +1369,7 @@ static inline int pfn_valid(unsigned long pfn) + + if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) + return 0; +- ms = __nr_to_section(pfn_to_section_nr(pfn)); ++ ms = __pfn_to_section(pfn); + if (!valid_section(ms)) + return 0; + /* +@@ -1384,7 +1384,7 @@ static inline int pfn_in_present_section(unsigned long pfn) + { + if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) + return 0; +- return present_section(__nr_to_section(pfn_to_section_nr(pfn))); ++ return present_section(__pfn_to_section(pfn)); + } + + static inline unsigned long next_present_section_nr(unsigned long section_nr) +-- +2.43.0 + diff --git a/queue-5.10/nfsd-add-documenting-comment-for-nfsd4_release_locko.patch b/queue-5.10/nfsd-add-documenting-comment-for-nfsd4_release_locko.patch new file mode 100644 index 00000000000..00d47109b5f --- /dev/null +++ b/queue-5.10/nfsd-add-documenting-comment-for-nfsd4_release_locko.patch @@ -0,0 +1,73 @@ +From a2cb252f9ce3c3a5d8e92e88b23550e02c0e0b22 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 22 May 2022 12:34:38 -0400 +Subject: NFSD: Add documenting comment for nfsd4_release_lockowner() + +From: Chuck Lever + +[ Upstream commit 043862b09cc00273e35e6c3a6389957953a34207 ] + +And return explicit nfserr values that match what is documented in the +new comment / API contract. + +Signed-off-by: Chuck Lever +Stable-dep-of: edcf9725150e ("nfsd: fix RELEASE_LOCKOWNER") +Signed-off-by: Sasha Levin +--- + fs/nfsd/nfs4state.c | 23 ++++++++++++++++++++--- + 1 file changed, 20 insertions(+), 3 deletions(-) + +diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c +index 1b40b2197ce6..b6480be7b5e6 100644 +--- a/fs/nfsd/nfs4state.c ++++ b/fs/nfsd/nfs4state.c +@@ -7107,6 +7107,23 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) + return status; + } + ++/** ++ * nfsd4_release_lockowner - process NFSv4.0 RELEASE_LOCKOWNER operations ++ * @rqstp: RPC transaction ++ * @cstate: NFSv4 COMPOUND state ++ * @u: RELEASE_LOCKOWNER arguments ++ * ++ * The lockowner's so_count is bumped when a lock record is added ++ * or when copying a conflicting lock. The latter case is brief, ++ * but can lead to fleeting false positives when looking for ++ * locks-in-use. ++ * ++ * Return values: ++ * %nfs_ok: lockowner released or not found ++ * %nfserr_locks_held: lockowner still in use ++ * %nfserr_stale_clientid: clientid no longer active ++ * %nfserr_expired: clientid not recognized ++ */ + __be32 + nfsd4_release_lockowner(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, +@@ -7133,7 +7150,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, + lo = find_lockowner_str_locked(clp, &rlockowner->rl_owner); + if (!lo) { + spin_unlock(&clp->cl_lock); +- return status; ++ return nfs_ok; + } + if (atomic_read(&lo->lo_owner.so_count) != 2) { + spin_unlock(&clp->cl_lock); +@@ -7149,11 +7166,11 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, + put_ol_stateid_locked(stp, &reaplist); + } + spin_unlock(&clp->cl_lock); ++ + free_ol_stateid_reaplist(&reaplist); + remove_blocked_locks(lo); + nfs4_put_stateowner(&lo->lo_owner); +- +- return status; ++ return nfs_ok; + } + + static inline struct nfs4_client_reclaim * +-- +2.43.0 + diff --git a/queue-5.10/nfsd-fix-release_lockowner.patch b/queue-5.10/nfsd-fix-release_lockowner.patch new file mode 100644 index 00000000000..093879ef2d3 --- /dev/null +++ b/queue-5.10/nfsd-fix-release_lockowner.patch @@ -0,0 +1,149 @@ +From cb99b2854e2c04fd668e5ea9fec4625f970bc3c5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 22 Jan 2024 14:58:16 +1100 +Subject: nfsd: fix RELEASE_LOCKOWNER + +From: NeilBrown + +[ Upstream commit edcf9725150e42beeca42d085149f4c88fa97afd ] + +The test on so_count in nfsd4_release_lockowner() is nonsense and +harmful. Revert to using check_for_locks(), changing that to not sleep. + +First: harmful. +As is documented in the kdoc comment for nfsd4_release_lockowner(), the +test on so_count can transiently return a false positive resulting in a +return of NFS4ERR_LOCKS_HELD when in fact no locks are held. This is +clearly a protocol violation and with the Linux NFS client it can cause +incorrect behaviour. + +If RELEASE_LOCKOWNER is sent while some other thread is still +processing a LOCK request which failed because, at the time that request +was received, the given owner held a conflicting lock, then the nfsd +thread processing that LOCK request can hold a reference (conflock) to +the lock owner that causes nfsd4_release_lockowner() to return an +incorrect error. + +The Linux NFS client ignores that NFS4ERR_LOCKS_HELD error because it +never sends NFS4_RELEASE_LOCKOWNER without first releasing any locks, so +it knows that the error is impossible. It assumes the lock owner was in +fact released so it feels free to use the same lock owner identifier in +some later locking request. + +When it does reuse a lock owner identifier for which a previous RELEASE +failed, it will naturally use a lock_seqid of zero. However the server, +which didn't release the lock owner, will expect a larger lock_seqid and +so will respond with NFS4ERR_BAD_SEQID. + +So clearly it is harmful to allow a false positive, which testing +so_count allows. + +The test is nonsense because ... well... it doesn't mean anything. + +so_count is the sum of three different counts. +1/ the set of states listed on so_stateids +2/ the set of active vfs locks owned by any of those states +3/ various transient counts such as for conflicting locks. + +When it is tested against '2' it is clear that one of these is the +transient reference obtained by find_lockowner_str_locked(). It is not +clear what the other one is expected to be. + +In practice, the count is often 2 because there is precisely one state +on so_stateids. If there were more, this would fail. + +In my testing I see two circumstances when RELEASE_LOCKOWNER is called. +In one case, CLOSE is called before RELEASE_LOCKOWNER. That results in +all the lock states being removed, and so the lockowner being discarded +(it is removed when there are no more references which usually happens +when the lock state is discarded). When nfsd4_release_lockowner() finds +that the lock owner doesn't exist, it returns success. + +The other case shows an so_count of '2' and precisely one state listed +in so_stateid. It appears that the Linux client uses a separate lock +owner for each file resulting in one lock state per lock owner, so this +test on '2' is safe. For another client it might not be safe. + +So this patch changes check_for_locks() to use the (newish) +find_any_file_locked() so that it doesn't take a reference on the +nfs4_file and so never calls nfsd_file_put(), and so never sleeps. With +this check is it safe to restore the use of check_for_locks() rather +than testing so_count against the mysterious '2'. + +Fixes: ce3c4ad7f4ce ("NFSD: Fix possible sleep during nfsd4_release_lockowner()") +Signed-off-by: NeilBrown +Reviewed-by: Jeff Layton +Cc: stable@vger.kernel.org # v6.2+ +Signed-off-by: Chuck Lever +Signed-off-by: Sasha Levin +--- + fs/nfsd/nfs4state.c | 26 +++++++++++++++----------- + 1 file changed, 15 insertions(+), 11 deletions(-) + +diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c +index b6480be7b5e6..16b073c63798 100644 +--- a/fs/nfsd/nfs4state.c ++++ b/fs/nfsd/nfs4state.c +@@ -7080,14 +7080,16 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) + { + struct file_lock *fl; + int status = false; +- struct nfsd_file *nf = find_any_file(fp); ++ struct nfsd_file *nf; + struct inode *inode; + struct file_lock_context *flctx; + ++ spin_lock(&fp->fi_lock); ++ nf = find_any_file_locked(fp); + if (!nf) { + /* Any valid lock stateid should have some sort of access */ + WARN_ON_ONCE(1); +- return status; ++ goto out; + } + + inode = locks_inode(nf->nf_file); +@@ -7103,7 +7105,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) + } + spin_unlock(&flctx->flc_lock); + } +- nfsd_file_put(nf); ++out: ++ spin_unlock(&fp->fi_lock); + return status; + } + +@@ -7113,10 +7116,8 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) + * @cstate: NFSv4 COMPOUND state + * @u: RELEASE_LOCKOWNER arguments + * +- * The lockowner's so_count is bumped when a lock record is added +- * or when copying a conflicting lock. The latter case is brief, +- * but can lead to fleeting false positives when looking for +- * locks-in-use. ++ * Check if theree are any locks still held and if not - free the lockowner ++ * and any lock state that is owned. + * + * Return values: + * %nfs_ok: lockowner released or not found +@@ -7152,10 +7153,13 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, + spin_unlock(&clp->cl_lock); + return nfs_ok; + } +- if (atomic_read(&lo->lo_owner.so_count) != 2) { +- spin_unlock(&clp->cl_lock); +- nfs4_put_stateowner(&lo->lo_owner); +- return nfserr_locks_held; ++ ++ list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) { ++ if (check_for_locks(stp->st_stid.sc_file, lo)) { ++ spin_unlock(&clp->cl_lock); ++ nfs4_put_stateowner(&lo->lo_owner); ++ return nfserr_locks_held; ++ } + } + unhash_lockowner_locked(lo); + while (!list_empty(&lo->lo_owner.so_stateids)) { +-- +2.43.0 + diff --git a/queue-5.10/nfsd-modernize-nfsd4_release_lockowner.patch b/queue-5.10/nfsd-modernize-nfsd4_release_lockowner.patch new file mode 100644 index 00000000000..93544a58484 --- /dev/null +++ b/queue-5.10/nfsd-modernize-nfsd4_release_lockowner.patch @@ -0,0 +1,86 @@ +From f49f08cf70085090d80b1e00688a7d4ffa3f7c94 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 22 May 2022 12:07:18 -0400 +Subject: NFSD: Modernize nfsd4_release_lockowner() + +From: Chuck Lever + +[ Upstream commit bd8fdb6e545f950f4654a9a10d7e819ad48146e5 ] + +Refactor: Use existing helpers that other lock operations use. This +change removes several automatic variables, so re-organize the +variable declarations for readability. + +Signed-off-by: Chuck Lever +Stable-dep-of: edcf9725150e ("nfsd: fix RELEASE_LOCKOWNER") +Signed-off-by: Sasha Levin +--- + fs/nfsd/nfs4state.c | 36 +++++++++++------------------------- + 1 file changed, 11 insertions(+), 25 deletions(-) + +diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c +index d402ca0b535f..1b40b2197ce6 100644 +--- a/fs/nfsd/nfs4state.c ++++ b/fs/nfsd/nfs4state.c +@@ -7113,16 +7113,13 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, + union nfsd4_op_u *u) + { + struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner; ++ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + clientid_t *clid = &rlockowner->rl_clientid; +- struct nfs4_stateowner *sop; +- struct nfs4_lockowner *lo = NULL; + struct nfs4_ol_stateid *stp; +- struct xdr_netobj *owner = &rlockowner->rl_owner; +- unsigned int hashval = ownerstr_hashval(owner); +- __be32 status; +- struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); ++ struct nfs4_lockowner *lo; + struct nfs4_client *clp; +- LIST_HEAD (reaplist); ++ LIST_HEAD(reaplist); ++ __be32 status; + + dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", + clid->cl_boot, clid->cl_id); +@@ -7130,30 +7127,19 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, + status = lookup_clientid(clid, cstate, nn, false); + if (status) + return status; +- + clp = cstate->clp; +- /* Find the matching lock stateowner */ +- spin_lock(&clp->cl_lock); +- list_for_each_entry(sop, &clp->cl_ownerstr_hashtbl[hashval], +- so_strhash) { + +- if (sop->so_is_open_owner || !same_owner_str(sop, owner)) +- continue; +- +- if (atomic_read(&sop->so_count) != 1) { +- spin_unlock(&clp->cl_lock); +- return nfserr_locks_held; +- } +- +- lo = lockowner(sop); +- nfs4_get_stateowner(sop); +- break; +- } ++ spin_lock(&clp->cl_lock); ++ lo = find_lockowner_str_locked(clp, &rlockowner->rl_owner); + if (!lo) { + spin_unlock(&clp->cl_lock); + return status; + } +- ++ if (atomic_read(&lo->lo_owner.so_count) != 2) { ++ spin_unlock(&clp->cl_lock); ++ nfs4_put_stateowner(&lo->lo_owner); ++ return nfserr_locks_held; ++ } + unhash_lockowner_locked(lo); + while (!list_empty(&lo->lo_owner.so_stateids)) { + stp = list_first_entry(&lo->lo_owner.so_stateids, +-- +2.43.0 + diff --git a/queue-5.10/pipe-wakeup-wr_wait-after-setting-max_usage.patch b/queue-5.10/pipe-wakeup-wr_wait-after-setting-max_usage.patch new file mode 100644 index 00000000000..b482aa62b93 --- /dev/null +++ b/queue-5.10/pipe-wakeup-wr_wait-after-setting-max_usage.patch @@ -0,0 +1,62 @@ +From 83c5b2c0278624b50c7080a2a3601d84f0f73716 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 1 Dec 2023 11:11:28 +0100 +Subject: pipe: wakeup wr_wait after setting max_usage + +From: Lukas Schauer + +[ Upstream commit e95aada4cb93d42e25c30a0ef9eb2923d9711d4a ] + +Commit c73be61cede5 ("pipe: Add general notification queue support") a +regression was introduced that would lock up resized pipes under certain +conditions. See the reproducer in [1]. + +The commit resizing the pipe ring size was moved to a different +function, doing that moved the wakeup for pipe->wr_wait before actually +raising pipe->max_usage. If a pipe was full before the resize occured it +would result in the wakeup never actually triggering pipe_write. + +Set @max_usage and @nr_accounted before waking writers if this isn't a +watch queue. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=212295 [1] +Link: https://lore.kernel.org/r/20231201-orchideen-modewelt-e009de4562c6@brauner +Fixes: c73be61cede5 ("pipe: Add general notification queue support") +Reviewed-by: David Howells +Cc: +Signed-off-by: Lukas Schauer +[Christian Brauner : rewrite to account for watch queues] +Signed-off-by: Christian Brauner +Signed-off-by: Sasha Levin +--- + fs/pipe.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/fs/pipe.c b/fs/pipe.c +index 7b3e94baba21..588fe37d8d95 100644 +--- a/fs/pipe.c ++++ b/fs/pipe.c +@@ -1300,6 +1300,11 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots) + pipe->tail = tail; + pipe->head = head; + ++ if (!pipe_has_watch_queue(pipe)) { ++ pipe->max_usage = nr_slots; ++ pipe->nr_accounted = nr_slots; ++ } ++ + spin_unlock_irq(&pipe->rd_wait.lock); + + /* This might have made more room for writers */ +@@ -1351,8 +1356,6 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) + if (ret < 0) + goto out_revert_acct; + +- pipe->max_usage = nr_slots; +- pipe->nr_accounted = nr_slots; + return pipe->max_usage * PAGE_SIZE; + + out_revert_acct: +-- +2.43.0 + diff --git a/queue-5.10/pm-core-remove-unnecessary-void-conversions.patch b/queue-5.10/pm-core-remove-unnecessary-void-conversions.patch new file mode 100644 index 00000000000..0890487d4ef --- /dev/null +++ b/queue-5.10/pm-core-remove-unnecessary-void-conversions.patch @@ -0,0 +1,83 @@ +From 706120e74e04d5df664a3218572dc75fc194cebc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 26 Mar 2023 06:19:35 +0800 +Subject: PM: core: Remove unnecessary (void *) conversions + +From: Li zeming + +[ Upstream commit 73d73f5ee7fb0c42ff87091d105bee720a9565f1 ] + +Assignments from pointer variables of type (void *) do not require +explicit type casts, so remove such type cases from the code in +drivers/base/power/main.c where applicable. + +Signed-off-by: Li zeming +[ rjw: Subject and changelog edits ] +Signed-off-by: Rafael J. Wysocki +Stable-dep-of: 7839d0078e0d ("PM: sleep: Fix possible deadlocks in core system-wide PM code") +Signed-off-by: Sasha Levin +--- + drivers/base/power/main.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c +index 6c334a65644c..402f3c4e3668 100644 +--- a/drivers/base/power/main.c ++++ b/drivers/base/power/main.c +@@ -683,7 +683,7 @@ static bool dpm_async_fn(struct device *dev, async_func_t func) + + static void async_resume_noirq(void *data, async_cookie_t cookie) + { +- struct device *dev = (struct device *)data; ++ struct device *dev = data; + int error; + + error = device_resume_noirq(dev, pm_transition, true); +@@ -822,7 +822,7 @@ static int device_resume_early(struct device *dev, pm_message_t state, bool asyn + + static void async_resume_early(void *data, async_cookie_t cookie) + { +- struct device *dev = (struct device *)data; ++ struct device *dev = data; + int error; + + error = device_resume_early(dev, pm_transition, true); +@@ -986,7 +986,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) + + static void async_resume(void *data, async_cookie_t cookie) + { +- struct device *dev = (struct device *)data; ++ struct device *dev = data; + int error; + + error = device_resume(dev, pm_transition, true); +@@ -1275,7 +1275,7 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a + + static void async_suspend_noirq(void *data, async_cookie_t cookie) + { +- struct device *dev = (struct device *)data; ++ struct device *dev = data; + int error; + + error = __device_suspend_noirq(dev, pm_transition, true); +@@ -1458,7 +1458,7 @@ static int __device_suspend_late(struct device *dev, pm_message_t state, bool as + + static void async_suspend_late(void *data, async_cookie_t cookie) + { +- struct device *dev = (struct device *)data; ++ struct device *dev = data; + int error; + + error = __device_suspend_late(dev, pm_transition, true); +@@ -1734,7 +1734,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) + + static void async_suspend(void *data, async_cookie_t cookie) + { +- struct device *dev = (struct device *)data; ++ struct device *dev = data; + int error; + + error = __device_suspend(dev, pm_transition, true); +-- +2.43.0 + diff --git a/queue-5.10/pm-sleep-avoid-calling-put_device-under-dpm_list_mtx.patch b/queue-5.10/pm-sleep-avoid-calling-put_device-under-dpm_list_mtx.patch new file mode 100644 index 00000000000..fd5819f3b1a --- /dev/null +++ b/queue-5.10/pm-sleep-avoid-calling-put_device-under-dpm_list_mtx.patch @@ -0,0 +1,371 @@ +From 13ac8ae047259f5e3af818063951b0453d01bcb4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 4 Nov 2021 18:26:26 +0100 +Subject: PM: sleep: Avoid calling put_device() under dpm_list_mtx + +From: Rafael J. Wysocki + +[ Upstream commit 2aa36604e8243698ff22bd5fef0dd0c6bb07ba92 ] + +It is generally unsafe to call put_device() with dpm_list_mtx held, +because the given device's release routine may carry out an action +depending on that lock which then may deadlock, so modify the +system-wide suspend and resume of devices to always drop dpm_list_mtx +before calling put_device() (and adjust white space somewhat while +at it). + +For instance, this prevents the following splat from showing up in +the kernel log after a system resume in certain configurations: + +[ 3290.969514] ====================================================== +[ 3290.969517] WARNING: possible circular locking dependency detected +[ 3290.969519] 5.15.0+ #2420 Tainted: G S +[ 3290.969523] ------------------------------------------------------ +[ 3290.969525] systemd-sleep/4553 is trying to acquire lock: +[ 3290.969529] ffff888117ab1138 ((wq_completion)hci0#2){+.+.}-{0:0}, at: flush_workqueue+0x87/0x4a0 +[ 3290.969554] + but task is already holding lock: +[ 3290.969556] ffffffff8280fca8 (dpm_list_mtx){+.+.}-{3:3}, at: dpm_resume+0x12e/0x3e0 +[ 3290.969571] + which lock already depends on the new lock. + +[ 3290.969573] + the existing dependency chain (in reverse order) is: +[ 3290.969575] + -> #3 (dpm_list_mtx){+.+.}-{3:3}: +[ 3290.969583] __mutex_lock+0x9d/0xa30 +[ 3290.969591] device_pm_add+0x2e/0xe0 +[ 3290.969597] device_add+0x4d5/0x8f0 +[ 3290.969605] hci_conn_add_sysfs+0x43/0xb0 [bluetooth] +[ 3290.969689] hci_conn_complete_evt.isra.71+0x124/0x750 [bluetooth] +[ 3290.969747] hci_event_packet+0xd6c/0x28a0 [bluetooth] +[ 3290.969798] hci_rx_work+0x213/0x640 [bluetooth] +[ 3290.969842] process_one_work+0x2aa/0x650 +[ 3290.969851] worker_thread+0x39/0x400 +[ 3290.969859] kthread+0x142/0x170 +[ 3290.969865] ret_from_fork+0x22/0x30 +[ 3290.969872] + -> #2 (&hdev->lock){+.+.}-{3:3}: +[ 3290.969881] __mutex_lock+0x9d/0xa30 +[ 3290.969887] hci_event_packet+0xba/0x28a0 [bluetooth] +[ 3290.969935] hci_rx_work+0x213/0x640 [bluetooth] +[ 3290.969978] process_one_work+0x2aa/0x650 +[ 3290.969985] worker_thread+0x39/0x400 +[ 3290.969993] kthread+0x142/0x170 +[ 3290.969999] ret_from_fork+0x22/0x30 +[ 3290.970004] + -> #1 ((work_completion)(&hdev->rx_work)){+.+.}-{0:0}: +[ 3290.970013] process_one_work+0x27d/0x650 +[ 3290.970020] worker_thread+0x39/0x400 +[ 3290.970028] kthread+0x142/0x170 +[ 3290.970033] ret_from_fork+0x22/0x30 +[ 3290.970038] + -> #0 ((wq_completion)hci0#2){+.+.}-{0:0}: +[ 3290.970047] __lock_acquire+0x15cb/0x1b50 +[ 3290.970054] lock_acquire+0x26c/0x300 +[ 3290.970059] flush_workqueue+0xae/0x4a0 +[ 3290.970066] drain_workqueue+0xa1/0x130 +[ 3290.970073] destroy_workqueue+0x34/0x1f0 +[ 3290.970081] hci_release_dev+0x49/0x180 [bluetooth] +[ 3290.970130] bt_host_release+0x1d/0x30 [bluetooth] +[ 3290.970195] device_release+0x33/0x90 +[ 3290.970201] kobject_release+0x63/0x160 +[ 3290.970211] dpm_resume+0x164/0x3e0 +[ 3290.970215] dpm_resume_end+0xd/0x20 +[ 3290.970220] suspend_devices_and_enter+0x1a4/0xba0 +[ 3290.970229] pm_suspend+0x26b/0x310 +[ 3290.970236] state_store+0x42/0x90 +[ 3290.970243] kernfs_fop_write_iter+0x135/0x1b0 +[ 3290.970251] new_sync_write+0x125/0x1c0 +[ 3290.970257] vfs_write+0x360/0x3c0 +[ 3290.970263] ksys_write+0xa7/0xe0 +[ 3290.970269] do_syscall_64+0x3a/0x80 +[ 3290.970276] entry_SYSCALL_64_after_hwframe+0x44/0xae +[ 3290.970284] + other info that might help us debug this: + +[ 3290.970285] Chain exists of: + (wq_completion)hci0#2 --> &hdev->lock --> dpm_list_mtx + +[ 3290.970297] Possible unsafe locking scenario: + +[ 3290.970299] CPU0 CPU1 +[ 3290.970300] ---- ---- +[ 3290.970302] lock(dpm_list_mtx); +[ 3290.970306] lock(&hdev->lock); +[ 3290.970310] lock(dpm_list_mtx); +[ 3290.970314] lock((wq_completion)hci0#2); +[ 3290.970319] + *** DEADLOCK *** + +[ 3290.970321] 7 locks held by systemd-sleep/4553: +[ 3290.970325] #0: ffff888103bcd448 (sb_writers#4){.+.+}-{0:0}, at: ksys_write+0xa7/0xe0 +[ 3290.970341] #1: ffff888115a14488 (&of->mutex){+.+.}-{3:3}, at: kernfs_fop_write_iter+0x103/0x1b0 +[ 3290.970355] #2: ffff888100f719e0 (kn->active#233){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x10c/0x1b0 +[ 3290.970369] #3: ffffffff82661048 (autosleep_lock){+.+.}-{3:3}, at: state_store+0x12/0x90 +[ 3290.970384] #4: ffffffff82658ac8 (system_transition_mutex){+.+.}-{3:3}, at: pm_suspend+0x9f/0x310 +[ 3290.970399] #5: ffffffff827f2a48 (acpi_scan_lock){+.+.}-{3:3}, at: acpi_suspend_begin+0x4c/0x80 +[ 3290.970416] #6: ffffffff8280fca8 (dpm_list_mtx){+.+.}-{3:3}, at: dpm_resume+0x12e/0x3e0 +[ 3290.970428] + stack backtrace: +[ 3290.970431] CPU: 3 PID: 4553 Comm: systemd-sleep Tainted: G S 5.15.0+ #2420 +[ 3290.970438] Hardware name: Dell Inc. XPS 13 9380/0RYJWW, BIOS 1.5.0 06/03/2019 +[ 3290.970441] Call Trace: +[ 3290.970446] dump_stack_lvl+0x44/0x57 +[ 3290.970454] check_noncircular+0x105/0x120 +[ 3290.970468] ? __lock_acquire+0x15cb/0x1b50 +[ 3290.970474] __lock_acquire+0x15cb/0x1b50 +[ 3290.970487] lock_acquire+0x26c/0x300 +[ 3290.970493] ? flush_workqueue+0x87/0x4a0 +[ 3290.970503] ? __raw_spin_lock_init+0x3b/0x60 +[ 3290.970510] ? lockdep_init_map_type+0x58/0x240 +[ 3290.970519] flush_workqueue+0xae/0x4a0 +[ 3290.970526] ? flush_workqueue+0x87/0x4a0 +[ 3290.970544] ? drain_workqueue+0xa1/0x130 +[ 3290.970552] drain_workqueue+0xa1/0x130 +[ 3290.970561] destroy_workqueue+0x34/0x1f0 +[ 3290.970572] hci_release_dev+0x49/0x180 [bluetooth] +[ 3290.970624] bt_host_release+0x1d/0x30 [bluetooth] +[ 3290.970687] device_release+0x33/0x90 +[ 3290.970695] kobject_release+0x63/0x160 +[ 3290.970705] dpm_resume+0x164/0x3e0 +[ 3290.970710] ? dpm_resume_early+0x251/0x3b0 +[ 3290.970718] dpm_resume_end+0xd/0x20 +[ 3290.970723] suspend_devices_and_enter+0x1a4/0xba0 +[ 3290.970737] pm_suspend+0x26b/0x310 +[ 3290.970746] state_store+0x42/0x90 +[ 3290.970755] kernfs_fop_write_iter+0x135/0x1b0 +[ 3290.970764] new_sync_write+0x125/0x1c0 +[ 3290.970777] vfs_write+0x360/0x3c0 +[ 3290.970785] ksys_write+0xa7/0xe0 +[ 3290.970794] do_syscall_64+0x3a/0x80 +[ 3290.970803] entry_SYSCALL_64_after_hwframe+0x44/0xae +[ 3290.970811] RIP: 0033:0x7f41b1328164 +[ 3290.970819] Code: 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 80 00 00 00 00 8b 05 4a d2 2c 00 48 63 ff 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 f3 c3 66 90 55 53 48 89 d5 48 89 f3 48 83 +[ 3290.970824] RSP: 002b:00007ffe6ae21b28 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 +[ 3290.970831] RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007f41b1328164 +[ 3290.970836] RDX: 0000000000000004 RSI: 000055965e651070 RDI: 0000000000000004 +[ 3290.970839] RBP: 000055965e651070 R08: 000055965e64f390 R09: 00007f41b1e3d1c0 +[ 3290.970843] R10: 000000000000000a R11: 0000000000000246 R12: 0000000000000004 +[ 3290.970846] R13: 0000000000000001 R14: 000055965e64f2b0 R15: 0000000000000004 + +Cc: All applicable +Signed-off-by: Rafael J. Wysocki +Stable-dep-of: 7839d0078e0d ("PM: sleep: Fix possible deadlocks in core system-wide PM code") +Signed-off-by: Sasha Levin +--- + drivers/base/power/main.c | 84 ++++++++++++++++++++++++++------------- + 1 file changed, 57 insertions(+), 27 deletions(-) + +diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c +index a4714a025315..6c334a65644c 100644 +--- a/drivers/base/power/main.c ++++ b/drivers/base/power/main.c +@@ -714,6 +714,7 @@ static void dpm_noirq_resume_devices(pm_message_t state) + dev = to_device(dpm_noirq_list.next); + get_device(dev); + list_move_tail(&dev->power.entry, &dpm_late_early_list); ++ + mutex_unlock(&dpm_list_mtx); + + if (!is_async(dev)) { +@@ -728,8 +729,9 @@ static void dpm_noirq_resume_devices(pm_message_t state) + } + } + +- mutex_lock(&dpm_list_mtx); + put_device(dev); ++ ++ mutex_lock(&dpm_list_mtx); + } + mutex_unlock(&dpm_list_mtx); + async_synchronize_full(); +@@ -855,6 +857,7 @@ void dpm_resume_early(pm_message_t state) + dev = to_device(dpm_late_early_list.next); + get_device(dev); + list_move_tail(&dev->power.entry, &dpm_suspended_list); ++ + mutex_unlock(&dpm_list_mtx); + + if (!is_async(dev)) { +@@ -868,8 +871,10 @@ void dpm_resume_early(pm_message_t state) + pm_dev_err(dev, state, " early", error); + } + } +- mutex_lock(&dpm_list_mtx); ++ + put_device(dev); ++ ++ mutex_lock(&dpm_list_mtx); + } + mutex_unlock(&dpm_list_mtx); + async_synchronize_full(); +@@ -1032,7 +1037,12 @@ void dpm_resume(pm_message_t state) + } + if (!list_empty(&dev->power.entry)) + list_move_tail(&dev->power.entry, &dpm_prepared_list); ++ ++ mutex_unlock(&dpm_list_mtx); ++ + put_device(dev); ++ ++ mutex_lock(&dpm_list_mtx); + } + mutex_unlock(&dpm_list_mtx); + async_synchronize_full(); +@@ -1110,14 +1120,16 @@ void dpm_complete(pm_message_t state) + get_device(dev); + dev->power.is_prepared = false; + list_move(&dev->power.entry, &list); ++ + mutex_unlock(&dpm_list_mtx); + + trace_device_pm_callback_start(dev, "", state.event); + device_complete(dev, state); + trace_device_pm_callback_end(dev, 0); + +- mutex_lock(&dpm_list_mtx); + put_device(dev); ++ ++ mutex_lock(&dpm_list_mtx); + } + list_splice(&list, &dpm_list); + mutex_unlock(&dpm_list_mtx); +@@ -1302,17 +1314,21 @@ static int dpm_noirq_suspend_devices(pm_message_t state) + error = device_suspend_noirq(dev); + + mutex_lock(&dpm_list_mtx); ++ + if (error) { + pm_dev_err(dev, state, " noirq", error); + dpm_save_failed_dev(dev_name(dev)); +- put_device(dev); +- break; +- } +- if (!list_empty(&dev->power.entry)) ++ } else if (!list_empty(&dev->power.entry)) { + list_move(&dev->power.entry, &dpm_noirq_list); ++ } ++ ++ mutex_unlock(&dpm_list_mtx); ++ + put_device(dev); + +- if (async_error) ++ mutex_lock(&dpm_list_mtx); ++ ++ if (error || async_error) + break; + } + mutex_unlock(&dpm_list_mtx); +@@ -1479,23 +1495,28 @@ int dpm_suspend_late(pm_message_t state) + struct device *dev = to_device(dpm_suspended_list.prev); + + get_device(dev); ++ + mutex_unlock(&dpm_list_mtx); + + error = device_suspend_late(dev); + + mutex_lock(&dpm_list_mtx); ++ + if (!list_empty(&dev->power.entry)) + list_move(&dev->power.entry, &dpm_late_early_list); + + if (error) { + pm_dev_err(dev, state, " late", error); + dpm_save_failed_dev(dev_name(dev)); +- put_device(dev); +- break; + } ++ ++ mutex_unlock(&dpm_list_mtx); ++ + put_device(dev); + +- if (async_error) ++ mutex_lock(&dpm_list_mtx); ++ ++ if (error || async_error) + break; + } + mutex_unlock(&dpm_list_mtx); +@@ -1755,21 +1776,27 @@ int dpm_suspend(pm_message_t state) + struct device *dev = to_device(dpm_prepared_list.prev); + + get_device(dev); ++ + mutex_unlock(&dpm_list_mtx); + + error = device_suspend(dev); + + mutex_lock(&dpm_list_mtx); ++ + if (error) { + pm_dev_err(dev, state, "", error); + dpm_save_failed_dev(dev_name(dev)); +- put_device(dev); +- break; +- } +- if (!list_empty(&dev->power.entry)) ++ } else if (!list_empty(&dev->power.entry)) { + list_move(&dev->power.entry, &dpm_suspended_list); ++ } ++ ++ mutex_unlock(&dpm_list_mtx); ++ + put_device(dev); +- if (async_error) ++ ++ mutex_lock(&dpm_list_mtx); ++ ++ if (error || async_error) + break; + } + mutex_unlock(&dpm_list_mtx); +@@ -1886,6 +1913,7 @@ int dpm_prepare(pm_message_t state) + struct device *dev = to_device(dpm_list.next); + + get_device(dev); ++ + mutex_unlock(&dpm_list_mtx); + + trace_device_pm_callback_start(dev, "", state.event); +@@ -1893,21 +1921,23 @@ int dpm_prepare(pm_message_t state) + trace_device_pm_callback_end(dev, error); + + mutex_lock(&dpm_list_mtx); +- if (error) { +- if (error == -EAGAIN) { +- put_device(dev); +- error = 0; +- continue; +- } ++ ++ if (!error) { ++ dev->power.is_prepared = true; ++ if (!list_empty(&dev->power.entry)) ++ list_move_tail(&dev->power.entry, &dpm_prepared_list); ++ } else if (error == -EAGAIN) { ++ error = 0; ++ } else { + dev_info(dev, "not prepared for power transition: code %d\n", + error); +- put_device(dev); +- break; + } +- dev->power.is_prepared = true; +- if (!list_empty(&dev->power.entry)) +- list_move_tail(&dev->power.entry, &dpm_prepared_list); ++ ++ mutex_unlock(&dpm_list_mtx); ++ + put_device(dev); ++ ++ mutex_lock(&dpm_list_mtx); + } + mutex_unlock(&dpm_list_mtx); + trace_suspend_resume(TPS("dpm_prepare"), state.event, false); +-- +2.43.0 + diff --git a/queue-5.10/pm-sleep-fix-possible-deadlocks-in-core-system-wide-.patch b/queue-5.10/pm-sleep-fix-possible-deadlocks-in-core-system-wide-.patch new file mode 100644 index 00000000000..060f42ad9fd --- /dev/null +++ b/queue-5.10/pm-sleep-fix-possible-deadlocks-in-core-system-wide-.patch @@ -0,0 +1,329 @@ +From c396cc5821d1d3b5d9fb188e7de296ba34803b18 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 27 Dec 2023 21:41:06 +0100 +Subject: PM: sleep: Fix possible deadlocks in core system-wide PM code + +From: Rafael J. Wysocki + +[ Upstream commit 7839d0078e0d5e6cc2fa0b0dfbee71de74f1e557 ] + +It is reported that in low-memory situations the system-wide resume core +code deadlocks, because async_schedule_dev() executes its argument +function synchronously if it cannot allocate memory (and not only in +that case) and that function attempts to acquire a mutex that is already +held. Executing the argument function synchronously from within +dpm_async_fn() may also be problematic for ordering reasons (it may +cause a consumer device's resume callback to be invoked before a +requisite supplier device's one, for example). + +Address this by changing the code in question to use +async_schedule_dev_nocall() for scheduling the asynchronous +execution of device suspend and resume functions and to directly +run them synchronously if async_schedule_dev_nocall() returns false. + +Link: https://lore.kernel.org/linux-pm/ZYvjiqX6EsL15moe@perf/ +Reported-by: Youngmin Nam +Signed-off-by: Rafael J. Wysocki +Reviewed-by: Stanislaw Gruszka +Tested-by: Youngmin Nam +Reviewed-by: Ulf Hansson +Cc: 5.7+ # 5.7+: 6aa09a5bccd8 async: Split async_schedule_node_domain() +Cc: 5.7+ # 5.7+: 7d4b5d7a37bd async: Introduce async_schedule_dev_nocall() +Cc: 5.7+ # 5.7+ +Signed-off-by: Sasha Levin +--- + drivers/base/power/main.c | 148 ++++++++++++++++++-------------------- + 1 file changed, 68 insertions(+), 80 deletions(-) + +diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c +index 402f3c4e3668..c493e48e420f 100644 +--- a/drivers/base/power/main.c ++++ b/drivers/base/power/main.c +@@ -583,7 +583,7 @@ bool dev_pm_skip_resume(struct device *dev) + } + + /** +- * device_resume_noirq - Execute a "noirq resume" callback for given device. ++ * __device_resume_noirq - Execute a "noirq resume" callback for given device. + * @dev: Device to handle. + * @state: PM transition of the system being carried out. + * @async: If true, the device is being resumed asynchronously. +@@ -591,7 +591,7 @@ bool dev_pm_skip_resume(struct device *dev) + * The driver of @dev will not receive interrupts while this function is being + * executed. + */ +-static int device_resume_noirq(struct device *dev, pm_message_t state, bool async) ++static void __device_resume_noirq(struct device *dev, pm_message_t state, bool async) + { + pm_callback_t callback = NULL; + const char *info = NULL; +@@ -659,7 +659,13 @@ static int device_resume_noirq(struct device *dev, pm_message_t state, bool asyn + Out: + complete_all(&dev->power.completion); + TRACE_RESUME(error); +- return error; ++ ++ if (error) { ++ suspend_stats.failed_resume_noirq++; ++ dpm_save_failed_step(SUSPEND_RESUME_NOIRQ); ++ dpm_save_failed_dev(dev_name(dev)); ++ pm_dev_err(dev, state, async ? " async noirq" : " noirq", error); ++ } + } + + static bool is_async(struct device *dev) +@@ -672,11 +678,15 @@ static bool dpm_async_fn(struct device *dev, async_func_t func) + { + reinit_completion(&dev->power.completion); + +- if (is_async(dev)) { +- get_device(dev); +- async_schedule_dev(func, dev); ++ if (!is_async(dev)) ++ return false; ++ ++ get_device(dev); ++ ++ if (async_schedule_dev_nocall(func, dev)) + return true; +- } ++ ++ put_device(dev); + + return false; + } +@@ -684,15 +694,19 @@ static bool dpm_async_fn(struct device *dev, async_func_t func) + static void async_resume_noirq(void *data, async_cookie_t cookie) + { + struct device *dev = data; +- int error; +- +- error = device_resume_noirq(dev, pm_transition, true); +- if (error) +- pm_dev_err(dev, pm_transition, " async", error); + ++ __device_resume_noirq(dev, pm_transition, true); + put_device(dev); + } + ++static void device_resume_noirq(struct device *dev) ++{ ++ if (dpm_async_fn(dev, async_resume_noirq)) ++ return; ++ ++ __device_resume_noirq(dev, pm_transition, false); ++} ++ + static void dpm_noirq_resume_devices(pm_message_t state) + { + struct device *dev; +@@ -702,14 +716,6 @@ static void dpm_noirq_resume_devices(pm_message_t state) + mutex_lock(&dpm_list_mtx); + pm_transition = state; + +- /* +- * Advanced the async threads upfront, +- * in case the starting of async threads is +- * delayed by non-async resuming devices. +- */ +- list_for_each_entry(dev, &dpm_noirq_list, power.entry) +- dpm_async_fn(dev, async_resume_noirq); +- + while (!list_empty(&dpm_noirq_list)) { + dev = to_device(dpm_noirq_list.next); + get_device(dev); +@@ -717,17 +723,7 @@ static void dpm_noirq_resume_devices(pm_message_t state) + + mutex_unlock(&dpm_list_mtx); + +- if (!is_async(dev)) { +- int error; +- +- error = device_resume_noirq(dev, state, false); +- if (error) { +- suspend_stats.failed_resume_noirq++; +- dpm_save_failed_step(SUSPEND_RESUME_NOIRQ); +- dpm_save_failed_dev(dev_name(dev)); +- pm_dev_err(dev, state, " noirq", error); +- } +- } ++ device_resume_noirq(dev); + + put_device(dev); + +@@ -757,14 +753,14 @@ void dpm_resume_noirq(pm_message_t state) + } + + /** +- * device_resume_early - Execute an "early resume" callback for given device. ++ * __device_resume_early - Execute an "early resume" callback for given device. + * @dev: Device to handle. + * @state: PM transition of the system being carried out. + * @async: If true, the device is being resumed asynchronously. + * + * Runtime PM is disabled for @dev while this function is being executed. + */ +-static int device_resume_early(struct device *dev, pm_message_t state, bool async) ++static void __device_resume_early(struct device *dev, pm_message_t state, bool async) + { + pm_callback_t callback = NULL; + const char *info = NULL; +@@ -817,21 +813,31 @@ static int device_resume_early(struct device *dev, pm_message_t state, bool asyn + + pm_runtime_enable(dev); + complete_all(&dev->power.completion); +- return error; ++ ++ if (error) { ++ suspend_stats.failed_resume_early++; ++ dpm_save_failed_step(SUSPEND_RESUME_EARLY); ++ dpm_save_failed_dev(dev_name(dev)); ++ pm_dev_err(dev, state, async ? " async early" : " early", error); ++ } + } + + static void async_resume_early(void *data, async_cookie_t cookie) + { + struct device *dev = data; +- int error; +- +- error = device_resume_early(dev, pm_transition, true); +- if (error) +- pm_dev_err(dev, pm_transition, " async", error); + ++ __device_resume_early(dev, pm_transition, true); + put_device(dev); + } + ++static void device_resume_early(struct device *dev) ++{ ++ if (dpm_async_fn(dev, async_resume_early)) ++ return; ++ ++ __device_resume_early(dev, pm_transition, false); ++} ++ + /** + * dpm_resume_early - Execute "early resume" callbacks for all devices. + * @state: PM transition of the system being carried out. +@@ -845,14 +851,6 @@ void dpm_resume_early(pm_message_t state) + mutex_lock(&dpm_list_mtx); + pm_transition = state; + +- /* +- * Advanced the async threads upfront, +- * in case the starting of async threads is +- * delayed by non-async resuming devices. +- */ +- list_for_each_entry(dev, &dpm_late_early_list, power.entry) +- dpm_async_fn(dev, async_resume_early); +- + while (!list_empty(&dpm_late_early_list)) { + dev = to_device(dpm_late_early_list.next); + get_device(dev); +@@ -860,17 +858,7 @@ void dpm_resume_early(pm_message_t state) + + mutex_unlock(&dpm_list_mtx); + +- if (!is_async(dev)) { +- int error; +- +- error = device_resume_early(dev, state, false); +- if (error) { +- suspend_stats.failed_resume_early++; +- dpm_save_failed_step(SUSPEND_RESUME_EARLY); +- dpm_save_failed_dev(dev_name(dev)); +- pm_dev_err(dev, state, " early", error); +- } +- } ++ device_resume_early(dev); + + put_device(dev); + +@@ -894,12 +882,12 @@ void dpm_resume_start(pm_message_t state) + EXPORT_SYMBOL_GPL(dpm_resume_start); + + /** +- * device_resume - Execute "resume" callbacks for given device. ++ * __device_resume - Execute "resume" callbacks for given device. + * @dev: Device to handle. + * @state: PM transition of the system being carried out. + * @async: If true, the device is being resumed asynchronously. + */ +-static int device_resume(struct device *dev, pm_message_t state, bool async) ++static void __device_resume(struct device *dev, pm_message_t state, bool async) + { + pm_callback_t callback = NULL; + const char *info = NULL; +@@ -981,20 +969,30 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) + + TRACE_RESUME(error); + +- return error; ++ if (error) { ++ suspend_stats.failed_resume++; ++ dpm_save_failed_step(SUSPEND_RESUME); ++ dpm_save_failed_dev(dev_name(dev)); ++ pm_dev_err(dev, state, async ? " async" : "", error); ++ } + } + + static void async_resume(void *data, async_cookie_t cookie) + { + struct device *dev = data; +- int error; + +- error = device_resume(dev, pm_transition, true); +- if (error) +- pm_dev_err(dev, pm_transition, " async", error); ++ __device_resume(dev, pm_transition, true); + put_device(dev); + } + ++static void device_resume(struct device *dev) ++{ ++ if (dpm_async_fn(dev, async_resume)) ++ return; ++ ++ __device_resume(dev, pm_transition, false); ++} ++ + /** + * dpm_resume - Execute "resume" callbacks for non-sysdev devices. + * @state: PM transition of the system being carried out. +@@ -1014,27 +1012,17 @@ void dpm_resume(pm_message_t state) + pm_transition = state; + async_error = 0; + +- list_for_each_entry(dev, &dpm_suspended_list, power.entry) +- dpm_async_fn(dev, async_resume); +- + while (!list_empty(&dpm_suspended_list)) { + dev = to_device(dpm_suspended_list.next); ++ + get_device(dev); +- if (!is_async(dev)) { +- int error; + +- mutex_unlock(&dpm_list_mtx); ++ mutex_unlock(&dpm_list_mtx); ++ ++ device_resume(dev); + +- error = device_resume(dev, state, false); +- if (error) { +- suspend_stats.failed_resume++; +- dpm_save_failed_step(SUSPEND_RESUME); +- dpm_save_failed_dev(dev_name(dev)); +- pm_dev_err(dev, state, "", error); +- } ++ mutex_lock(&dpm_list_mtx); + +- mutex_lock(&dpm_list_mtx); +- } + if (!list_empty(&dev->power.entry)) + list_move_tail(&dev->power.entry, &dpm_prepared_list); + +-- +2.43.0 + diff --git a/queue-5.10/pm-sleep-use-dev_printk-when-possible.patch b/queue-5.10/pm-sleep-use-dev_printk-when-possible.patch new file mode 100644 index 00000000000..3438b5c5bb4 --- /dev/null +++ b/queue-5.10/pm-sleep-use-dev_printk-when-possible.patch @@ -0,0 +1,57 @@ +From 1edbd5dd5ab4cf0706ef635587483ed79583a3f8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 26 Jan 2021 15:29:40 -0600 +Subject: PM: sleep: Use dev_printk() when possible + +From: Bjorn Helgaas + +[ Upstream commit eb23d91af55bc2369fe3f0aa6997e72eb20e16fe ] + +Use dev_printk() when possible to make messages more consistent with other +device-related messages. + +Signed-off-by: Bjorn Helgaas +Signed-off-by: Rafael J. Wysocki +Stable-dep-of: 7839d0078e0d ("PM: sleep: Fix possible deadlocks in core system-wide PM code") +Signed-off-by: Sasha Levin +--- + drivers/base/power/main.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c +index 1dbaaddf540e..a4714a025315 100644 +--- a/drivers/base/power/main.c ++++ b/drivers/base/power/main.c +@@ -16,6 +16,7 @@ + */ + + #define pr_fmt(fmt) "PM: " fmt ++#define dev_fmt pr_fmt + + #include + #include +@@ -449,8 +450,8 @@ static void pm_dev_dbg(struct device *dev, pm_message_t state, const char *info) + static void pm_dev_err(struct device *dev, pm_message_t state, const char *info, + int error) + { +- pr_err("Device %s failed to %s%s: error %d\n", +- dev_name(dev), pm_verb(state.event), info, error); ++ dev_err(dev, "failed to %s%s: error %d\n", pm_verb(state.event), info, ++ error); + } + + static void dpm_show_time(ktime_t starttime, pm_message_t state, int error, +@@ -1898,8 +1899,8 @@ int dpm_prepare(pm_message_t state) + error = 0; + continue; + } +- pr_info("Device %s not prepared for power transition: code %d\n", +- dev_name(dev), error); ++ dev_info(dev, "not prepared for power transition: code %d\n", ++ error); + put_device(dev); + break; + } +-- +2.43.0 + diff --git a/queue-5.10/series b/queue-5.10/series index 74ec799cbf0..a610130dc94 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -74,3 +74,20 @@ drm-don-t-unref-the-same-fb-many-times-by-mistake-due-to-deadlock-handling.patch drm-bridge-nxp-ptn3460-fix-i2c_master_send-error-checking.patch drm-tidss-fix-atomic_flush-check.patch drm-bridge-nxp-ptn3460-simplify-some-error-checking.patch +pm-sleep-use-dev_printk-when-possible.patch +pm-sleep-avoid-calling-put_device-under-dpm_list_mtx.patch +pm-core-remove-unnecessary-void-conversions.patch +pm-sleep-fix-possible-deadlocks-in-core-system-wide-.patch +fs-pipe-move-check-to-pipe_has_watch_queue.patch +pipe-wakeup-wr_wait-after-setting-max_usage.patch +arm-dts-samsung-exynos4210-i9100-unconditionally-ena.patch +arm64-dts-qcom-sc7180-use-pdc-interrupts-for-usb-ins.patch +arm64-dts-qcom-sc7180-fix-usb-wakeup-interrupt-types.patch +media-mtk-jpeg-fix-use-after-free-bug-due-to-error-p.patch +mm-use-__pfn_to_section-instead-of-open-coding-it.patch +mm-sparsemem-fix-race-in-accessing-memory_section-us.patch +btrfs-remove-err-variable-from-btrfs_delete_subvolum.patch +btrfs-avoid-copying-btrfs_root_subvol_dead-flag-to-s.patch +nfsd-modernize-nfsd4_release_lockowner.patch +nfsd-add-documenting-comment-for-nfsd4_release_locko.patch +nfsd-fix-release_lockowner.patch -- 2.47.3