From: Greg Kroah-Hartman Date: Mon, 15 Nov 2021 13:49:09 +0000 (+0100) Subject: 5.10-stable patches X-Git-Tag: v5.4.160~55 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=588df6a15ced0f65fe04f5f0da30a3f95c7fb0e2;p=thirdparty%2Fkernel%2Fstable-queue.git 5.10-stable patches added patches: block-hold-invalidate_lock-in-blkdiscard-ioctl.patch block-hold-invalidate_lock-in-blkresetzone-ioctl.patch block-hold-invalidate_lock-in-blkzeroout-ioctl.patch f2fs-should-use-gfp_nofs-for-directory-inodes.patch irqchip-sifive-plic-fixup-eoi-failed-when-masked.patch posix-cpu-timers-clear-task-posix_cputimers_work-in-copy_process.patch --- diff --git a/queue-5.10/block-hold-invalidate_lock-in-blkdiscard-ioctl.patch b/queue-5.10/block-hold-invalidate_lock-in-blkdiscard-ioctl.patch new file mode 100644 index 00000000000..1284ecb185b --- /dev/null +++ b/queue-5.10/block-hold-invalidate_lock-in-blkdiscard-ioctl.patch @@ -0,0 +1,59 @@ +From 7607c44c157d343223510c8ffdf7206fdd2a6213 Mon Sep 17 00:00:00 2001 +From: Shin'ichiro Kawasaki +Date: Tue, 9 Nov 2021 19:47:22 +0900 +Subject: block: Hold invalidate_lock in BLKDISCARD ioctl + +From: Shin'ichiro Kawasaki + +commit 7607c44c157d343223510c8ffdf7206fdd2a6213 upstream. + +When BLKDISCARD ioctl and data read race, the data read leaves stale +page cache. To avoid the stale page cache, hold invalidate_lock of the +block device file mapping. The stale page cache is observed when +blktests test case block/009 is repeated hundreds of times. + +This patch can be applied back to the stable kernel version v5.15.y +with slight patch edit. Rework is required for older stable kernels. + +Fixes: 351499a172c0 ("block: Invalidate cache on discard v2") +Signed-off-by: Shin'ichiro Kawasaki +Cc: stable@vger.kernel.org # v5.15 +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20211109104723.835533-2-shinichiro.kawasaki@wdc.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + block/ioctl.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +--- a/block/ioctl.c ++++ b/block/ioctl.c +@@ -121,6 +121,7 @@ static int blk_ioctl_discard(struct bloc + uint64_t range[2]; + uint64_t start, len; + struct request_queue *q = bdev_get_queue(bdev); ++ struct inode *inode = bdev->bd_inode; + int err; + + if (!(mode & FMODE_WRITE)) +@@ -143,12 +144,17 @@ static int blk_ioctl_discard(struct bloc + if (start + len > i_size_read(bdev->bd_inode)) + return -EINVAL; + ++ filemap_invalidate_lock(inode->i_mapping); + err = truncate_bdev_range(bdev, mode, start, start + len - 1); + if (err) +- return err; ++ goto fail; + +- return blkdev_issue_discard(bdev, start >> 9, len >> 9, +- GFP_KERNEL, flags); ++ err = blkdev_issue_discard(bdev, start >> 9, len >> 9, ++ GFP_KERNEL, flags); ++ ++fail: ++ filemap_invalidate_unlock(inode->i_mapping); ++ return err; + } + + static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode, diff --git a/queue-5.10/block-hold-invalidate_lock-in-blkresetzone-ioctl.patch b/queue-5.10/block-hold-invalidate_lock-in-blkresetzone-ioctl.patch new file mode 100644 index 00000000000..7b272279575 --- /dev/null +++ b/queue-5.10/block-hold-invalidate_lock-in-blkresetzone-ioctl.patch @@ -0,0 +1,61 @@ +From 86399ea071099ec8ee0a83ac9ad67f7df96a50ad Mon Sep 17 00:00:00 2001 +From: Shin'ichiro Kawasaki +Date: Thu, 11 Nov 2021 17:52:38 +0900 +Subject: block: Hold invalidate_lock in BLKRESETZONE ioctl + +From: Shin'ichiro Kawasaki + +commit 86399ea071099ec8ee0a83ac9ad67f7df96a50ad upstream. + +When BLKRESETZONE ioctl and data read race, the data read leaves stale +page cache. The commit e5113505904e ("block: Discard page cache of zone +reset target range") added page cache truncation to avoid stale page +cache after the ioctl. However, the stale page cache still can be read +during the reset zone operation for the ioctl. To avoid the stale page +cache completely, hold invalidate_lock of the block device file mapping. + +Fixes: e5113505904e ("block: Discard page cache of zone reset target range") +Signed-off-by: Shin'ichiro Kawasaki +Cc: stable@vger.kernel.org # v5.15 +Reviewed-by: Jan Kara +Reviewed-by: Ming Lei +Link: https://lore.kernel.org/r/20211111085238.942492-1-shinichiro.kawasaki@wdc.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + block/blk-zoned.c | 15 +++++---------- + 1 file changed, 5 insertions(+), 10 deletions(-) + +--- a/block/blk-zoned.c ++++ b/block/blk-zoned.c +@@ -365,9 +365,10 @@ int blkdev_zone_mgmt_ioctl(struct block_ + op = REQ_OP_ZONE_RESET; + + /* Invalidate the page cache, including dirty pages. */ ++ filemap_invalidate_lock(bdev->bd_inode->i_mapping); + ret = blkdev_truncate_zone_range(bdev, mode, &zrange); + if (ret) +- return ret; ++ goto fail; + break; + case BLKOPENZONE: + op = REQ_OP_ZONE_OPEN; +@@ -385,15 +386,9 @@ int blkdev_zone_mgmt_ioctl(struct block_ + ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors, + GFP_KERNEL); + +- /* +- * Invalidate the page cache again for zone reset: writes can only be +- * direct for zoned devices so concurrent writes would not add any page +- * to the page cache after/during reset. The page cache may be filled +- * again due to concurrent reads though and dropping the pages for +- * these is fine. +- */ +- if (!ret && cmd == BLKRESETZONE) +- ret = blkdev_truncate_zone_range(bdev, mode, &zrange); ++fail: ++ if (cmd == BLKRESETZONE) ++ filemap_invalidate_unlock(bdev->bd_inode->i_mapping); + + return ret; + } diff --git a/queue-5.10/block-hold-invalidate_lock-in-blkzeroout-ioctl.patch b/queue-5.10/block-hold-invalidate_lock-in-blkzeroout-ioctl.patch new file mode 100644 index 00000000000..99cdcb82194 --- /dev/null +++ b/queue-5.10/block-hold-invalidate_lock-in-blkzeroout-ioctl.patch @@ -0,0 +1,60 @@ +From 35e4c6c1a2fc2eb11b9306e95cda1fa06a511948 Mon Sep 17 00:00:00 2001 +From: Shin'ichiro Kawasaki +Date: Tue, 9 Nov 2021 19:47:23 +0900 +Subject: block: Hold invalidate_lock in BLKZEROOUT ioctl + +From: Shin'ichiro Kawasaki + +commit 35e4c6c1a2fc2eb11b9306e95cda1fa06a511948 upstream. + +When BLKZEROOUT ioctl and data read race, the data read leaves stale +page cache. To avoid the stale page cache, hold invalidate_lock of the +block device file mapping. The stale page cache is observed when +blktests test case block/009 is modified to call "blkdiscard -z" command +and repeated hundreds of times. + +This patch can be applied back to the stable kernel version v5.15.y. +Rework is required for older stable kernels. + +Fixes: 22dd6d356628 ("block: invalidate the page cache when issuing BLKZEROOUT") +Signed-off-by: Shin'ichiro Kawasaki +Cc: stable@vger.kernel.org # v5.15 +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20211109104723.835533-3-shinichiro.kawasaki@wdc.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + block/ioctl.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +--- a/block/ioctl.c ++++ b/block/ioctl.c +@@ -162,6 +162,7 @@ static int blk_ioctl_zeroout(struct bloc + { + uint64_t range[2]; + uint64_t start, end, len; ++ struct inode *inode = bdev->bd_inode; + int err; + + if (!(mode & FMODE_WRITE)) +@@ -184,12 +185,17 @@ static int blk_ioctl_zeroout(struct bloc + return -EINVAL; + + /* Invalidate the page cache, including dirty pages */ ++ filemap_invalidate_lock(inode->i_mapping); + err = truncate_bdev_range(bdev, mode, start, end); + if (err) +- return err; ++ goto fail; + +- return blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL, +- BLKDEV_ZERO_NOUNMAP); ++ err = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL, ++ BLKDEV_ZERO_NOUNMAP); ++ ++fail: ++ filemap_invalidate_unlock(inode->i_mapping); ++ return err; + } + + static int put_ushort(unsigned short __user *argp, unsigned short val) diff --git a/queue-5.10/f2fs-should-use-gfp_nofs-for-directory-inodes.patch b/queue-5.10/f2fs-should-use-gfp_nofs-for-directory-inodes.patch new file mode 100644 index 00000000000..9c706c4f464 --- /dev/null +++ b/queue-5.10/f2fs-should-use-gfp_nofs-for-directory-inodes.patch @@ -0,0 +1,104 @@ +From 92d602bc7177325e7453189a22e0c8764ed3453e Mon Sep 17 00:00:00 2001 +From: Jaegeuk Kim +Date: Tue, 7 Sep 2021 10:24:21 -0700 +Subject: f2fs: should use GFP_NOFS for directory inodes + +From: Jaegeuk Kim + +commit 92d602bc7177325e7453189a22e0c8764ed3453e upstream. + +We use inline_dentry which requires to allocate dentry page when adding a link. +If we allow to reclaim memory from filesystem, we do down_read(&sbi->cp_rwsem) +twice by f2fs_lock_op(). I think this should be okay, but how about stopping +the lockdep complaint [1]? + +f2fs_create() + - f2fs_lock_op() + - f2fs_do_add_link() + - __f2fs_find_entry + - f2fs_get_read_data_page() + -> kswapd + - shrink_node + - f2fs_evict_inode + - f2fs_lock_op() + +[1] + +fs_reclaim +){+.+.}-{0:0} +: +kswapd0: lock_acquire+0x114/0x394 +kswapd0: __fs_reclaim_acquire+0x40/0x50 +kswapd0: prepare_alloc_pages+0x94/0x1ec +kswapd0: __alloc_pages_nodemask+0x78/0x1b0 +kswapd0: pagecache_get_page+0x2e0/0x57c +kswapd0: f2fs_get_read_data_page+0xc0/0x394 +kswapd0: f2fs_find_data_page+0xa4/0x23c +kswapd0: find_in_level+0x1a8/0x36c +kswapd0: __f2fs_find_entry+0x70/0x100 +kswapd0: f2fs_do_add_link+0x84/0x1ec +kswapd0: f2fs_mkdir+0xe4/0x1e4 +kswapd0: vfs_mkdir+0x110/0x1c0 +kswapd0: do_mkdirat+0xa4/0x160 +kswapd0: __arm64_sys_mkdirat+0x24/0x34 +kswapd0: el0_svc_common.llvm.17258447499513131576+0xc4/0x1e8 +kswapd0: do_el0_svc+0x28/0xa0 +kswapd0: el0_svc+0x24/0x38 +kswapd0: el0_sync_handler+0x88/0xec +kswapd0: el0_sync+0x1c0/0x200 +kswapd0: +-> #1 +( +&sbi->cp_rwsem +){++++}-{3:3} +: +kswapd0: lock_acquire+0x114/0x394 +kswapd0: down_read+0x7c/0x98 +kswapd0: f2fs_do_truncate_blocks+0x78/0x3dc +kswapd0: f2fs_truncate+0xc8/0x128 +kswapd0: f2fs_evict_inode+0x2b8/0x8b8 +kswapd0: evict+0xd4/0x2f8 +kswapd0: iput+0x1c0/0x258 +kswapd0: do_unlinkat+0x170/0x2a0 +kswapd0: __arm64_sys_unlinkat+0x4c/0x68 +kswapd0: el0_svc_common.llvm.17258447499513131576+0xc4/0x1e8 +kswapd0: do_el0_svc+0x28/0xa0 +kswapd0: el0_svc+0x24/0x38 +kswapd0: el0_sync_handler+0x88/0xec +kswapd0: el0_sync+0x1c0/0x200 + +Cc: stable@vger.kernel.org +Fixes: bdbc90fa55af ("f2fs: don't put dentry page in pagecache into highmem") +Reviewed-by: Chao Yu +Reviewed-by: Stanley Chu +Reviewed-by: Light Hsieh +Tested-by: Light Hsieh +Signed-off-by: Jaegeuk Kim +Signed-off-by: Greg Kroah-Hartman +--- + fs/f2fs/inode.c | 2 +- + fs/f2fs/namei.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +--- a/fs/f2fs/inode.c ++++ b/fs/f2fs/inode.c +@@ -511,7 +511,7 @@ make_now: + inode->i_op = &f2fs_dir_inode_operations; + inode->i_fop = &f2fs_dir_operations; + inode->i_mapping->a_ops = &f2fs_dblock_aops; +- inode_nohighmem(inode); ++ mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); + } else if (S_ISLNK(inode->i_mode)) { + if (file_is_encrypt(inode)) + inode->i_op = &f2fs_encrypted_symlink_inode_operations; +--- a/fs/f2fs/namei.c ++++ b/fs/f2fs/namei.c +@@ -744,7 +744,7 @@ static int f2fs_mkdir(struct inode *dir, + inode->i_op = &f2fs_dir_inode_operations; + inode->i_fop = &f2fs_dir_operations; + inode->i_mapping->a_ops = &f2fs_dblock_aops; +- inode_nohighmem(inode); ++ mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); + + set_inode_flag(inode, FI_INC_LINK); + f2fs_lock_op(sbi); diff --git a/queue-5.10/irqchip-sifive-plic-fixup-eoi-failed-when-masked.patch b/queue-5.10/irqchip-sifive-plic-fixup-eoi-failed-when-masked.patch new file mode 100644 index 00000000000..a239c12ccb3 --- /dev/null +++ b/queue-5.10/irqchip-sifive-plic-fixup-eoi-failed-when-masked.patch @@ -0,0 +1,65 @@ +From 69ea463021be0d159ab30f96195fb0dd18ee2272 Mon Sep 17 00:00:00 2001 +From: Guo Ren +Date: Fri, 5 Nov 2021 17:47:48 +0800 +Subject: irqchip/sifive-plic: Fixup EOI failed when masked + +From: Guo Ren + +commit 69ea463021be0d159ab30f96195fb0dd18ee2272 upstream. + +When using "devm_request_threaded_irq(,,,,IRQF_ONESHOT,,)" in a driver, +only the first interrupt is handled, and following interrupts are never +delivered (initially reported in [1]). + +That's because the RISC-V PLIC cannot EOI masked interrupts, as explained +in the description of Interrupt Completion in the PLIC spec [2]: + + +The PLIC signals it has completed executing an interrupt handler by +writing the interrupt ID it received from the claim to the claim/complete +register. The PLIC does not check whether the completion ID is the same +as the last claim ID for that target. If the completion ID does not match +an interrupt source that *is currently enabled* for the target, the +completion is silently ignored. + + +Re-enable the interrupt before completion if it has been masked during +the handling, and remask it afterwards. + +[1] http://lists.infradead.org/pipermail/linux-riscv/2021-July/007441.html +[2] https://github.com/riscv/riscv-plic-spec/blob/8bc15a35d07c9edf7b5d23fec9728302595ffc4d/riscv-plic.adoc + +Fixes: bb0fed1c60cc ("irqchip/sifive-plic: Switch to fasteoi flow") +Reported-by: Vincent Pelletier +Tested-by: Nikita Shubin +Signed-off-by: Guo Ren +Cc: stable@vger.kernel.org +Cc: Thomas Gleixner +Cc: Palmer Dabbelt +Cc: Atish Patra +Reviewed-by: Anup Patel +[maz: amended commit message] +Signed-off-by: Marc Zyngier +Link: https://lore.kernel.org/r/20211105094748.3894453-1-guoren@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/irqchip/irq-sifive-plic.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/drivers/irqchip/irq-sifive-plic.c ++++ b/drivers/irqchip/irq-sifive-plic.c +@@ -163,7 +163,13 @@ static void plic_irq_eoi(struct irq_data + { + struct plic_handler *handler = this_cpu_ptr(&plic_handlers); + +- writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); ++ if (irqd_irq_masked(d)) { ++ plic_irq_unmask(d); ++ writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); ++ plic_irq_mask(d); ++ } else { ++ writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); ++ } + } + + static struct irq_chip plic_chip = { diff --git a/queue-5.10/posix-cpu-timers-clear-task-posix_cputimers_work-in-copy_process.patch b/queue-5.10/posix-cpu-timers-clear-task-posix_cputimers_work-in-copy_process.patch new file mode 100644 index 00000000000..ebb351cbdc5 --- /dev/null +++ b/queue-5.10/posix-cpu-timers-clear-task-posix_cputimers_work-in-copy_process.patch @@ -0,0 +1,111 @@ +From ca7752caeaa70bd31d1714af566c9809688544af Mon Sep 17 00:00:00 2001 +From: Michael Pratt +Date: Mon, 1 Nov 2021 17:06:15 -0400 +Subject: posix-cpu-timers: Clear task::posix_cputimers_work in copy_process() + +From: Michael Pratt + +commit ca7752caeaa70bd31d1714af566c9809688544af upstream. + +copy_process currently copies task_struct.posix_cputimers_work as-is. If a +timer interrupt arrives while handling clone and before dup_task_struct +completes then the child task will have: + +1. posix_cputimers_work.scheduled = true +2. posix_cputimers_work.work queued. + +copy_process clears task_struct.task_works, so (2) will have no effect and +posix_cpu_timers_work will never run (not to mention it doesn't make sense +for two tasks to share a common linked list). + +Since posix_cpu_timers_work never runs, posix_cputimers_work.scheduled is +never cleared. Since scheduled is set, future timer interrupts will skip +scheduling work, with the ultimate result that the task will never receive +timer expirations. + +Together, the complete flow is: + +1. Task 1 calls clone(), enters kernel. +2. Timer interrupt fires, schedules task work on Task 1. + 2a. task_struct.posix_cputimers_work.scheduled = true + 2b. task_struct.posix_cputimers_work.work added to + task_struct.task_works. +3. dup_task_struct() copies Task 1 to Task 2. +4. copy_process() clears task_struct.task_works for Task 2. +5. Future timer interrupts on Task 2 see + task_struct.posix_cputimers_work.scheduled = true and skip scheduling + work. + +Fix this by explicitly clearing contents of task_struct.posix_cputimers_work +in copy_process(). This was never meant to be shared or inherited across +tasks in the first place. + +Fixes: 1fb497dd0030 ("posix-cpu-timers: Provide mechanisms to defer timer handling to task_work") +Reported-by: Rhys Hiltner +Signed-off-by: Michael Pratt +Signed-off-by: Thomas Gleixner +Cc: +Link: https://lore.kernel.org/r/20211101210615.716522-1-mpratt@google.com +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/posix-timers.h | 2 ++ + kernel/fork.c | 1 + + kernel/time/posix-cpu-timers.c | 19 +++++++++++++++++-- + 3 files changed, 20 insertions(+), 2 deletions(-) + +--- a/include/linux/posix-timers.h ++++ b/include/linux/posix-timers.h +@@ -177,8 +177,10 @@ static inline void posix_cputimers_group + #endif + + #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK ++void clear_posix_cputimers_work(struct task_struct *p); + void posix_cputimers_init_work(void); + #else ++static inline void clear_posix_cputimers_work(struct task_struct *p) { } + static inline void posix_cputimers_init_work(void) { } + #endif + +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -2191,6 +2191,7 @@ static __latent_entropy struct task_stru + p->pdeath_signal = 0; + INIT_LIST_HEAD(&p->thread_group); + p->task_works = NULL; ++ clear_posix_cputimers_work(p); + + /* + * Ensure that the cgroup subsystem policies allow the new process to be +--- a/kernel/time/posix-cpu-timers.c ++++ b/kernel/time/posix-cpu-timers.c +@@ -1101,13 +1101,28 @@ static void posix_cpu_timers_work(struct + } + + /* ++ * Clear existing posix CPU timers task work. ++ */ ++void clear_posix_cputimers_work(struct task_struct *p) ++{ ++ /* ++ * A copied work entry from the old task is not meaningful, clear it. ++ * N.B. init_task_work will not do this. ++ */ ++ memset(&p->posix_cputimers_work.work, 0, ++ sizeof(p->posix_cputimers_work.work)); ++ init_task_work(&p->posix_cputimers_work.work, ++ posix_cpu_timers_work); ++ p->posix_cputimers_work.scheduled = false; ++} ++ ++/* + * Initialize posix CPU timers task work in init task. Out of line to + * keep the callback static and to avoid header recursion hell. + */ + void __init posix_cputimers_init_work(void) + { +- init_task_work(¤t->posix_cputimers_work.work, +- posix_cpu_timers_work); ++ clear_posix_cputimers_work(current); + } + + /* diff --git a/queue-5.10/series b/queue-5.10/series index a543f70ce7a..2571523adfd 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -541,3 +541,9 @@ arm-9156-1-drop-cc-option-fallbacks-for-architecture-selection.patch parisc-fix-backtrace-to-always-include-init-funtion-names.patch mips-fix-assembly-error-from-mipsr2-code-used-within-mips_isa_arch_level.patch x86-mce-add-errata-workaround-for-skylake-skx37.patch +posix-cpu-timers-clear-task-posix_cputimers_work-in-copy_process.patch +irqchip-sifive-plic-fixup-eoi-failed-when-masked.patch +f2fs-should-use-gfp_nofs-for-directory-inodes.patch +block-hold-invalidate_lock-in-blkdiscard-ioctl.patch +block-hold-invalidate_lock-in-blkzeroout-ioctl.patch +block-hold-invalidate_lock-in-blkresetzone-ioctl.patch