--- /dev/null
+From 7607c44c157d343223510c8ffdf7206fdd2a6213 Mon Sep 17 00:00:00 2001
+From: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+Date: Tue, 9 Nov 2021 19:47:22 +0900
+Subject: block: Hold invalidate_lock in BLKDISCARD ioctl
+
+From: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+
+commit 7607c44c157d343223510c8ffdf7206fdd2a6213 upstream.
+
+When BLKDISCARD ioctl and data read race, the data read leaves stale
+page cache. To avoid the stale page cache, hold invalidate_lock of the
+block device file mapping. The stale page cache is observed when
+blktests test case block/009 is repeated hundreds of times.
+
+This patch can be applied back to the stable kernel version v5.15.y
+with slight patch edit. Rework is required for older stable kernels.
+
+Fixes: 351499a172c0 ("block: Invalidate cache on discard v2")
+Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+Cc: stable@vger.kernel.org # v5.15
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20211109104723.835533-2-shinichiro.kawasaki@wdc.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/ioctl.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+--- a/block/ioctl.c
++++ b/block/ioctl.c
+@@ -121,6 +121,7 @@ static int blk_ioctl_discard(struct bloc
+ uint64_t range[2];
+ uint64_t start, len;
+ struct request_queue *q = bdev_get_queue(bdev);
++ struct inode *inode = bdev->bd_inode;
+ int err;
+
+ if (!(mode & FMODE_WRITE))
+@@ -143,12 +144,17 @@ static int blk_ioctl_discard(struct bloc
+ if (start + len > i_size_read(bdev->bd_inode))
+ return -EINVAL;
+
++ filemap_invalidate_lock(inode->i_mapping);
+ err = truncate_bdev_range(bdev, mode, start, start + len - 1);
+ if (err)
+- return err;
++ goto fail;
+
+- return blkdev_issue_discard(bdev, start >> 9, len >> 9,
+- GFP_KERNEL, flags);
++ err = blkdev_issue_discard(bdev, start >> 9, len >> 9,
++ GFP_KERNEL, flags);
++
++fail:
++ filemap_invalidate_unlock(inode->i_mapping);
++ return err;
+ }
+
+ static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
--- /dev/null
+From 86399ea071099ec8ee0a83ac9ad67f7df96a50ad Mon Sep 17 00:00:00 2001
+From: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+Date: Thu, 11 Nov 2021 17:52:38 +0900
+Subject: block: Hold invalidate_lock in BLKRESETZONE ioctl
+
+From: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+
+commit 86399ea071099ec8ee0a83ac9ad67f7df96a50ad upstream.
+
+When BLKRESETZONE ioctl and data read race, the data read leaves stale
+page cache. The commit e5113505904e ("block: Discard page cache of zone
+reset target range") added page cache truncation to avoid stale page
+cache after the ioctl. However, the stale page cache still can be read
+during the reset zone operation for the ioctl. To avoid the stale page
+cache completely, hold invalidate_lock of the block device file mapping.
+
+Fixes: e5113505904e ("block: Discard page cache of zone reset target range")
+Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+Cc: stable@vger.kernel.org # v5.15
+Reviewed-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Ming Lei <ming.lei@redhat.com>
+Link: https://lore.kernel.org/r/20211111085238.942492-1-shinichiro.kawasaki@wdc.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/blk-zoned.c | 15 +++++----------
+ 1 file changed, 5 insertions(+), 10 deletions(-)
+
+--- a/block/blk-zoned.c
++++ b/block/blk-zoned.c
+@@ -365,9 +365,10 @@ int blkdev_zone_mgmt_ioctl(struct block_
+ op = REQ_OP_ZONE_RESET;
+
+ /* Invalidate the page cache, including dirty pages. */
++ filemap_invalidate_lock(bdev->bd_inode->i_mapping);
+ ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
+ if (ret)
+- return ret;
++ goto fail;
+ break;
+ case BLKOPENZONE:
+ op = REQ_OP_ZONE_OPEN;
+@@ -385,15 +386,9 @@ int blkdev_zone_mgmt_ioctl(struct block_
+ ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors,
+ GFP_KERNEL);
+
+- /*
+- * Invalidate the page cache again for zone reset: writes can only be
+- * direct for zoned devices so concurrent writes would not add any page
+- * to the page cache after/during reset. The page cache may be filled
+- * again due to concurrent reads though and dropping the pages for
+- * these is fine.
+- */
+- if (!ret && cmd == BLKRESETZONE)
+- ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
++fail:
++ if (cmd == BLKRESETZONE)
++ filemap_invalidate_unlock(bdev->bd_inode->i_mapping);
+
+ return ret;
+ }
--- /dev/null
+From 35e4c6c1a2fc2eb11b9306e95cda1fa06a511948 Mon Sep 17 00:00:00 2001
+From: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+Date: Tue, 9 Nov 2021 19:47:23 +0900
+Subject: block: Hold invalidate_lock in BLKZEROOUT ioctl
+
+From: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+
+commit 35e4c6c1a2fc2eb11b9306e95cda1fa06a511948 upstream.
+
+When BLKZEROOUT ioctl and data read race, the data read leaves stale
+page cache. To avoid the stale page cache, hold invalidate_lock of the
+block device file mapping. The stale page cache is observed when
+blktests test case block/009 is modified to call "blkdiscard -z" command
+and repeated hundreds of times.
+
+This patch can be applied back to the stable kernel version v5.15.y.
+Rework is required for older stable kernels.
+
+Fixes: 22dd6d356628 ("block: invalidate the page cache when issuing BLKZEROOUT")
+Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
+Cc: stable@vger.kernel.org # v5.15
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20211109104723.835533-3-shinichiro.kawasaki@wdc.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/ioctl.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+--- a/block/ioctl.c
++++ b/block/ioctl.c
+@@ -162,6 +162,7 @@ static int blk_ioctl_zeroout(struct bloc
+ {
+ uint64_t range[2];
+ uint64_t start, end, len;
++ struct inode *inode = bdev->bd_inode;
+ int err;
+
+ if (!(mode & FMODE_WRITE))
+@@ -184,12 +185,17 @@ static int blk_ioctl_zeroout(struct bloc
+ return -EINVAL;
+
+ /* Invalidate the page cache, including dirty pages */
++ filemap_invalidate_lock(inode->i_mapping);
+ err = truncate_bdev_range(bdev, mode, start, end);
+ if (err)
+- return err;
++ goto fail;
+
+- return blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL,
+- BLKDEV_ZERO_NOUNMAP);
++ err = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL,
++ BLKDEV_ZERO_NOUNMAP);
++
++fail:
++ filemap_invalidate_unlock(inode->i_mapping);
++ return err;
+ }
+
+ static int put_ushort(unsigned short __user *argp, unsigned short val)
--- /dev/null
+From 92d602bc7177325e7453189a22e0c8764ed3453e Mon Sep 17 00:00:00 2001
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+Date: Tue, 7 Sep 2021 10:24:21 -0700
+Subject: f2fs: should use GFP_NOFS for directory inodes
+
+From: Jaegeuk Kim <jaegeuk@kernel.org>
+
+commit 92d602bc7177325e7453189a22e0c8764ed3453e upstream.
+
+We use inline_dentry which requires to allocate dentry page when adding a link.
+If we allow to reclaim memory from filesystem, we do down_read(&sbi->cp_rwsem)
+twice by f2fs_lock_op(). I think this should be okay, but how about stopping
+the lockdep complaint [1]?
+
+f2fs_create()
+ - f2fs_lock_op()
+ - f2fs_do_add_link()
+ - __f2fs_find_entry
+ - f2fs_get_read_data_page()
+ -> kswapd
+ - shrink_node
+ - f2fs_evict_inode
+ - f2fs_lock_op()
+
+[1]
+
+fs_reclaim
+){+.+.}-{0:0}
+:
+kswapd0: lock_acquire+0x114/0x394
+kswapd0: __fs_reclaim_acquire+0x40/0x50
+kswapd0: prepare_alloc_pages+0x94/0x1ec
+kswapd0: __alloc_pages_nodemask+0x78/0x1b0
+kswapd0: pagecache_get_page+0x2e0/0x57c
+kswapd0: f2fs_get_read_data_page+0xc0/0x394
+kswapd0: f2fs_find_data_page+0xa4/0x23c
+kswapd0: find_in_level+0x1a8/0x36c
+kswapd0: __f2fs_find_entry+0x70/0x100
+kswapd0: f2fs_do_add_link+0x84/0x1ec
+kswapd0: f2fs_mkdir+0xe4/0x1e4
+kswapd0: vfs_mkdir+0x110/0x1c0
+kswapd0: do_mkdirat+0xa4/0x160
+kswapd0: __arm64_sys_mkdirat+0x24/0x34
+kswapd0: el0_svc_common.llvm.17258447499513131576+0xc4/0x1e8
+kswapd0: do_el0_svc+0x28/0xa0
+kswapd0: el0_svc+0x24/0x38
+kswapd0: el0_sync_handler+0x88/0xec
+kswapd0: el0_sync+0x1c0/0x200
+kswapd0:
+-> #1
+(
+&sbi->cp_rwsem
+){++++}-{3:3}
+:
+kswapd0: lock_acquire+0x114/0x394
+kswapd0: down_read+0x7c/0x98
+kswapd0: f2fs_do_truncate_blocks+0x78/0x3dc
+kswapd0: f2fs_truncate+0xc8/0x128
+kswapd0: f2fs_evict_inode+0x2b8/0x8b8
+kswapd0: evict+0xd4/0x2f8
+kswapd0: iput+0x1c0/0x258
+kswapd0: do_unlinkat+0x170/0x2a0
+kswapd0: __arm64_sys_unlinkat+0x4c/0x68
+kswapd0: el0_svc_common.llvm.17258447499513131576+0xc4/0x1e8
+kswapd0: do_el0_svc+0x28/0xa0
+kswapd0: el0_svc+0x24/0x38
+kswapd0: el0_sync_handler+0x88/0xec
+kswapd0: el0_sync+0x1c0/0x200
+
+Cc: stable@vger.kernel.org
+Fixes: bdbc90fa55af ("f2fs: don't put dentry page in pagecache into highmem")
+Reviewed-by: Chao Yu <chao@kernel.org>
+Reviewed-by: Stanley Chu <stanley.chu@mediatek.com>
+Reviewed-by: Light Hsieh <light.hsieh@mediatek.com>
+Tested-by: Light Hsieh <light.hsieh@mediatek.com>
+Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/f2fs/inode.c | 2 +-
+ fs/f2fs/namei.c | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/f2fs/inode.c
++++ b/fs/f2fs/inode.c
+@@ -511,7 +511,7 @@ make_now:
+ inode->i_op = &f2fs_dir_inode_operations;
+ inode->i_fop = &f2fs_dir_operations;
+ inode->i_mapping->a_ops = &f2fs_dblock_aops;
+- inode_nohighmem(inode);
++ mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
+ } else if (S_ISLNK(inode->i_mode)) {
+ if (file_is_encrypt(inode))
+ inode->i_op = &f2fs_encrypted_symlink_inode_operations;
+--- a/fs/f2fs/namei.c
++++ b/fs/f2fs/namei.c
+@@ -744,7 +744,7 @@ static int f2fs_mkdir(struct inode *dir,
+ inode->i_op = &f2fs_dir_inode_operations;
+ inode->i_fop = &f2fs_dir_operations;
+ inode->i_mapping->a_ops = &f2fs_dblock_aops;
+- inode_nohighmem(inode);
++ mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
+
+ set_inode_flag(inode, FI_INC_LINK);
+ f2fs_lock_op(sbi);
--- /dev/null
+From 69ea463021be0d159ab30f96195fb0dd18ee2272 Mon Sep 17 00:00:00 2001
+From: Guo Ren <guoren@linux.alibaba.com>
+Date: Fri, 5 Nov 2021 17:47:48 +0800
+Subject: irqchip/sifive-plic: Fixup EOI failed when masked
+
+From: Guo Ren <guoren@linux.alibaba.com>
+
+commit 69ea463021be0d159ab30f96195fb0dd18ee2272 upstream.
+
+When using "devm_request_threaded_irq(,,,,IRQF_ONESHOT,,)" in a driver,
+only the first interrupt is handled, and following interrupts are never
+delivered (initially reported in [1]).
+
+That's because the RISC-V PLIC cannot EOI masked interrupts, as explained
+in the description of Interrupt Completion in the PLIC spec [2]:
+
+<quote>
+The PLIC signals it has completed executing an interrupt handler by
+writing the interrupt ID it received from the claim to the claim/complete
+register. The PLIC does not check whether the completion ID is the same
+as the last claim ID for that target. If the completion ID does not match
+an interrupt source that *is currently enabled* for the target, the
+completion is silently ignored.
+</quote>
+
+Re-enable the interrupt before completion if it has been masked during
+the handling, and remask it afterwards.
+
+[1] http://lists.infradead.org/pipermail/linux-riscv/2021-July/007441.html
+[2] https://github.com/riscv/riscv-plic-spec/blob/8bc15a35d07c9edf7b5d23fec9728302595ffc4d/riscv-plic.adoc
+
+Fixes: bb0fed1c60cc ("irqchip/sifive-plic: Switch to fasteoi flow")
+Reported-by: Vincent Pelletier <plr.vincent@gmail.com>
+Tested-by: Nikita Shubin <nikita.shubin@maquefel.me>
+Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
+Cc: stable@vger.kernel.org
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Palmer Dabbelt <palmer@dabbelt.com>
+Cc: Atish Patra <atish.patra@wdc.com>
+Reviewed-by: Anup Patel <anup@brainfault.org>
+[maz: amended commit message]
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Link: https://lore.kernel.org/r/20211105094748.3894453-1-guoren@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/irqchip/irq-sifive-plic.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/drivers/irqchip/irq-sifive-plic.c
++++ b/drivers/irqchip/irq-sifive-plic.c
+@@ -163,7 +163,13 @@ static void plic_irq_eoi(struct irq_data
+ {
+ struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
+
+- writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
++ if (irqd_irq_masked(d)) {
++ plic_irq_unmask(d);
++ writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
++ plic_irq_mask(d);
++ } else {
++ writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
++ }
+ }
+
+ static struct irq_chip plic_chip = {
--- /dev/null
+From ca7752caeaa70bd31d1714af566c9809688544af Mon Sep 17 00:00:00 2001
+From: Michael Pratt <mpratt@google.com>
+Date: Mon, 1 Nov 2021 17:06:15 -0400
+Subject: posix-cpu-timers: Clear task::posix_cputimers_work in copy_process()
+
+From: Michael Pratt <mpratt@google.com>
+
+commit ca7752caeaa70bd31d1714af566c9809688544af upstream.
+
+copy_process currently copies task_struct.posix_cputimers_work as-is. If a
+timer interrupt arrives while handling clone and before dup_task_struct
+completes then the child task will have:
+
+1. posix_cputimers_work.scheduled = true
+2. posix_cputimers_work.work queued.
+
+copy_process clears task_struct.task_works, so (2) will have no effect and
+posix_cpu_timers_work will never run (not to mention it doesn't make sense
+for two tasks to share a common linked list).
+
+Since posix_cpu_timers_work never runs, posix_cputimers_work.scheduled is
+never cleared. Since scheduled is set, future timer interrupts will skip
+scheduling work, with the ultimate result that the task will never receive
+timer expirations.
+
+Together, the complete flow is:
+
+1. Task 1 calls clone(), enters kernel.
+2. Timer interrupt fires, schedules task work on Task 1.
+ 2a. task_struct.posix_cputimers_work.scheduled = true
+ 2b. task_struct.posix_cputimers_work.work added to
+ task_struct.task_works.
+3. dup_task_struct() copies Task 1 to Task 2.
+4. copy_process() clears task_struct.task_works for Task 2.
+5. Future timer interrupts on Task 2 see
+ task_struct.posix_cputimers_work.scheduled = true and skip scheduling
+ work.
+
+Fix this by explicitly clearing contents of task_struct.posix_cputimers_work
+in copy_process(). This was never meant to be shared or inherited across
+tasks in the first place.
+
+Fixes: 1fb497dd0030 ("posix-cpu-timers: Provide mechanisms to defer timer handling to task_work")
+Reported-by: Rhys Hiltner <rhys@justin.tv>
+Signed-off-by: Michael Pratt <mpratt@google.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/20211101210615.716522-1-mpratt@google.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/posix-timers.h | 2 ++
+ kernel/fork.c | 1 +
+ kernel/time/posix-cpu-timers.c | 19 +++++++++++++++++--
+ 3 files changed, 20 insertions(+), 2 deletions(-)
+
+--- a/include/linux/posix-timers.h
++++ b/include/linux/posix-timers.h
+@@ -177,8 +177,10 @@ static inline void posix_cputimers_group
+ #endif
+
+ #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
++void clear_posix_cputimers_work(struct task_struct *p);
+ void posix_cputimers_init_work(void);
+ #else
++static inline void clear_posix_cputimers_work(struct task_struct *p) { }
+ static inline void posix_cputimers_init_work(void) { }
+ #endif
+
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -2191,6 +2191,7 @@ static __latent_entropy struct task_stru
+ p->pdeath_signal = 0;
+ INIT_LIST_HEAD(&p->thread_group);
+ p->task_works = NULL;
++ clear_posix_cputimers_work(p);
+
+ /*
+ * Ensure that the cgroup subsystem policies allow the new process to be
+--- a/kernel/time/posix-cpu-timers.c
++++ b/kernel/time/posix-cpu-timers.c
+@@ -1101,13 +1101,28 @@ static void posix_cpu_timers_work(struct
+ }
+
+ /*
++ * Clear existing posix CPU timers task work.
++ */
++void clear_posix_cputimers_work(struct task_struct *p)
++{
++ /*
++ * A copied work entry from the old task is not meaningful, clear it.
++ * N.B. init_task_work will not do this.
++ */
++ memset(&p->posix_cputimers_work.work, 0,
++ sizeof(p->posix_cputimers_work.work));
++ init_task_work(&p->posix_cputimers_work.work,
++ posix_cpu_timers_work);
++ p->posix_cputimers_work.scheduled = false;
++}
++
++/*
+ * Initialize posix CPU timers task work in init task. Out of line to
+ * keep the callback static and to avoid header recursion hell.
+ */
+ void __init posix_cputimers_init_work(void)
+ {
+- init_task_work(¤t->posix_cputimers_work.work,
+- posix_cpu_timers_work);
++ clear_posix_cputimers_work(current);
+ }
+
+ /*
parisc-fix-backtrace-to-always-include-init-funtion-names.patch
mips-fix-assembly-error-from-mipsr2-code-used-within-mips_isa_arch_level.patch
x86-mce-add-errata-workaround-for-skylake-skx37.patch
+posix-cpu-timers-clear-task-posix_cputimers_work-in-copy_process.patch
+irqchip-sifive-plic-fixup-eoi-failed-when-masked.patch
+f2fs-should-use-gfp_nofs-for-directory-inodes.patch
+block-hold-invalidate_lock-in-blkdiscard-ioctl.patch
+block-hold-invalidate_lock-in-blkzeroout-ioctl.patch
+block-hold-invalidate_lock-in-blkresetzone-ioctl.patch